Commit 989a2979205dd34269382b357e6d4b4b6956b889

Authored by Eric Dumazet
Committed by David S. Miller
1 parent e5700aff14

fasync: RCU and fine grained locking

kill_fasync() uses a central rwlock, candidate for RCU conversion, to
avoid cache line ping pongs on SMP.

fasync_remove_entry() and fasync_add_entry() can disable IRQS on a short
section instead during whole list scan.

Use a spinlock per fasync_struct to synchronize kill_fasync_rcu() and
fasync_{remove|add}_entry(). This spinlock is IRQ safe, so sock_fasync()
doesnt need its own implementation and can use fasync_helper(), to
reduce code size and complexity.

We can remove __kill_fasync() direct use in net/socket.c, and rename it
to kill_fasync_rcu().

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

Showing 3 changed files with 59 additions and 92 deletions Inline Diff

1 /* 1 /*
2 * linux/fs/fcntl.c 2 * linux/fs/fcntl.c
3 * 3 *
4 * Copyright (C) 1991, 1992 Linus Torvalds 4 * Copyright (C) 1991, 1992 Linus Torvalds
5 */ 5 */
6 6
7 #include <linux/syscalls.h> 7 #include <linux/syscalls.h>
8 #include <linux/init.h> 8 #include <linux/init.h>
9 #include <linux/mm.h> 9 #include <linux/mm.h>
10 #include <linux/fs.h> 10 #include <linux/fs.h>
11 #include <linux/file.h> 11 #include <linux/file.h>
12 #include <linux/fdtable.h> 12 #include <linux/fdtable.h>
13 #include <linux/capability.h> 13 #include <linux/capability.h>
14 #include <linux/dnotify.h> 14 #include <linux/dnotify.h>
15 #include <linux/slab.h> 15 #include <linux/slab.h>
16 #include <linux/module.h> 16 #include <linux/module.h>
17 #include <linux/security.h> 17 #include <linux/security.h>
18 #include <linux/ptrace.h> 18 #include <linux/ptrace.h>
19 #include <linux/signal.h> 19 #include <linux/signal.h>
20 #include <linux/rcupdate.h> 20 #include <linux/rcupdate.h>
21 #include <linux/pid_namespace.h> 21 #include <linux/pid_namespace.h>
22 22
23 #include <asm/poll.h> 23 #include <asm/poll.h>
24 #include <asm/siginfo.h> 24 #include <asm/siginfo.h>
25 #include <asm/uaccess.h> 25 #include <asm/uaccess.h>
26 26
27 void set_close_on_exec(unsigned int fd, int flag) 27 void set_close_on_exec(unsigned int fd, int flag)
28 { 28 {
29 struct files_struct *files = current->files; 29 struct files_struct *files = current->files;
30 struct fdtable *fdt; 30 struct fdtable *fdt;
31 spin_lock(&files->file_lock); 31 spin_lock(&files->file_lock);
32 fdt = files_fdtable(files); 32 fdt = files_fdtable(files);
33 if (flag) 33 if (flag)
34 FD_SET(fd, fdt->close_on_exec); 34 FD_SET(fd, fdt->close_on_exec);
35 else 35 else
36 FD_CLR(fd, fdt->close_on_exec); 36 FD_CLR(fd, fdt->close_on_exec);
37 spin_unlock(&files->file_lock); 37 spin_unlock(&files->file_lock);
38 } 38 }
39 39
40 static int get_close_on_exec(unsigned int fd) 40 static int get_close_on_exec(unsigned int fd)
41 { 41 {
42 struct files_struct *files = current->files; 42 struct files_struct *files = current->files;
43 struct fdtable *fdt; 43 struct fdtable *fdt;
44 int res; 44 int res;
45 rcu_read_lock(); 45 rcu_read_lock();
46 fdt = files_fdtable(files); 46 fdt = files_fdtable(files);
47 res = FD_ISSET(fd, fdt->close_on_exec); 47 res = FD_ISSET(fd, fdt->close_on_exec);
48 rcu_read_unlock(); 48 rcu_read_unlock();
49 return res; 49 return res;
50 } 50 }
51 51
52 SYSCALL_DEFINE3(dup3, unsigned int, oldfd, unsigned int, newfd, int, flags) 52 SYSCALL_DEFINE3(dup3, unsigned int, oldfd, unsigned int, newfd, int, flags)
53 { 53 {
54 int err = -EBADF; 54 int err = -EBADF;
55 struct file * file, *tofree; 55 struct file * file, *tofree;
56 struct files_struct * files = current->files; 56 struct files_struct * files = current->files;
57 struct fdtable *fdt; 57 struct fdtable *fdt;
58 58
59 if ((flags & ~O_CLOEXEC) != 0) 59 if ((flags & ~O_CLOEXEC) != 0)
60 return -EINVAL; 60 return -EINVAL;
61 61
62 if (unlikely(oldfd == newfd)) 62 if (unlikely(oldfd == newfd))
63 return -EINVAL; 63 return -EINVAL;
64 64
65 spin_lock(&files->file_lock); 65 spin_lock(&files->file_lock);
66 err = expand_files(files, newfd); 66 err = expand_files(files, newfd);
67 file = fcheck(oldfd); 67 file = fcheck(oldfd);
68 if (unlikely(!file)) 68 if (unlikely(!file))
69 goto Ebadf; 69 goto Ebadf;
70 if (unlikely(err < 0)) { 70 if (unlikely(err < 0)) {
71 if (err == -EMFILE) 71 if (err == -EMFILE)
72 goto Ebadf; 72 goto Ebadf;
73 goto out_unlock; 73 goto out_unlock;
74 } 74 }
75 /* 75 /*
76 * We need to detect attempts to do dup2() over allocated but still 76 * We need to detect attempts to do dup2() over allocated but still
77 * not finished descriptor. NB: OpenBSD avoids that at the price of 77 * not finished descriptor. NB: OpenBSD avoids that at the price of
78 * extra work in their equivalent of fget() - they insert struct 78 * extra work in their equivalent of fget() - they insert struct
79 * file immediately after grabbing descriptor, mark it larval if 79 * file immediately after grabbing descriptor, mark it larval if
80 * more work (e.g. actual opening) is needed and make sure that 80 * more work (e.g. actual opening) is needed and make sure that
81 * fget() treats larval files as absent. Potentially interesting, 81 * fget() treats larval files as absent. Potentially interesting,
82 * but while extra work in fget() is trivial, locking implications 82 * but while extra work in fget() is trivial, locking implications
83 * and amount of surgery on open()-related paths in VFS are not. 83 * and amount of surgery on open()-related paths in VFS are not.
84 * FreeBSD fails with -EBADF in the same situation, NetBSD "solution" 84 * FreeBSD fails with -EBADF in the same situation, NetBSD "solution"
85 * deadlocks in rather amusing ways, AFAICS. All of that is out of 85 * deadlocks in rather amusing ways, AFAICS. All of that is out of
86 * scope of POSIX or SUS, since neither considers shared descriptor 86 * scope of POSIX or SUS, since neither considers shared descriptor
87 * tables and this condition does not arise without those. 87 * tables and this condition does not arise without those.
88 */ 88 */
89 err = -EBUSY; 89 err = -EBUSY;
90 fdt = files_fdtable(files); 90 fdt = files_fdtable(files);
91 tofree = fdt->fd[newfd]; 91 tofree = fdt->fd[newfd];
92 if (!tofree && FD_ISSET(newfd, fdt->open_fds)) 92 if (!tofree && FD_ISSET(newfd, fdt->open_fds))
93 goto out_unlock; 93 goto out_unlock;
94 get_file(file); 94 get_file(file);
95 rcu_assign_pointer(fdt->fd[newfd], file); 95 rcu_assign_pointer(fdt->fd[newfd], file);
96 FD_SET(newfd, fdt->open_fds); 96 FD_SET(newfd, fdt->open_fds);
97 if (flags & O_CLOEXEC) 97 if (flags & O_CLOEXEC)
98 FD_SET(newfd, fdt->close_on_exec); 98 FD_SET(newfd, fdt->close_on_exec);
99 else 99 else
100 FD_CLR(newfd, fdt->close_on_exec); 100 FD_CLR(newfd, fdt->close_on_exec);
101 spin_unlock(&files->file_lock); 101 spin_unlock(&files->file_lock);
102 102
103 if (tofree) 103 if (tofree)
104 filp_close(tofree, files); 104 filp_close(tofree, files);
105 105
106 return newfd; 106 return newfd;
107 107
108 Ebadf: 108 Ebadf:
109 err = -EBADF; 109 err = -EBADF;
110 out_unlock: 110 out_unlock:
111 spin_unlock(&files->file_lock); 111 spin_unlock(&files->file_lock);
112 return err; 112 return err;
113 } 113 }
114 114
115 SYSCALL_DEFINE2(dup2, unsigned int, oldfd, unsigned int, newfd) 115 SYSCALL_DEFINE2(dup2, unsigned int, oldfd, unsigned int, newfd)
116 { 116 {
117 if (unlikely(newfd == oldfd)) { /* corner case */ 117 if (unlikely(newfd == oldfd)) { /* corner case */
118 struct files_struct *files = current->files; 118 struct files_struct *files = current->files;
119 int retval = oldfd; 119 int retval = oldfd;
120 120
121 rcu_read_lock(); 121 rcu_read_lock();
122 if (!fcheck_files(files, oldfd)) 122 if (!fcheck_files(files, oldfd))
123 retval = -EBADF; 123 retval = -EBADF;
124 rcu_read_unlock(); 124 rcu_read_unlock();
125 return retval; 125 return retval;
126 } 126 }
127 return sys_dup3(oldfd, newfd, 0); 127 return sys_dup3(oldfd, newfd, 0);
128 } 128 }
129 129
130 SYSCALL_DEFINE1(dup, unsigned int, fildes) 130 SYSCALL_DEFINE1(dup, unsigned int, fildes)
131 { 131 {
132 int ret = -EBADF; 132 int ret = -EBADF;
133 struct file *file = fget(fildes); 133 struct file *file = fget(fildes);
134 134
135 if (file) { 135 if (file) {
136 ret = get_unused_fd(); 136 ret = get_unused_fd();
137 if (ret >= 0) 137 if (ret >= 0)
138 fd_install(ret, file); 138 fd_install(ret, file);
139 else 139 else
140 fput(file); 140 fput(file);
141 } 141 }
142 return ret; 142 return ret;
143 } 143 }
144 144
145 #define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT | O_NOATIME) 145 #define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT | O_NOATIME)
146 146
147 static int setfl(int fd, struct file * filp, unsigned long arg) 147 static int setfl(int fd, struct file * filp, unsigned long arg)
148 { 148 {
149 struct inode * inode = filp->f_path.dentry->d_inode; 149 struct inode * inode = filp->f_path.dentry->d_inode;
150 int error = 0; 150 int error = 0;
151 151
152 /* 152 /*
153 * O_APPEND cannot be cleared if the file is marked as append-only 153 * O_APPEND cannot be cleared if the file is marked as append-only
154 * and the file is open for write. 154 * and the file is open for write.
155 */ 155 */
156 if (((arg ^ filp->f_flags) & O_APPEND) && IS_APPEND(inode)) 156 if (((arg ^ filp->f_flags) & O_APPEND) && IS_APPEND(inode))
157 return -EPERM; 157 return -EPERM;
158 158
159 /* O_NOATIME can only be set by the owner or superuser */ 159 /* O_NOATIME can only be set by the owner or superuser */
160 if ((arg & O_NOATIME) && !(filp->f_flags & O_NOATIME)) 160 if ((arg & O_NOATIME) && !(filp->f_flags & O_NOATIME))
161 if (!is_owner_or_cap(inode)) 161 if (!is_owner_or_cap(inode))
162 return -EPERM; 162 return -EPERM;
163 163
164 /* required for strict SunOS emulation */ 164 /* required for strict SunOS emulation */
165 if (O_NONBLOCK != O_NDELAY) 165 if (O_NONBLOCK != O_NDELAY)
166 if (arg & O_NDELAY) 166 if (arg & O_NDELAY)
167 arg |= O_NONBLOCK; 167 arg |= O_NONBLOCK;
168 168
169 if (arg & O_DIRECT) { 169 if (arg & O_DIRECT) {
170 if (!filp->f_mapping || !filp->f_mapping->a_ops || 170 if (!filp->f_mapping || !filp->f_mapping->a_ops ||
171 !filp->f_mapping->a_ops->direct_IO) 171 !filp->f_mapping->a_ops->direct_IO)
172 return -EINVAL; 172 return -EINVAL;
173 } 173 }
174 174
175 if (filp->f_op && filp->f_op->check_flags) 175 if (filp->f_op && filp->f_op->check_flags)
176 error = filp->f_op->check_flags(arg); 176 error = filp->f_op->check_flags(arg);
177 if (error) 177 if (error)
178 return error; 178 return error;
179 179
180 /* 180 /*
181 * ->fasync() is responsible for setting the FASYNC bit. 181 * ->fasync() is responsible for setting the FASYNC bit.
182 */ 182 */
183 if (((arg ^ filp->f_flags) & FASYNC) && filp->f_op && 183 if (((arg ^ filp->f_flags) & FASYNC) && filp->f_op &&
184 filp->f_op->fasync) { 184 filp->f_op->fasync) {
185 error = filp->f_op->fasync(fd, filp, (arg & FASYNC) != 0); 185 error = filp->f_op->fasync(fd, filp, (arg & FASYNC) != 0);
186 if (error < 0) 186 if (error < 0)
187 goto out; 187 goto out;
188 if (error > 0) 188 if (error > 0)
189 error = 0; 189 error = 0;
190 } 190 }
191 spin_lock(&filp->f_lock); 191 spin_lock(&filp->f_lock);
192 filp->f_flags = (arg & SETFL_MASK) | (filp->f_flags & ~SETFL_MASK); 192 filp->f_flags = (arg & SETFL_MASK) | (filp->f_flags & ~SETFL_MASK);
193 spin_unlock(&filp->f_lock); 193 spin_unlock(&filp->f_lock);
194 194
195 out: 195 out:
196 return error; 196 return error;
197 } 197 }
198 198
199 static void f_modown(struct file *filp, struct pid *pid, enum pid_type type, 199 static void f_modown(struct file *filp, struct pid *pid, enum pid_type type,
200 int force) 200 int force)
201 { 201 {
202 write_lock_irq(&filp->f_owner.lock); 202 write_lock_irq(&filp->f_owner.lock);
203 if (force || !filp->f_owner.pid) { 203 if (force || !filp->f_owner.pid) {
204 put_pid(filp->f_owner.pid); 204 put_pid(filp->f_owner.pid);
205 filp->f_owner.pid = get_pid(pid); 205 filp->f_owner.pid = get_pid(pid);
206 filp->f_owner.pid_type = type; 206 filp->f_owner.pid_type = type;
207 207
208 if (pid) { 208 if (pid) {
209 const struct cred *cred = current_cred(); 209 const struct cred *cred = current_cred();
210 filp->f_owner.uid = cred->uid; 210 filp->f_owner.uid = cred->uid;
211 filp->f_owner.euid = cred->euid; 211 filp->f_owner.euid = cred->euid;
212 } 212 }
213 } 213 }
214 write_unlock_irq(&filp->f_owner.lock); 214 write_unlock_irq(&filp->f_owner.lock);
215 } 215 }
216 216
217 int __f_setown(struct file *filp, struct pid *pid, enum pid_type type, 217 int __f_setown(struct file *filp, struct pid *pid, enum pid_type type,
218 int force) 218 int force)
219 { 219 {
220 int err; 220 int err;
221 221
222 err = security_file_set_fowner(filp); 222 err = security_file_set_fowner(filp);
223 if (err) 223 if (err)
224 return err; 224 return err;
225 225
226 f_modown(filp, pid, type, force); 226 f_modown(filp, pid, type, force);
227 return 0; 227 return 0;
228 } 228 }
229 EXPORT_SYMBOL(__f_setown); 229 EXPORT_SYMBOL(__f_setown);
230 230
231 int f_setown(struct file *filp, unsigned long arg, int force) 231 int f_setown(struct file *filp, unsigned long arg, int force)
232 { 232 {
233 enum pid_type type; 233 enum pid_type type;
234 struct pid *pid; 234 struct pid *pid;
235 int who = arg; 235 int who = arg;
236 int result; 236 int result;
237 type = PIDTYPE_PID; 237 type = PIDTYPE_PID;
238 if (who < 0) { 238 if (who < 0) {
239 type = PIDTYPE_PGID; 239 type = PIDTYPE_PGID;
240 who = -who; 240 who = -who;
241 } 241 }
242 rcu_read_lock(); 242 rcu_read_lock();
243 pid = find_vpid(who); 243 pid = find_vpid(who);
244 result = __f_setown(filp, pid, type, force); 244 result = __f_setown(filp, pid, type, force);
245 rcu_read_unlock(); 245 rcu_read_unlock();
246 return result; 246 return result;
247 } 247 }
248 EXPORT_SYMBOL(f_setown); 248 EXPORT_SYMBOL(f_setown);
249 249
250 void f_delown(struct file *filp) 250 void f_delown(struct file *filp)
251 { 251 {
252 f_modown(filp, NULL, PIDTYPE_PID, 1); 252 f_modown(filp, NULL, PIDTYPE_PID, 1);
253 } 253 }
254 254
255 pid_t f_getown(struct file *filp) 255 pid_t f_getown(struct file *filp)
256 { 256 {
257 pid_t pid; 257 pid_t pid;
258 read_lock(&filp->f_owner.lock); 258 read_lock(&filp->f_owner.lock);
259 pid = pid_vnr(filp->f_owner.pid); 259 pid = pid_vnr(filp->f_owner.pid);
260 if (filp->f_owner.pid_type == PIDTYPE_PGID) 260 if (filp->f_owner.pid_type == PIDTYPE_PGID)
261 pid = -pid; 261 pid = -pid;
262 read_unlock(&filp->f_owner.lock); 262 read_unlock(&filp->f_owner.lock);
263 return pid; 263 return pid;
264 } 264 }
265 265
266 static int f_setown_ex(struct file *filp, unsigned long arg) 266 static int f_setown_ex(struct file *filp, unsigned long arg)
267 { 267 {
268 struct f_owner_ex * __user owner_p = (void * __user)arg; 268 struct f_owner_ex * __user owner_p = (void * __user)arg;
269 struct f_owner_ex owner; 269 struct f_owner_ex owner;
270 struct pid *pid; 270 struct pid *pid;
271 int type; 271 int type;
272 int ret; 272 int ret;
273 273
274 ret = copy_from_user(&owner, owner_p, sizeof(owner)); 274 ret = copy_from_user(&owner, owner_p, sizeof(owner));
275 if (ret) 275 if (ret)
276 return ret; 276 return ret;
277 277
278 switch (owner.type) { 278 switch (owner.type) {
279 case F_OWNER_TID: 279 case F_OWNER_TID:
280 type = PIDTYPE_MAX; 280 type = PIDTYPE_MAX;
281 break; 281 break;
282 282
283 case F_OWNER_PID: 283 case F_OWNER_PID:
284 type = PIDTYPE_PID; 284 type = PIDTYPE_PID;
285 break; 285 break;
286 286
287 case F_OWNER_PGRP: 287 case F_OWNER_PGRP:
288 type = PIDTYPE_PGID; 288 type = PIDTYPE_PGID;
289 break; 289 break;
290 290
291 default: 291 default:
292 return -EINVAL; 292 return -EINVAL;
293 } 293 }
294 294
295 rcu_read_lock(); 295 rcu_read_lock();
296 pid = find_vpid(owner.pid); 296 pid = find_vpid(owner.pid);
297 if (owner.pid && !pid) 297 if (owner.pid && !pid)
298 ret = -ESRCH; 298 ret = -ESRCH;
299 else 299 else
300 ret = __f_setown(filp, pid, type, 1); 300 ret = __f_setown(filp, pid, type, 1);
301 rcu_read_unlock(); 301 rcu_read_unlock();
302 302
303 return ret; 303 return ret;
304 } 304 }
305 305
306 static int f_getown_ex(struct file *filp, unsigned long arg) 306 static int f_getown_ex(struct file *filp, unsigned long arg)
307 { 307 {
308 struct f_owner_ex * __user owner_p = (void * __user)arg; 308 struct f_owner_ex * __user owner_p = (void * __user)arg;
309 struct f_owner_ex owner; 309 struct f_owner_ex owner;
310 int ret = 0; 310 int ret = 0;
311 311
312 read_lock(&filp->f_owner.lock); 312 read_lock(&filp->f_owner.lock);
313 owner.pid = pid_vnr(filp->f_owner.pid); 313 owner.pid = pid_vnr(filp->f_owner.pid);
314 switch (filp->f_owner.pid_type) { 314 switch (filp->f_owner.pid_type) {
315 case PIDTYPE_MAX: 315 case PIDTYPE_MAX:
316 owner.type = F_OWNER_TID; 316 owner.type = F_OWNER_TID;
317 break; 317 break;
318 318
319 case PIDTYPE_PID: 319 case PIDTYPE_PID:
320 owner.type = F_OWNER_PID; 320 owner.type = F_OWNER_PID;
321 break; 321 break;
322 322
323 case PIDTYPE_PGID: 323 case PIDTYPE_PGID:
324 owner.type = F_OWNER_PGRP; 324 owner.type = F_OWNER_PGRP;
325 break; 325 break;
326 326
327 default: 327 default:
328 WARN_ON(1); 328 WARN_ON(1);
329 ret = -EINVAL; 329 ret = -EINVAL;
330 break; 330 break;
331 } 331 }
332 read_unlock(&filp->f_owner.lock); 332 read_unlock(&filp->f_owner.lock);
333 333
334 if (!ret) 334 if (!ret)
335 ret = copy_to_user(owner_p, &owner, sizeof(owner)); 335 ret = copy_to_user(owner_p, &owner, sizeof(owner));
336 return ret; 336 return ret;
337 } 337 }
338 338
339 static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, 339 static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
340 struct file *filp) 340 struct file *filp)
341 { 341 {
342 long err = -EINVAL; 342 long err = -EINVAL;
343 343
344 switch (cmd) { 344 switch (cmd) {
345 case F_DUPFD: 345 case F_DUPFD:
346 case F_DUPFD_CLOEXEC: 346 case F_DUPFD_CLOEXEC:
347 if (arg >= rlimit(RLIMIT_NOFILE)) 347 if (arg >= rlimit(RLIMIT_NOFILE))
348 break; 348 break;
349 err = alloc_fd(arg, cmd == F_DUPFD_CLOEXEC ? O_CLOEXEC : 0); 349 err = alloc_fd(arg, cmd == F_DUPFD_CLOEXEC ? O_CLOEXEC : 0);
350 if (err >= 0) { 350 if (err >= 0) {
351 get_file(filp); 351 get_file(filp);
352 fd_install(err, filp); 352 fd_install(err, filp);
353 } 353 }
354 break; 354 break;
355 case F_GETFD: 355 case F_GETFD:
356 err = get_close_on_exec(fd) ? FD_CLOEXEC : 0; 356 err = get_close_on_exec(fd) ? FD_CLOEXEC : 0;
357 break; 357 break;
358 case F_SETFD: 358 case F_SETFD:
359 err = 0; 359 err = 0;
360 set_close_on_exec(fd, arg & FD_CLOEXEC); 360 set_close_on_exec(fd, arg & FD_CLOEXEC);
361 break; 361 break;
362 case F_GETFL: 362 case F_GETFL:
363 err = filp->f_flags; 363 err = filp->f_flags;
364 break; 364 break;
365 case F_SETFL: 365 case F_SETFL:
366 err = setfl(fd, filp, arg); 366 err = setfl(fd, filp, arg);
367 break; 367 break;
368 case F_GETLK: 368 case F_GETLK:
369 err = fcntl_getlk(filp, (struct flock __user *) arg); 369 err = fcntl_getlk(filp, (struct flock __user *) arg);
370 break; 370 break;
371 case F_SETLK: 371 case F_SETLK:
372 case F_SETLKW: 372 case F_SETLKW:
373 err = fcntl_setlk(fd, filp, cmd, (struct flock __user *) arg); 373 err = fcntl_setlk(fd, filp, cmd, (struct flock __user *) arg);
374 break; 374 break;
375 case F_GETOWN: 375 case F_GETOWN:
376 /* 376 /*
377 * XXX If f_owner is a process group, the 377 * XXX If f_owner is a process group, the
378 * negative return value will get converted 378 * negative return value will get converted
379 * into an error. Oops. If we keep the 379 * into an error. Oops. If we keep the
380 * current syscall conventions, the only way 380 * current syscall conventions, the only way
381 * to fix this will be in libc. 381 * to fix this will be in libc.
382 */ 382 */
383 err = f_getown(filp); 383 err = f_getown(filp);
384 force_successful_syscall_return(); 384 force_successful_syscall_return();
385 break; 385 break;
386 case F_SETOWN: 386 case F_SETOWN:
387 err = f_setown(filp, arg, 1); 387 err = f_setown(filp, arg, 1);
388 break; 388 break;
389 case F_GETOWN_EX: 389 case F_GETOWN_EX:
390 err = f_getown_ex(filp, arg); 390 err = f_getown_ex(filp, arg);
391 break; 391 break;
392 case F_SETOWN_EX: 392 case F_SETOWN_EX:
393 err = f_setown_ex(filp, arg); 393 err = f_setown_ex(filp, arg);
394 break; 394 break;
395 case F_GETSIG: 395 case F_GETSIG:
396 err = filp->f_owner.signum; 396 err = filp->f_owner.signum;
397 break; 397 break;
398 case F_SETSIG: 398 case F_SETSIG:
399 /* arg == 0 restores default behaviour. */ 399 /* arg == 0 restores default behaviour. */
400 if (!valid_signal(arg)) { 400 if (!valid_signal(arg)) {
401 break; 401 break;
402 } 402 }
403 err = 0; 403 err = 0;
404 filp->f_owner.signum = arg; 404 filp->f_owner.signum = arg;
405 break; 405 break;
406 case F_GETLEASE: 406 case F_GETLEASE:
407 err = fcntl_getlease(filp); 407 err = fcntl_getlease(filp);
408 break; 408 break;
409 case F_SETLEASE: 409 case F_SETLEASE:
410 err = fcntl_setlease(fd, filp, arg); 410 err = fcntl_setlease(fd, filp, arg);
411 break; 411 break;
412 case F_NOTIFY: 412 case F_NOTIFY:
413 err = fcntl_dirnotify(fd, filp, arg); 413 err = fcntl_dirnotify(fd, filp, arg);
414 break; 414 break;
415 default: 415 default:
416 break; 416 break;
417 } 417 }
418 return err; 418 return err;
419 } 419 }
420 420
421 SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, unsigned long, arg) 421 SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, unsigned long, arg)
422 { 422 {
423 struct file *filp; 423 struct file *filp;
424 long err = -EBADF; 424 long err = -EBADF;
425 425
426 filp = fget(fd); 426 filp = fget(fd);
427 if (!filp) 427 if (!filp)
428 goto out; 428 goto out;
429 429
430 err = security_file_fcntl(filp, cmd, arg); 430 err = security_file_fcntl(filp, cmd, arg);
431 if (err) { 431 if (err) {
432 fput(filp); 432 fput(filp);
433 return err; 433 return err;
434 } 434 }
435 435
436 err = do_fcntl(fd, cmd, arg, filp); 436 err = do_fcntl(fd, cmd, arg, filp);
437 437
438 fput(filp); 438 fput(filp);
439 out: 439 out:
440 return err; 440 return err;
441 } 441 }
442 442
443 #if BITS_PER_LONG == 32 443 #if BITS_PER_LONG == 32
444 SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd, 444 SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd,
445 unsigned long, arg) 445 unsigned long, arg)
446 { 446 {
447 struct file * filp; 447 struct file * filp;
448 long err; 448 long err;
449 449
450 err = -EBADF; 450 err = -EBADF;
451 filp = fget(fd); 451 filp = fget(fd);
452 if (!filp) 452 if (!filp)
453 goto out; 453 goto out;
454 454
455 err = security_file_fcntl(filp, cmd, arg); 455 err = security_file_fcntl(filp, cmd, arg);
456 if (err) { 456 if (err) {
457 fput(filp); 457 fput(filp);
458 return err; 458 return err;
459 } 459 }
460 err = -EBADF; 460 err = -EBADF;
461 461
462 switch (cmd) { 462 switch (cmd) {
463 case F_GETLK64: 463 case F_GETLK64:
464 err = fcntl_getlk64(filp, (struct flock64 __user *) arg); 464 err = fcntl_getlk64(filp, (struct flock64 __user *) arg);
465 break; 465 break;
466 case F_SETLK64: 466 case F_SETLK64:
467 case F_SETLKW64: 467 case F_SETLKW64:
468 err = fcntl_setlk64(fd, filp, cmd, 468 err = fcntl_setlk64(fd, filp, cmd,
469 (struct flock64 __user *) arg); 469 (struct flock64 __user *) arg);
470 break; 470 break;
471 default: 471 default:
472 err = do_fcntl(fd, cmd, arg, filp); 472 err = do_fcntl(fd, cmd, arg, filp);
473 break; 473 break;
474 } 474 }
475 fput(filp); 475 fput(filp);
476 out: 476 out:
477 return err; 477 return err;
478 } 478 }
479 #endif 479 #endif
480 480
481 /* Table to convert sigio signal codes into poll band bitmaps */ 481 /* Table to convert sigio signal codes into poll band bitmaps */
482 482
483 static const long band_table[NSIGPOLL] = { 483 static const long band_table[NSIGPOLL] = {
484 POLLIN | POLLRDNORM, /* POLL_IN */ 484 POLLIN | POLLRDNORM, /* POLL_IN */
485 POLLOUT | POLLWRNORM | POLLWRBAND, /* POLL_OUT */ 485 POLLOUT | POLLWRNORM | POLLWRBAND, /* POLL_OUT */
486 POLLIN | POLLRDNORM | POLLMSG, /* POLL_MSG */ 486 POLLIN | POLLRDNORM | POLLMSG, /* POLL_MSG */
487 POLLERR, /* POLL_ERR */ 487 POLLERR, /* POLL_ERR */
488 POLLPRI | POLLRDBAND, /* POLL_PRI */ 488 POLLPRI | POLLRDBAND, /* POLL_PRI */
489 POLLHUP | POLLERR /* POLL_HUP */ 489 POLLHUP | POLLERR /* POLL_HUP */
490 }; 490 };
491 491
492 static inline int sigio_perm(struct task_struct *p, 492 static inline int sigio_perm(struct task_struct *p,
493 struct fown_struct *fown, int sig) 493 struct fown_struct *fown, int sig)
494 { 494 {
495 const struct cred *cred; 495 const struct cred *cred;
496 int ret; 496 int ret;
497 497
498 rcu_read_lock(); 498 rcu_read_lock();
499 cred = __task_cred(p); 499 cred = __task_cred(p);
500 ret = ((fown->euid == 0 || 500 ret = ((fown->euid == 0 ||
501 fown->euid == cred->suid || fown->euid == cred->uid || 501 fown->euid == cred->suid || fown->euid == cred->uid ||
502 fown->uid == cred->suid || fown->uid == cred->uid) && 502 fown->uid == cred->suid || fown->uid == cred->uid) &&
503 !security_file_send_sigiotask(p, fown, sig)); 503 !security_file_send_sigiotask(p, fown, sig));
504 rcu_read_unlock(); 504 rcu_read_unlock();
505 return ret; 505 return ret;
506 } 506 }
507 507
508 static void send_sigio_to_task(struct task_struct *p, 508 static void send_sigio_to_task(struct task_struct *p,
509 struct fown_struct *fown, 509 struct fown_struct *fown,
510 int fd, int reason, int group) 510 int fd, int reason, int group)
511 { 511 {
512 /* 512 /*
513 * F_SETSIG can change ->signum lockless in parallel, make 513 * F_SETSIG can change ->signum lockless in parallel, make
514 * sure we read it once and use the same value throughout. 514 * sure we read it once and use the same value throughout.
515 */ 515 */
516 int signum = ACCESS_ONCE(fown->signum); 516 int signum = ACCESS_ONCE(fown->signum);
517 517
518 if (!sigio_perm(p, fown, signum)) 518 if (!sigio_perm(p, fown, signum))
519 return; 519 return;
520 520
521 switch (signum) { 521 switch (signum) {
522 siginfo_t si; 522 siginfo_t si;
523 default: 523 default:
524 /* Queue a rt signal with the appropriate fd as its 524 /* Queue a rt signal with the appropriate fd as its
525 value. We use SI_SIGIO as the source, not 525 value. We use SI_SIGIO as the source, not
526 SI_KERNEL, since kernel signals always get 526 SI_KERNEL, since kernel signals always get
527 delivered even if we can't queue. Failure to 527 delivered even if we can't queue. Failure to
528 queue in this case _should_ be reported; we fall 528 queue in this case _should_ be reported; we fall
529 back to SIGIO in that case. --sct */ 529 back to SIGIO in that case. --sct */
530 si.si_signo = signum; 530 si.si_signo = signum;
531 si.si_errno = 0; 531 si.si_errno = 0;
532 si.si_code = reason; 532 si.si_code = reason;
533 /* Make sure we are called with one of the POLL_* 533 /* Make sure we are called with one of the POLL_*
534 reasons, otherwise we could leak kernel stack into 534 reasons, otherwise we could leak kernel stack into
535 userspace. */ 535 userspace. */
536 BUG_ON((reason & __SI_MASK) != __SI_POLL); 536 BUG_ON((reason & __SI_MASK) != __SI_POLL);
537 if (reason - POLL_IN >= NSIGPOLL) 537 if (reason - POLL_IN >= NSIGPOLL)
538 si.si_band = ~0L; 538 si.si_band = ~0L;
539 else 539 else
540 si.si_band = band_table[reason - POLL_IN]; 540 si.si_band = band_table[reason - POLL_IN];
541 si.si_fd = fd; 541 si.si_fd = fd;
542 if (!do_send_sig_info(signum, &si, p, group)) 542 if (!do_send_sig_info(signum, &si, p, group))
543 break; 543 break;
544 /* fall-through: fall back on the old plain SIGIO signal */ 544 /* fall-through: fall back on the old plain SIGIO signal */
545 case 0: 545 case 0:
546 do_send_sig_info(SIGIO, SEND_SIG_PRIV, p, group); 546 do_send_sig_info(SIGIO, SEND_SIG_PRIV, p, group);
547 } 547 }
548 } 548 }
549 549
550 void send_sigio(struct fown_struct *fown, int fd, int band) 550 void send_sigio(struct fown_struct *fown, int fd, int band)
551 { 551 {
552 struct task_struct *p; 552 struct task_struct *p;
553 enum pid_type type; 553 enum pid_type type;
554 struct pid *pid; 554 struct pid *pid;
555 int group = 1; 555 int group = 1;
556 556
557 read_lock(&fown->lock); 557 read_lock(&fown->lock);
558 558
559 type = fown->pid_type; 559 type = fown->pid_type;
560 if (type == PIDTYPE_MAX) { 560 if (type == PIDTYPE_MAX) {
561 group = 0; 561 group = 0;
562 type = PIDTYPE_PID; 562 type = PIDTYPE_PID;
563 } 563 }
564 564
565 pid = fown->pid; 565 pid = fown->pid;
566 if (!pid) 566 if (!pid)
567 goto out_unlock_fown; 567 goto out_unlock_fown;
568 568
569 read_lock(&tasklist_lock); 569 read_lock(&tasklist_lock);
570 do_each_pid_task(pid, type, p) { 570 do_each_pid_task(pid, type, p) {
571 send_sigio_to_task(p, fown, fd, band, group); 571 send_sigio_to_task(p, fown, fd, band, group);
572 } while_each_pid_task(pid, type, p); 572 } while_each_pid_task(pid, type, p);
573 read_unlock(&tasklist_lock); 573 read_unlock(&tasklist_lock);
574 out_unlock_fown: 574 out_unlock_fown:
575 read_unlock(&fown->lock); 575 read_unlock(&fown->lock);
576 } 576 }
577 577
578 static void send_sigurg_to_task(struct task_struct *p, 578 static void send_sigurg_to_task(struct task_struct *p,
579 struct fown_struct *fown, int group) 579 struct fown_struct *fown, int group)
580 { 580 {
581 if (sigio_perm(p, fown, SIGURG)) 581 if (sigio_perm(p, fown, SIGURG))
582 do_send_sig_info(SIGURG, SEND_SIG_PRIV, p, group); 582 do_send_sig_info(SIGURG, SEND_SIG_PRIV, p, group);
583 } 583 }
584 584
585 int send_sigurg(struct fown_struct *fown) 585 int send_sigurg(struct fown_struct *fown)
586 { 586 {
587 struct task_struct *p; 587 struct task_struct *p;
588 enum pid_type type; 588 enum pid_type type;
589 struct pid *pid; 589 struct pid *pid;
590 int group = 1; 590 int group = 1;
591 int ret = 0; 591 int ret = 0;
592 592
593 read_lock(&fown->lock); 593 read_lock(&fown->lock);
594 594
595 type = fown->pid_type; 595 type = fown->pid_type;
596 if (type == PIDTYPE_MAX) { 596 if (type == PIDTYPE_MAX) {
597 group = 0; 597 group = 0;
598 type = PIDTYPE_PID; 598 type = PIDTYPE_PID;
599 } 599 }
600 600
601 pid = fown->pid; 601 pid = fown->pid;
602 if (!pid) 602 if (!pid)
603 goto out_unlock_fown; 603 goto out_unlock_fown;
604 604
605 ret = 1; 605 ret = 1;
606 606
607 read_lock(&tasklist_lock); 607 read_lock(&tasklist_lock);
608 do_each_pid_task(pid, type, p) { 608 do_each_pid_task(pid, type, p) {
609 send_sigurg_to_task(p, fown, group); 609 send_sigurg_to_task(p, fown, group);
610 } while_each_pid_task(pid, type, p); 610 } while_each_pid_task(pid, type, p);
611 read_unlock(&tasklist_lock); 611 read_unlock(&tasklist_lock);
612 out_unlock_fown: 612 out_unlock_fown:
613 read_unlock(&fown->lock); 613 read_unlock(&fown->lock);
614 return ret; 614 return ret;
615 } 615 }
616 616
617 static DEFINE_RWLOCK(fasync_lock); 617 static DEFINE_SPINLOCK(fasync_lock);
618 static struct kmem_cache *fasync_cache __read_mostly; 618 static struct kmem_cache *fasync_cache __read_mostly;
619 619
620 static void fasync_free_rcu(struct rcu_head *head)
621 {
622 kmem_cache_free(fasync_cache,
623 container_of(head, struct fasync_struct, fa_rcu));
624 }
625
620 /* 626 /*
621 * Remove a fasync entry. If successfully removed, return 627 * Remove a fasync entry. If successfully removed, return
622 * positive and clear the FASYNC flag. If no entry exists, 628 * positive and clear the FASYNC flag. If no entry exists,
623 * do nothing and return 0. 629 * do nothing and return 0.
624 * 630 *
625 * NOTE! It is very important that the FASYNC flag always 631 * NOTE! It is very important that the FASYNC flag always
626 * match the state "is the filp on a fasync list". 632 * match the state "is the filp on a fasync list".
627 * 633 *
628 * We always take the 'filp->f_lock', in since fasync_lock
629 * needs to be irq-safe.
630 */ 634 */
631 static int fasync_remove_entry(struct file *filp, struct fasync_struct **fapp) 635 static int fasync_remove_entry(struct file *filp, struct fasync_struct **fapp)
632 { 636 {
633 struct fasync_struct *fa, **fp; 637 struct fasync_struct *fa, **fp;
634 int result = 0; 638 int result = 0;
635 639
636 spin_lock(&filp->f_lock); 640 spin_lock(&filp->f_lock);
637 write_lock_irq(&fasync_lock); 641 spin_lock(&fasync_lock);
638 for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) { 642 for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) {
639 if (fa->fa_file != filp) 643 if (fa->fa_file != filp)
640 continue; 644 continue;
645
646 spin_lock_irq(&fa->fa_lock);
647 fa->fa_file = NULL;
648 spin_unlock_irq(&fa->fa_lock);
649
641 *fp = fa->fa_next; 650 *fp = fa->fa_next;
642 kmem_cache_free(fasync_cache, fa); 651 call_rcu(&fa->fa_rcu, fasync_free_rcu);
643 filp->f_flags &= ~FASYNC; 652 filp->f_flags &= ~FASYNC;
644 result = 1; 653 result = 1;
645 break; 654 break;
646 } 655 }
647 write_unlock_irq(&fasync_lock); 656 spin_unlock(&fasync_lock);
648 spin_unlock(&filp->f_lock); 657 spin_unlock(&filp->f_lock);
649 return result; 658 return result;
650 } 659 }
651 660
652 /* 661 /*
653 * Add a fasync entry. Return negative on error, positive if 662 * Add a fasync entry. Return negative on error, positive if
654 * added, and zero if did nothing but change an existing one. 663 * added, and zero if did nothing but change an existing one.
655 * 664 *
656 * NOTE! It is very important that the FASYNC flag always 665 * NOTE! It is very important that the FASYNC flag always
657 * match the state "is the filp on a fasync list". 666 * match the state "is the filp on a fasync list".
658 */ 667 */
659 static int fasync_add_entry(int fd, struct file *filp, struct fasync_struct **fapp) 668 static int fasync_add_entry(int fd, struct file *filp, struct fasync_struct **fapp)
660 { 669 {
661 struct fasync_struct *new, *fa, **fp; 670 struct fasync_struct *new, *fa, **fp;
662 int result = 0; 671 int result = 0;
663 672
664 new = kmem_cache_alloc(fasync_cache, GFP_KERNEL); 673 new = kmem_cache_alloc(fasync_cache, GFP_KERNEL);
665 if (!new) 674 if (!new)
666 return -ENOMEM; 675 return -ENOMEM;
667 676
668 spin_lock(&filp->f_lock); 677 spin_lock(&filp->f_lock);
669 write_lock_irq(&fasync_lock); 678 spin_lock(&fasync_lock);
670 for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) { 679 for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) {
671 if (fa->fa_file != filp) 680 if (fa->fa_file != filp)
672 continue; 681 continue;
682
683 spin_lock_irq(&fa->fa_lock);
673 fa->fa_fd = fd; 684 fa->fa_fd = fd;
685 spin_unlock_irq(&fa->fa_lock);
686
674 kmem_cache_free(fasync_cache, new); 687 kmem_cache_free(fasync_cache, new);
675 goto out; 688 goto out;
676 } 689 }
677 690
691 spin_lock_init(&new->fa_lock);
678 new->magic = FASYNC_MAGIC; 692 new->magic = FASYNC_MAGIC;
679 new->fa_file = filp; 693 new->fa_file = filp;
680 new->fa_fd = fd; 694 new->fa_fd = fd;
681 new->fa_next = *fapp; 695 new->fa_next = *fapp;
682 *fapp = new; 696 rcu_assign_pointer(*fapp, new);
683 result = 1; 697 result = 1;
684 filp->f_flags |= FASYNC; 698 filp->f_flags |= FASYNC;
685 699
686 out: 700 out:
687 write_unlock_irq(&fasync_lock); 701 spin_unlock(&fasync_lock);
688 spin_unlock(&filp->f_lock); 702 spin_unlock(&filp->f_lock);
689 return result; 703 return result;
690 } 704 }
691 705
692 /* 706 /*
693 * fasync_helper() is used by almost all character device drivers 707 * fasync_helper() is used by almost all character device drivers
694 * to set up the fasync queue, and for regular files by the file 708 * to set up the fasync queue, and for regular files by the file
695 * lease code. It returns negative on error, 0 if it did no changes 709 * lease code. It returns negative on error, 0 if it did no changes
696 * and positive if it added/deleted the entry. 710 * and positive if it added/deleted the entry.
697 */ 711 */
698 int fasync_helper(int fd, struct file * filp, int on, struct fasync_struct **fapp) 712 int fasync_helper(int fd, struct file * filp, int on, struct fasync_struct **fapp)
699 { 713 {
700 if (!on) 714 if (!on)
701 return fasync_remove_entry(filp, fapp); 715 return fasync_remove_entry(filp, fapp);
702 return fasync_add_entry(fd, filp, fapp); 716 return fasync_add_entry(fd, filp, fapp);
703 } 717 }
704 718
705 EXPORT_SYMBOL(fasync_helper); 719 EXPORT_SYMBOL(fasync_helper);
706 720
707 void __kill_fasync(struct fasync_struct *fa, int sig, int band) 721 /*
722 * rcu_read_lock() is held
723 */
724 static void kill_fasync_rcu(struct fasync_struct *fa, int sig, int band)
708 { 725 {
709 while (fa) { 726 while (fa) {
710 struct fown_struct * fown; 727 struct fown_struct *fown;
711 if (fa->magic != FASYNC_MAGIC) { 728 if (fa->magic != FASYNC_MAGIC) {
712 printk(KERN_ERR "kill_fasync: bad magic number in " 729 printk(KERN_ERR "kill_fasync: bad magic number in "
713 "fasync_struct!\n"); 730 "fasync_struct!\n");
714 return; 731 return;
715 } 732 }
716 fown = &fa->fa_file->f_owner; 733 spin_lock(&fa->fa_lock);
717 /* Don't send SIGURG to processes which have not set a 734 if (fa->fa_file) {
718 queued signum: SIGURG has its own default signalling 735 fown = &fa->fa_file->f_owner;
719 mechanism. */ 736 /* Don't send SIGURG to processes which have not set a
720 if (!(sig == SIGURG && fown->signum == 0)) 737 queued signum: SIGURG has its own default signalling
721 send_sigio(fown, fa->fa_fd, band); 738 mechanism. */
722 fa = fa->fa_next; 739 if (!(sig == SIGURG && fown->signum == 0))
740 send_sigio(fown, fa->fa_fd, band);
741 }
742 spin_unlock(&fa->fa_lock);
743 fa = rcu_dereference(fa->fa_next);
723 } 744 }
724 } 745 }
725 746
726 EXPORT_SYMBOL(__kill_fasync);
727
728 void kill_fasync(struct fasync_struct **fp, int sig, int band) 747 void kill_fasync(struct fasync_struct **fp, int sig, int band)
729 { 748 {
730 /* First a quick test without locking: usually 749 /* First a quick test without locking: usually
731 * the list is empty. 750 * the list is empty.
732 */ 751 */
733 if (*fp) { 752 if (*fp) {
734 read_lock(&fasync_lock); 753 rcu_read_lock();
735 /* reread *fp after obtaining the lock */ 754 kill_fasync_rcu(rcu_dereference(*fp), sig, band);
736 __kill_fasync(*fp, sig, band); 755 rcu_read_unlock();
737 read_unlock(&fasync_lock);
738 } 756 }
739 } 757 }
740 EXPORT_SYMBOL(kill_fasync); 758 EXPORT_SYMBOL(kill_fasync);
741 759
742 static int __init fasync_init(void) 760 static int __init fasync_init(void)
743 { 761 {
744 fasync_cache = kmem_cache_create("fasync_cache", 762 fasync_cache = kmem_cache_create("fasync_cache",
745 sizeof(struct fasync_struct), 0, SLAB_PANIC, NULL); 763 sizeof(struct fasync_struct), 0, SLAB_PANIC, NULL);
1 #ifndef _LINUX_FS_H 1 #ifndef _LINUX_FS_H
2 #define _LINUX_FS_H 2 #define _LINUX_FS_H
3 3
4 /* 4 /*
5 * This file has definitions for some important file table 5 * This file has definitions for some important file table
6 * structures etc. 6 * structures etc.
7 */ 7 */
8 8
9 #include <linux/limits.h> 9 #include <linux/limits.h>
10 #include <linux/ioctl.h> 10 #include <linux/ioctl.h>
11 11
12 /* 12 /*
13 * It's silly to have NR_OPEN bigger than NR_FILE, but you can change 13 * It's silly to have NR_OPEN bigger than NR_FILE, but you can change
14 * the file limit at runtime and only root can increase the per-process 14 * the file limit at runtime and only root can increase the per-process
15 * nr_file rlimit, so it's safe to set up a ridiculously high absolute 15 * nr_file rlimit, so it's safe to set up a ridiculously high absolute
16 * upper limit on files-per-process. 16 * upper limit on files-per-process.
17 * 17 *
18 * Some programs (notably those using select()) may have to be 18 * Some programs (notably those using select()) may have to be
19 * recompiled to take full advantage of the new limits.. 19 * recompiled to take full advantage of the new limits..
20 */ 20 */
21 21
22 /* Fixed constants first: */ 22 /* Fixed constants first: */
23 #undef NR_OPEN 23 #undef NR_OPEN
24 #define INR_OPEN 1024 /* Initial setting for nfile rlimits */ 24 #define INR_OPEN 1024 /* Initial setting for nfile rlimits */
25 25
26 #define BLOCK_SIZE_BITS 10 26 #define BLOCK_SIZE_BITS 10
27 #define BLOCK_SIZE (1<<BLOCK_SIZE_BITS) 27 #define BLOCK_SIZE (1<<BLOCK_SIZE_BITS)
28 28
29 #define SEEK_SET 0 /* seek relative to beginning of file */ 29 #define SEEK_SET 0 /* seek relative to beginning of file */
30 #define SEEK_CUR 1 /* seek relative to current file position */ 30 #define SEEK_CUR 1 /* seek relative to current file position */
31 #define SEEK_END 2 /* seek relative to end of file */ 31 #define SEEK_END 2 /* seek relative to end of file */
32 #define SEEK_MAX SEEK_END 32 #define SEEK_MAX SEEK_END
33 33
34 /* And dynamically-tunable limits and defaults: */ 34 /* And dynamically-tunable limits and defaults: */
35 struct files_stat_struct { 35 struct files_stat_struct {
36 int nr_files; /* read only */ 36 int nr_files; /* read only */
37 int nr_free_files; /* read only */ 37 int nr_free_files; /* read only */
38 int max_files; /* tunable */ 38 int max_files; /* tunable */
39 }; 39 };
40 40
41 struct inodes_stat_t { 41 struct inodes_stat_t {
42 int nr_inodes; 42 int nr_inodes;
43 int nr_unused; 43 int nr_unused;
44 int dummy[5]; /* padding for sysctl ABI compatibility */ 44 int dummy[5]; /* padding for sysctl ABI compatibility */
45 }; 45 };
46 46
47 47
48 #define NR_FILE 8192 /* this can well be larger on a larger system */ 48 #define NR_FILE 8192 /* this can well be larger on a larger system */
49 49
50 #define MAY_EXEC 1 50 #define MAY_EXEC 1
51 #define MAY_WRITE 2 51 #define MAY_WRITE 2
52 #define MAY_READ 4 52 #define MAY_READ 4
53 #define MAY_APPEND 8 53 #define MAY_APPEND 8
54 #define MAY_ACCESS 16 54 #define MAY_ACCESS 16
55 #define MAY_OPEN 32 55 #define MAY_OPEN 32
56 56
57 /* 57 /*
58 * flags in file.f_mode. Note that FMODE_READ and FMODE_WRITE must correspond 58 * flags in file.f_mode. Note that FMODE_READ and FMODE_WRITE must correspond
59 * to O_WRONLY and O_RDWR via the strange trick in __dentry_open() 59 * to O_WRONLY and O_RDWR via the strange trick in __dentry_open()
60 */ 60 */
61 61
62 /* file is open for reading */ 62 /* file is open for reading */
63 #define FMODE_READ ((__force fmode_t)0x1) 63 #define FMODE_READ ((__force fmode_t)0x1)
64 /* file is open for writing */ 64 /* file is open for writing */
65 #define FMODE_WRITE ((__force fmode_t)0x2) 65 #define FMODE_WRITE ((__force fmode_t)0x2)
66 /* file is seekable */ 66 /* file is seekable */
67 #define FMODE_LSEEK ((__force fmode_t)0x4) 67 #define FMODE_LSEEK ((__force fmode_t)0x4)
68 /* file can be accessed using pread */ 68 /* file can be accessed using pread */
69 #define FMODE_PREAD ((__force fmode_t)0x8) 69 #define FMODE_PREAD ((__force fmode_t)0x8)
70 /* file can be accessed using pwrite */ 70 /* file can be accessed using pwrite */
71 #define FMODE_PWRITE ((__force fmode_t)0x10) 71 #define FMODE_PWRITE ((__force fmode_t)0x10)
72 /* File is opened for execution with sys_execve / sys_uselib */ 72 /* File is opened for execution with sys_execve / sys_uselib */
73 #define FMODE_EXEC ((__force fmode_t)0x20) 73 #define FMODE_EXEC ((__force fmode_t)0x20)
74 /* File is opened with O_NDELAY (only set for block devices) */ 74 /* File is opened with O_NDELAY (only set for block devices) */
75 #define FMODE_NDELAY ((__force fmode_t)0x40) 75 #define FMODE_NDELAY ((__force fmode_t)0x40)
76 /* File is opened with O_EXCL (only set for block devices) */ 76 /* File is opened with O_EXCL (only set for block devices) */
77 #define FMODE_EXCL ((__force fmode_t)0x80) 77 #define FMODE_EXCL ((__force fmode_t)0x80)
78 /* File is opened using open(.., 3, ..) and is writeable only for ioctls 78 /* File is opened using open(.., 3, ..) and is writeable only for ioctls
79 (specialy hack for floppy.c) */ 79 (specialy hack for floppy.c) */
80 #define FMODE_WRITE_IOCTL ((__force fmode_t)0x100) 80 #define FMODE_WRITE_IOCTL ((__force fmode_t)0x100)
81 81
82 /* 82 /*
83 * Don't update ctime and mtime. 83 * Don't update ctime and mtime.
84 * 84 *
85 * Currently a special hack for the XFS open_by_handle ioctl, but we'll 85 * Currently a special hack for the XFS open_by_handle ioctl, but we'll
86 * hopefully graduate it to a proper O_CMTIME flag supported by open(2) soon. 86 * hopefully graduate it to a proper O_CMTIME flag supported by open(2) soon.
87 */ 87 */
88 #define FMODE_NOCMTIME ((__force fmode_t)0x800) 88 #define FMODE_NOCMTIME ((__force fmode_t)0x800)
89 89
90 /* Expect random access pattern */ 90 /* Expect random access pattern */
91 #define FMODE_RANDOM ((__force fmode_t)0x1000) 91 #define FMODE_RANDOM ((__force fmode_t)0x1000)
92 92
93 /* 93 /*
94 * The below are the various read and write types that we support. Some of 94 * The below are the various read and write types that we support. Some of
95 * them include behavioral modifiers that send information down to the 95 * them include behavioral modifiers that send information down to the
96 * block layer and IO scheduler. Terminology: 96 * block layer and IO scheduler. Terminology:
97 * 97 *
98 * The block layer uses device plugging to defer IO a little bit, in 98 * The block layer uses device plugging to defer IO a little bit, in
99 * the hope that we will see more IO very shortly. This increases 99 * the hope that we will see more IO very shortly. This increases
100 * coalescing of adjacent IO and thus reduces the number of IOs we 100 * coalescing of adjacent IO and thus reduces the number of IOs we
101 * have to send to the device. It also allows for better queuing, 101 * have to send to the device. It also allows for better queuing,
102 * if the IO isn't mergeable. If the caller is going to be waiting 102 * if the IO isn't mergeable. If the caller is going to be waiting
103 * for the IO, then he must ensure that the device is unplugged so 103 * for the IO, then he must ensure that the device is unplugged so
104 * that the IO is dispatched to the driver. 104 * that the IO is dispatched to the driver.
105 * 105 *
106 * All IO is handled async in Linux. This is fine for background 106 * All IO is handled async in Linux. This is fine for background
107 * writes, but for reads or writes that someone waits for completion 107 * writes, but for reads or writes that someone waits for completion
108 * on, we want to notify the block layer and IO scheduler so that they 108 * on, we want to notify the block layer and IO scheduler so that they
109 * know about it. That allows them to make better scheduling 109 * know about it. That allows them to make better scheduling
110 * decisions. So when the below references 'sync' and 'async', it 110 * decisions. So when the below references 'sync' and 'async', it
111 * is referencing this priority hint. 111 * is referencing this priority hint.
112 * 112 *
113 * With that in mind, the available types are: 113 * With that in mind, the available types are:
114 * 114 *
115 * READ A normal read operation. Device will be plugged. 115 * READ A normal read operation. Device will be plugged.
116 * READ_SYNC A synchronous read. Device is not plugged, caller can 116 * READ_SYNC A synchronous read. Device is not plugged, caller can
117 * immediately wait on this read without caring about 117 * immediately wait on this read without caring about
118 * unplugging. 118 * unplugging.
119 * READA Used for read-ahead operations. Lower priority, and the 119 * READA Used for read-ahead operations. Lower priority, and the
120 * block layer could (in theory) choose to ignore this 120 * block layer could (in theory) choose to ignore this
121 * request if it runs into resource problems. 121 * request if it runs into resource problems.
122 * WRITE A normal async write. Device will be plugged. 122 * WRITE A normal async write. Device will be plugged.
123 * SWRITE Like WRITE, but a special case for ll_rw_block() that 123 * SWRITE Like WRITE, but a special case for ll_rw_block() that
124 * tells it to lock the buffer first. Normally a buffer 124 * tells it to lock the buffer first. Normally a buffer
125 * must be locked before doing IO. 125 * must be locked before doing IO.
126 * WRITE_SYNC_PLUG Synchronous write. Identical to WRITE, but passes down 126 * WRITE_SYNC_PLUG Synchronous write. Identical to WRITE, but passes down
127 * the hint that someone will be waiting on this IO 127 * the hint that someone will be waiting on this IO
128 * shortly. The device must still be unplugged explicitly, 128 * shortly. The device must still be unplugged explicitly,
129 * WRITE_SYNC_PLUG does not do this as we could be 129 * WRITE_SYNC_PLUG does not do this as we could be
130 * submitting more writes before we actually wait on any 130 * submitting more writes before we actually wait on any
131 * of them. 131 * of them.
132 * WRITE_SYNC Like WRITE_SYNC_PLUG, but also unplugs the device 132 * WRITE_SYNC Like WRITE_SYNC_PLUG, but also unplugs the device
133 * immediately after submission. The write equivalent 133 * immediately after submission. The write equivalent
134 * of READ_SYNC. 134 * of READ_SYNC.
135 * WRITE_ODIRECT_PLUG Special case write for O_DIRECT only. 135 * WRITE_ODIRECT_PLUG Special case write for O_DIRECT only.
136 * SWRITE_SYNC 136 * SWRITE_SYNC
137 * SWRITE_SYNC_PLUG Like WRITE_SYNC/WRITE_SYNC_PLUG, but locks the buffer. 137 * SWRITE_SYNC_PLUG Like WRITE_SYNC/WRITE_SYNC_PLUG, but locks the buffer.
138 * See SWRITE. 138 * See SWRITE.
139 * WRITE_BARRIER Like WRITE, but tells the block layer that all 139 * WRITE_BARRIER Like WRITE, but tells the block layer that all
140 * previously submitted writes must be safely on storage 140 * previously submitted writes must be safely on storage
141 * before this one is started. Also guarantees that when 141 * before this one is started. Also guarantees that when
142 * this write is complete, it itself is also safely on 142 * this write is complete, it itself is also safely on
143 * storage. Prevents reordering of writes on both sides 143 * storage. Prevents reordering of writes on both sides
144 * of this IO. 144 * of this IO.
145 * 145 *
146 */ 146 */
147 #define RW_MASK 1 147 #define RW_MASK 1
148 #define RWA_MASK 2 148 #define RWA_MASK 2
149 #define READ 0 149 #define READ 0
150 #define WRITE 1 150 #define WRITE 1
151 #define READA 2 /* read-ahead - don't block if no resources */ 151 #define READA 2 /* read-ahead - don't block if no resources */
152 #define SWRITE 3 /* for ll_rw_block() - wait for buffer lock */ 152 #define SWRITE 3 /* for ll_rw_block() - wait for buffer lock */
153 #define READ_SYNC (READ | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG)) 153 #define READ_SYNC (READ | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG))
154 #define READ_META (READ | (1 << BIO_RW_META)) 154 #define READ_META (READ | (1 << BIO_RW_META))
155 #define WRITE_SYNC_PLUG (WRITE | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_NOIDLE)) 155 #define WRITE_SYNC_PLUG (WRITE | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_NOIDLE))
156 #define WRITE_SYNC (WRITE_SYNC_PLUG | (1 << BIO_RW_UNPLUG)) 156 #define WRITE_SYNC (WRITE_SYNC_PLUG | (1 << BIO_RW_UNPLUG))
157 #define WRITE_ODIRECT_PLUG (WRITE | (1 << BIO_RW_SYNCIO)) 157 #define WRITE_ODIRECT_PLUG (WRITE | (1 << BIO_RW_SYNCIO))
158 #define WRITE_META (WRITE | (1 << BIO_RW_META)) 158 #define WRITE_META (WRITE | (1 << BIO_RW_META))
159 #define SWRITE_SYNC_PLUG \ 159 #define SWRITE_SYNC_PLUG \
160 (SWRITE | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_NOIDLE)) 160 (SWRITE | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_NOIDLE))
161 #define SWRITE_SYNC (SWRITE_SYNC_PLUG | (1 << BIO_RW_UNPLUG)) 161 #define SWRITE_SYNC (SWRITE_SYNC_PLUG | (1 << BIO_RW_UNPLUG))
162 #define WRITE_BARRIER (WRITE | (1 << BIO_RW_BARRIER)) 162 #define WRITE_BARRIER (WRITE | (1 << BIO_RW_BARRIER))
163 163
164 /* 164 /*
165 * These aren't really reads or writes, they pass down information about 165 * These aren't really reads or writes, they pass down information about
166 * parts of device that are now unused by the file system. 166 * parts of device that are now unused by the file system.
167 */ 167 */
168 #define DISCARD_NOBARRIER (WRITE | (1 << BIO_RW_DISCARD)) 168 #define DISCARD_NOBARRIER (WRITE | (1 << BIO_RW_DISCARD))
169 #define DISCARD_BARRIER (DISCARD_NOBARRIER | (1 << BIO_RW_BARRIER)) 169 #define DISCARD_BARRIER (DISCARD_NOBARRIER | (1 << BIO_RW_BARRIER))
170 170
171 #define SEL_IN 1 171 #define SEL_IN 1
172 #define SEL_OUT 2 172 #define SEL_OUT 2
173 #define SEL_EX 4 173 #define SEL_EX 4
174 174
175 /* public flags for file_system_type */ 175 /* public flags for file_system_type */
176 #define FS_REQUIRES_DEV 1 176 #define FS_REQUIRES_DEV 1
177 #define FS_BINARY_MOUNTDATA 2 177 #define FS_BINARY_MOUNTDATA 2
178 #define FS_HAS_SUBTYPE 4 178 #define FS_HAS_SUBTYPE 4
179 #define FS_REVAL_DOT 16384 /* Check the paths ".", ".." for staleness */ 179 #define FS_REVAL_DOT 16384 /* Check the paths ".", ".." for staleness */
180 #define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() 180 #define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move()
181 * during rename() internally. 181 * during rename() internally.
182 */ 182 */
183 183
184 /* 184 /*
185 * These are the fs-independent mount-flags: up to 32 flags are supported 185 * These are the fs-independent mount-flags: up to 32 flags are supported
186 */ 186 */
187 #define MS_RDONLY 1 /* Mount read-only */ 187 #define MS_RDONLY 1 /* Mount read-only */
188 #define MS_NOSUID 2 /* Ignore suid and sgid bits */ 188 #define MS_NOSUID 2 /* Ignore suid and sgid bits */
189 #define MS_NODEV 4 /* Disallow access to device special files */ 189 #define MS_NODEV 4 /* Disallow access to device special files */
190 #define MS_NOEXEC 8 /* Disallow program execution */ 190 #define MS_NOEXEC 8 /* Disallow program execution */
191 #define MS_SYNCHRONOUS 16 /* Writes are synced at once */ 191 #define MS_SYNCHRONOUS 16 /* Writes are synced at once */
192 #define MS_REMOUNT 32 /* Alter flags of a mounted FS */ 192 #define MS_REMOUNT 32 /* Alter flags of a mounted FS */
193 #define MS_MANDLOCK 64 /* Allow mandatory locks on an FS */ 193 #define MS_MANDLOCK 64 /* Allow mandatory locks on an FS */
194 #define MS_DIRSYNC 128 /* Directory modifications are synchronous */ 194 #define MS_DIRSYNC 128 /* Directory modifications are synchronous */
195 #define MS_NOATIME 1024 /* Do not update access times. */ 195 #define MS_NOATIME 1024 /* Do not update access times. */
196 #define MS_NODIRATIME 2048 /* Do not update directory access times */ 196 #define MS_NODIRATIME 2048 /* Do not update directory access times */
197 #define MS_BIND 4096 197 #define MS_BIND 4096
198 #define MS_MOVE 8192 198 #define MS_MOVE 8192
199 #define MS_REC 16384 199 #define MS_REC 16384
200 #define MS_VERBOSE 32768 /* War is peace. Verbosity is silence. 200 #define MS_VERBOSE 32768 /* War is peace. Verbosity is silence.
201 MS_VERBOSE is deprecated. */ 201 MS_VERBOSE is deprecated. */
202 #define MS_SILENT 32768 202 #define MS_SILENT 32768
203 #define MS_POSIXACL (1<<16) /* VFS does not apply the umask */ 203 #define MS_POSIXACL (1<<16) /* VFS does not apply the umask */
204 #define MS_UNBINDABLE (1<<17) /* change to unbindable */ 204 #define MS_UNBINDABLE (1<<17) /* change to unbindable */
205 #define MS_PRIVATE (1<<18) /* change to private */ 205 #define MS_PRIVATE (1<<18) /* change to private */
206 #define MS_SLAVE (1<<19) /* change to slave */ 206 #define MS_SLAVE (1<<19) /* change to slave */
207 #define MS_SHARED (1<<20) /* change to shared */ 207 #define MS_SHARED (1<<20) /* change to shared */
208 #define MS_RELATIME (1<<21) /* Update atime relative to mtime/ctime. */ 208 #define MS_RELATIME (1<<21) /* Update atime relative to mtime/ctime. */
209 #define MS_KERNMOUNT (1<<22) /* this is a kern_mount call */ 209 #define MS_KERNMOUNT (1<<22) /* this is a kern_mount call */
210 #define MS_I_VERSION (1<<23) /* Update inode I_version field */ 210 #define MS_I_VERSION (1<<23) /* Update inode I_version field */
211 #define MS_STRICTATIME (1<<24) /* Always perform atime updates */ 211 #define MS_STRICTATIME (1<<24) /* Always perform atime updates */
212 #define MS_ACTIVE (1<<30) 212 #define MS_ACTIVE (1<<30)
213 #define MS_NOUSER (1<<31) 213 #define MS_NOUSER (1<<31)
214 214
215 /* 215 /*
216 * Superblock flags that can be altered by MS_REMOUNT 216 * Superblock flags that can be altered by MS_REMOUNT
217 */ 217 */
218 #define MS_RMT_MASK (MS_RDONLY|MS_SYNCHRONOUS|MS_MANDLOCK|MS_I_VERSION) 218 #define MS_RMT_MASK (MS_RDONLY|MS_SYNCHRONOUS|MS_MANDLOCK|MS_I_VERSION)
219 219
220 /* 220 /*
221 * Old magic mount flag and mask 221 * Old magic mount flag and mask
222 */ 222 */
223 #define MS_MGC_VAL 0xC0ED0000 223 #define MS_MGC_VAL 0xC0ED0000
224 #define MS_MGC_MSK 0xffff0000 224 #define MS_MGC_MSK 0xffff0000
225 225
226 /* Inode flags - they have nothing to superblock flags now */ 226 /* Inode flags - they have nothing to superblock flags now */
227 227
228 #define S_SYNC 1 /* Writes are synced at once */ 228 #define S_SYNC 1 /* Writes are synced at once */
229 #define S_NOATIME 2 /* Do not update access times */ 229 #define S_NOATIME 2 /* Do not update access times */
230 #define S_APPEND 4 /* Append-only file */ 230 #define S_APPEND 4 /* Append-only file */
231 #define S_IMMUTABLE 8 /* Immutable file */ 231 #define S_IMMUTABLE 8 /* Immutable file */
232 #define S_DEAD 16 /* removed, but still open directory */ 232 #define S_DEAD 16 /* removed, but still open directory */
233 #define S_NOQUOTA 32 /* Inode is not counted to quota */ 233 #define S_NOQUOTA 32 /* Inode is not counted to quota */
234 #define S_DIRSYNC 64 /* Directory modifications are synchronous */ 234 #define S_DIRSYNC 64 /* Directory modifications are synchronous */
235 #define S_NOCMTIME 128 /* Do not update file c/mtime */ 235 #define S_NOCMTIME 128 /* Do not update file c/mtime */
236 #define S_SWAPFILE 256 /* Do not truncate: swapon got its bmaps */ 236 #define S_SWAPFILE 256 /* Do not truncate: swapon got its bmaps */
237 #define S_PRIVATE 512 /* Inode is fs-internal */ 237 #define S_PRIVATE 512 /* Inode is fs-internal */
238 238
239 /* 239 /*
240 * Note that nosuid etc flags are inode-specific: setting some file-system 240 * Note that nosuid etc flags are inode-specific: setting some file-system
241 * flags just means all the inodes inherit those flags by default. It might be 241 * flags just means all the inodes inherit those flags by default. It might be
242 * possible to override it selectively if you really wanted to with some 242 * possible to override it selectively if you really wanted to with some
243 * ioctl() that is not currently implemented. 243 * ioctl() that is not currently implemented.
244 * 244 *
245 * Exception: MS_RDONLY is always applied to the entire file system. 245 * Exception: MS_RDONLY is always applied to the entire file system.
246 * 246 *
247 * Unfortunately, it is possible to change a filesystems flags with it mounted 247 * Unfortunately, it is possible to change a filesystems flags with it mounted
248 * with files in use. This means that all of the inodes will not have their 248 * with files in use. This means that all of the inodes will not have their
249 * i_flags updated. Hence, i_flags no longer inherit the superblock mount 249 * i_flags updated. Hence, i_flags no longer inherit the superblock mount
250 * flags, so these have to be checked separately. -- rmk@arm.uk.linux.org 250 * flags, so these have to be checked separately. -- rmk@arm.uk.linux.org
251 */ 251 */
252 #define __IS_FLG(inode,flg) ((inode)->i_sb->s_flags & (flg)) 252 #define __IS_FLG(inode,flg) ((inode)->i_sb->s_flags & (flg))
253 253
254 #define IS_RDONLY(inode) ((inode)->i_sb->s_flags & MS_RDONLY) 254 #define IS_RDONLY(inode) ((inode)->i_sb->s_flags & MS_RDONLY)
255 #define IS_SYNC(inode) (__IS_FLG(inode, MS_SYNCHRONOUS) || \ 255 #define IS_SYNC(inode) (__IS_FLG(inode, MS_SYNCHRONOUS) || \
256 ((inode)->i_flags & S_SYNC)) 256 ((inode)->i_flags & S_SYNC))
257 #define IS_DIRSYNC(inode) (__IS_FLG(inode, MS_SYNCHRONOUS|MS_DIRSYNC) || \ 257 #define IS_DIRSYNC(inode) (__IS_FLG(inode, MS_SYNCHRONOUS|MS_DIRSYNC) || \
258 ((inode)->i_flags & (S_SYNC|S_DIRSYNC))) 258 ((inode)->i_flags & (S_SYNC|S_DIRSYNC)))
259 #define IS_MANDLOCK(inode) __IS_FLG(inode, MS_MANDLOCK) 259 #define IS_MANDLOCK(inode) __IS_FLG(inode, MS_MANDLOCK)
260 #define IS_NOATIME(inode) __IS_FLG(inode, MS_RDONLY|MS_NOATIME) 260 #define IS_NOATIME(inode) __IS_FLG(inode, MS_RDONLY|MS_NOATIME)
261 #define IS_I_VERSION(inode) __IS_FLG(inode, MS_I_VERSION) 261 #define IS_I_VERSION(inode) __IS_FLG(inode, MS_I_VERSION)
262 262
263 #define IS_NOQUOTA(inode) ((inode)->i_flags & S_NOQUOTA) 263 #define IS_NOQUOTA(inode) ((inode)->i_flags & S_NOQUOTA)
264 #define IS_APPEND(inode) ((inode)->i_flags & S_APPEND) 264 #define IS_APPEND(inode) ((inode)->i_flags & S_APPEND)
265 #define IS_IMMUTABLE(inode) ((inode)->i_flags & S_IMMUTABLE) 265 #define IS_IMMUTABLE(inode) ((inode)->i_flags & S_IMMUTABLE)
266 #define IS_POSIXACL(inode) __IS_FLG(inode, MS_POSIXACL) 266 #define IS_POSIXACL(inode) __IS_FLG(inode, MS_POSIXACL)
267 267
268 #define IS_DEADDIR(inode) ((inode)->i_flags & S_DEAD) 268 #define IS_DEADDIR(inode) ((inode)->i_flags & S_DEAD)
269 #define IS_NOCMTIME(inode) ((inode)->i_flags & S_NOCMTIME) 269 #define IS_NOCMTIME(inode) ((inode)->i_flags & S_NOCMTIME)
270 #define IS_SWAPFILE(inode) ((inode)->i_flags & S_SWAPFILE) 270 #define IS_SWAPFILE(inode) ((inode)->i_flags & S_SWAPFILE)
271 #define IS_PRIVATE(inode) ((inode)->i_flags & S_PRIVATE) 271 #define IS_PRIVATE(inode) ((inode)->i_flags & S_PRIVATE)
272 272
273 /* the read-only stuff doesn't really belong here, but any other place is 273 /* the read-only stuff doesn't really belong here, but any other place is
274 probably as bad and I don't want to create yet another include file. */ 274 probably as bad and I don't want to create yet another include file. */
275 275
276 #define BLKROSET _IO(0x12,93) /* set device read-only (0 = read-write) */ 276 #define BLKROSET _IO(0x12,93) /* set device read-only (0 = read-write) */
277 #define BLKROGET _IO(0x12,94) /* get read-only status (0 = read_write) */ 277 #define BLKROGET _IO(0x12,94) /* get read-only status (0 = read_write) */
278 #define BLKRRPART _IO(0x12,95) /* re-read partition table */ 278 #define BLKRRPART _IO(0x12,95) /* re-read partition table */
279 #define BLKGETSIZE _IO(0x12,96) /* return device size /512 (long *arg) */ 279 #define BLKGETSIZE _IO(0x12,96) /* return device size /512 (long *arg) */
280 #define BLKFLSBUF _IO(0x12,97) /* flush buffer cache */ 280 #define BLKFLSBUF _IO(0x12,97) /* flush buffer cache */
281 #define BLKRASET _IO(0x12,98) /* set read ahead for block device */ 281 #define BLKRASET _IO(0x12,98) /* set read ahead for block device */
282 #define BLKRAGET _IO(0x12,99) /* get current read ahead setting */ 282 #define BLKRAGET _IO(0x12,99) /* get current read ahead setting */
283 #define BLKFRASET _IO(0x12,100)/* set filesystem (mm/filemap.c) read-ahead */ 283 #define BLKFRASET _IO(0x12,100)/* set filesystem (mm/filemap.c) read-ahead */
284 #define BLKFRAGET _IO(0x12,101)/* get filesystem (mm/filemap.c) read-ahead */ 284 #define BLKFRAGET _IO(0x12,101)/* get filesystem (mm/filemap.c) read-ahead */
285 #define BLKSECTSET _IO(0x12,102)/* set max sectors per request (ll_rw_blk.c) */ 285 #define BLKSECTSET _IO(0x12,102)/* set max sectors per request (ll_rw_blk.c) */
286 #define BLKSECTGET _IO(0x12,103)/* get max sectors per request (ll_rw_blk.c) */ 286 #define BLKSECTGET _IO(0x12,103)/* get max sectors per request (ll_rw_blk.c) */
287 #define BLKSSZGET _IO(0x12,104)/* get block device sector size */ 287 #define BLKSSZGET _IO(0x12,104)/* get block device sector size */
288 #if 0 288 #if 0
289 #define BLKPG _IO(0x12,105)/* See blkpg.h */ 289 #define BLKPG _IO(0x12,105)/* See blkpg.h */
290 290
291 /* Some people are morons. Do not use sizeof! */ 291 /* Some people are morons. Do not use sizeof! */
292 292
293 #define BLKELVGET _IOR(0x12,106,size_t)/* elevator get */ 293 #define BLKELVGET _IOR(0x12,106,size_t)/* elevator get */
294 #define BLKELVSET _IOW(0x12,107,size_t)/* elevator set */ 294 #define BLKELVSET _IOW(0x12,107,size_t)/* elevator set */
295 /* This was here just to show that the number is taken - 295 /* This was here just to show that the number is taken -
296 probably all these _IO(0x12,*) ioctls should be moved to blkpg.h. */ 296 probably all these _IO(0x12,*) ioctls should be moved to blkpg.h. */
297 #endif 297 #endif
298 /* A jump here: 108-111 have been used for various private purposes. */ 298 /* A jump here: 108-111 have been used for various private purposes. */
299 #define BLKBSZGET _IOR(0x12,112,size_t) 299 #define BLKBSZGET _IOR(0x12,112,size_t)
300 #define BLKBSZSET _IOW(0x12,113,size_t) 300 #define BLKBSZSET _IOW(0x12,113,size_t)
301 #define BLKGETSIZE64 _IOR(0x12,114,size_t) /* return device size in bytes (u64 *arg) */ 301 #define BLKGETSIZE64 _IOR(0x12,114,size_t) /* return device size in bytes (u64 *arg) */
302 #define BLKTRACESETUP _IOWR(0x12,115,struct blk_user_trace_setup) 302 #define BLKTRACESETUP _IOWR(0x12,115,struct blk_user_trace_setup)
303 #define BLKTRACESTART _IO(0x12,116) 303 #define BLKTRACESTART _IO(0x12,116)
304 #define BLKTRACESTOP _IO(0x12,117) 304 #define BLKTRACESTOP _IO(0x12,117)
305 #define BLKTRACETEARDOWN _IO(0x12,118) 305 #define BLKTRACETEARDOWN _IO(0x12,118)
306 #define BLKDISCARD _IO(0x12,119) 306 #define BLKDISCARD _IO(0x12,119)
307 #define BLKIOMIN _IO(0x12,120) 307 #define BLKIOMIN _IO(0x12,120)
308 #define BLKIOOPT _IO(0x12,121) 308 #define BLKIOOPT _IO(0x12,121)
309 #define BLKALIGNOFF _IO(0x12,122) 309 #define BLKALIGNOFF _IO(0x12,122)
310 #define BLKPBSZGET _IO(0x12,123) 310 #define BLKPBSZGET _IO(0x12,123)
311 #define BLKDISCARDZEROES _IO(0x12,124) 311 #define BLKDISCARDZEROES _IO(0x12,124)
312 312
313 #define BMAP_IOCTL 1 /* obsolete - kept for compatibility */ 313 #define BMAP_IOCTL 1 /* obsolete - kept for compatibility */
314 #define FIBMAP _IO(0x00,1) /* bmap access */ 314 #define FIBMAP _IO(0x00,1) /* bmap access */
315 #define FIGETBSZ _IO(0x00,2) /* get the block size used for bmap */ 315 #define FIGETBSZ _IO(0x00,2) /* get the block size used for bmap */
316 #define FIFREEZE _IOWR('X', 119, int) /* Freeze */ 316 #define FIFREEZE _IOWR('X', 119, int) /* Freeze */
317 #define FITHAW _IOWR('X', 120, int) /* Thaw */ 317 #define FITHAW _IOWR('X', 120, int) /* Thaw */
318 318
319 #define FS_IOC_GETFLAGS _IOR('f', 1, long) 319 #define FS_IOC_GETFLAGS _IOR('f', 1, long)
320 #define FS_IOC_SETFLAGS _IOW('f', 2, long) 320 #define FS_IOC_SETFLAGS _IOW('f', 2, long)
321 #define FS_IOC_GETVERSION _IOR('v', 1, long) 321 #define FS_IOC_GETVERSION _IOR('v', 1, long)
322 #define FS_IOC_SETVERSION _IOW('v', 2, long) 322 #define FS_IOC_SETVERSION _IOW('v', 2, long)
323 #define FS_IOC_FIEMAP _IOWR('f', 11, struct fiemap) 323 #define FS_IOC_FIEMAP _IOWR('f', 11, struct fiemap)
324 #define FS_IOC32_GETFLAGS _IOR('f', 1, int) 324 #define FS_IOC32_GETFLAGS _IOR('f', 1, int)
325 #define FS_IOC32_SETFLAGS _IOW('f', 2, int) 325 #define FS_IOC32_SETFLAGS _IOW('f', 2, int)
326 #define FS_IOC32_GETVERSION _IOR('v', 1, int) 326 #define FS_IOC32_GETVERSION _IOR('v', 1, int)
327 #define FS_IOC32_SETVERSION _IOW('v', 2, int) 327 #define FS_IOC32_SETVERSION _IOW('v', 2, int)
328 328
329 /* 329 /*
330 * Inode flags (FS_IOC_GETFLAGS / FS_IOC_SETFLAGS) 330 * Inode flags (FS_IOC_GETFLAGS / FS_IOC_SETFLAGS)
331 */ 331 */
332 #define FS_SECRM_FL 0x00000001 /* Secure deletion */ 332 #define FS_SECRM_FL 0x00000001 /* Secure deletion */
333 #define FS_UNRM_FL 0x00000002 /* Undelete */ 333 #define FS_UNRM_FL 0x00000002 /* Undelete */
334 #define FS_COMPR_FL 0x00000004 /* Compress file */ 334 #define FS_COMPR_FL 0x00000004 /* Compress file */
335 #define FS_SYNC_FL 0x00000008 /* Synchronous updates */ 335 #define FS_SYNC_FL 0x00000008 /* Synchronous updates */
336 #define FS_IMMUTABLE_FL 0x00000010 /* Immutable file */ 336 #define FS_IMMUTABLE_FL 0x00000010 /* Immutable file */
337 #define FS_APPEND_FL 0x00000020 /* writes to file may only append */ 337 #define FS_APPEND_FL 0x00000020 /* writes to file may only append */
338 #define FS_NODUMP_FL 0x00000040 /* do not dump file */ 338 #define FS_NODUMP_FL 0x00000040 /* do not dump file */
339 #define FS_NOATIME_FL 0x00000080 /* do not update atime */ 339 #define FS_NOATIME_FL 0x00000080 /* do not update atime */
340 /* Reserved for compression usage... */ 340 /* Reserved for compression usage... */
341 #define FS_DIRTY_FL 0x00000100 341 #define FS_DIRTY_FL 0x00000100
342 #define FS_COMPRBLK_FL 0x00000200 /* One or more compressed clusters */ 342 #define FS_COMPRBLK_FL 0x00000200 /* One or more compressed clusters */
343 #define FS_NOCOMP_FL 0x00000400 /* Don't compress */ 343 #define FS_NOCOMP_FL 0x00000400 /* Don't compress */
344 #define FS_ECOMPR_FL 0x00000800 /* Compression error */ 344 #define FS_ECOMPR_FL 0x00000800 /* Compression error */
345 /* End compression flags --- maybe not all used */ 345 /* End compression flags --- maybe not all used */
346 #define FS_BTREE_FL 0x00001000 /* btree format dir */ 346 #define FS_BTREE_FL 0x00001000 /* btree format dir */
347 #define FS_INDEX_FL 0x00001000 /* hash-indexed directory */ 347 #define FS_INDEX_FL 0x00001000 /* hash-indexed directory */
348 #define FS_IMAGIC_FL 0x00002000 /* AFS directory */ 348 #define FS_IMAGIC_FL 0x00002000 /* AFS directory */
349 #define FS_JOURNAL_DATA_FL 0x00004000 /* Reserved for ext3 */ 349 #define FS_JOURNAL_DATA_FL 0x00004000 /* Reserved for ext3 */
350 #define FS_NOTAIL_FL 0x00008000 /* file tail should not be merged */ 350 #define FS_NOTAIL_FL 0x00008000 /* file tail should not be merged */
351 #define FS_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */ 351 #define FS_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */
352 #define FS_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/ 352 #define FS_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/
353 #define FS_EXTENT_FL 0x00080000 /* Extents */ 353 #define FS_EXTENT_FL 0x00080000 /* Extents */
354 #define FS_DIRECTIO_FL 0x00100000 /* Use direct i/o */ 354 #define FS_DIRECTIO_FL 0x00100000 /* Use direct i/o */
355 #define FS_RESERVED_FL 0x80000000 /* reserved for ext2 lib */ 355 #define FS_RESERVED_FL 0x80000000 /* reserved for ext2 lib */
356 356
357 #define FS_FL_USER_VISIBLE 0x0003DFFF /* User visible flags */ 357 #define FS_FL_USER_VISIBLE 0x0003DFFF /* User visible flags */
358 #define FS_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */ 358 #define FS_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */
359 359
360 360
361 #define SYNC_FILE_RANGE_WAIT_BEFORE 1 361 #define SYNC_FILE_RANGE_WAIT_BEFORE 1
362 #define SYNC_FILE_RANGE_WRITE 2 362 #define SYNC_FILE_RANGE_WRITE 2
363 #define SYNC_FILE_RANGE_WAIT_AFTER 4 363 #define SYNC_FILE_RANGE_WAIT_AFTER 4
364 364
365 #ifdef __KERNEL__ 365 #ifdef __KERNEL__
366 366
367 #include <linux/linkage.h> 367 #include <linux/linkage.h>
368 #include <linux/wait.h> 368 #include <linux/wait.h>
369 #include <linux/types.h> 369 #include <linux/types.h>
370 #include <linux/kdev_t.h> 370 #include <linux/kdev_t.h>
371 #include <linux/dcache.h> 371 #include <linux/dcache.h>
372 #include <linux/path.h> 372 #include <linux/path.h>
373 #include <linux/stat.h> 373 #include <linux/stat.h>
374 #include <linux/cache.h> 374 #include <linux/cache.h>
375 #include <linux/kobject.h> 375 #include <linux/kobject.h>
376 #include <linux/list.h> 376 #include <linux/list.h>
377 #include <linux/radix-tree.h> 377 #include <linux/radix-tree.h>
378 #include <linux/prio_tree.h> 378 #include <linux/prio_tree.h>
379 #include <linux/init.h> 379 #include <linux/init.h>
380 #include <linux/pid.h> 380 #include <linux/pid.h>
381 #include <linux/mutex.h> 381 #include <linux/mutex.h>
382 #include <linux/capability.h> 382 #include <linux/capability.h>
383 #include <linux/semaphore.h> 383 #include <linux/semaphore.h>
384 #include <linux/fiemap.h> 384 #include <linux/fiemap.h>
385 385
386 #include <asm/atomic.h> 386 #include <asm/atomic.h>
387 #include <asm/byteorder.h> 387 #include <asm/byteorder.h>
388 388
389 struct export_operations; 389 struct export_operations;
390 struct hd_geometry; 390 struct hd_geometry;
391 struct iovec; 391 struct iovec;
392 struct nameidata; 392 struct nameidata;
393 struct kiocb; 393 struct kiocb;
394 struct pipe_inode_info; 394 struct pipe_inode_info;
395 struct poll_table_struct; 395 struct poll_table_struct;
396 struct kstatfs; 396 struct kstatfs;
397 struct vm_area_struct; 397 struct vm_area_struct;
398 struct vfsmount; 398 struct vfsmount;
399 struct cred; 399 struct cred;
400 400
401 extern void __init inode_init(void); 401 extern void __init inode_init(void);
402 extern void __init inode_init_early(void); 402 extern void __init inode_init_early(void);
403 extern void __init files_init(unsigned long); 403 extern void __init files_init(unsigned long);
404 404
405 extern struct files_stat_struct files_stat; 405 extern struct files_stat_struct files_stat;
406 extern int get_max_files(void); 406 extern int get_max_files(void);
407 extern int sysctl_nr_open; 407 extern int sysctl_nr_open;
408 extern struct inodes_stat_t inodes_stat; 408 extern struct inodes_stat_t inodes_stat;
409 extern int leases_enable, lease_break_time; 409 extern int leases_enable, lease_break_time;
410 #ifdef CONFIG_DNOTIFY 410 #ifdef CONFIG_DNOTIFY
411 extern int dir_notify_enable; 411 extern int dir_notify_enable;
412 #endif 412 #endif
413 413
414 struct buffer_head; 414 struct buffer_head;
415 typedef int (get_block_t)(struct inode *inode, sector_t iblock, 415 typedef int (get_block_t)(struct inode *inode, sector_t iblock,
416 struct buffer_head *bh_result, int create); 416 struct buffer_head *bh_result, int create);
417 typedef void (dio_iodone_t)(struct kiocb *iocb, loff_t offset, 417 typedef void (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
418 ssize_t bytes, void *private); 418 ssize_t bytes, void *private);
419 419
420 /* 420 /*
421 * Attribute flags. These should be or-ed together to figure out what 421 * Attribute flags. These should be or-ed together to figure out what
422 * has been changed! 422 * has been changed!
423 */ 423 */
424 #define ATTR_MODE (1 << 0) 424 #define ATTR_MODE (1 << 0)
425 #define ATTR_UID (1 << 1) 425 #define ATTR_UID (1 << 1)
426 #define ATTR_GID (1 << 2) 426 #define ATTR_GID (1 << 2)
427 #define ATTR_SIZE (1 << 3) 427 #define ATTR_SIZE (1 << 3)
428 #define ATTR_ATIME (1 << 4) 428 #define ATTR_ATIME (1 << 4)
429 #define ATTR_MTIME (1 << 5) 429 #define ATTR_MTIME (1 << 5)
430 #define ATTR_CTIME (1 << 6) 430 #define ATTR_CTIME (1 << 6)
431 #define ATTR_ATIME_SET (1 << 7) 431 #define ATTR_ATIME_SET (1 << 7)
432 #define ATTR_MTIME_SET (1 << 8) 432 #define ATTR_MTIME_SET (1 << 8)
433 #define ATTR_FORCE (1 << 9) /* Not a change, but a change it */ 433 #define ATTR_FORCE (1 << 9) /* Not a change, but a change it */
434 #define ATTR_ATTR_FLAG (1 << 10) 434 #define ATTR_ATTR_FLAG (1 << 10)
435 #define ATTR_KILL_SUID (1 << 11) 435 #define ATTR_KILL_SUID (1 << 11)
436 #define ATTR_KILL_SGID (1 << 12) 436 #define ATTR_KILL_SGID (1 << 12)
437 #define ATTR_FILE (1 << 13) 437 #define ATTR_FILE (1 << 13)
438 #define ATTR_KILL_PRIV (1 << 14) 438 #define ATTR_KILL_PRIV (1 << 14)
439 #define ATTR_OPEN (1 << 15) /* Truncating from open(O_TRUNC) */ 439 #define ATTR_OPEN (1 << 15) /* Truncating from open(O_TRUNC) */
440 #define ATTR_TIMES_SET (1 << 16) 440 #define ATTR_TIMES_SET (1 << 16)
441 441
442 /* 442 /*
443 * This is the Inode Attributes structure, used for notify_change(). It 443 * This is the Inode Attributes structure, used for notify_change(). It
444 * uses the above definitions as flags, to know which values have changed. 444 * uses the above definitions as flags, to know which values have changed.
445 * Also, in this manner, a Filesystem can look at only the values it cares 445 * Also, in this manner, a Filesystem can look at only the values it cares
446 * about. Basically, these are the attributes that the VFS layer can 446 * about. Basically, these are the attributes that the VFS layer can
447 * request to change from the FS layer. 447 * request to change from the FS layer.
448 * 448 *
449 * Derek Atkins <warlord@MIT.EDU> 94-10-20 449 * Derek Atkins <warlord@MIT.EDU> 94-10-20
450 */ 450 */
451 struct iattr { 451 struct iattr {
452 unsigned int ia_valid; 452 unsigned int ia_valid;
453 umode_t ia_mode; 453 umode_t ia_mode;
454 uid_t ia_uid; 454 uid_t ia_uid;
455 gid_t ia_gid; 455 gid_t ia_gid;
456 loff_t ia_size; 456 loff_t ia_size;
457 struct timespec ia_atime; 457 struct timespec ia_atime;
458 struct timespec ia_mtime; 458 struct timespec ia_mtime;
459 struct timespec ia_ctime; 459 struct timespec ia_ctime;
460 460
461 /* 461 /*
462 * Not an attribute, but an auxilary info for filesystems wanting to 462 * Not an attribute, but an auxilary info for filesystems wanting to
463 * implement an ftruncate() like method. NOTE: filesystem should 463 * implement an ftruncate() like method. NOTE: filesystem should
464 * check for (ia_valid & ATTR_FILE), and not for (ia_file != NULL). 464 * check for (ia_valid & ATTR_FILE), and not for (ia_file != NULL).
465 */ 465 */
466 struct file *ia_file; 466 struct file *ia_file;
467 }; 467 };
468 468
469 /* 469 /*
470 * Includes for diskquotas. 470 * Includes for diskquotas.
471 */ 471 */
472 #include <linux/quota.h> 472 #include <linux/quota.h>
473 473
474 /** 474 /**
475 * enum positive_aop_returns - aop return codes with specific semantics 475 * enum positive_aop_returns - aop return codes with specific semantics
476 * 476 *
477 * @AOP_WRITEPAGE_ACTIVATE: Informs the caller that page writeback has 477 * @AOP_WRITEPAGE_ACTIVATE: Informs the caller that page writeback has
478 * completed, that the page is still locked, and 478 * completed, that the page is still locked, and
479 * should be considered active. The VM uses this hint 479 * should be considered active. The VM uses this hint
480 * to return the page to the active list -- it won't 480 * to return the page to the active list -- it won't
481 * be a candidate for writeback again in the near 481 * be a candidate for writeback again in the near
482 * future. Other callers must be careful to unlock 482 * future. Other callers must be careful to unlock
483 * the page if they get this return. Returned by 483 * the page if they get this return. Returned by
484 * writepage(); 484 * writepage();
485 * 485 *
486 * @AOP_TRUNCATED_PAGE: The AOP method that was handed a locked page has 486 * @AOP_TRUNCATED_PAGE: The AOP method that was handed a locked page has
487 * unlocked it and the page might have been truncated. 487 * unlocked it and the page might have been truncated.
488 * The caller should back up to acquiring a new page and 488 * The caller should back up to acquiring a new page and
489 * trying again. The aop will be taking reasonable 489 * trying again. The aop will be taking reasonable
490 * precautions not to livelock. If the caller held a page 490 * precautions not to livelock. If the caller held a page
491 * reference, it should drop it before retrying. Returned 491 * reference, it should drop it before retrying. Returned
492 * by readpage(). 492 * by readpage().
493 * 493 *
494 * address_space_operation functions return these large constants to indicate 494 * address_space_operation functions return these large constants to indicate
495 * special semantics to the caller. These are much larger than the bytes in a 495 * special semantics to the caller. These are much larger than the bytes in a
496 * page to allow for functions that return the number of bytes operated on in a 496 * page to allow for functions that return the number of bytes operated on in a
497 * given page. 497 * given page.
498 */ 498 */
499 499
500 enum positive_aop_returns { 500 enum positive_aop_returns {
501 AOP_WRITEPAGE_ACTIVATE = 0x80000, 501 AOP_WRITEPAGE_ACTIVATE = 0x80000,
502 AOP_TRUNCATED_PAGE = 0x80001, 502 AOP_TRUNCATED_PAGE = 0x80001,
503 }; 503 };
504 504
505 #define AOP_FLAG_UNINTERRUPTIBLE 0x0001 /* will not do a short write */ 505 #define AOP_FLAG_UNINTERRUPTIBLE 0x0001 /* will not do a short write */
506 #define AOP_FLAG_CONT_EXPAND 0x0002 /* called from cont_expand */ 506 #define AOP_FLAG_CONT_EXPAND 0x0002 /* called from cont_expand */
507 #define AOP_FLAG_NOFS 0x0004 /* used by filesystem to direct 507 #define AOP_FLAG_NOFS 0x0004 /* used by filesystem to direct
508 * helper code (eg buffer layer) 508 * helper code (eg buffer layer)
509 * to clear GFP_FS from alloc */ 509 * to clear GFP_FS from alloc */
510 510
511 /* 511 /*
512 * oh the beauties of C type declarations. 512 * oh the beauties of C type declarations.
513 */ 513 */
514 struct page; 514 struct page;
515 struct address_space; 515 struct address_space;
516 struct writeback_control; 516 struct writeback_control;
517 517
518 struct iov_iter { 518 struct iov_iter {
519 const struct iovec *iov; 519 const struct iovec *iov;
520 unsigned long nr_segs; 520 unsigned long nr_segs;
521 size_t iov_offset; 521 size_t iov_offset;
522 size_t count; 522 size_t count;
523 }; 523 };
524 524
525 size_t iov_iter_copy_from_user_atomic(struct page *page, 525 size_t iov_iter_copy_from_user_atomic(struct page *page,
526 struct iov_iter *i, unsigned long offset, size_t bytes); 526 struct iov_iter *i, unsigned long offset, size_t bytes);
527 size_t iov_iter_copy_from_user(struct page *page, 527 size_t iov_iter_copy_from_user(struct page *page,
528 struct iov_iter *i, unsigned long offset, size_t bytes); 528 struct iov_iter *i, unsigned long offset, size_t bytes);
529 void iov_iter_advance(struct iov_iter *i, size_t bytes); 529 void iov_iter_advance(struct iov_iter *i, size_t bytes);
530 int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes); 530 int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes);
531 size_t iov_iter_single_seg_count(struct iov_iter *i); 531 size_t iov_iter_single_seg_count(struct iov_iter *i);
532 532
533 static inline void iov_iter_init(struct iov_iter *i, 533 static inline void iov_iter_init(struct iov_iter *i,
534 const struct iovec *iov, unsigned long nr_segs, 534 const struct iovec *iov, unsigned long nr_segs,
535 size_t count, size_t written) 535 size_t count, size_t written)
536 { 536 {
537 i->iov = iov; 537 i->iov = iov;
538 i->nr_segs = nr_segs; 538 i->nr_segs = nr_segs;
539 i->iov_offset = 0; 539 i->iov_offset = 0;
540 i->count = count + written; 540 i->count = count + written;
541 541
542 iov_iter_advance(i, written); 542 iov_iter_advance(i, written);
543 } 543 }
544 544
545 static inline size_t iov_iter_count(struct iov_iter *i) 545 static inline size_t iov_iter_count(struct iov_iter *i)
546 { 546 {
547 return i->count; 547 return i->count;
548 } 548 }
549 549
550 /* 550 /*
551 * "descriptor" for what we're up to with a read. 551 * "descriptor" for what we're up to with a read.
552 * This allows us to use the same read code yet 552 * This allows us to use the same read code yet
553 * have multiple different users of the data that 553 * have multiple different users of the data that
554 * we read from a file. 554 * we read from a file.
555 * 555 *
556 * The simplest case just copies the data to user 556 * The simplest case just copies the data to user
557 * mode. 557 * mode.
558 */ 558 */
559 typedef struct { 559 typedef struct {
560 size_t written; 560 size_t written;
561 size_t count; 561 size_t count;
562 union { 562 union {
563 char __user *buf; 563 char __user *buf;
564 void *data; 564 void *data;
565 } arg; 565 } arg;
566 int error; 566 int error;
567 } read_descriptor_t; 567 } read_descriptor_t;
568 568
569 typedef int (*read_actor_t)(read_descriptor_t *, struct page *, 569 typedef int (*read_actor_t)(read_descriptor_t *, struct page *,
570 unsigned long, unsigned long); 570 unsigned long, unsigned long);
571 571
572 struct address_space_operations { 572 struct address_space_operations {
573 int (*writepage)(struct page *page, struct writeback_control *wbc); 573 int (*writepage)(struct page *page, struct writeback_control *wbc);
574 int (*readpage)(struct file *, struct page *); 574 int (*readpage)(struct file *, struct page *);
575 void (*sync_page)(struct page *); 575 void (*sync_page)(struct page *);
576 576
577 /* Write back some dirty pages from this mapping. */ 577 /* Write back some dirty pages from this mapping. */
578 int (*writepages)(struct address_space *, struct writeback_control *); 578 int (*writepages)(struct address_space *, struct writeback_control *);
579 579
580 /* Set a page dirty. Return true if this dirtied it */ 580 /* Set a page dirty. Return true if this dirtied it */
581 int (*set_page_dirty)(struct page *page); 581 int (*set_page_dirty)(struct page *page);
582 582
583 int (*readpages)(struct file *filp, struct address_space *mapping, 583 int (*readpages)(struct file *filp, struct address_space *mapping,
584 struct list_head *pages, unsigned nr_pages); 584 struct list_head *pages, unsigned nr_pages);
585 585
586 int (*write_begin)(struct file *, struct address_space *mapping, 586 int (*write_begin)(struct file *, struct address_space *mapping,
587 loff_t pos, unsigned len, unsigned flags, 587 loff_t pos, unsigned len, unsigned flags,
588 struct page **pagep, void **fsdata); 588 struct page **pagep, void **fsdata);
589 int (*write_end)(struct file *, struct address_space *mapping, 589 int (*write_end)(struct file *, struct address_space *mapping,
590 loff_t pos, unsigned len, unsigned copied, 590 loff_t pos, unsigned len, unsigned copied,
591 struct page *page, void *fsdata); 591 struct page *page, void *fsdata);
592 592
593 /* Unfortunately this kludge is needed for FIBMAP. Don't use it */ 593 /* Unfortunately this kludge is needed for FIBMAP. Don't use it */
594 sector_t (*bmap)(struct address_space *, sector_t); 594 sector_t (*bmap)(struct address_space *, sector_t);
595 void (*invalidatepage) (struct page *, unsigned long); 595 void (*invalidatepage) (struct page *, unsigned long);
596 int (*releasepage) (struct page *, gfp_t); 596 int (*releasepage) (struct page *, gfp_t);
597 ssize_t (*direct_IO)(int, struct kiocb *, const struct iovec *iov, 597 ssize_t (*direct_IO)(int, struct kiocb *, const struct iovec *iov,
598 loff_t offset, unsigned long nr_segs); 598 loff_t offset, unsigned long nr_segs);
599 int (*get_xip_mem)(struct address_space *, pgoff_t, int, 599 int (*get_xip_mem)(struct address_space *, pgoff_t, int,
600 void **, unsigned long *); 600 void **, unsigned long *);
601 /* migrate the contents of a page to the specified target */ 601 /* migrate the contents of a page to the specified target */
602 int (*migratepage) (struct address_space *, 602 int (*migratepage) (struct address_space *,
603 struct page *, struct page *); 603 struct page *, struct page *);
604 int (*launder_page) (struct page *); 604 int (*launder_page) (struct page *);
605 int (*is_partially_uptodate) (struct page *, read_descriptor_t *, 605 int (*is_partially_uptodate) (struct page *, read_descriptor_t *,
606 unsigned long); 606 unsigned long);
607 int (*error_remove_page)(struct address_space *, struct page *); 607 int (*error_remove_page)(struct address_space *, struct page *);
608 }; 608 };
609 609
610 /* 610 /*
611 * pagecache_write_begin/pagecache_write_end must be used by general code 611 * pagecache_write_begin/pagecache_write_end must be used by general code
612 * to write into the pagecache. 612 * to write into the pagecache.
613 */ 613 */
614 int pagecache_write_begin(struct file *, struct address_space *mapping, 614 int pagecache_write_begin(struct file *, struct address_space *mapping,
615 loff_t pos, unsigned len, unsigned flags, 615 loff_t pos, unsigned len, unsigned flags,
616 struct page **pagep, void **fsdata); 616 struct page **pagep, void **fsdata);
617 617
618 int pagecache_write_end(struct file *, struct address_space *mapping, 618 int pagecache_write_end(struct file *, struct address_space *mapping,
619 loff_t pos, unsigned len, unsigned copied, 619 loff_t pos, unsigned len, unsigned copied,
620 struct page *page, void *fsdata); 620 struct page *page, void *fsdata);
621 621
622 struct backing_dev_info; 622 struct backing_dev_info;
623 struct address_space { 623 struct address_space {
624 struct inode *host; /* owner: inode, block_device */ 624 struct inode *host; /* owner: inode, block_device */
625 struct radix_tree_root page_tree; /* radix tree of all pages */ 625 struct radix_tree_root page_tree; /* radix tree of all pages */
626 spinlock_t tree_lock; /* and lock protecting it */ 626 spinlock_t tree_lock; /* and lock protecting it */
627 unsigned int i_mmap_writable;/* count VM_SHARED mappings */ 627 unsigned int i_mmap_writable;/* count VM_SHARED mappings */
628 struct prio_tree_root i_mmap; /* tree of private and shared mappings */ 628 struct prio_tree_root i_mmap; /* tree of private and shared mappings */
629 struct list_head i_mmap_nonlinear;/*list VM_NONLINEAR mappings */ 629 struct list_head i_mmap_nonlinear;/*list VM_NONLINEAR mappings */
630 spinlock_t i_mmap_lock; /* protect tree, count, list */ 630 spinlock_t i_mmap_lock; /* protect tree, count, list */
631 unsigned int truncate_count; /* Cover race condition with truncate */ 631 unsigned int truncate_count; /* Cover race condition with truncate */
632 unsigned long nrpages; /* number of total pages */ 632 unsigned long nrpages; /* number of total pages */
633 pgoff_t writeback_index;/* writeback starts here */ 633 pgoff_t writeback_index;/* writeback starts here */
634 const struct address_space_operations *a_ops; /* methods */ 634 const struct address_space_operations *a_ops; /* methods */
635 unsigned long flags; /* error bits/gfp mask */ 635 unsigned long flags; /* error bits/gfp mask */
636 struct backing_dev_info *backing_dev_info; /* device readahead, etc */ 636 struct backing_dev_info *backing_dev_info; /* device readahead, etc */
637 spinlock_t private_lock; /* for use by the address_space */ 637 spinlock_t private_lock; /* for use by the address_space */
638 struct list_head private_list; /* ditto */ 638 struct list_head private_list; /* ditto */
639 struct address_space *assoc_mapping; /* ditto */ 639 struct address_space *assoc_mapping; /* ditto */
640 } __attribute__((aligned(sizeof(long)))); 640 } __attribute__((aligned(sizeof(long))));
641 /* 641 /*
642 * On most architectures that alignment is already the case; but 642 * On most architectures that alignment is already the case; but
643 * must be enforced here for CRIS, to let the least signficant bit 643 * must be enforced here for CRIS, to let the least signficant bit
644 * of struct page's "mapping" pointer be used for PAGE_MAPPING_ANON. 644 * of struct page's "mapping" pointer be used for PAGE_MAPPING_ANON.
645 */ 645 */
646 646
647 struct block_device { 647 struct block_device {
648 dev_t bd_dev; /* not a kdev_t - it's a search key */ 648 dev_t bd_dev; /* not a kdev_t - it's a search key */
649 struct inode * bd_inode; /* will die */ 649 struct inode * bd_inode; /* will die */
650 struct super_block * bd_super; 650 struct super_block * bd_super;
651 int bd_openers; 651 int bd_openers;
652 struct mutex bd_mutex; /* open/close mutex */ 652 struct mutex bd_mutex; /* open/close mutex */
653 struct list_head bd_inodes; 653 struct list_head bd_inodes;
654 void * bd_holder; 654 void * bd_holder;
655 int bd_holders; 655 int bd_holders;
656 #ifdef CONFIG_SYSFS 656 #ifdef CONFIG_SYSFS
657 struct list_head bd_holder_list; 657 struct list_head bd_holder_list;
658 #endif 658 #endif
659 struct block_device * bd_contains; 659 struct block_device * bd_contains;
660 unsigned bd_block_size; 660 unsigned bd_block_size;
661 struct hd_struct * bd_part; 661 struct hd_struct * bd_part;
662 /* number of times partitions within this device have been opened. */ 662 /* number of times partitions within this device have been opened. */
663 unsigned bd_part_count; 663 unsigned bd_part_count;
664 int bd_invalidated; 664 int bd_invalidated;
665 struct gendisk * bd_disk; 665 struct gendisk * bd_disk;
666 struct list_head bd_list; 666 struct list_head bd_list;
667 /* 667 /*
668 * Private data. You must have bd_claim'ed the block_device 668 * Private data. You must have bd_claim'ed the block_device
669 * to use this. NOTE: bd_claim allows an owner to claim 669 * to use this. NOTE: bd_claim allows an owner to claim
670 * the same device multiple times, the owner must take special 670 * the same device multiple times, the owner must take special
671 * care to not mess up bd_private for that case. 671 * care to not mess up bd_private for that case.
672 */ 672 */
673 unsigned long bd_private; 673 unsigned long bd_private;
674 674
675 /* The counter of freeze processes */ 675 /* The counter of freeze processes */
676 int bd_fsfreeze_count; 676 int bd_fsfreeze_count;
677 /* Mutex for freeze */ 677 /* Mutex for freeze */
678 struct mutex bd_fsfreeze_mutex; 678 struct mutex bd_fsfreeze_mutex;
679 }; 679 };
680 680
681 /* 681 /*
682 * Radix-tree tags, for tagging dirty and writeback pages within the pagecache 682 * Radix-tree tags, for tagging dirty and writeback pages within the pagecache
683 * radix trees 683 * radix trees
684 */ 684 */
685 #define PAGECACHE_TAG_DIRTY 0 685 #define PAGECACHE_TAG_DIRTY 0
686 #define PAGECACHE_TAG_WRITEBACK 1 686 #define PAGECACHE_TAG_WRITEBACK 1
687 687
688 int mapping_tagged(struct address_space *mapping, int tag); 688 int mapping_tagged(struct address_space *mapping, int tag);
689 689
690 /* 690 /*
691 * Might pages of this file be mapped into userspace? 691 * Might pages of this file be mapped into userspace?
692 */ 692 */
693 static inline int mapping_mapped(struct address_space *mapping) 693 static inline int mapping_mapped(struct address_space *mapping)
694 { 694 {
695 return !prio_tree_empty(&mapping->i_mmap) || 695 return !prio_tree_empty(&mapping->i_mmap) ||
696 !list_empty(&mapping->i_mmap_nonlinear); 696 !list_empty(&mapping->i_mmap_nonlinear);
697 } 697 }
698 698
699 /* 699 /*
700 * Might pages of this file have been modified in userspace? 700 * Might pages of this file have been modified in userspace?
701 * Note that i_mmap_writable counts all VM_SHARED vmas: do_mmap_pgoff 701 * Note that i_mmap_writable counts all VM_SHARED vmas: do_mmap_pgoff
702 * marks vma as VM_SHARED if it is shared, and the file was opened for 702 * marks vma as VM_SHARED if it is shared, and the file was opened for
703 * writing i.e. vma may be mprotected writable even if now readonly. 703 * writing i.e. vma may be mprotected writable even if now readonly.
704 */ 704 */
705 static inline int mapping_writably_mapped(struct address_space *mapping) 705 static inline int mapping_writably_mapped(struct address_space *mapping)
706 { 706 {
707 return mapping->i_mmap_writable != 0; 707 return mapping->i_mmap_writable != 0;
708 } 708 }
709 709
710 /* 710 /*
711 * Use sequence counter to get consistent i_size on 32-bit processors. 711 * Use sequence counter to get consistent i_size on 32-bit processors.
712 */ 712 */
713 #if BITS_PER_LONG==32 && defined(CONFIG_SMP) 713 #if BITS_PER_LONG==32 && defined(CONFIG_SMP)
714 #include <linux/seqlock.h> 714 #include <linux/seqlock.h>
715 #define __NEED_I_SIZE_ORDERED 715 #define __NEED_I_SIZE_ORDERED
716 #define i_size_ordered_init(inode) seqcount_init(&inode->i_size_seqcount) 716 #define i_size_ordered_init(inode) seqcount_init(&inode->i_size_seqcount)
717 #else 717 #else
718 #define i_size_ordered_init(inode) do { } while (0) 718 #define i_size_ordered_init(inode) do { } while (0)
719 #endif 719 #endif
720 720
721 struct posix_acl; 721 struct posix_acl;
722 #define ACL_NOT_CACHED ((void *)(-1)) 722 #define ACL_NOT_CACHED ((void *)(-1))
723 723
724 struct inode { 724 struct inode {
725 struct hlist_node i_hash; 725 struct hlist_node i_hash;
726 struct list_head i_list; /* backing dev IO list */ 726 struct list_head i_list; /* backing dev IO list */
727 struct list_head i_sb_list; 727 struct list_head i_sb_list;
728 struct list_head i_dentry; 728 struct list_head i_dentry;
729 unsigned long i_ino; 729 unsigned long i_ino;
730 atomic_t i_count; 730 atomic_t i_count;
731 unsigned int i_nlink; 731 unsigned int i_nlink;
732 uid_t i_uid; 732 uid_t i_uid;
733 gid_t i_gid; 733 gid_t i_gid;
734 dev_t i_rdev; 734 dev_t i_rdev;
735 unsigned int i_blkbits; 735 unsigned int i_blkbits;
736 u64 i_version; 736 u64 i_version;
737 loff_t i_size; 737 loff_t i_size;
738 #ifdef __NEED_I_SIZE_ORDERED 738 #ifdef __NEED_I_SIZE_ORDERED
739 seqcount_t i_size_seqcount; 739 seqcount_t i_size_seqcount;
740 #endif 740 #endif
741 struct timespec i_atime; 741 struct timespec i_atime;
742 struct timespec i_mtime; 742 struct timespec i_mtime;
743 struct timespec i_ctime; 743 struct timespec i_ctime;
744 blkcnt_t i_blocks; 744 blkcnt_t i_blocks;
745 unsigned short i_bytes; 745 unsigned short i_bytes;
746 umode_t i_mode; 746 umode_t i_mode;
747 spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */ 747 spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */
748 struct mutex i_mutex; 748 struct mutex i_mutex;
749 struct rw_semaphore i_alloc_sem; 749 struct rw_semaphore i_alloc_sem;
750 const struct inode_operations *i_op; 750 const struct inode_operations *i_op;
751 const struct file_operations *i_fop; /* former ->i_op->default_file_ops */ 751 const struct file_operations *i_fop; /* former ->i_op->default_file_ops */
752 struct super_block *i_sb; 752 struct super_block *i_sb;
753 struct file_lock *i_flock; 753 struct file_lock *i_flock;
754 struct address_space *i_mapping; 754 struct address_space *i_mapping;
755 struct address_space i_data; 755 struct address_space i_data;
756 #ifdef CONFIG_QUOTA 756 #ifdef CONFIG_QUOTA
757 struct dquot *i_dquot[MAXQUOTAS]; 757 struct dquot *i_dquot[MAXQUOTAS];
758 #endif 758 #endif
759 struct list_head i_devices; 759 struct list_head i_devices;
760 union { 760 union {
761 struct pipe_inode_info *i_pipe; 761 struct pipe_inode_info *i_pipe;
762 struct block_device *i_bdev; 762 struct block_device *i_bdev;
763 struct cdev *i_cdev; 763 struct cdev *i_cdev;
764 }; 764 };
765 765
766 __u32 i_generation; 766 __u32 i_generation;
767 767
768 #ifdef CONFIG_FSNOTIFY 768 #ifdef CONFIG_FSNOTIFY
769 __u32 i_fsnotify_mask; /* all events this inode cares about */ 769 __u32 i_fsnotify_mask; /* all events this inode cares about */
770 struct hlist_head i_fsnotify_mark_entries; /* fsnotify mark entries */ 770 struct hlist_head i_fsnotify_mark_entries; /* fsnotify mark entries */
771 #endif 771 #endif
772 772
773 #ifdef CONFIG_INOTIFY 773 #ifdef CONFIG_INOTIFY
774 struct list_head inotify_watches; /* watches on this inode */ 774 struct list_head inotify_watches; /* watches on this inode */
775 struct mutex inotify_mutex; /* protects the watches list */ 775 struct mutex inotify_mutex; /* protects the watches list */
776 #endif 776 #endif
777 777
778 unsigned long i_state; 778 unsigned long i_state;
779 unsigned long dirtied_when; /* jiffies of first dirtying */ 779 unsigned long dirtied_when; /* jiffies of first dirtying */
780 780
781 unsigned int i_flags; 781 unsigned int i_flags;
782 782
783 atomic_t i_writecount; 783 atomic_t i_writecount;
784 #ifdef CONFIG_SECURITY 784 #ifdef CONFIG_SECURITY
785 void *i_security; 785 void *i_security;
786 #endif 786 #endif
787 #ifdef CONFIG_FS_POSIX_ACL 787 #ifdef CONFIG_FS_POSIX_ACL
788 struct posix_acl *i_acl; 788 struct posix_acl *i_acl;
789 struct posix_acl *i_default_acl; 789 struct posix_acl *i_default_acl;
790 #endif 790 #endif
791 void *i_private; /* fs or device private pointer */ 791 void *i_private; /* fs or device private pointer */
792 }; 792 };
793 793
794 /* 794 /*
795 * inode->i_mutex nesting subclasses for the lock validator: 795 * inode->i_mutex nesting subclasses for the lock validator:
796 * 796 *
797 * 0: the object of the current VFS operation 797 * 0: the object of the current VFS operation
798 * 1: parent 798 * 1: parent
799 * 2: child/target 799 * 2: child/target
800 * 3: quota file 800 * 3: quota file
801 * 801 *
802 * The locking order between these classes is 802 * The locking order between these classes is
803 * parent -> child -> normal -> xattr -> quota 803 * parent -> child -> normal -> xattr -> quota
804 */ 804 */
805 enum inode_i_mutex_lock_class 805 enum inode_i_mutex_lock_class
806 { 806 {
807 I_MUTEX_NORMAL, 807 I_MUTEX_NORMAL,
808 I_MUTEX_PARENT, 808 I_MUTEX_PARENT,
809 I_MUTEX_CHILD, 809 I_MUTEX_CHILD,
810 I_MUTEX_XATTR, 810 I_MUTEX_XATTR,
811 I_MUTEX_QUOTA 811 I_MUTEX_QUOTA
812 }; 812 };
813 813
814 /* 814 /*
815 * NOTE: in a 32bit arch with a preemptable kernel and 815 * NOTE: in a 32bit arch with a preemptable kernel and
816 * an UP compile the i_size_read/write must be atomic 816 * an UP compile the i_size_read/write must be atomic
817 * with respect to the local cpu (unlike with preempt disabled), 817 * with respect to the local cpu (unlike with preempt disabled),
818 * but they don't need to be atomic with respect to other cpus like in 818 * but they don't need to be atomic with respect to other cpus like in
819 * true SMP (so they need either to either locally disable irq around 819 * true SMP (so they need either to either locally disable irq around
820 * the read or for example on x86 they can be still implemented as a 820 * the read or for example on x86 they can be still implemented as a
821 * cmpxchg8b without the need of the lock prefix). For SMP compiles 821 * cmpxchg8b without the need of the lock prefix). For SMP compiles
822 * and 64bit archs it makes no difference if preempt is enabled or not. 822 * and 64bit archs it makes no difference if preempt is enabled or not.
823 */ 823 */
824 static inline loff_t i_size_read(const struct inode *inode) 824 static inline loff_t i_size_read(const struct inode *inode)
825 { 825 {
826 #if BITS_PER_LONG==32 && defined(CONFIG_SMP) 826 #if BITS_PER_LONG==32 && defined(CONFIG_SMP)
827 loff_t i_size; 827 loff_t i_size;
828 unsigned int seq; 828 unsigned int seq;
829 829
830 do { 830 do {
831 seq = read_seqcount_begin(&inode->i_size_seqcount); 831 seq = read_seqcount_begin(&inode->i_size_seqcount);
832 i_size = inode->i_size; 832 i_size = inode->i_size;
833 } while (read_seqcount_retry(&inode->i_size_seqcount, seq)); 833 } while (read_seqcount_retry(&inode->i_size_seqcount, seq));
834 return i_size; 834 return i_size;
835 #elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPT) 835 #elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPT)
836 loff_t i_size; 836 loff_t i_size;
837 837
838 preempt_disable(); 838 preempt_disable();
839 i_size = inode->i_size; 839 i_size = inode->i_size;
840 preempt_enable(); 840 preempt_enable();
841 return i_size; 841 return i_size;
842 #else 842 #else
843 return inode->i_size; 843 return inode->i_size;
844 #endif 844 #endif
845 } 845 }
846 846
847 /* 847 /*
848 * NOTE: unlike i_size_read(), i_size_write() does need locking around it 848 * NOTE: unlike i_size_read(), i_size_write() does need locking around it
849 * (normally i_mutex), otherwise on 32bit/SMP an update of i_size_seqcount 849 * (normally i_mutex), otherwise on 32bit/SMP an update of i_size_seqcount
850 * can be lost, resulting in subsequent i_size_read() calls spinning forever. 850 * can be lost, resulting in subsequent i_size_read() calls spinning forever.
851 */ 851 */
852 static inline void i_size_write(struct inode *inode, loff_t i_size) 852 static inline void i_size_write(struct inode *inode, loff_t i_size)
853 { 853 {
854 #if BITS_PER_LONG==32 && defined(CONFIG_SMP) 854 #if BITS_PER_LONG==32 && defined(CONFIG_SMP)
855 write_seqcount_begin(&inode->i_size_seqcount); 855 write_seqcount_begin(&inode->i_size_seqcount);
856 inode->i_size = i_size; 856 inode->i_size = i_size;
857 write_seqcount_end(&inode->i_size_seqcount); 857 write_seqcount_end(&inode->i_size_seqcount);
858 #elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPT) 858 #elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPT)
859 preempt_disable(); 859 preempt_disable();
860 inode->i_size = i_size; 860 inode->i_size = i_size;
861 preempt_enable(); 861 preempt_enable();
862 #else 862 #else
863 inode->i_size = i_size; 863 inode->i_size = i_size;
864 #endif 864 #endif
865 } 865 }
866 866
867 static inline unsigned iminor(const struct inode *inode) 867 static inline unsigned iminor(const struct inode *inode)
868 { 868 {
869 return MINOR(inode->i_rdev); 869 return MINOR(inode->i_rdev);
870 } 870 }
871 871
872 static inline unsigned imajor(const struct inode *inode) 872 static inline unsigned imajor(const struct inode *inode)
873 { 873 {
874 return MAJOR(inode->i_rdev); 874 return MAJOR(inode->i_rdev);
875 } 875 }
876 876
877 extern struct block_device *I_BDEV(struct inode *inode); 877 extern struct block_device *I_BDEV(struct inode *inode);
878 878
879 struct fown_struct { 879 struct fown_struct {
880 rwlock_t lock; /* protects pid, uid, euid fields */ 880 rwlock_t lock; /* protects pid, uid, euid fields */
881 struct pid *pid; /* pid or -pgrp where SIGIO should be sent */ 881 struct pid *pid; /* pid or -pgrp where SIGIO should be sent */
882 enum pid_type pid_type; /* Kind of process group SIGIO should be sent to */ 882 enum pid_type pid_type; /* Kind of process group SIGIO should be sent to */
883 uid_t uid, euid; /* uid/euid of process setting the owner */ 883 uid_t uid, euid; /* uid/euid of process setting the owner */
884 int signum; /* posix.1b rt signal to be delivered on IO */ 884 int signum; /* posix.1b rt signal to be delivered on IO */
885 }; 885 };
886 886
887 /* 887 /*
888 * Track a single file's readahead state 888 * Track a single file's readahead state
889 */ 889 */
890 struct file_ra_state { 890 struct file_ra_state {
891 pgoff_t start; /* where readahead started */ 891 pgoff_t start; /* where readahead started */
892 unsigned int size; /* # of readahead pages */ 892 unsigned int size; /* # of readahead pages */
893 unsigned int async_size; /* do asynchronous readahead when 893 unsigned int async_size; /* do asynchronous readahead when
894 there are only # of pages ahead */ 894 there are only # of pages ahead */
895 895
896 unsigned int ra_pages; /* Maximum readahead window */ 896 unsigned int ra_pages; /* Maximum readahead window */
897 unsigned int mmap_miss; /* Cache miss stat for mmap accesses */ 897 unsigned int mmap_miss; /* Cache miss stat for mmap accesses */
898 loff_t prev_pos; /* Cache last read() position */ 898 loff_t prev_pos; /* Cache last read() position */
899 }; 899 };
900 900
901 /* 901 /*
902 * Check if @index falls in the readahead windows. 902 * Check if @index falls in the readahead windows.
903 */ 903 */
904 static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index) 904 static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index)
905 { 905 {
906 return (index >= ra->start && 906 return (index >= ra->start &&
907 index < ra->start + ra->size); 907 index < ra->start + ra->size);
908 } 908 }
909 909
910 #define FILE_MNT_WRITE_TAKEN 1 910 #define FILE_MNT_WRITE_TAKEN 1
911 #define FILE_MNT_WRITE_RELEASED 2 911 #define FILE_MNT_WRITE_RELEASED 2
912 912
913 struct file { 913 struct file {
914 /* 914 /*
915 * fu_list becomes invalid after file_free is called and queued via 915 * fu_list becomes invalid after file_free is called and queued via
916 * fu_rcuhead for RCU freeing 916 * fu_rcuhead for RCU freeing
917 */ 917 */
918 union { 918 union {
919 struct list_head fu_list; 919 struct list_head fu_list;
920 struct rcu_head fu_rcuhead; 920 struct rcu_head fu_rcuhead;
921 } f_u; 921 } f_u;
922 struct path f_path; 922 struct path f_path;
923 #define f_dentry f_path.dentry 923 #define f_dentry f_path.dentry
924 #define f_vfsmnt f_path.mnt 924 #define f_vfsmnt f_path.mnt
925 const struct file_operations *f_op; 925 const struct file_operations *f_op;
926 spinlock_t f_lock; /* f_ep_links, f_flags, no IRQ */ 926 spinlock_t f_lock; /* f_ep_links, f_flags, no IRQ */
927 atomic_long_t f_count; 927 atomic_long_t f_count;
928 unsigned int f_flags; 928 unsigned int f_flags;
929 fmode_t f_mode; 929 fmode_t f_mode;
930 loff_t f_pos; 930 loff_t f_pos;
931 struct fown_struct f_owner; 931 struct fown_struct f_owner;
932 const struct cred *f_cred; 932 const struct cred *f_cred;
933 struct file_ra_state f_ra; 933 struct file_ra_state f_ra;
934 934
935 u64 f_version; 935 u64 f_version;
936 #ifdef CONFIG_SECURITY 936 #ifdef CONFIG_SECURITY
937 void *f_security; 937 void *f_security;
938 #endif 938 #endif
939 /* needed for tty driver, and maybe others */ 939 /* needed for tty driver, and maybe others */
940 void *private_data; 940 void *private_data;
941 941
942 #ifdef CONFIG_EPOLL 942 #ifdef CONFIG_EPOLL
943 /* Used by fs/eventpoll.c to link all the hooks to this file */ 943 /* Used by fs/eventpoll.c to link all the hooks to this file */
944 struct list_head f_ep_links; 944 struct list_head f_ep_links;
945 #endif /* #ifdef CONFIG_EPOLL */ 945 #endif /* #ifdef CONFIG_EPOLL */
946 struct address_space *f_mapping; 946 struct address_space *f_mapping;
947 #ifdef CONFIG_DEBUG_WRITECOUNT 947 #ifdef CONFIG_DEBUG_WRITECOUNT
948 unsigned long f_mnt_write_state; 948 unsigned long f_mnt_write_state;
949 #endif 949 #endif
950 }; 950 };
951 extern spinlock_t files_lock; 951 extern spinlock_t files_lock;
952 #define file_list_lock() spin_lock(&files_lock); 952 #define file_list_lock() spin_lock(&files_lock);
953 #define file_list_unlock() spin_unlock(&files_lock); 953 #define file_list_unlock() spin_unlock(&files_lock);
954 954
955 #define get_file(x) atomic_long_inc(&(x)->f_count) 955 #define get_file(x) atomic_long_inc(&(x)->f_count)
956 #define file_count(x) atomic_long_read(&(x)->f_count) 956 #define file_count(x) atomic_long_read(&(x)->f_count)
957 957
958 #ifdef CONFIG_DEBUG_WRITECOUNT 958 #ifdef CONFIG_DEBUG_WRITECOUNT
959 static inline void file_take_write(struct file *f) 959 static inline void file_take_write(struct file *f)
960 { 960 {
961 WARN_ON(f->f_mnt_write_state != 0); 961 WARN_ON(f->f_mnt_write_state != 0);
962 f->f_mnt_write_state = FILE_MNT_WRITE_TAKEN; 962 f->f_mnt_write_state = FILE_MNT_WRITE_TAKEN;
963 } 963 }
964 static inline void file_release_write(struct file *f) 964 static inline void file_release_write(struct file *f)
965 { 965 {
966 f->f_mnt_write_state |= FILE_MNT_WRITE_RELEASED; 966 f->f_mnt_write_state |= FILE_MNT_WRITE_RELEASED;
967 } 967 }
968 static inline void file_reset_write(struct file *f) 968 static inline void file_reset_write(struct file *f)
969 { 969 {
970 f->f_mnt_write_state = 0; 970 f->f_mnt_write_state = 0;
971 } 971 }
972 static inline void file_check_state(struct file *f) 972 static inline void file_check_state(struct file *f)
973 { 973 {
974 /* 974 /*
975 * At this point, either both or neither of these bits 975 * At this point, either both or neither of these bits
976 * should be set. 976 * should be set.
977 */ 977 */
978 WARN_ON(f->f_mnt_write_state == FILE_MNT_WRITE_TAKEN); 978 WARN_ON(f->f_mnt_write_state == FILE_MNT_WRITE_TAKEN);
979 WARN_ON(f->f_mnt_write_state == FILE_MNT_WRITE_RELEASED); 979 WARN_ON(f->f_mnt_write_state == FILE_MNT_WRITE_RELEASED);
980 } 980 }
981 static inline int file_check_writeable(struct file *f) 981 static inline int file_check_writeable(struct file *f)
982 { 982 {
983 if (f->f_mnt_write_state == FILE_MNT_WRITE_TAKEN) 983 if (f->f_mnt_write_state == FILE_MNT_WRITE_TAKEN)
984 return 0; 984 return 0;
985 printk(KERN_WARNING "writeable file with no " 985 printk(KERN_WARNING "writeable file with no "
986 "mnt_want_write()\n"); 986 "mnt_want_write()\n");
987 WARN_ON(1); 987 WARN_ON(1);
988 return -EINVAL; 988 return -EINVAL;
989 } 989 }
990 #else /* !CONFIG_DEBUG_WRITECOUNT */ 990 #else /* !CONFIG_DEBUG_WRITECOUNT */
991 static inline void file_take_write(struct file *filp) {} 991 static inline void file_take_write(struct file *filp) {}
992 static inline void file_release_write(struct file *filp) {} 992 static inline void file_release_write(struct file *filp) {}
993 static inline void file_reset_write(struct file *filp) {} 993 static inline void file_reset_write(struct file *filp) {}
994 static inline void file_check_state(struct file *filp) {} 994 static inline void file_check_state(struct file *filp) {}
995 static inline int file_check_writeable(struct file *filp) 995 static inline int file_check_writeable(struct file *filp)
996 { 996 {
997 return 0; 997 return 0;
998 } 998 }
999 #endif /* CONFIG_DEBUG_WRITECOUNT */ 999 #endif /* CONFIG_DEBUG_WRITECOUNT */
1000 1000
1001 #define MAX_NON_LFS ((1UL<<31) - 1) 1001 #define MAX_NON_LFS ((1UL<<31) - 1)
1002 1002
1003 /* Page cache limit. The filesystems should put that into their s_maxbytes 1003 /* Page cache limit. The filesystems should put that into their s_maxbytes
1004 limits, otherwise bad things can happen in VM. */ 1004 limits, otherwise bad things can happen in VM. */
1005 #if BITS_PER_LONG==32 1005 #if BITS_PER_LONG==32
1006 #define MAX_LFS_FILESIZE (((u64)PAGE_CACHE_SIZE << (BITS_PER_LONG-1))-1) 1006 #define MAX_LFS_FILESIZE (((u64)PAGE_CACHE_SIZE << (BITS_PER_LONG-1))-1)
1007 #elif BITS_PER_LONG==64 1007 #elif BITS_PER_LONG==64
1008 #define MAX_LFS_FILESIZE 0x7fffffffffffffffUL 1008 #define MAX_LFS_FILESIZE 0x7fffffffffffffffUL
1009 #endif 1009 #endif
1010 1010
1011 #define FL_POSIX 1 1011 #define FL_POSIX 1
1012 #define FL_FLOCK 2 1012 #define FL_FLOCK 2
1013 #define FL_ACCESS 8 /* not trying to lock, just looking */ 1013 #define FL_ACCESS 8 /* not trying to lock, just looking */
1014 #define FL_EXISTS 16 /* when unlocking, test for existence */ 1014 #define FL_EXISTS 16 /* when unlocking, test for existence */
1015 #define FL_LEASE 32 /* lease held on this file */ 1015 #define FL_LEASE 32 /* lease held on this file */
1016 #define FL_CLOSE 64 /* unlock on close */ 1016 #define FL_CLOSE 64 /* unlock on close */
1017 #define FL_SLEEP 128 /* A blocking lock */ 1017 #define FL_SLEEP 128 /* A blocking lock */
1018 1018
1019 /* 1019 /*
1020 * Special return value from posix_lock_file() and vfs_lock_file() for 1020 * Special return value from posix_lock_file() and vfs_lock_file() for
1021 * asynchronous locking. 1021 * asynchronous locking.
1022 */ 1022 */
1023 #define FILE_LOCK_DEFERRED 1 1023 #define FILE_LOCK_DEFERRED 1
1024 1024
1025 /* 1025 /*
1026 * The POSIX file lock owner is determined by 1026 * The POSIX file lock owner is determined by
1027 * the "struct files_struct" in the thread group 1027 * the "struct files_struct" in the thread group
1028 * (or NULL for no owner - BSD locks). 1028 * (or NULL for no owner - BSD locks).
1029 * 1029 *
1030 * Lockd stuffs a "host" pointer into this. 1030 * Lockd stuffs a "host" pointer into this.
1031 */ 1031 */
1032 typedef struct files_struct *fl_owner_t; 1032 typedef struct files_struct *fl_owner_t;
1033 1033
1034 struct file_lock_operations { 1034 struct file_lock_operations {
1035 void (*fl_copy_lock)(struct file_lock *, struct file_lock *); 1035 void (*fl_copy_lock)(struct file_lock *, struct file_lock *);
1036 void (*fl_release_private)(struct file_lock *); 1036 void (*fl_release_private)(struct file_lock *);
1037 }; 1037 };
1038 1038
1039 struct lock_manager_operations { 1039 struct lock_manager_operations {
1040 int (*fl_compare_owner)(struct file_lock *, struct file_lock *); 1040 int (*fl_compare_owner)(struct file_lock *, struct file_lock *);
1041 void (*fl_notify)(struct file_lock *); /* unblock callback */ 1041 void (*fl_notify)(struct file_lock *); /* unblock callback */
1042 int (*fl_grant)(struct file_lock *, struct file_lock *, int); 1042 int (*fl_grant)(struct file_lock *, struct file_lock *, int);
1043 void (*fl_copy_lock)(struct file_lock *, struct file_lock *); 1043 void (*fl_copy_lock)(struct file_lock *, struct file_lock *);
1044 void (*fl_release_private)(struct file_lock *); 1044 void (*fl_release_private)(struct file_lock *);
1045 void (*fl_break)(struct file_lock *); 1045 void (*fl_break)(struct file_lock *);
1046 int (*fl_mylease)(struct file_lock *, struct file_lock *); 1046 int (*fl_mylease)(struct file_lock *, struct file_lock *);
1047 int (*fl_change)(struct file_lock **, int); 1047 int (*fl_change)(struct file_lock **, int);
1048 }; 1048 };
1049 1049
1050 struct lock_manager { 1050 struct lock_manager {
1051 struct list_head list; 1051 struct list_head list;
1052 }; 1052 };
1053 1053
1054 void locks_start_grace(struct lock_manager *); 1054 void locks_start_grace(struct lock_manager *);
1055 void locks_end_grace(struct lock_manager *); 1055 void locks_end_grace(struct lock_manager *);
1056 int locks_in_grace(void); 1056 int locks_in_grace(void);
1057 1057
1058 /* that will die - we need it for nfs_lock_info */ 1058 /* that will die - we need it for nfs_lock_info */
1059 #include <linux/nfs_fs_i.h> 1059 #include <linux/nfs_fs_i.h>
1060 1060
1061 struct file_lock { 1061 struct file_lock {
1062 struct file_lock *fl_next; /* singly linked list for this inode */ 1062 struct file_lock *fl_next; /* singly linked list for this inode */
1063 struct list_head fl_link; /* doubly linked list of all locks */ 1063 struct list_head fl_link; /* doubly linked list of all locks */
1064 struct list_head fl_block; /* circular list of blocked processes */ 1064 struct list_head fl_block; /* circular list of blocked processes */
1065 fl_owner_t fl_owner; 1065 fl_owner_t fl_owner;
1066 unsigned char fl_flags; 1066 unsigned char fl_flags;
1067 unsigned char fl_type; 1067 unsigned char fl_type;
1068 unsigned int fl_pid; 1068 unsigned int fl_pid;
1069 struct pid *fl_nspid; 1069 struct pid *fl_nspid;
1070 wait_queue_head_t fl_wait; 1070 wait_queue_head_t fl_wait;
1071 struct file *fl_file; 1071 struct file *fl_file;
1072 loff_t fl_start; 1072 loff_t fl_start;
1073 loff_t fl_end; 1073 loff_t fl_end;
1074 1074
1075 struct fasync_struct * fl_fasync; /* for lease break notifications */ 1075 struct fasync_struct * fl_fasync; /* for lease break notifications */
1076 unsigned long fl_break_time; /* for nonblocking lease breaks */ 1076 unsigned long fl_break_time; /* for nonblocking lease breaks */
1077 1077
1078 const struct file_lock_operations *fl_ops; /* Callbacks for filesystems */ 1078 const struct file_lock_operations *fl_ops; /* Callbacks for filesystems */
1079 const struct lock_manager_operations *fl_lmops; /* Callbacks for lockmanagers */ 1079 const struct lock_manager_operations *fl_lmops; /* Callbacks for lockmanagers */
1080 union { 1080 union {
1081 struct nfs_lock_info nfs_fl; 1081 struct nfs_lock_info nfs_fl;
1082 struct nfs4_lock_info nfs4_fl; 1082 struct nfs4_lock_info nfs4_fl;
1083 struct { 1083 struct {
1084 struct list_head link; /* link in AFS vnode's pending_locks list */ 1084 struct list_head link; /* link in AFS vnode's pending_locks list */
1085 int state; /* state of grant or error if -ve */ 1085 int state; /* state of grant or error if -ve */
1086 } afs; 1086 } afs;
1087 } fl_u; 1087 } fl_u;
1088 }; 1088 };
1089 1089
1090 /* The following constant reflects the upper bound of the file/locking space */ 1090 /* The following constant reflects the upper bound of the file/locking space */
1091 #ifndef OFFSET_MAX 1091 #ifndef OFFSET_MAX
1092 #define INT_LIMIT(x) (~((x)1 << (sizeof(x)*8 - 1))) 1092 #define INT_LIMIT(x) (~((x)1 << (sizeof(x)*8 - 1)))
1093 #define OFFSET_MAX INT_LIMIT(loff_t) 1093 #define OFFSET_MAX INT_LIMIT(loff_t)
1094 #define OFFT_OFFSET_MAX INT_LIMIT(off_t) 1094 #define OFFT_OFFSET_MAX INT_LIMIT(off_t)
1095 #endif 1095 #endif
1096 1096
1097 #include <linux/fcntl.h> 1097 #include <linux/fcntl.h>
1098 1098
1099 extern void send_sigio(struct fown_struct *fown, int fd, int band); 1099 extern void send_sigio(struct fown_struct *fown, int fd, int band);
1100 1100
1101 #ifdef CONFIG_FILE_LOCKING 1101 #ifdef CONFIG_FILE_LOCKING
1102 extern int fcntl_getlk(struct file *, struct flock __user *); 1102 extern int fcntl_getlk(struct file *, struct flock __user *);
1103 extern int fcntl_setlk(unsigned int, struct file *, unsigned int, 1103 extern int fcntl_setlk(unsigned int, struct file *, unsigned int,
1104 struct flock __user *); 1104 struct flock __user *);
1105 1105
1106 #if BITS_PER_LONG == 32 1106 #if BITS_PER_LONG == 32
1107 extern int fcntl_getlk64(struct file *, struct flock64 __user *); 1107 extern int fcntl_getlk64(struct file *, struct flock64 __user *);
1108 extern int fcntl_setlk64(unsigned int, struct file *, unsigned int, 1108 extern int fcntl_setlk64(unsigned int, struct file *, unsigned int,
1109 struct flock64 __user *); 1109 struct flock64 __user *);
1110 #endif 1110 #endif
1111 1111
1112 extern int fcntl_setlease(unsigned int fd, struct file *filp, long arg); 1112 extern int fcntl_setlease(unsigned int fd, struct file *filp, long arg);
1113 extern int fcntl_getlease(struct file *filp); 1113 extern int fcntl_getlease(struct file *filp);
1114 1114
1115 /* fs/locks.c */ 1115 /* fs/locks.c */
1116 extern void locks_init_lock(struct file_lock *); 1116 extern void locks_init_lock(struct file_lock *);
1117 extern void locks_copy_lock(struct file_lock *, struct file_lock *); 1117 extern void locks_copy_lock(struct file_lock *, struct file_lock *);
1118 extern void __locks_copy_lock(struct file_lock *, const struct file_lock *); 1118 extern void __locks_copy_lock(struct file_lock *, const struct file_lock *);
1119 extern void locks_remove_posix(struct file *, fl_owner_t); 1119 extern void locks_remove_posix(struct file *, fl_owner_t);
1120 extern void locks_remove_flock(struct file *); 1120 extern void locks_remove_flock(struct file *);
1121 extern void locks_release_private(struct file_lock *); 1121 extern void locks_release_private(struct file_lock *);
1122 extern void posix_test_lock(struct file *, struct file_lock *); 1122 extern void posix_test_lock(struct file *, struct file_lock *);
1123 extern int posix_lock_file(struct file *, struct file_lock *, struct file_lock *); 1123 extern int posix_lock_file(struct file *, struct file_lock *, struct file_lock *);
1124 extern int posix_lock_file_wait(struct file *, struct file_lock *); 1124 extern int posix_lock_file_wait(struct file *, struct file_lock *);
1125 extern int posix_unblock_lock(struct file *, struct file_lock *); 1125 extern int posix_unblock_lock(struct file *, struct file_lock *);
1126 extern int vfs_test_lock(struct file *, struct file_lock *); 1126 extern int vfs_test_lock(struct file *, struct file_lock *);
1127 extern int vfs_lock_file(struct file *, unsigned int, struct file_lock *, struct file_lock *); 1127 extern int vfs_lock_file(struct file *, unsigned int, struct file_lock *, struct file_lock *);
1128 extern int vfs_cancel_lock(struct file *filp, struct file_lock *fl); 1128 extern int vfs_cancel_lock(struct file *filp, struct file_lock *fl);
1129 extern int flock_lock_file_wait(struct file *filp, struct file_lock *fl); 1129 extern int flock_lock_file_wait(struct file *filp, struct file_lock *fl);
1130 extern int __break_lease(struct inode *inode, unsigned int flags); 1130 extern int __break_lease(struct inode *inode, unsigned int flags);
1131 extern void lease_get_mtime(struct inode *, struct timespec *time); 1131 extern void lease_get_mtime(struct inode *, struct timespec *time);
1132 extern int generic_setlease(struct file *, long, struct file_lock **); 1132 extern int generic_setlease(struct file *, long, struct file_lock **);
1133 extern int vfs_setlease(struct file *, long, struct file_lock **); 1133 extern int vfs_setlease(struct file *, long, struct file_lock **);
1134 extern int lease_modify(struct file_lock **, int); 1134 extern int lease_modify(struct file_lock **, int);
1135 extern int lock_may_read(struct inode *, loff_t start, unsigned long count); 1135 extern int lock_may_read(struct inode *, loff_t start, unsigned long count);
1136 extern int lock_may_write(struct inode *, loff_t start, unsigned long count); 1136 extern int lock_may_write(struct inode *, loff_t start, unsigned long count);
1137 #else /* !CONFIG_FILE_LOCKING */ 1137 #else /* !CONFIG_FILE_LOCKING */
1138 static inline int fcntl_getlk(struct file *file, struct flock __user *user) 1138 static inline int fcntl_getlk(struct file *file, struct flock __user *user)
1139 { 1139 {
1140 return -EINVAL; 1140 return -EINVAL;
1141 } 1141 }
1142 1142
1143 static inline int fcntl_setlk(unsigned int fd, struct file *file, 1143 static inline int fcntl_setlk(unsigned int fd, struct file *file,
1144 unsigned int cmd, struct flock __user *user) 1144 unsigned int cmd, struct flock __user *user)
1145 { 1145 {
1146 return -EACCES; 1146 return -EACCES;
1147 } 1147 }
1148 1148
1149 #if BITS_PER_LONG == 32 1149 #if BITS_PER_LONG == 32
1150 static inline int fcntl_getlk64(struct file *file, struct flock64 __user *user) 1150 static inline int fcntl_getlk64(struct file *file, struct flock64 __user *user)
1151 { 1151 {
1152 return -EINVAL; 1152 return -EINVAL;
1153 } 1153 }
1154 1154
1155 static inline int fcntl_setlk64(unsigned int fd, struct file *file, 1155 static inline int fcntl_setlk64(unsigned int fd, struct file *file,
1156 unsigned int cmd, struct flock64 __user *user) 1156 unsigned int cmd, struct flock64 __user *user)
1157 { 1157 {
1158 return -EACCES; 1158 return -EACCES;
1159 } 1159 }
1160 #endif 1160 #endif
1161 static inline int fcntl_setlease(unsigned int fd, struct file *filp, long arg) 1161 static inline int fcntl_setlease(unsigned int fd, struct file *filp, long arg)
1162 { 1162 {
1163 return 0; 1163 return 0;
1164 } 1164 }
1165 1165
1166 static inline int fcntl_getlease(struct file *filp) 1166 static inline int fcntl_getlease(struct file *filp)
1167 { 1167 {
1168 return 0; 1168 return 0;
1169 } 1169 }
1170 1170
1171 static inline void locks_init_lock(struct file_lock *fl) 1171 static inline void locks_init_lock(struct file_lock *fl)
1172 { 1172 {
1173 return; 1173 return;
1174 } 1174 }
1175 1175
1176 static inline void __locks_copy_lock(struct file_lock *new, struct file_lock *fl) 1176 static inline void __locks_copy_lock(struct file_lock *new, struct file_lock *fl)
1177 { 1177 {
1178 return; 1178 return;
1179 } 1179 }
1180 1180
1181 static inline void locks_copy_lock(struct file_lock *new, struct file_lock *fl) 1181 static inline void locks_copy_lock(struct file_lock *new, struct file_lock *fl)
1182 { 1182 {
1183 return; 1183 return;
1184 } 1184 }
1185 1185
1186 static inline void locks_remove_posix(struct file *filp, fl_owner_t owner) 1186 static inline void locks_remove_posix(struct file *filp, fl_owner_t owner)
1187 { 1187 {
1188 return; 1188 return;
1189 } 1189 }
1190 1190
1191 static inline void locks_remove_flock(struct file *filp) 1191 static inline void locks_remove_flock(struct file *filp)
1192 { 1192 {
1193 return; 1193 return;
1194 } 1194 }
1195 1195
1196 static inline void posix_test_lock(struct file *filp, struct file_lock *fl) 1196 static inline void posix_test_lock(struct file *filp, struct file_lock *fl)
1197 { 1197 {
1198 return; 1198 return;
1199 } 1199 }
1200 1200
1201 static inline int posix_lock_file(struct file *filp, struct file_lock *fl, 1201 static inline int posix_lock_file(struct file *filp, struct file_lock *fl,
1202 struct file_lock *conflock) 1202 struct file_lock *conflock)
1203 { 1203 {
1204 return -ENOLCK; 1204 return -ENOLCK;
1205 } 1205 }
1206 1206
1207 static inline int posix_lock_file_wait(struct file *filp, struct file_lock *fl) 1207 static inline int posix_lock_file_wait(struct file *filp, struct file_lock *fl)
1208 { 1208 {
1209 return -ENOLCK; 1209 return -ENOLCK;
1210 } 1210 }
1211 1211
1212 static inline int posix_unblock_lock(struct file *filp, 1212 static inline int posix_unblock_lock(struct file *filp,
1213 struct file_lock *waiter) 1213 struct file_lock *waiter)
1214 { 1214 {
1215 return -ENOENT; 1215 return -ENOENT;
1216 } 1216 }
1217 1217
1218 static inline int vfs_test_lock(struct file *filp, struct file_lock *fl) 1218 static inline int vfs_test_lock(struct file *filp, struct file_lock *fl)
1219 { 1219 {
1220 return 0; 1220 return 0;
1221 } 1221 }
1222 1222
1223 static inline int vfs_lock_file(struct file *filp, unsigned int cmd, 1223 static inline int vfs_lock_file(struct file *filp, unsigned int cmd,
1224 struct file_lock *fl, struct file_lock *conf) 1224 struct file_lock *fl, struct file_lock *conf)
1225 { 1225 {
1226 return -ENOLCK; 1226 return -ENOLCK;
1227 } 1227 }
1228 1228
1229 static inline int vfs_cancel_lock(struct file *filp, struct file_lock *fl) 1229 static inline int vfs_cancel_lock(struct file *filp, struct file_lock *fl)
1230 { 1230 {
1231 return 0; 1231 return 0;
1232 } 1232 }
1233 1233
1234 static inline int flock_lock_file_wait(struct file *filp, 1234 static inline int flock_lock_file_wait(struct file *filp,
1235 struct file_lock *request) 1235 struct file_lock *request)
1236 { 1236 {
1237 return -ENOLCK; 1237 return -ENOLCK;
1238 } 1238 }
1239 1239
1240 static inline int __break_lease(struct inode *inode, unsigned int mode) 1240 static inline int __break_lease(struct inode *inode, unsigned int mode)
1241 { 1241 {
1242 return 0; 1242 return 0;
1243 } 1243 }
1244 1244
1245 static inline void lease_get_mtime(struct inode *inode, struct timespec *time) 1245 static inline void lease_get_mtime(struct inode *inode, struct timespec *time)
1246 { 1246 {
1247 return; 1247 return;
1248 } 1248 }
1249 1249
1250 static inline int generic_setlease(struct file *filp, long arg, 1250 static inline int generic_setlease(struct file *filp, long arg,
1251 struct file_lock **flp) 1251 struct file_lock **flp)
1252 { 1252 {
1253 return -EINVAL; 1253 return -EINVAL;
1254 } 1254 }
1255 1255
1256 static inline int vfs_setlease(struct file *filp, long arg, 1256 static inline int vfs_setlease(struct file *filp, long arg,
1257 struct file_lock **lease) 1257 struct file_lock **lease)
1258 { 1258 {
1259 return -EINVAL; 1259 return -EINVAL;
1260 } 1260 }
1261 1261
1262 static inline int lease_modify(struct file_lock **before, int arg) 1262 static inline int lease_modify(struct file_lock **before, int arg)
1263 { 1263 {
1264 return -EINVAL; 1264 return -EINVAL;
1265 } 1265 }
1266 1266
1267 static inline int lock_may_read(struct inode *inode, loff_t start, 1267 static inline int lock_may_read(struct inode *inode, loff_t start,
1268 unsigned long len) 1268 unsigned long len)
1269 { 1269 {
1270 return 1; 1270 return 1;
1271 } 1271 }
1272 1272
1273 static inline int lock_may_write(struct inode *inode, loff_t start, 1273 static inline int lock_may_write(struct inode *inode, loff_t start,
1274 unsigned long len) 1274 unsigned long len)
1275 { 1275 {
1276 return 1; 1276 return 1;
1277 } 1277 }
1278 1278
1279 #endif /* !CONFIG_FILE_LOCKING */ 1279 #endif /* !CONFIG_FILE_LOCKING */
1280 1280
1281 1281
1282 struct fasync_struct { 1282 struct fasync_struct {
1283 int magic; 1283 spinlock_t fa_lock;
1284 int fa_fd; 1284 int magic;
1285 struct fasync_struct *fa_next; /* singly linked list */ 1285 int fa_fd;
1286 struct file *fa_file; 1286 struct fasync_struct *fa_next; /* singly linked list */
1287 struct file *fa_file;
1288 struct rcu_head fa_rcu;
1287 }; 1289 };
1288 1290
1289 #define FASYNC_MAGIC 0x4601 1291 #define FASYNC_MAGIC 0x4601
1290 1292
1291 /* SMP safe fasync helpers: */ 1293 /* SMP safe fasync helpers: */
1292 extern int fasync_helper(int, struct file *, int, struct fasync_struct **); 1294 extern int fasync_helper(int, struct file *, int, struct fasync_struct **);
1293 /* can be called from interrupts */ 1295 /* can be called from interrupts */
1294 extern void kill_fasync(struct fasync_struct **, int, int); 1296 extern void kill_fasync(struct fasync_struct **, int, int);
1295 /* only for net: no internal synchronization */
1296 extern void __kill_fasync(struct fasync_struct *, int, int);
1297 1297
1298 extern int __f_setown(struct file *filp, struct pid *, enum pid_type, int force); 1298 extern int __f_setown(struct file *filp, struct pid *, enum pid_type, int force);
1299 extern int f_setown(struct file *filp, unsigned long arg, int force); 1299 extern int f_setown(struct file *filp, unsigned long arg, int force);
1300 extern void f_delown(struct file *filp); 1300 extern void f_delown(struct file *filp);
1301 extern pid_t f_getown(struct file *filp); 1301 extern pid_t f_getown(struct file *filp);
1302 extern int send_sigurg(struct fown_struct *fown); 1302 extern int send_sigurg(struct fown_struct *fown);
1303 1303
1304 /* 1304 /*
1305 * Umount options 1305 * Umount options
1306 */ 1306 */
1307 1307
1308 #define MNT_FORCE 0x00000001 /* Attempt to forcibily umount */ 1308 #define MNT_FORCE 0x00000001 /* Attempt to forcibily umount */
1309 #define MNT_DETACH 0x00000002 /* Just detach from the tree */ 1309 #define MNT_DETACH 0x00000002 /* Just detach from the tree */
1310 #define MNT_EXPIRE 0x00000004 /* Mark for expiry */ 1310 #define MNT_EXPIRE 0x00000004 /* Mark for expiry */
1311 #define UMOUNT_NOFOLLOW 0x00000008 /* Don't follow symlink on umount */ 1311 #define UMOUNT_NOFOLLOW 0x00000008 /* Don't follow symlink on umount */
1312 #define UMOUNT_UNUSED 0x80000000 /* Flag guaranteed to be unused */ 1312 #define UMOUNT_UNUSED 0x80000000 /* Flag guaranteed to be unused */
1313 1313
1314 extern struct list_head super_blocks; 1314 extern struct list_head super_blocks;
1315 extern spinlock_t sb_lock; 1315 extern spinlock_t sb_lock;
1316 1316
1317 #define sb_entry(list) list_entry((list), struct super_block, s_list) 1317 #define sb_entry(list) list_entry((list), struct super_block, s_list)
1318 #define S_BIAS (1<<30) 1318 #define S_BIAS (1<<30)
1319 struct super_block { 1319 struct super_block {
1320 struct list_head s_list; /* Keep this first */ 1320 struct list_head s_list; /* Keep this first */
1321 dev_t s_dev; /* search index; _not_ kdev_t */ 1321 dev_t s_dev; /* search index; _not_ kdev_t */
1322 unsigned char s_dirt; 1322 unsigned char s_dirt;
1323 unsigned char s_blocksize_bits; 1323 unsigned char s_blocksize_bits;
1324 unsigned long s_blocksize; 1324 unsigned long s_blocksize;
1325 loff_t s_maxbytes; /* Max file size */ 1325 loff_t s_maxbytes; /* Max file size */
1326 struct file_system_type *s_type; 1326 struct file_system_type *s_type;
1327 const struct super_operations *s_op; 1327 const struct super_operations *s_op;
1328 const struct dquot_operations *dq_op; 1328 const struct dquot_operations *dq_op;
1329 const struct quotactl_ops *s_qcop; 1329 const struct quotactl_ops *s_qcop;
1330 const struct export_operations *s_export_op; 1330 const struct export_operations *s_export_op;
1331 unsigned long s_flags; 1331 unsigned long s_flags;
1332 unsigned long s_magic; 1332 unsigned long s_magic;
1333 struct dentry *s_root; 1333 struct dentry *s_root;
1334 struct rw_semaphore s_umount; 1334 struct rw_semaphore s_umount;
1335 struct mutex s_lock; 1335 struct mutex s_lock;
1336 int s_count; 1336 int s_count;
1337 int s_need_sync; 1337 int s_need_sync;
1338 atomic_t s_active; 1338 atomic_t s_active;
1339 #ifdef CONFIG_SECURITY 1339 #ifdef CONFIG_SECURITY
1340 void *s_security; 1340 void *s_security;
1341 #endif 1341 #endif
1342 struct xattr_handler **s_xattr; 1342 struct xattr_handler **s_xattr;
1343 1343
1344 struct list_head s_inodes; /* all inodes */ 1344 struct list_head s_inodes; /* all inodes */
1345 struct hlist_head s_anon; /* anonymous dentries for (nfs) exporting */ 1345 struct hlist_head s_anon; /* anonymous dentries for (nfs) exporting */
1346 struct list_head s_files; 1346 struct list_head s_files;
1347 /* s_dentry_lru and s_nr_dentry_unused are protected by dcache_lock */ 1347 /* s_dentry_lru and s_nr_dentry_unused are protected by dcache_lock */
1348 struct list_head s_dentry_lru; /* unused dentry lru */ 1348 struct list_head s_dentry_lru; /* unused dentry lru */
1349 int s_nr_dentry_unused; /* # of dentry on lru */ 1349 int s_nr_dentry_unused; /* # of dentry on lru */
1350 1350
1351 struct block_device *s_bdev; 1351 struct block_device *s_bdev;
1352 struct backing_dev_info *s_bdi; 1352 struct backing_dev_info *s_bdi;
1353 struct mtd_info *s_mtd; 1353 struct mtd_info *s_mtd;
1354 struct list_head s_instances; 1354 struct list_head s_instances;
1355 struct quota_info s_dquot; /* Diskquota specific options */ 1355 struct quota_info s_dquot; /* Diskquota specific options */
1356 1356
1357 int s_frozen; 1357 int s_frozen;
1358 wait_queue_head_t s_wait_unfrozen; 1358 wait_queue_head_t s_wait_unfrozen;
1359 1359
1360 char s_id[32]; /* Informational name */ 1360 char s_id[32]; /* Informational name */
1361 1361
1362 void *s_fs_info; /* Filesystem private info */ 1362 void *s_fs_info; /* Filesystem private info */
1363 fmode_t s_mode; 1363 fmode_t s_mode;
1364 1364
1365 /* Granularity of c/m/atime in ns. 1365 /* Granularity of c/m/atime in ns.
1366 Cannot be worse than a second */ 1366 Cannot be worse than a second */
1367 u32 s_time_gran; 1367 u32 s_time_gran;
1368 1368
1369 /* 1369 /*
1370 * The next field is for VFS *only*. No filesystems have any business 1370 * The next field is for VFS *only*. No filesystems have any business
1371 * even looking at it. You had been warned. 1371 * even looking at it. You had been warned.
1372 */ 1372 */
1373 struct mutex s_vfs_rename_mutex; /* Kludge */ 1373 struct mutex s_vfs_rename_mutex; /* Kludge */
1374 1374
1375 /* 1375 /*
1376 * Filesystem subtype. If non-empty the filesystem type field 1376 * Filesystem subtype. If non-empty the filesystem type field
1377 * in /proc/mounts will be "type.subtype" 1377 * in /proc/mounts will be "type.subtype"
1378 */ 1378 */
1379 char *s_subtype; 1379 char *s_subtype;
1380 1380
1381 /* 1381 /*
1382 * Saved mount options for lazy filesystems using 1382 * Saved mount options for lazy filesystems using
1383 * generic_show_options() 1383 * generic_show_options()
1384 */ 1384 */
1385 char *s_options; 1385 char *s_options;
1386 }; 1386 };
1387 1387
1388 extern struct timespec current_fs_time(struct super_block *sb); 1388 extern struct timespec current_fs_time(struct super_block *sb);
1389 1389
1390 /* 1390 /*
1391 * Snapshotting support. 1391 * Snapshotting support.
1392 */ 1392 */
1393 enum { 1393 enum {
1394 SB_UNFROZEN = 0, 1394 SB_UNFROZEN = 0,
1395 SB_FREEZE_WRITE = 1, 1395 SB_FREEZE_WRITE = 1,
1396 SB_FREEZE_TRANS = 2, 1396 SB_FREEZE_TRANS = 2,
1397 }; 1397 };
1398 1398
1399 #define vfs_check_frozen(sb, level) \ 1399 #define vfs_check_frozen(sb, level) \
1400 wait_event((sb)->s_wait_unfrozen, ((sb)->s_frozen < (level))) 1400 wait_event((sb)->s_wait_unfrozen, ((sb)->s_frozen < (level)))
1401 1401
1402 #define get_fs_excl() atomic_inc(&current->fs_excl) 1402 #define get_fs_excl() atomic_inc(&current->fs_excl)
1403 #define put_fs_excl() atomic_dec(&current->fs_excl) 1403 #define put_fs_excl() atomic_dec(&current->fs_excl)
1404 #define has_fs_excl() atomic_read(&current->fs_excl) 1404 #define has_fs_excl() atomic_read(&current->fs_excl)
1405 1405
1406 #define is_owner_or_cap(inode) \ 1406 #define is_owner_or_cap(inode) \
1407 ((current_fsuid() == (inode)->i_uid) || capable(CAP_FOWNER)) 1407 ((current_fsuid() == (inode)->i_uid) || capable(CAP_FOWNER))
1408 1408
1409 /* not quite ready to be deprecated, but... */ 1409 /* not quite ready to be deprecated, but... */
1410 extern void lock_super(struct super_block *); 1410 extern void lock_super(struct super_block *);
1411 extern void unlock_super(struct super_block *); 1411 extern void unlock_super(struct super_block *);
1412 1412
1413 /* 1413 /*
1414 * VFS helper functions.. 1414 * VFS helper functions..
1415 */ 1415 */
1416 extern int vfs_create(struct inode *, struct dentry *, int, struct nameidata *); 1416 extern int vfs_create(struct inode *, struct dentry *, int, struct nameidata *);
1417 extern int vfs_mkdir(struct inode *, struct dentry *, int); 1417 extern int vfs_mkdir(struct inode *, struct dentry *, int);
1418 extern int vfs_mknod(struct inode *, struct dentry *, int, dev_t); 1418 extern int vfs_mknod(struct inode *, struct dentry *, int, dev_t);
1419 extern int vfs_symlink(struct inode *, struct dentry *, const char *); 1419 extern int vfs_symlink(struct inode *, struct dentry *, const char *);
1420 extern int vfs_link(struct dentry *, struct inode *, struct dentry *); 1420 extern int vfs_link(struct dentry *, struct inode *, struct dentry *);
1421 extern int vfs_rmdir(struct inode *, struct dentry *); 1421 extern int vfs_rmdir(struct inode *, struct dentry *);
1422 extern int vfs_unlink(struct inode *, struct dentry *); 1422 extern int vfs_unlink(struct inode *, struct dentry *);
1423 extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *); 1423 extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *);
1424 1424
1425 /* 1425 /*
1426 * VFS dentry helper functions. 1426 * VFS dentry helper functions.
1427 */ 1427 */
1428 extern void dentry_unhash(struct dentry *dentry); 1428 extern void dentry_unhash(struct dentry *dentry);
1429 1429
1430 /* 1430 /*
1431 * VFS file helper functions. 1431 * VFS file helper functions.
1432 */ 1432 */
1433 extern int file_permission(struct file *, int); 1433 extern int file_permission(struct file *, int);
1434 1434
1435 /* 1435 /*
1436 * VFS FS_IOC_FIEMAP helper definitions. 1436 * VFS FS_IOC_FIEMAP helper definitions.
1437 */ 1437 */
1438 struct fiemap_extent_info { 1438 struct fiemap_extent_info {
1439 unsigned int fi_flags; /* Flags as passed from user */ 1439 unsigned int fi_flags; /* Flags as passed from user */
1440 unsigned int fi_extents_mapped; /* Number of mapped extents */ 1440 unsigned int fi_extents_mapped; /* Number of mapped extents */
1441 unsigned int fi_extents_max; /* Size of fiemap_extent array */ 1441 unsigned int fi_extents_max; /* Size of fiemap_extent array */
1442 struct fiemap_extent *fi_extents_start; /* Start of fiemap_extent 1442 struct fiemap_extent *fi_extents_start; /* Start of fiemap_extent
1443 * array */ 1443 * array */
1444 }; 1444 };
1445 int fiemap_fill_next_extent(struct fiemap_extent_info *info, u64 logical, 1445 int fiemap_fill_next_extent(struct fiemap_extent_info *info, u64 logical,
1446 u64 phys, u64 len, u32 flags); 1446 u64 phys, u64 len, u32 flags);
1447 int fiemap_check_flags(struct fiemap_extent_info *fieinfo, u32 fs_flags); 1447 int fiemap_check_flags(struct fiemap_extent_info *fieinfo, u32 fs_flags);
1448 1448
1449 /* 1449 /*
1450 * File types 1450 * File types
1451 * 1451 *
1452 * NOTE! These match bits 12..15 of stat.st_mode 1452 * NOTE! These match bits 12..15 of stat.st_mode
1453 * (ie "(i_mode >> 12) & 15"). 1453 * (ie "(i_mode >> 12) & 15").
1454 */ 1454 */
1455 #define DT_UNKNOWN 0 1455 #define DT_UNKNOWN 0
1456 #define DT_FIFO 1 1456 #define DT_FIFO 1
1457 #define DT_CHR 2 1457 #define DT_CHR 2
1458 #define DT_DIR 4 1458 #define DT_DIR 4
1459 #define DT_BLK 6 1459 #define DT_BLK 6
1460 #define DT_REG 8 1460 #define DT_REG 8
1461 #define DT_LNK 10 1461 #define DT_LNK 10
1462 #define DT_SOCK 12 1462 #define DT_SOCK 12
1463 #define DT_WHT 14 1463 #define DT_WHT 14
1464 1464
1465 /* 1465 /*
1466 * This is the "filldir" function type, used by readdir() to let 1466 * This is the "filldir" function type, used by readdir() to let
1467 * the kernel specify what kind of dirent layout it wants to have. 1467 * the kernel specify what kind of dirent layout it wants to have.
1468 * This allows the kernel to read directories into kernel space or 1468 * This allows the kernel to read directories into kernel space or
1469 * to have different dirent layouts depending on the binary type. 1469 * to have different dirent layouts depending on the binary type.
1470 */ 1470 */
1471 typedef int (*filldir_t)(void *, const char *, int, loff_t, u64, unsigned); 1471 typedef int (*filldir_t)(void *, const char *, int, loff_t, u64, unsigned);
1472 struct block_device_operations; 1472 struct block_device_operations;
1473 1473
1474 /* These macros are for out of kernel modules to test that 1474 /* These macros are for out of kernel modules to test that
1475 * the kernel supports the unlocked_ioctl and compat_ioctl 1475 * the kernel supports the unlocked_ioctl and compat_ioctl
1476 * fields in struct file_operations. */ 1476 * fields in struct file_operations. */
1477 #define HAVE_COMPAT_IOCTL 1 1477 #define HAVE_COMPAT_IOCTL 1
1478 #define HAVE_UNLOCKED_IOCTL 1 1478 #define HAVE_UNLOCKED_IOCTL 1
1479 1479
1480 /* 1480 /*
1481 * NOTE: 1481 * NOTE:
1482 * read, write, poll, fsync, readv, writev, unlocked_ioctl and compat_ioctl 1482 * read, write, poll, fsync, readv, writev, unlocked_ioctl and compat_ioctl
1483 * can be called without the big kernel lock held in all filesystems. 1483 * can be called without the big kernel lock held in all filesystems.
1484 */ 1484 */
1485 struct file_operations { 1485 struct file_operations {
1486 struct module *owner; 1486 struct module *owner;
1487 loff_t (*llseek) (struct file *, loff_t, int); 1487 loff_t (*llseek) (struct file *, loff_t, int);
1488 ssize_t (*read) (struct file *, char __user *, size_t, loff_t *); 1488 ssize_t (*read) (struct file *, char __user *, size_t, loff_t *);
1489 ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *); 1489 ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
1490 ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t); 1490 ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
1491 ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t); 1491 ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
1492 int (*readdir) (struct file *, void *, filldir_t); 1492 int (*readdir) (struct file *, void *, filldir_t);
1493 unsigned int (*poll) (struct file *, struct poll_table_struct *); 1493 unsigned int (*poll) (struct file *, struct poll_table_struct *);
1494 int (*ioctl) (struct inode *, struct file *, unsigned int, unsigned long); 1494 int (*ioctl) (struct inode *, struct file *, unsigned int, unsigned long);
1495 long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long); 1495 long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
1496 long (*compat_ioctl) (struct file *, unsigned int, unsigned long); 1496 long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
1497 int (*mmap) (struct file *, struct vm_area_struct *); 1497 int (*mmap) (struct file *, struct vm_area_struct *);
1498 int (*open) (struct inode *, struct file *); 1498 int (*open) (struct inode *, struct file *);
1499 int (*flush) (struct file *, fl_owner_t id); 1499 int (*flush) (struct file *, fl_owner_t id);
1500 int (*release) (struct inode *, struct file *); 1500 int (*release) (struct inode *, struct file *);
1501 int (*fsync) (struct file *, struct dentry *, int datasync); 1501 int (*fsync) (struct file *, struct dentry *, int datasync);
1502 int (*aio_fsync) (struct kiocb *, int datasync); 1502 int (*aio_fsync) (struct kiocb *, int datasync);
1503 int (*fasync) (int, struct file *, int); 1503 int (*fasync) (int, struct file *, int);
1504 int (*lock) (struct file *, int, struct file_lock *); 1504 int (*lock) (struct file *, int, struct file_lock *);
1505 ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int); 1505 ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int);
1506 unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long); 1506 unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
1507 int (*check_flags)(int); 1507 int (*check_flags)(int);
1508 int (*flock) (struct file *, int, struct file_lock *); 1508 int (*flock) (struct file *, int, struct file_lock *);
1509 ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int); 1509 ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int);
1510 ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int); 1510 ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int);
1511 int (*setlease)(struct file *, long, struct file_lock **); 1511 int (*setlease)(struct file *, long, struct file_lock **);
1512 }; 1512 };
1513 1513
1514 struct inode_operations { 1514 struct inode_operations {
1515 int (*create) (struct inode *,struct dentry *,int, struct nameidata *); 1515 int (*create) (struct inode *,struct dentry *,int, struct nameidata *);
1516 struct dentry * (*lookup) (struct inode *,struct dentry *, struct nameidata *); 1516 struct dentry * (*lookup) (struct inode *,struct dentry *, struct nameidata *);
1517 int (*link) (struct dentry *,struct inode *,struct dentry *); 1517 int (*link) (struct dentry *,struct inode *,struct dentry *);
1518 int (*unlink) (struct inode *,struct dentry *); 1518 int (*unlink) (struct inode *,struct dentry *);
1519 int (*symlink) (struct inode *,struct dentry *,const char *); 1519 int (*symlink) (struct inode *,struct dentry *,const char *);
1520 int (*mkdir) (struct inode *,struct dentry *,int); 1520 int (*mkdir) (struct inode *,struct dentry *,int);
1521 int (*rmdir) (struct inode *,struct dentry *); 1521 int (*rmdir) (struct inode *,struct dentry *);
1522 int (*mknod) (struct inode *,struct dentry *,int,dev_t); 1522 int (*mknod) (struct inode *,struct dentry *,int,dev_t);
1523 int (*rename) (struct inode *, struct dentry *, 1523 int (*rename) (struct inode *, struct dentry *,
1524 struct inode *, struct dentry *); 1524 struct inode *, struct dentry *);
1525 int (*readlink) (struct dentry *, char __user *,int); 1525 int (*readlink) (struct dentry *, char __user *,int);
1526 void * (*follow_link) (struct dentry *, struct nameidata *); 1526 void * (*follow_link) (struct dentry *, struct nameidata *);
1527 void (*put_link) (struct dentry *, struct nameidata *, void *); 1527 void (*put_link) (struct dentry *, struct nameidata *, void *);
1528 void (*truncate) (struct inode *); 1528 void (*truncate) (struct inode *);
1529 int (*permission) (struct inode *, int); 1529 int (*permission) (struct inode *, int);
1530 int (*check_acl)(struct inode *, int); 1530 int (*check_acl)(struct inode *, int);
1531 int (*setattr) (struct dentry *, struct iattr *); 1531 int (*setattr) (struct dentry *, struct iattr *);
1532 int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *); 1532 int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *);
1533 int (*setxattr) (struct dentry *, const char *,const void *,size_t,int); 1533 int (*setxattr) (struct dentry *, const char *,const void *,size_t,int);
1534 ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t); 1534 ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t);
1535 ssize_t (*listxattr) (struct dentry *, char *, size_t); 1535 ssize_t (*listxattr) (struct dentry *, char *, size_t);
1536 int (*removexattr) (struct dentry *, const char *); 1536 int (*removexattr) (struct dentry *, const char *);
1537 void (*truncate_range)(struct inode *, loff_t, loff_t); 1537 void (*truncate_range)(struct inode *, loff_t, loff_t);
1538 long (*fallocate)(struct inode *inode, int mode, loff_t offset, 1538 long (*fallocate)(struct inode *inode, int mode, loff_t offset,
1539 loff_t len); 1539 loff_t len);
1540 int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, 1540 int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start,
1541 u64 len); 1541 u64 len);
1542 }; 1542 };
1543 1543
1544 struct seq_file; 1544 struct seq_file;
1545 1545
1546 ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector, 1546 ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
1547 unsigned long nr_segs, unsigned long fast_segs, 1547 unsigned long nr_segs, unsigned long fast_segs,
1548 struct iovec *fast_pointer, 1548 struct iovec *fast_pointer,
1549 struct iovec **ret_pointer); 1549 struct iovec **ret_pointer);
1550 1550
1551 extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *); 1551 extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *);
1552 extern ssize_t vfs_write(struct file *, const char __user *, size_t, loff_t *); 1552 extern ssize_t vfs_write(struct file *, const char __user *, size_t, loff_t *);
1553 extern ssize_t vfs_readv(struct file *, const struct iovec __user *, 1553 extern ssize_t vfs_readv(struct file *, const struct iovec __user *,
1554 unsigned long, loff_t *); 1554 unsigned long, loff_t *);
1555 extern ssize_t vfs_writev(struct file *, const struct iovec __user *, 1555 extern ssize_t vfs_writev(struct file *, const struct iovec __user *,
1556 unsigned long, loff_t *); 1556 unsigned long, loff_t *);
1557 1557
1558 struct super_operations { 1558 struct super_operations {
1559 struct inode *(*alloc_inode)(struct super_block *sb); 1559 struct inode *(*alloc_inode)(struct super_block *sb);
1560 void (*destroy_inode)(struct inode *); 1560 void (*destroy_inode)(struct inode *);
1561 1561
1562 void (*dirty_inode) (struct inode *); 1562 void (*dirty_inode) (struct inode *);
1563 int (*write_inode) (struct inode *, struct writeback_control *wbc); 1563 int (*write_inode) (struct inode *, struct writeback_control *wbc);
1564 void (*drop_inode) (struct inode *); 1564 void (*drop_inode) (struct inode *);
1565 void (*delete_inode) (struct inode *); 1565 void (*delete_inode) (struct inode *);
1566 void (*put_super) (struct super_block *); 1566 void (*put_super) (struct super_block *);
1567 void (*write_super) (struct super_block *); 1567 void (*write_super) (struct super_block *);
1568 int (*sync_fs)(struct super_block *sb, int wait); 1568 int (*sync_fs)(struct super_block *sb, int wait);
1569 int (*freeze_fs) (struct super_block *); 1569 int (*freeze_fs) (struct super_block *);
1570 int (*unfreeze_fs) (struct super_block *); 1570 int (*unfreeze_fs) (struct super_block *);
1571 int (*statfs) (struct dentry *, struct kstatfs *); 1571 int (*statfs) (struct dentry *, struct kstatfs *);
1572 int (*remount_fs) (struct super_block *, int *, char *); 1572 int (*remount_fs) (struct super_block *, int *, char *);
1573 void (*clear_inode) (struct inode *); 1573 void (*clear_inode) (struct inode *);
1574 void (*umount_begin) (struct super_block *); 1574 void (*umount_begin) (struct super_block *);
1575 1575
1576 int (*show_options)(struct seq_file *, struct vfsmount *); 1576 int (*show_options)(struct seq_file *, struct vfsmount *);
1577 int (*show_stats)(struct seq_file *, struct vfsmount *); 1577 int (*show_stats)(struct seq_file *, struct vfsmount *);
1578 #ifdef CONFIG_QUOTA 1578 #ifdef CONFIG_QUOTA
1579 ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t); 1579 ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t);
1580 ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t); 1580 ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t);
1581 #endif 1581 #endif
1582 int (*bdev_try_to_free_page)(struct super_block*, struct page*, gfp_t); 1582 int (*bdev_try_to_free_page)(struct super_block*, struct page*, gfp_t);
1583 }; 1583 };
1584 1584
1585 /* 1585 /*
1586 * Inode state bits. Protected by inode_lock. 1586 * Inode state bits. Protected by inode_lock.
1587 * 1587 *
1588 * Three bits determine the dirty state of the inode, I_DIRTY_SYNC, 1588 * Three bits determine the dirty state of the inode, I_DIRTY_SYNC,
1589 * I_DIRTY_DATASYNC and I_DIRTY_PAGES. 1589 * I_DIRTY_DATASYNC and I_DIRTY_PAGES.
1590 * 1590 *
1591 * Four bits define the lifetime of an inode. Initially, inodes are I_NEW, 1591 * Four bits define the lifetime of an inode. Initially, inodes are I_NEW,
1592 * until that flag is cleared. I_WILL_FREE, I_FREEING and I_CLEAR are set at 1592 * until that flag is cleared. I_WILL_FREE, I_FREEING and I_CLEAR are set at
1593 * various stages of removing an inode. 1593 * various stages of removing an inode.
1594 * 1594 *
1595 * Two bits are used for locking and completion notification, I_NEW and I_SYNC. 1595 * Two bits are used for locking and completion notification, I_NEW and I_SYNC.
1596 * 1596 *
1597 * I_DIRTY_SYNC Inode is dirty, but doesn't have to be written on 1597 * I_DIRTY_SYNC Inode is dirty, but doesn't have to be written on
1598 * fdatasync(). i_atime is the usual cause. 1598 * fdatasync(). i_atime is the usual cause.
1599 * I_DIRTY_DATASYNC Data-related inode changes pending. We keep track of 1599 * I_DIRTY_DATASYNC Data-related inode changes pending. We keep track of
1600 * these changes separately from I_DIRTY_SYNC so that we 1600 * these changes separately from I_DIRTY_SYNC so that we
1601 * don't have to write inode on fdatasync() when only 1601 * don't have to write inode on fdatasync() when only
1602 * mtime has changed in it. 1602 * mtime has changed in it.
1603 * I_DIRTY_PAGES Inode has dirty pages. Inode itself may be clean. 1603 * I_DIRTY_PAGES Inode has dirty pages. Inode itself may be clean.
1604 * I_NEW Serves as both a mutex and completion notification. 1604 * I_NEW Serves as both a mutex and completion notification.
1605 * New inodes set I_NEW. If two processes both create 1605 * New inodes set I_NEW. If two processes both create
1606 * the same inode, one of them will release its inode and 1606 * the same inode, one of them will release its inode and
1607 * wait for I_NEW to be released before returning. 1607 * wait for I_NEW to be released before returning.
1608 * Inodes in I_WILL_FREE, I_FREEING or I_CLEAR state can 1608 * Inodes in I_WILL_FREE, I_FREEING or I_CLEAR state can
1609 * also cause waiting on I_NEW, without I_NEW actually 1609 * also cause waiting on I_NEW, without I_NEW actually
1610 * being set. find_inode() uses this to prevent returning 1610 * being set. find_inode() uses this to prevent returning
1611 * nearly-dead inodes. 1611 * nearly-dead inodes.
1612 * I_WILL_FREE Must be set when calling write_inode_now() if i_count 1612 * I_WILL_FREE Must be set when calling write_inode_now() if i_count
1613 * is zero. I_FREEING must be set when I_WILL_FREE is 1613 * is zero. I_FREEING must be set when I_WILL_FREE is
1614 * cleared. 1614 * cleared.
1615 * I_FREEING Set when inode is about to be freed but still has dirty 1615 * I_FREEING Set when inode is about to be freed but still has dirty
1616 * pages or buffers attached or the inode itself is still 1616 * pages or buffers attached or the inode itself is still
1617 * dirty. 1617 * dirty.
1618 * I_CLEAR Set by clear_inode(). In this state the inode is clean 1618 * I_CLEAR Set by clear_inode(). In this state the inode is clean
1619 * and can be destroyed. 1619 * and can be destroyed.
1620 * 1620 *
1621 * Inodes that are I_WILL_FREE, I_FREEING or I_CLEAR are 1621 * Inodes that are I_WILL_FREE, I_FREEING or I_CLEAR are
1622 * prohibited for many purposes. iget() must wait for 1622 * prohibited for many purposes. iget() must wait for
1623 * the inode to be completely released, then create it 1623 * the inode to be completely released, then create it
1624 * anew. Other functions will just ignore such inodes, 1624 * anew. Other functions will just ignore such inodes,
1625 * if appropriate. I_NEW is used for waiting. 1625 * if appropriate. I_NEW is used for waiting.
1626 * 1626 *
1627 * I_SYNC Synchonized write of dirty inode data. The bits is 1627 * I_SYNC Synchonized write of dirty inode data. The bits is
1628 * set during data writeback, and cleared with a wakeup 1628 * set during data writeback, and cleared with a wakeup
1629 * on the bit address once it is done. 1629 * on the bit address once it is done.
1630 * 1630 *
1631 * Q: What is the difference between I_WILL_FREE and I_FREEING? 1631 * Q: What is the difference between I_WILL_FREE and I_FREEING?
1632 */ 1632 */
1633 #define I_DIRTY_SYNC 1 1633 #define I_DIRTY_SYNC 1
1634 #define I_DIRTY_DATASYNC 2 1634 #define I_DIRTY_DATASYNC 2
1635 #define I_DIRTY_PAGES 4 1635 #define I_DIRTY_PAGES 4
1636 #define __I_NEW 3 1636 #define __I_NEW 3
1637 #define I_NEW (1 << __I_NEW) 1637 #define I_NEW (1 << __I_NEW)
1638 #define I_WILL_FREE 16 1638 #define I_WILL_FREE 16
1639 #define I_FREEING 32 1639 #define I_FREEING 32
1640 #define I_CLEAR 64 1640 #define I_CLEAR 64
1641 #define __I_SYNC 7 1641 #define __I_SYNC 7
1642 #define I_SYNC (1 << __I_SYNC) 1642 #define I_SYNC (1 << __I_SYNC)
1643 1643
1644 #define I_DIRTY (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES) 1644 #define I_DIRTY (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES)
1645 1645
1646 extern void __mark_inode_dirty(struct inode *, int); 1646 extern void __mark_inode_dirty(struct inode *, int);
1647 static inline void mark_inode_dirty(struct inode *inode) 1647 static inline void mark_inode_dirty(struct inode *inode)
1648 { 1648 {
1649 __mark_inode_dirty(inode, I_DIRTY); 1649 __mark_inode_dirty(inode, I_DIRTY);
1650 } 1650 }
1651 1651
1652 static inline void mark_inode_dirty_sync(struct inode *inode) 1652 static inline void mark_inode_dirty_sync(struct inode *inode)
1653 { 1653 {
1654 __mark_inode_dirty(inode, I_DIRTY_SYNC); 1654 __mark_inode_dirty(inode, I_DIRTY_SYNC);
1655 } 1655 }
1656 1656
1657 /** 1657 /**
1658 * inc_nlink - directly increment an inode's link count 1658 * inc_nlink - directly increment an inode's link count
1659 * @inode: inode 1659 * @inode: inode
1660 * 1660 *
1661 * This is a low-level filesystem helper to replace any 1661 * This is a low-level filesystem helper to replace any
1662 * direct filesystem manipulation of i_nlink. Currently, 1662 * direct filesystem manipulation of i_nlink. Currently,
1663 * it is only here for parity with dec_nlink(). 1663 * it is only here for parity with dec_nlink().
1664 */ 1664 */
1665 static inline void inc_nlink(struct inode *inode) 1665 static inline void inc_nlink(struct inode *inode)
1666 { 1666 {
1667 inode->i_nlink++; 1667 inode->i_nlink++;
1668 } 1668 }
1669 1669
1670 static inline void inode_inc_link_count(struct inode *inode) 1670 static inline void inode_inc_link_count(struct inode *inode)
1671 { 1671 {
1672 inc_nlink(inode); 1672 inc_nlink(inode);
1673 mark_inode_dirty(inode); 1673 mark_inode_dirty(inode);
1674 } 1674 }
1675 1675
1676 /** 1676 /**
1677 * drop_nlink - directly drop an inode's link count 1677 * drop_nlink - directly drop an inode's link count
1678 * @inode: inode 1678 * @inode: inode
1679 * 1679 *
1680 * This is a low-level filesystem helper to replace any 1680 * This is a low-level filesystem helper to replace any
1681 * direct filesystem manipulation of i_nlink. In cases 1681 * direct filesystem manipulation of i_nlink. In cases
1682 * where we are attempting to track writes to the 1682 * where we are attempting to track writes to the
1683 * filesystem, a decrement to zero means an imminent 1683 * filesystem, a decrement to zero means an imminent
1684 * write when the file is truncated and actually unlinked 1684 * write when the file is truncated and actually unlinked
1685 * on the filesystem. 1685 * on the filesystem.
1686 */ 1686 */
1687 static inline void drop_nlink(struct inode *inode) 1687 static inline void drop_nlink(struct inode *inode)
1688 { 1688 {
1689 inode->i_nlink--; 1689 inode->i_nlink--;
1690 } 1690 }
1691 1691
1692 /** 1692 /**
1693 * clear_nlink - directly zero an inode's link count 1693 * clear_nlink - directly zero an inode's link count
1694 * @inode: inode 1694 * @inode: inode
1695 * 1695 *
1696 * This is a low-level filesystem helper to replace any 1696 * This is a low-level filesystem helper to replace any
1697 * direct filesystem manipulation of i_nlink. See 1697 * direct filesystem manipulation of i_nlink. See
1698 * drop_nlink() for why we care about i_nlink hitting zero. 1698 * drop_nlink() for why we care about i_nlink hitting zero.
1699 */ 1699 */
1700 static inline void clear_nlink(struct inode *inode) 1700 static inline void clear_nlink(struct inode *inode)
1701 { 1701 {
1702 inode->i_nlink = 0; 1702 inode->i_nlink = 0;
1703 } 1703 }
1704 1704
1705 static inline void inode_dec_link_count(struct inode *inode) 1705 static inline void inode_dec_link_count(struct inode *inode)
1706 { 1706 {
1707 drop_nlink(inode); 1707 drop_nlink(inode);
1708 mark_inode_dirty(inode); 1708 mark_inode_dirty(inode);
1709 } 1709 }
1710 1710
1711 /** 1711 /**
1712 * inode_inc_iversion - increments i_version 1712 * inode_inc_iversion - increments i_version
1713 * @inode: inode that need to be updated 1713 * @inode: inode that need to be updated
1714 * 1714 *
1715 * Every time the inode is modified, the i_version field will be incremented. 1715 * Every time the inode is modified, the i_version field will be incremented.
1716 * The filesystem has to be mounted with i_version flag 1716 * The filesystem has to be mounted with i_version flag
1717 */ 1717 */
1718 1718
1719 static inline void inode_inc_iversion(struct inode *inode) 1719 static inline void inode_inc_iversion(struct inode *inode)
1720 { 1720 {
1721 spin_lock(&inode->i_lock); 1721 spin_lock(&inode->i_lock);
1722 inode->i_version++; 1722 inode->i_version++;
1723 spin_unlock(&inode->i_lock); 1723 spin_unlock(&inode->i_lock);
1724 } 1724 }
1725 1725
1726 extern void touch_atime(struct vfsmount *mnt, struct dentry *dentry); 1726 extern void touch_atime(struct vfsmount *mnt, struct dentry *dentry);
1727 static inline void file_accessed(struct file *file) 1727 static inline void file_accessed(struct file *file)
1728 { 1728 {
1729 if (!(file->f_flags & O_NOATIME)) 1729 if (!(file->f_flags & O_NOATIME))
1730 touch_atime(file->f_path.mnt, file->f_path.dentry); 1730 touch_atime(file->f_path.mnt, file->f_path.dentry);
1731 } 1731 }
1732 1732
1733 int sync_inode(struct inode *inode, struct writeback_control *wbc); 1733 int sync_inode(struct inode *inode, struct writeback_control *wbc);
1734 1734
1735 struct file_system_type { 1735 struct file_system_type {
1736 const char *name; 1736 const char *name;
1737 int fs_flags; 1737 int fs_flags;
1738 int (*get_sb) (struct file_system_type *, int, 1738 int (*get_sb) (struct file_system_type *, int,
1739 const char *, void *, struct vfsmount *); 1739 const char *, void *, struct vfsmount *);
1740 void (*kill_sb) (struct super_block *); 1740 void (*kill_sb) (struct super_block *);
1741 struct module *owner; 1741 struct module *owner;
1742 struct file_system_type * next; 1742 struct file_system_type * next;
1743 struct list_head fs_supers; 1743 struct list_head fs_supers;
1744 1744
1745 struct lock_class_key s_lock_key; 1745 struct lock_class_key s_lock_key;
1746 struct lock_class_key s_umount_key; 1746 struct lock_class_key s_umount_key;
1747 1747
1748 struct lock_class_key i_lock_key; 1748 struct lock_class_key i_lock_key;
1749 struct lock_class_key i_mutex_key; 1749 struct lock_class_key i_mutex_key;
1750 struct lock_class_key i_mutex_dir_key; 1750 struct lock_class_key i_mutex_dir_key;
1751 struct lock_class_key i_alloc_sem_key; 1751 struct lock_class_key i_alloc_sem_key;
1752 }; 1752 };
1753 1753
1754 extern int get_sb_ns(struct file_system_type *fs_type, int flags, void *data, 1754 extern int get_sb_ns(struct file_system_type *fs_type, int flags, void *data,
1755 int (*fill_super)(struct super_block *, void *, int), 1755 int (*fill_super)(struct super_block *, void *, int),
1756 struct vfsmount *mnt); 1756 struct vfsmount *mnt);
1757 extern int get_sb_bdev(struct file_system_type *fs_type, 1757 extern int get_sb_bdev(struct file_system_type *fs_type,
1758 int flags, const char *dev_name, void *data, 1758 int flags, const char *dev_name, void *data,
1759 int (*fill_super)(struct super_block *, void *, int), 1759 int (*fill_super)(struct super_block *, void *, int),
1760 struct vfsmount *mnt); 1760 struct vfsmount *mnt);
1761 extern int get_sb_single(struct file_system_type *fs_type, 1761 extern int get_sb_single(struct file_system_type *fs_type,
1762 int flags, void *data, 1762 int flags, void *data,
1763 int (*fill_super)(struct super_block *, void *, int), 1763 int (*fill_super)(struct super_block *, void *, int),
1764 struct vfsmount *mnt); 1764 struct vfsmount *mnt);
1765 extern int get_sb_nodev(struct file_system_type *fs_type, 1765 extern int get_sb_nodev(struct file_system_type *fs_type,
1766 int flags, void *data, 1766 int flags, void *data,
1767 int (*fill_super)(struct super_block *, void *, int), 1767 int (*fill_super)(struct super_block *, void *, int),
1768 struct vfsmount *mnt); 1768 struct vfsmount *mnt);
1769 void generic_shutdown_super(struct super_block *sb); 1769 void generic_shutdown_super(struct super_block *sb);
1770 void kill_block_super(struct super_block *sb); 1770 void kill_block_super(struct super_block *sb);
1771 void kill_anon_super(struct super_block *sb); 1771 void kill_anon_super(struct super_block *sb);
1772 void kill_litter_super(struct super_block *sb); 1772 void kill_litter_super(struct super_block *sb);
1773 void deactivate_super(struct super_block *sb); 1773 void deactivate_super(struct super_block *sb);
1774 void deactivate_locked_super(struct super_block *sb); 1774 void deactivate_locked_super(struct super_block *sb);
1775 int set_anon_super(struct super_block *s, void *data); 1775 int set_anon_super(struct super_block *s, void *data);
1776 struct super_block *sget(struct file_system_type *type, 1776 struct super_block *sget(struct file_system_type *type,
1777 int (*test)(struct super_block *,void *), 1777 int (*test)(struct super_block *,void *),
1778 int (*set)(struct super_block *,void *), 1778 int (*set)(struct super_block *,void *),
1779 void *data); 1779 void *data);
1780 extern int get_sb_pseudo(struct file_system_type *, char *, 1780 extern int get_sb_pseudo(struct file_system_type *, char *,
1781 const struct super_operations *ops, unsigned long, 1781 const struct super_operations *ops, unsigned long,
1782 struct vfsmount *mnt); 1782 struct vfsmount *mnt);
1783 extern void simple_set_mnt(struct vfsmount *mnt, struct super_block *sb); 1783 extern void simple_set_mnt(struct vfsmount *mnt, struct super_block *sb);
1784 int __put_super_and_need_restart(struct super_block *sb); 1784 int __put_super_and_need_restart(struct super_block *sb);
1785 void put_super(struct super_block *sb); 1785 void put_super(struct super_block *sb);
1786 1786
1787 /* Alas, no aliases. Too much hassle with bringing module.h everywhere */ 1787 /* Alas, no aliases. Too much hassle with bringing module.h everywhere */
1788 #define fops_get(fops) \ 1788 #define fops_get(fops) \
1789 (((fops) && try_module_get((fops)->owner) ? (fops) : NULL)) 1789 (((fops) && try_module_get((fops)->owner) ? (fops) : NULL))
1790 #define fops_put(fops) \ 1790 #define fops_put(fops) \
1791 do { if (fops) module_put((fops)->owner); } while(0) 1791 do { if (fops) module_put((fops)->owner); } while(0)
1792 1792
1793 extern int register_filesystem(struct file_system_type *); 1793 extern int register_filesystem(struct file_system_type *);
1794 extern int unregister_filesystem(struct file_system_type *); 1794 extern int unregister_filesystem(struct file_system_type *);
1795 extern struct vfsmount *kern_mount_data(struct file_system_type *, void *data); 1795 extern struct vfsmount *kern_mount_data(struct file_system_type *, void *data);
1796 #define kern_mount(type) kern_mount_data(type, NULL) 1796 #define kern_mount(type) kern_mount_data(type, NULL)
1797 extern int may_umount_tree(struct vfsmount *); 1797 extern int may_umount_tree(struct vfsmount *);
1798 extern int may_umount(struct vfsmount *); 1798 extern int may_umount(struct vfsmount *);
1799 extern long do_mount(char *, char *, char *, unsigned long, void *); 1799 extern long do_mount(char *, char *, char *, unsigned long, void *);
1800 extern struct vfsmount *collect_mounts(struct path *); 1800 extern struct vfsmount *collect_mounts(struct path *);
1801 extern void drop_collected_mounts(struct vfsmount *); 1801 extern void drop_collected_mounts(struct vfsmount *);
1802 extern int iterate_mounts(int (*)(struct vfsmount *, void *), void *, 1802 extern int iterate_mounts(int (*)(struct vfsmount *, void *), void *,
1803 struct vfsmount *); 1803 struct vfsmount *);
1804 extern int vfs_statfs(struct dentry *, struct kstatfs *); 1804 extern int vfs_statfs(struct dentry *, struct kstatfs *);
1805 1805
1806 extern int current_umask(void); 1806 extern int current_umask(void);
1807 1807
1808 /* /sys/fs */ 1808 /* /sys/fs */
1809 extern struct kobject *fs_kobj; 1809 extern struct kobject *fs_kobj;
1810 1810
1811 extern int rw_verify_area(int, struct file *, loff_t *, size_t); 1811 extern int rw_verify_area(int, struct file *, loff_t *, size_t);
1812 1812
1813 #define FLOCK_VERIFY_READ 1 1813 #define FLOCK_VERIFY_READ 1
1814 #define FLOCK_VERIFY_WRITE 2 1814 #define FLOCK_VERIFY_WRITE 2
1815 1815
1816 #ifdef CONFIG_FILE_LOCKING 1816 #ifdef CONFIG_FILE_LOCKING
1817 extern int locks_mandatory_locked(struct inode *); 1817 extern int locks_mandatory_locked(struct inode *);
1818 extern int locks_mandatory_area(int, struct inode *, struct file *, loff_t, size_t); 1818 extern int locks_mandatory_area(int, struct inode *, struct file *, loff_t, size_t);
1819 1819
1820 /* 1820 /*
1821 * Candidates for mandatory locking have the setgid bit set 1821 * Candidates for mandatory locking have the setgid bit set
1822 * but no group execute bit - an otherwise meaningless combination. 1822 * but no group execute bit - an otherwise meaningless combination.
1823 */ 1823 */
1824 1824
1825 static inline int __mandatory_lock(struct inode *ino) 1825 static inline int __mandatory_lock(struct inode *ino)
1826 { 1826 {
1827 return (ino->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID; 1827 return (ino->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID;
1828 } 1828 }
1829 1829
1830 /* 1830 /*
1831 * ... and these candidates should be on MS_MANDLOCK mounted fs, 1831 * ... and these candidates should be on MS_MANDLOCK mounted fs,
1832 * otherwise these will be advisory locks 1832 * otherwise these will be advisory locks
1833 */ 1833 */
1834 1834
1835 static inline int mandatory_lock(struct inode *ino) 1835 static inline int mandatory_lock(struct inode *ino)
1836 { 1836 {
1837 return IS_MANDLOCK(ino) && __mandatory_lock(ino); 1837 return IS_MANDLOCK(ino) && __mandatory_lock(ino);
1838 } 1838 }
1839 1839
1840 static inline int locks_verify_locked(struct inode *inode) 1840 static inline int locks_verify_locked(struct inode *inode)
1841 { 1841 {
1842 if (mandatory_lock(inode)) 1842 if (mandatory_lock(inode))
1843 return locks_mandatory_locked(inode); 1843 return locks_mandatory_locked(inode);
1844 return 0; 1844 return 0;
1845 } 1845 }
1846 1846
1847 static inline int locks_verify_truncate(struct inode *inode, 1847 static inline int locks_verify_truncate(struct inode *inode,
1848 struct file *filp, 1848 struct file *filp,
1849 loff_t size) 1849 loff_t size)
1850 { 1850 {
1851 if (inode->i_flock && mandatory_lock(inode)) 1851 if (inode->i_flock && mandatory_lock(inode))
1852 return locks_mandatory_area( 1852 return locks_mandatory_area(
1853 FLOCK_VERIFY_WRITE, inode, filp, 1853 FLOCK_VERIFY_WRITE, inode, filp,
1854 size < inode->i_size ? size : inode->i_size, 1854 size < inode->i_size ? size : inode->i_size,
1855 (size < inode->i_size ? inode->i_size - size 1855 (size < inode->i_size ? inode->i_size - size
1856 : size - inode->i_size) 1856 : size - inode->i_size)
1857 ); 1857 );
1858 return 0; 1858 return 0;
1859 } 1859 }
1860 1860
1861 static inline int break_lease(struct inode *inode, unsigned int mode) 1861 static inline int break_lease(struct inode *inode, unsigned int mode)
1862 { 1862 {
1863 if (inode->i_flock) 1863 if (inode->i_flock)
1864 return __break_lease(inode, mode); 1864 return __break_lease(inode, mode);
1865 return 0; 1865 return 0;
1866 } 1866 }
1867 #else /* !CONFIG_FILE_LOCKING */ 1867 #else /* !CONFIG_FILE_LOCKING */
1868 static inline int locks_mandatory_locked(struct inode *inode) 1868 static inline int locks_mandatory_locked(struct inode *inode)
1869 { 1869 {
1870 return 0; 1870 return 0;
1871 } 1871 }
1872 1872
1873 static inline int locks_mandatory_area(int rw, struct inode *inode, 1873 static inline int locks_mandatory_area(int rw, struct inode *inode,
1874 struct file *filp, loff_t offset, 1874 struct file *filp, loff_t offset,
1875 size_t count) 1875 size_t count)
1876 { 1876 {
1877 return 0; 1877 return 0;
1878 } 1878 }
1879 1879
1880 static inline int __mandatory_lock(struct inode *inode) 1880 static inline int __mandatory_lock(struct inode *inode)
1881 { 1881 {
1882 return 0; 1882 return 0;
1883 } 1883 }
1884 1884
1885 static inline int mandatory_lock(struct inode *inode) 1885 static inline int mandatory_lock(struct inode *inode)
1886 { 1886 {
1887 return 0; 1887 return 0;
1888 } 1888 }
1889 1889
1890 static inline int locks_verify_locked(struct inode *inode) 1890 static inline int locks_verify_locked(struct inode *inode)
1891 { 1891 {
1892 return 0; 1892 return 0;
1893 } 1893 }
1894 1894
1895 static inline int locks_verify_truncate(struct inode *inode, struct file *filp, 1895 static inline int locks_verify_truncate(struct inode *inode, struct file *filp,
1896 size_t size) 1896 size_t size)
1897 { 1897 {
1898 return 0; 1898 return 0;
1899 } 1899 }
1900 1900
1901 static inline int break_lease(struct inode *inode, unsigned int mode) 1901 static inline int break_lease(struct inode *inode, unsigned int mode)
1902 { 1902 {
1903 return 0; 1903 return 0;
1904 } 1904 }
1905 1905
1906 #endif /* CONFIG_FILE_LOCKING */ 1906 #endif /* CONFIG_FILE_LOCKING */
1907 1907
1908 /* fs/open.c */ 1908 /* fs/open.c */
1909 1909
1910 extern int do_truncate(struct dentry *, loff_t start, unsigned int time_attrs, 1910 extern int do_truncate(struct dentry *, loff_t start, unsigned int time_attrs,
1911 struct file *filp); 1911 struct file *filp);
1912 extern int do_fallocate(struct file *file, int mode, loff_t offset, 1912 extern int do_fallocate(struct file *file, int mode, loff_t offset,
1913 loff_t len); 1913 loff_t len);
1914 extern long do_sys_open(int dfd, const char __user *filename, int flags, 1914 extern long do_sys_open(int dfd, const char __user *filename, int flags,
1915 int mode); 1915 int mode);
1916 extern struct file *filp_open(const char *, int, int); 1916 extern struct file *filp_open(const char *, int, int);
1917 extern struct file * dentry_open(struct dentry *, struct vfsmount *, int, 1917 extern struct file * dentry_open(struct dentry *, struct vfsmount *, int,
1918 const struct cred *); 1918 const struct cred *);
1919 extern int filp_close(struct file *, fl_owner_t id); 1919 extern int filp_close(struct file *, fl_owner_t id);
1920 extern char * getname(const char __user *); 1920 extern char * getname(const char __user *);
1921 1921
1922 /* fs/ioctl.c */ 1922 /* fs/ioctl.c */
1923 1923
1924 extern int ioctl_preallocate(struct file *filp, void __user *argp); 1924 extern int ioctl_preallocate(struct file *filp, void __user *argp);
1925 1925
1926 /* fs/dcache.c */ 1926 /* fs/dcache.c */
1927 extern void __init vfs_caches_init_early(void); 1927 extern void __init vfs_caches_init_early(void);
1928 extern void __init vfs_caches_init(unsigned long); 1928 extern void __init vfs_caches_init(unsigned long);
1929 1929
1930 extern struct kmem_cache *names_cachep; 1930 extern struct kmem_cache *names_cachep;
1931 1931
1932 #define __getname_gfp(gfp) kmem_cache_alloc(names_cachep, (gfp)) 1932 #define __getname_gfp(gfp) kmem_cache_alloc(names_cachep, (gfp))
1933 #define __getname() __getname_gfp(GFP_KERNEL) 1933 #define __getname() __getname_gfp(GFP_KERNEL)
1934 #define __putname(name) kmem_cache_free(names_cachep, (void *)(name)) 1934 #define __putname(name) kmem_cache_free(names_cachep, (void *)(name))
1935 #ifndef CONFIG_AUDITSYSCALL 1935 #ifndef CONFIG_AUDITSYSCALL
1936 #define putname(name) __putname(name) 1936 #define putname(name) __putname(name)
1937 #else 1937 #else
1938 extern void putname(const char *name); 1938 extern void putname(const char *name);
1939 #endif 1939 #endif
1940 1940
1941 #ifdef CONFIG_BLOCK 1941 #ifdef CONFIG_BLOCK
1942 extern int register_blkdev(unsigned int, const char *); 1942 extern int register_blkdev(unsigned int, const char *);
1943 extern void unregister_blkdev(unsigned int, const char *); 1943 extern void unregister_blkdev(unsigned int, const char *);
1944 extern struct block_device *bdget(dev_t); 1944 extern struct block_device *bdget(dev_t);
1945 extern struct block_device *bdgrab(struct block_device *bdev); 1945 extern struct block_device *bdgrab(struct block_device *bdev);
1946 extern void bd_set_size(struct block_device *, loff_t size); 1946 extern void bd_set_size(struct block_device *, loff_t size);
1947 extern void bd_forget(struct inode *inode); 1947 extern void bd_forget(struct inode *inode);
1948 extern void bdput(struct block_device *); 1948 extern void bdput(struct block_device *);
1949 extern struct block_device *open_by_devnum(dev_t, fmode_t); 1949 extern struct block_device *open_by_devnum(dev_t, fmode_t);
1950 extern void invalidate_bdev(struct block_device *); 1950 extern void invalidate_bdev(struct block_device *);
1951 extern int sync_blockdev(struct block_device *bdev); 1951 extern int sync_blockdev(struct block_device *bdev);
1952 extern struct super_block *freeze_bdev(struct block_device *); 1952 extern struct super_block *freeze_bdev(struct block_device *);
1953 extern void emergency_thaw_all(void); 1953 extern void emergency_thaw_all(void);
1954 extern int thaw_bdev(struct block_device *bdev, struct super_block *sb); 1954 extern int thaw_bdev(struct block_device *bdev, struct super_block *sb);
1955 extern int fsync_bdev(struct block_device *); 1955 extern int fsync_bdev(struct block_device *);
1956 #else 1956 #else
1957 static inline void bd_forget(struct inode *inode) {} 1957 static inline void bd_forget(struct inode *inode) {}
1958 static inline int sync_blockdev(struct block_device *bdev) { return 0; } 1958 static inline int sync_blockdev(struct block_device *bdev) { return 0; }
1959 static inline void invalidate_bdev(struct block_device *bdev) {} 1959 static inline void invalidate_bdev(struct block_device *bdev) {}
1960 1960
1961 static inline struct super_block *freeze_bdev(struct block_device *sb) 1961 static inline struct super_block *freeze_bdev(struct block_device *sb)
1962 { 1962 {
1963 return NULL; 1963 return NULL;
1964 } 1964 }
1965 1965
1966 static inline int thaw_bdev(struct block_device *bdev, struct super_block *sb) 1966 static inline int thaw_bdev(struct block_device *bdev, struct super_block *sb)
1967 { 1967 {
1968 return 0; 1968 return 0;
1969 } 1969 }
1970 #endif 1970 #endif
1971 extern int sync_filesystem(struct super_block *); 1971 extern int sync_filesystem(struct super_block *);
1972 extern const struct file_operations def_blk_fops; 1972 extern const struct file_operations def_blk_fops;
1973 extern const struct file_operations def_chr_fops; 1973 extern const struct file_operations def_chr_fops;
1974 extern const struct file_operations bad_sock_fops; 1974 extern const struct file_operations bad_sock_fops;
1975 extern const struct file_operations def_fifo_fops; 1975 extern const struct file_operations def_fifo_fops;
1976 #ifdef CONFIG_BLOCK 1976 #ifdef CONFIG_BLOCK
1977 extern int ioctl_by_bdev(struct block_device *, unsigned, unsigned long); 1977 extern int ioctl_by_bdev(struct block_device *, unsigned, unsigned long);
1978 extern int blkdev_ioctl(struct block_device *, fmode_t, unsigned, unsigned long); 1978 extern int blkdev_ioctl(struct block_device *, fmode_t, unsigned, unsigned long);
1979 extern long compat_blkdev_ioctl(struct file *, unsigned, unsigned long); 1979 extern long compat_blkdev_ioctl(struct file *, unsigned, unsigned long);
1980 extern int blkdev_get(struct block_device *, fmode_t); 1980 extern int blkdev_get(struct block_device *, fmode_t);
1981 extern int blkdev_put(struct block_device *, fmode_t); 1981 extern int blkdev_put(struct block_device *, fmode_t);
1982 extern int bd_claim(struct block_device *, void *); 1982 extern int bd_claim(struct block_device *, void *);
1983 extern void bd_release(struct block_device *); 1983 extern void bd_release(struct block_device *);
1984 #ifdef CONFIG_SYSFS 1984 #ifdef CONFIG_SYSFS
1985 extern int bd_claim_by_disk(struct block_device *, void *, struct gendisk *); 1985 extern int bd_claim_by_disk(struct block_device *, void *, struct gendisk *);
1986 extern void bd_release_from_disk(struct block_device *, struct gendisk *); 1986 extern void bd_release_from_disk(struct block_device *, struct gendisk *);
1987 #else 1987 #else
1988 #define bd_claim_by_disk(bdev, holder, disk) bd_claim(bdev, holder) 1988 #define bd_claim_by_disk(bdev, holder, disk) bd_claim(bdev, holder)
1989 #define bd_release_from_disk(bdev, disk) bd_release(bdev) 1989 #define bd_release_from_disk(bdev, disk) bd_release(bdev)
1990 #endif 1990 #endif
1991 #endif 1991 #endif
1992 1992
1993 /* fs/char_dev.c */ 1993 /* fs/char_dev.c */
1994 #define CHRDEV_MAJOR_HASH_SIZE 255 1994 #define CHRDEV_MAJOR_HASH_SIZE 255
1995 extern int alloc_chrdev_region(dev_t *, unsigned, unsigned, const char *); 1995 extern int alloc_chrdev_region(dev_t *, unsigned, unsigned, const char *);
1996 extern int register_chrdev_region(dev_t, unsigned, const char *); 1996 extern int register_chrdev_region(dev_t, unsigned, const char *);
1997 extern int __register_chrdev(unsigned int major, unsigned int baseminor, 1997 extern int __register_chrdev(unsigned int major, unsigned int baseminor,
1998 unsigned int count, const char *name, 1998 unsigned int count, const char *name,
1999 const struct file_operations *fops); 1999 const struct file_operations *fops);
2000 extern void __unregister_chrdev(unsigned int major, unsigned int baseminor, 2000 extern void __unregister_chrdev(unsigned int major, unsigned int baseminor,
2001 unsigned int count, const char *name); 2001 unsigned int count, const char *name);
2002 extern void unregister_chrdev_region(dev_t, unsigned); 2002 extern void unregister_chrdev_region(dev_t, unsigned);
2003 extern void chrdev_show(struct seq_file *,off_t); 2003 extern void chrdev_show(struct seq_file *,off_t);
2004 2004
2005 static inline int register_chrdev(unsigned int major, const char *name, 2005 static inline int register_chrdev(unsigned int major, const char *name,
2006 const struct file_operations *fops) 2006 const struct file_operations *fops)
2007 { 2007 {
2008 return __register_chrdev(major, 0, 256, name, fops); 2008 return __register_chrdev(major, 0, 256, name, fops);
2009 } 2009 }
2010 2010
2011 static inline void unregister_chrdev(unsigned int major, const char *name) 2011 static inline void unregister_chrdev(unsigned int major, const char *name)
2012 { 2012 {
2013 __unregister_chrdev(major, 0, 256, name); 2013 __unregister_chrdev(major, 0, 256, name);
2014 } 2014 }
2015 2015
2016 /* fs/block_dev.c */ 2016 /* fs/block_dev.c */
2017 #define BDEVNAME_SIZE 32 /* Largest string for a blockdev identifier */ 2017 #define BDEVNAME_SIZE 32 /* Largest string for a blockdev identifier */
2018 #define BDEVT_SIZE 10 /* Largest string for MAJ:MIN for blkdev */ 2018 #define BDEVT_SIZE 10 /* Largest string for MAJ:MIN for blkdev */
2019 2019
2020 #ifdef CONFIG_BLOCK 2020 #ifdef CONFIG_BLOCK
2021 #define BLKDEV_MAJOR_HASH_SIZE 255 2021 #define BLKDEV_MAJOR_HASH_SIZE 255
2022 extern const char *__bdevname(dev_t, char *buffer); 2022 extern const char *__bdevname(dev_t, char *buffer);
2023 extern const char *bdevname(struct block_device *bdev, char *buffer); 2023 extern const char *bdevname(struct block_device *bdev, char *buffer);
2024 extern struct block_device *lookup_bdev(const char *); 2024 extern struct block_device *lookup_bdev(const char *);
2025 extern struct block_device *open_bdev_exclusive(const char *, fmode_t, void *); 2025 extern struct block_device *open_bdev_exclusive(const char *, fmode_t, void *);
2026 extern void close_bdev_exclusive(struct block_device *, fmode_t); 2026 extern void close_bdev_exclusive(struct block_device *, fmode_t);
2027 extern void blkdev_show(struct seq_file *,off_t); 2027 extern void blkdev_show(struct seq_file *,off_t);
2028 2028
2029 #else 2029 #else
2030 #define BLKDEV_MAJOR_HASH_SIZE 0 2030 #define BLKDEV_MAJOR_HASH_SIZE 0
2031 #endif 2031 #endif
2032 2032
2033 extern void init_special_inode(struct inode *, umode_t, dev_t); 2033 extern void init_special_inode(struct inode *, umode_t, dev_t);
2034 2034
2035 /* Invalid inode operations -- fs/bad_inode.c */ 2035 /* Invalid inode operations -- fs/bad_inode.c */
2036 extern void make_bad_inode(struct inode *); 2036 extern void make_bad_inode(struct inode *);
2037 extern int is_bad_inode(struct inode *); 2037 extern int is_bad_inode(struct inode *);
2038 2038
2039 extern const struct file_operations read_pipefifo_fops; 2039 extern const struct file_operations read_pipefifo_fops;
2040 extern const struct file_operations write_pipefifo_fops; 2040 extern const struct file_operations write_pipefifo_fops;
2041 extern const struct file_operations rdwr_pipefifo_fops; 2041 extern const struct file_operations rdwr_pipefifo_fops;
2042 2042
2043 extern int fs_may_remount_ro(struct super_block *); 2043 extern int fs_may_remount_ro(struct super_block *);
2044 2044
2045 #ifdef CONFIG_BLOCK 2045 #ifdef CONFIG_BLOCK
2046 /* 2046 /*
2047 * return READ, READA, or WRITE 2047 * return READ, READA, or WRITE
2048 */ 2048 */
2049 #define bio_rw(bio) ((bio)->bi_rw & (RW_MASK | RWA_MASK)) 2049 #define bio_rw(bio) ((bio)->bi_rw & (RW_MASK | RWA_MASK))
2050 2050
2051 /* 2051 /*
2052 * return data direction, READ or WRITE 2052 * return data direction, READ or WRITE
2053 */ 2053 */
2054 #define bio_data_dir(bio) ((bio)->bi_rw & 1) 2054 #define bio_data_dir(bio) ((bio)->bi_rw & 1)
2055 2055
2056 extern void check_disk_size_change(struct gendisk *disk, 2056 extern void check_disk_size_change(struct gendisk *disk,
2057 struct block_device *bdev); 2057 struct block_device *bdev);
2058 extern int revalidate_disk(struct gendisk *); 2058 extern int revalidate_disk(struct gendisk *);
2059 extern int check_disk_change(struct block_device *); 2059 extern int check_disk_change(struct block_device *);
2060 extern int __invalidate_device(struct block_device *); 2060 extern int __invalidate_device(struct block_device *);
2061 extern int invalidate_partition(struct gendisk *, int); 2061 extern int invalidate_partition(struct gendisk *, int);
2062 #endif 2062 #endif
2063 extern int invalidate_inodes(struct super_block *); 2063 extern int invalidate_inodes(struct super_block *);
2064 unsigned long invalidate_mapping_pages(struct address_space *mapping, 2064 unsigned long invalidate_mapping_pages(struct address_space *mapping,
2065 pgoff_t start, pgoff_t end); 2065 pgoff_t start, pgoff_t end);
2066 2066
2067 static inline void invalidate_remote_inode(struct inode *inode) 2067 static inline void invalidate_remote_inode(struct inode *inode)
2068 { 2068 {
2069 if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || 2069 if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
2070 S_ISLNK(inode->i_mode)) 2070 S_ISLNK(inode->i_mode))
2071 invalidate_mapping_pages(inode->i_mapping, 0, -1); 2071 invalidate_mapping_pages(inode->i_mapping, 0, -1);
2072 } 2072 }
2073 extern int invalidate_inode_pages2(struct address_space *mapping); 2073 extern int invalidate_inode_pages2(struct address_space *mapping);
2074 extern int invalidate_inode_pages2_range(struct address_space *mapping, 2074 extern int invalidate_inode_pages2_range(struct address_space *mapping,
2075 pgoff_t start, pgoff_t end); 2075 pgoff_t start, pgoff_t end);
2076 extern int write_inode_now(struct inode *, int); 2076 extern int write_inode_now(struct inode *, int);
2077 extern int filemap_fdatawrite(struct address_space *); 2077 extern int filemap_fdatawrite(struct address_space *);
2078 extern int filemap_flush(struct address_space *); 2078 extern int filemap_flush(struct address_space *);
2079 extern int filemap_fdatawait(struct address_space *); 2079 extern int filemap_fdatawait(struct address_space *);
2080 extern int filemap_fdatawait_range(struct address_space *, loff_t lstart, 2080 extern int filemap_fdatawait_range(struct address_space *, loff_t lstart,
2081 loff_t lend); 2081 loff_t lend);
2082 extern int filemap_write_and_wait(struct address_space *mapping); 2082 extern int filemap_write_and_wait(struct address_space *mapping);
2083 extern int filemap_write_and_wait_range(struct address_space *mapping, 2083 extern int filemap_write_and_wait_range(struct address_space *mapping,
2084 loff_t lstart, loff_t lend); 2084 loff_t lstart, loff_t lend);
2085 extern int __filemap_fdatawrite_range(struct address_space *mapping, 2085 extern int __filemap_fdatawrite_range(struct address_space *mapping,
2086 loff_t start, loff_t end, int sync_mode); 2086 loff_t start, loff_t end, int sync_mode);
2087 extern int filemap_fdatawrite_range(struct address_space *mapping, 2087 extern int filemap_fdatawrite_range(struct address_space *mapping,
2088 loff_t start, loff_t end); 2088 loff_t start, loff_t end);
2089 2089
2090 extern int vfs_fsync_range(struct file *file, struct dentry *dentry, 2090 extern int vfs_fsync_range(struct file *file, struct dentry *dentry,
2091 loff_t start, loff_t end, int datasync); 2091 loff_t start, loff_t end, int datasync);
2092 extern int vfs_fsync(struct file *file, struct dentry *dentry, int datasync); 2092 extern int vfs_fsync(struct file *file, struct dentry *dentry, int datasync);
2093 extern int generic_write_sync(struct file *file, loff_t pos, loff_t count); 2093 extern int generic_write_sync(struct file *file, loff_t pos, loff_t count);
2094 extern void sync_supers(void); 2094 extern void sync_supers(void);
2095 extern void emergency_sync(void); 2095 extern void emergency_sync(void);
2096 extern void emergency_remount(void); 2096 extern void emergency_remount(void);
2097 #ifdef CONFIG_BLOCK 2097 #ifdef CONFIG_BLOCK
2098 extern sector_t bmap(struct inode *, sector_t); 2098 extern sector_t bmap(struct inode *, sector_t);
2099 #endif 2099 #endif
2100 extern int notify_change(struct dentry *, struct iattr *); 2100 extern int notify_change(struct dentry *, struct iattr *);
2101 extern int inode_permission(struct inode *, int); 2101 extern int inode_permission(struct inode *, int);
2102 extern int generic_permission(struct inode *, int, 2102 extern int generic_permission(struct inode *, int,
2103 int (*check_acl)(struct inode *, int)); 2103 int (*check_acl)(struct inode *, int));
2104 2104
2105 static inline bool execute_ok(struct inode *inode) 2105 static inline bool execute_ok(struct inode *inode)
2106 { 2106 {
2107 return (inode->i_mode & S_IXUGO) || S_ISDIR(inode->i_mode); 2107 return (inode->i_mode & S_IXUGO) || S_ISDIR(inode->i_mode);
2108 } 2108 }
2109 2109
2110 extern int get_write_access(struct inode *); 2110 extern int get_write_access(struct inode *);
2111 extern int deny_write_access(struct file *); 2111 extern int deny_write_access(struct file *);
2112 static inline void put_write_access(struct inode * inode) 2112 static inline void put_write_access(struct inode * inode)
2113 { 2113 {
2114 atomic_dec(&inode->i_writecount); 2114 atomic_dec(&inode->i_writecount);
2115 } 2115 }
2116 static inline void allow_write_access(struct file *file) 2116 static inline void allow_write_access(struct file *file)
2117 { 2117 {
2118 if (file) 2118 if (file)
2119 atomic_inc(&file->f_path.dentry->d_inode->i_writecount); 2119 atomic_inc(&file->f_path.dentry->d_inode->i_writecount);
2120 } 2120 }
2121 extern int do_pipe_flags(int *, int); 2121 extern int do_pipe_flags(int *, int);
2122 extern struct file *create_read_pipe(struct file *f, int flags); 2122 extern struct file *create_read_pipe(struct file *f, int flags);
2123 extern struct file *create_write_pipe(int flags); 2123 extern struct file *create_write_pipe(int flags);
2124 extern void free_write_pipe(struct file *); 2124 extern void free_write_pipe(struct file *);
2125 2125
2126 extern struct file *do_filp_open(int dfd, const char *pathname, 2126 extern struct file *do_filp_open(int dfd, const char *pathname,
2127 int open_flag, int mode, int acc_mode); 2127 int open_flag, int mode, int acc_mode);
2128 extern int may_open(struct path *, int, int); 2128 extern int may_open(struct path *, int, int);
2129 2129
2130 extern int kernel_read(struct file *, loff_t, char *, unsigned long); 2130 extern int kernel_read(struct file *, loff_t, char *, unsigned long);
2131 extern struct file * open_exec(const char *); 2131 extern struct file * open_exec(const char *);
2132 2132
2133 /* fs/dcache.c -- generic fs support functions */ 2133 /* fs/dcache.c -- generic fs support functions */
2134 extern int is_subdir(struct dentry *, struct dentry *); 2134 extern int is_subdir(struct dentry *, struct dentry *);
2135 extern int path_is_under(struct path *, struct path *); 2135 extern int path_is_under(struct path *, struct path *);
2136 extern ino_t find_inode_number(struct dentry *, struct qstr *); 2136 extern ino_t find_inode_number(struct dentry *, struct qstr *);
2137 2137
2138 #include <linux/err.h> 2138 #include <linux/err.h>
2139 2139
2140 /* needed for stackable file system support */ 2140 /* needed for stackable file system support */
2141 extern loff_t default_llseek(struct file *file, loff_t offset, int origin); 2141 extern loff_t default_llseek(struct file *file, loff_t offset, int origin);
2142 2142
2143 extern loff_t vfs_llseek(struct file *file, loff_t offset, int origin); 2143 extern loff_t vfs_llseek(struct file *file, loff_t offset, int origin);
2144 2144
2145 extern int inode_init_always(struct super_block *, struct inode *); 2145 extern int inode_init_always(struct super_block *, struct inode *);
2146 extern void inode_init_once(struct inode *); 2146 extern void inode_init_once(struct inode *);
2147 extern void inode_add_to_lists(struct super_block *, struct inode *); 2147 extern void inode_add_to_lists(struct super_block *, struct inode *);
2148 extern void iput(struct inode *); 2148 extern void iput(struct inode *);
2149 extern struct inode * igrab(struct inode *); 2149 extern struct inode * igrab(struct inode *);
2150 extern ino_t iunique(struct super_block *, ino_t); 2150 extern ino_t iunique(struct super_block *, ino_t);
2151 extern int inode_needs_sync(struct inode *inode); 2151 extern int inode_needs_sync(struct inode *inode);
2152 extern void generic_delete_inode(struct inode *inode); 2152 extern void generic_delete_inode(struct inode *inode);
2153 extern void generic_drop_inode(struct inode *inode); 2153 extern void generic_drop_inode(struct inode *inode);
2154 extern int generic_detach_inode(struct inode *inode); 2154 extern int generic_detach_inode(struct inode *inode);
2155 2155
2156 extern struct inode *ilookup5_nowait(struct super_block *sb, 2156 extern struct inode *ilookup5_nowait(struct super_block *sb,
2157 unsigned long hashval, int (*test)(struct inode *, void *), 2157 unsigned long hashval, int (*test)(struct inode *, void *),
2158 void *data); 2158 void *data);
2159 extern struct inode *ilookup5(struct super_block *sb, unsigned long hashval, 2159 extern struct inode *ilookup5(struct super_block *sb, unsigned long hashval,
2160 int (*test)(struct inode *, void *), void *data); 2160 int (*test)(struct inode *, void *), void *data);
2161 extern struct inode *ilookup(struct super_block *sb, unsigned long ino); 2161 extern struct inode *ilookup(struct super_block *sb, unsigned long ino);
2162 2162
2163 extern struct inode * iget5_locked(struct super_block *, unsigned long, int (*test)(struct inode *, void *), int (*set)(struct inode *, void *), void *); 2163 extern struct inode * iget5_locked(struct super_block *, unsigned long, int (*test)(struct inode *, void *), int (*set)(struct inode *, void *), void *);
2164 extern struct inode * iget_locked(struct super_block *, unsigned long); 2164 extern struct inode * iget_locked(struct super_block *, unsigned long);
2165 extern int insert_inode_locked4(struct inode *, unsigned long, int (*test)(struct inode *, void *), void *); 2165 extern int insert_inode_locked4(struct inode *, unsigned long, int (*test)(struct inode *, void *), void *);
2166 extern int insert_inode_locked(struct inode *); 2166 extern int insert_inode_locked(struct inode *);
2167 extern void unlock_new_inode(struct inode *); 2167 extern void unlock_new_inode(struct inode *);
2168 2168
2169 extern void __iget(struct inode * inode); 2169 extern void __iget(struct inode * inode);
2170 extern void iget_failed(struct inode *); 2170 extern void iget_failed(struct inode *);
2171 extern void clear_inode(struct inode *); 2171 extern void clear_inode(struct inode *);
2172 extern void destroy_inode(struct inode *); 2172 extern void destroy_inode(struct inode *);
2173 extern void __destroy_inode(struct inode *); 2173 extern void __destroy_inode(struct inode *);
2174 extern struct inode *new_inode(struct super_block *); 2174 extern struct inode *new_inode(struct super_block *);
2175 extern int should_remove_suid(struct dentry *); 2175 extern int should_remove_suid(struct dentry *);
2176 extern int file_remove_suid(struct file *); 2176 extern int file_remove_suid(struct file *);
2177 2177
2178 extern void __insert_inode_hash(struct inode *, unsigned long hashval); 2178 extern void __insert_inode_hash(struct inode *, unsigned long hashval);
2179 extern void remove_inode_hash(struct inode *); 2179 extern void remove_inode_hash(struct inode *);
2180 static inline void insert_inode_hash(struct inode *inode) { 2180 static inline void insert_inode_hash(struct inode *inode) {
2181 __insert_inode_hash(inode, inode->i_ino); 2181 __insert_inode_hash(inode, inode->i_ino);
2182 } 2182 }
2183 2183
2184 extern void file_move(struct file *f, struct list_head *list); 2184 extern void file_move(struct file *f, struct list_head *list);
2185 extern void file_kill(struct file *f); 2185 extern void file_kill(struct file *f);
2186 #ifdef CONFIG_BLOCK 2186 #ifdef CONFIG_BLOCK
2187 struct bio; 2187 struct bio;
2188 extern void submit_bio(int, struct bio *); 2188 extern void submit_bio(int, struct bio *);
2189 extern int bdev_read_only(struct block_device *); 2189 extern int bdev_read_only(struct block_device *);
2190 #endif 2190 #endif
2191 extern int set_blocksize(struct block_device *, int); 2191 extern int set_blocksize(struct block_device *, int);
2192 extern int sb_set_blocksize(struct super_block *, int); 2192 extern int sb_set_blocksize(struct super_block *, int);
2193 extern int sb_min_blocksize(struct super_block *, int); 2193 extern int sb_min_blocksize(struct super_block *, int);
2194 2194
2195 extern int generic_file_mmap(struct file *, struct vm_area_struct *); 2195 extern int generic_file_mmap(struct file *, struct vm_area_struct *);
2196 extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *); 2196 extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *);
2197 extern int file_read_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size); 2197 extern int file_read_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size);
2198 int generic_write_checks(struct file *file, loff_t *pos, size_t *count, int isblk); 2198 int generic_write_checks(struct file *file, loff_t *pos, size_t *count, int isblk);
2199 extern ssize_t generic_file_aio_read(struct kiocb *, const struct iovec *, unsigned long, loff_t); 2199 extern ssize_t generic_file_aio_read(struct kiocb *, const struct iovec *, unsigned long, loff_t);
2200 extern ssize_t __generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long, 2200 extern ssize_t __generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long,
2201 loff_t *); 2201 loff_t *);
2202 extern ssize_t generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long, loff_t); 2202 extern ssize_t generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long, loff_t);
2203 extern ssize_t generic_file_direct_write(struct kiocb *, const struct iovec *, 2203 extern ssize_t generic_file_direct_write(struct kiocb *, const struct iovec *,
2204 unsigned long *, loff_t, loff_t *, size_t, size_t); 2204 unsigned long *, loff_t, loff_t *, size_t, size_t);
2205 extern ssize_t generic_file_buffered_write(struct kiocb *, const struct iovec *, 2205 extern ssize_t generic_file_buffered_write(struct kiocb *, const struct iovec *,
2206 unsigned long, loff_t, loff_t *, size_t, ssize_t); 2206 unsigned long, loff_t, loff_t *, size_t, ssize_t);
2207 extern ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos); 2207 extern ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos);
2208 extern ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos); 2208 extern ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos);
2209 extern int generic_segment_checks(const struct iovec *iov, 2209 extern int generic_segment_checks(const struct iovec *iov,
2210 unsigned long *nr_segs, size_t *count, int access_flags); 2210 unsigned long *nr_segs, size_t *count, int access_flags);
2211 2211
2212 /* fs/block_dev.c */ 2212 /* fs/block_dev.c */
2213 extern ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov, 2213 extern ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov,
2214 unsigned long nr_segs, loff_t pos); 2214 unsigned long nr_segs, loff_t pos);
2215 extern int blkdev_fsync(struct file *filp, struct dentry *dentry, int datasync); 2215 extern int blkdev_fsync(struct file *filp, struct dentry *dentry, int datasync);
2216 2216
2217 /* fs/splice.c */ 2217 /* fs/splice.c */
2218 extern ssize_t generic_file_splice_read(struct file *, loff_t *, 2218 extern ssize_t generic_file_splice_read(struct file *, loff_t *,
2219 struct pipe_inode_info *, size_t, unsigned int); 2219 struct pipe_inode_info *, size_t, unsigned int);
2220 extern ssize_t default_file_splice_read(struct file *, loff_t *, 2220 extern ssize_t default_file_splice_read(struct file *, loff_t *,
2221 struct pipe_inode_info *, size_t, unsigned int); 2221 struct pipe_inode_info *, size_t, unsigned int);
2222 extern ssize_t generic_file_splice_write(struct pipe_inode_info *, 2222 extern ssize_t generic_file_splice_write(struct pipe_inode_info *,
2223 struct file *, loff_t *, size_t, unsigned int); 2223 struct file *, loff_t *, size_t, unsigned int);
2224 extern ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, 2224 extern ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe,
2225 struct file *out, loff_t *, size_t len, unsigned int flags); 2225 struct file *out, loff_t *, size_t len, unsigned int flags);
2226 extern long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, 2226 extern long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
2227 size_t len, unsigned int flags); 2227 size_t len, unsigned int flags);
2228 2228
2229 extern void 2229 extern void
2230 file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping); 2230 file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping);
2231 extern loff_t no_llseek(struct file *file, loff_t offset, int origin); 2231 extern loff_t no_llseek(struct file *file, loff_t offset, int origin);
2232 extern loff_t generic_file_llseek(struct file *file, loff_t offset, int origin); 2232 extern loff_t generic_file_llseek(struct file *file, loff_t offset, int origin);
2233 extern loff_t generic_file_llseek_unlocked(struct file *file, loff_t offset, 2233 extern loff_t generic_file_llseek_unlocked(struct file *file, loff_t offset,
2234 int origin); 2234 int origin);
2235 extern int generic_file_open(struct inode * inode, struct file * filp); 2235 extern int generic_file_open(struct inode * inode, struct file * filp);
2236 extern int nonseekable_open(struct inode * inode, struct file * filp); 2236 extern int nonseekable_open(struct inode * inode, struct file * filp);
2237 2237
2238 #ifdef CONFIG_FS_XIP 2238 #ifdef CONFIG_FS_XIP
2239 extern ssize_t xip_file_read(struct file *filp, char __user *buf, size_t len, 2239 extern ssize_t xip_file_read(struct file *filp, char __user *buf, size_t len,
2240 loff_t *ppos); 2240 loff_t *ppos);
2241 extern int xip_file_mmap(struct file * file, struct vm_area_struct * vma); 2241 extern int xip_file_mmap(struct file * file, struct vm_area_struct * vma);
2242 extern ssize_t xip_file_write(struct file *filp, const char __user *buf, 2242 extern ssize_t xip_file_write(struct file *filp, const char __user *buf,
2243 size_t len, loff_t *ppos); 2243 size_t len, loff_t *ppos);
2244 extern int xip_truncate_page(struct address_space *mapping, loff_t from); 2244 extern int xip_truncate_page(struct address_space *mapping, loff_t from);
2245 #else 2245 #else
2246 static inline int xip_truncate_page(struct address_space *mapping, loff_t from) 2246 static inline int xip_truncate_page(struct address_space *mapping, loff_t from)
2247 { 2247 {
2248 return 0; 2248 return 0;
2249 } 2249 }
2250 #endif 2250 #endif
2251 2251
2252 #ifdef CONFIG_BLOCK 2252 #ifdef CONFIG_BLOCK
2253 ssize_t __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, 2253 ssize_t __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
2254 struct block_device *bdev, const struct iovec *iov, loff_t offset, 2254 struct block_device *bdev, const struct iovec *iov, loff_t offset,
2255 unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io, 2255 unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io,
2256 int lock_type); 2256 int lock_type);
2257 2257
2258 enum { 2258 enum {
2259 /* need locking between buffered and direct access */ 2259 /* need locking between buffered and direct access */
2260 DIO_LOCKING = 0x01, 2260 DIO_LOCKING = 0x01,
2261 2261
2262 /* filesystem does not support filling holes */ 2262 /* filesystem does not support filling holes */
2263 DIO_SKIP_HOLES = 0x02, 2263 DIO_SKIP_HOLES = 0x02,
2264 }; 2264 };
2265 2265
2266 static inline ssize_t blockdev_direct_IO(int rw, struct kiocb *iocb, 2266 static inline ssize_t blockdev_direct_IO(int rw, struct kiocb *iocb,
2267 struct inode *inode, struct block_device *bdev, const struct iovec *iov, 2267 struct inode *inode, struct block_device *bdev, const struct iovec *iov,
2268 loff_t offset, unsigned long nr_segs, get_block_t get_block, 2268 loff_t offset, unsigned long nr_segs, get_block_t get_block,
2269 dio_iodone_t end_io) 2269 dio_iodone_t end_io)
2270 { 2270 {
2271 return __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset, 2271 return __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset,
2272 nr_segs, get_block, end_io, 2272 nr_segs, get_block, end_io,
2273 DIO_LOCKING | DIO_SKIP_HOLES); 2273 DIO_LOCKING | DIO_SKIP_HOLES);
2274 } 2274 }
2275 2275
2276 static inline ssize_t blockdev_direct_IO_no_locking(int rw, struct kiocb *iocb, 2276 static inline ssize_t blockdev_direct_IO_no_locking(int rw, struct kiocb *iocb,
2277 struct inode *inode, struct block_device *bdev, const struct iovec *iov, 2277 struct inode *inode, struct block_device *bdev, const struct iovec *iov,
2278 loff_t offset, unsigned long nr_segs, get_block_t get_block, 2278 loff_t offset, unsigned long nr_segs, get_block_t get_block,
2279 dio_iodone_t end_io) 2279 dio_iodone_t end_io)
2280 { 2280 {
2281 return __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset, 2281 return __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset,
2282 nr_segs, get_block, end_io, 0); 2282 nr_segs, get_block, end_io, 0);
2283 } 2283 }
2284 #endif 2284 #endif
2285 2285
2286 extern const struct file_operations generic_ro_fops; 2286 extern const struct file_operations generic_ro_fops;
2287 2287
2288 #define special_file(m) (S_ISCHR(m)||S_ISBLK(m)||S_ISFIFO(m)||S_ISSOCK(m)) 2288 #define special_file(m) (S_ISCHR(m)||S_ISBLK(m)||S_ISFIFO(m)||S_ISSOCK(m))
2289 2289
2290 extern int vfs_readlink(struct dentry *, char __user *, int, const char *); 2290 extern int vfs_readlink(struct dentry *, char __user *, int, const char *);
2291 extern int vfs_follow_link(struct nameidata *, const char *); 2291 extern int vfs_follow_link(struct nameidata *, const char *);
2292 extern int page_readlink(struct dentry *, char __user *, int); 2292 extern int page_readlink(struct dentry *, char __user *, int);
2293 extern void *page_follow_link_light(struct dentry *, struct nameidata *); 2293 extern void *page_follow_link_light(struct dentry *, struct nameidata *);
2294 extern void page_put_link(struct dentry *, struct nameidata *, void *); 2294 extern void page_put_link(struct dentry *, struct nameidata *, void *);
2295 extern int __page_symlink(struct inode *inode, const char *symname, int len, 2295 extern int __page_symlink(struct inode *inode, const char *symname, int len,
2296 int nofs); 2296 int nofs);
2297 extern int page_symlink(struct inode *inode, const char *symname, int len); 2297 extern int page_symlink(struct inode *inode, const char *symname, int len);
2298 extern const struct inode_operations page_symlink_inode_operations; 2298 extern const struct inode_operations page_symlink_inode_operations;
2299 extern int generic_readlink(struct dentry *, char __user *, int); 2299 extern int generic_readlink(struct dentry *, char __user *, int);
2300 extern void generic_fillattr(struct inode *, struct kstat *); 2300 extern void generic_fillattr(struct inode *, struct kstat *);
2301 extern int vfs_getattr(struct vfsmount *, struct dentry *, struct kstat *); 2301 extern int vfs_getattr(struct vfsmount *, struct dentry *, struct kstat *);
2302 void __inode_add_bytes(struct inode *inode, loff_t bytes); 2302 void __inode_add_bytes(struct inode *inode, loff_t bytes);
2303 void inode_add_bytes(struct inode *inode, loff_t bytes); 2303 void inode_add_bytes(struct inode *inode, loff_t bytes);
2304 void inode_sub_bytes(struct inode *inode, loff_t bytes); 2304 void inode_sub_bytes(struct inode *inode, loff_t bytes);
2305 loff_t inode_get_bytes(struct inode *inode); 2305 loff_t inode_get_bytes(struct inode *inode);
2306 void inode_set_bytes(struct inode *inode, loff_t bytes); 2306 void inode_set_bytes(struct inode *inode, loff_t bytes);
2307 2307
2308 extern int vfs_readdir(struct file *, filldir_t, void *); 2308 extern int vfs_readdir(struct file *, filldir_t, void *);
2309 2309
2310 extern int vfs_stat(char __user *, struct kstat *); 2310 extern int vfs_stat(char __user *, struct kstat *);
2311 extern int vfs_lstat(char __user *, struct kstat *); 2311 extern int vfs_lstat(char __user *, struct kstat *);
2312 extern int vfs_fstat(unsigned int, struct kstat *); 2312 extern int vfs_fstat(unsigned int, struct kstat *);
2313 extern int vfs_fstatat(int , char __user *, struct kstat *, int); 2313 extern int vfs_fstatat(int , char __user *, struct kstat *, int);
2314 2314
2315 extern int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd, 2315 extern int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd,
2316 unsigned long arg); 2316 unsigned long arg);
2317 extern int __generic_block_fiemap(struct inode *inode, 2317 extern int __generic_block_fiemap(struct inode *inode,
2318 struct fiemap_extent_info *fieinfo, u64 start, 2318 struct fiemap_extent_info *fieinfo, u64 start,
2319 u64 len, get_block_t *get_block); 2319 u64 len, get_block_t *get_block);
2320 extern int generic_block_fiemap(struct inode *inode, 2320 extern int generic_block_fiemap(struct inode *inode,
2321 struct fiemap_extent_info *fieinfo, u64 start, 2321 struct fiemap_extent_info *fieinfo, u64 start,
2322 u64 len, get_block_t *get_block); 2322 u64 len, get_block_t *get_block);
2323 2323
2324 extern void get_filesystem(struct file_system_type *fs); 2324 extern void get_filesystem(struct file_system_type *fs);
2325 extern void put_filesystem(struct file_system_type *fs); 2325 extern void put_filesystem(struct file_system_type *fs);
2326 extern struct file_system_type *get_fs_type(const char *name); 2326 extern struct file_system_type *get_fs_type(const char *name);
2327 extern struct super_block *get_super(struct block_device *); 2327 extern struct super_block *get_super(struct block_device *);
2328 extern struct super_block *get_active_super(struct block_device *bdev); 2328 extern struct super_block *get_active_super(struct block_device *bdev);
2329 extern struct super_block *user_get_super(dev_t); 2329 extern struct super_block *user_get_super(dev_t);
2330 extern void drop_super(struct super_block *sb); 2330 extern void drop_super(struct super_block *sb);
2331 2331
2332 extern int dcache_dir_open(struct inode *, struct file *); 2332 extern int dcache_dir_open(struct inode *, struct file *);
2333 extern int dcache_dir_close(struct inode *, struct file *); 2333 extern int dcache_dir_close(struct inode *, struct file *);
2334 extern loff_t dcache_dir_lseek(struct file *, loff_t, int); 2334 extern loff_t dcache_dir_lseek(struct file *, loff_t, int);
2335 extern int dcache_readdir(struct file *, void *, filldir_t); 2335 extern int dcache_readdir(struct file *, void *, filldir_t);
2336 extern int simple_getattr(struct vfsmount *, struct dentry *, struct kstat *); 2336 extern int simple_getattr(struct vfsmount *, struct dentry *, struct kstat *);
2337 extern int simple_statfs(struct dentry *, struct kstatfs *); 2337 extern int simple_statfs(struct dentry *, struct kstatfs *);
2338 extern int simple_link(struct dentry *, struct inode *, struct dentry *); 2338 extern int simple_link(struct dentry *, struct inode *, struct dentry *);
2339 extern int simple_unlink(struct inode *, struct dentry *); 2339 extern int simple_unlink(struct inode *, struct dentry *);
2340 extern int simple_rmdir(struct inode *, struct dentry *); 2340 extern int simple_rmdir(struct inode *, struct dentry *);
2341 extern int simple_rename(struct inode *, struct dentry *, struct inode *, struct dentry *); 2341 extern int simple_rename(struct inode *, struct dentry *, struct inode *, struct dentry *);
2342 extern int simple_sync_file(struct file *, struct dentry *, int); 2342 extern int simple_sync_file(struct file *, struct dentry *, int);
2343 extern int simple_empty(struct dentry *); 2343 extern int simple_empty(struct dentry *);
2344 extern int simple_readpage(struct file *file, struct page *page); 2344 extern int simple_readpage(struct file *file, struct page *page);
2345 extern int simple_write_begin(struct file *file, struct address_space *mapping, 2345 extern int simple_write_begin(struct file *file, struct address_space *mapping,
2346 loff_t pos, unsigned len, unsigned flags, 2346 loff_t pos, unsigned len, unsigned flags,
2347 struct page **pagep, void **fsdata); 2347 struct page **pagep, void **fsdata);
2348 extern int simple_write_end(struct file *file, struct address_space *mapping, 2348 extern int simple_write_end(struct file *file, struct address_space *mapping,
2349 loff_t pos, unsigned len, unsigned copied, 2349 loff_t pos, unsigned len, unsigned copied,
2350 struct page *page, void *fsdata); 2350 struct page *page, void *fsdata);
2351 2351
2352 extern struct dentry *simple_lookup(struct inode *, struct dentry *, struct nameidata *); 2352 extern struct dentry *simple_lookup(struct inode *, struct dentry *, struct nameidata *);
2353 extern ssize_t generic_read_dir(struct file *, char __user *, size_t, loff_t *); 2353 extern ssize_t generic_read_dir(struct file *, char __user *, size_t, loff_t *);
2354 extern const struct file_operations simple_dir_operations; 2354 extern const struct file_operations simple_dir_operations;
2355 extern const struct inode_operations simple_dir_inode_operations; 2355 extern const struct inode_operations simple_dir_inode_operations;
2356 struct tree_descr { char *name; const struct file_operations *ops; int mode; }; 2356 struct tree_descr { char *name; const struct file_operations *ops; int mode; };
2357 struct dentry *d_alloc_name(struct dentry *, const char *); 2357 struct dentry *d_alloc_name(struct dentry *, const char *);
2358 extern int simple_fill_super(struct super_block *, int, struct tree_descr *); 2358 extern int simple_fill_super(struct super_block *, int, struct tree_descr *);
2359 extern int simple_pin_fs(struct file_system_type *, struct vfsmount **mount, int *count); 2359 extern int simple_pin_fs(struct file_system_type *, struct vfsmount **mount, int *count);
2360 extern void simple_release_fs(struct vfsmount **mount, int *count); 2360 extern void simple_release_fs(struct vfsmount **mount, int *count);
2361 2361
2362 extern ssize_t simple_read_from_buffer(void __user *to, size_t count, 2362 extern ssize_t simple_read_from_buffer(void __user *to, size_t count,
2363 loff_t *ppos, const void *from, size_t available); 2363 loff_t *ppos, const void *from, size_t available);
2364 2364
2365 extern int simple_fsync(struct file *, struct dentry *, int); 2365 extern int simple_fsync(struct file *, struct dentry *, int);
2366 2366
2367 #ifdef CONFIG_MIGRATION 2367 #ifdef CONFIG_MIGRATION
2368 extern int buffer_migrate_page(struct address_space *, 2368 extern int buffer_migrate_page(struct address_space *,
2369 struct page *, struct page *); 2369 struct page *, struct page *);
2370 #else 2370 #else
2371 #define buffer_migrate_page NULL 2371 #define buffer_migrate_page NULL
2372 #endif 2372 #endif
2373 2373
2374 extern int inode_change_ok(const struct inode *, struct iattr *); 2374 extern int inode_change_ok(const struct inode *, struct iattr *);
2375 extern int inode_newsize_ok(const struct inode *, loff_t offset); 2375 extern int inode_newsize_ok(const struct inode *, loff_t offset);
2376 extern int __must_check inode_setattr(struct inode *, struct iattr *); 2376 extern int __must_check inode_setattr(struct inode *, struct iattr *);
2377 2377
2378 extern void file_update_time(struct file *file); 2378 extern void file_update_time(struct file *file);
2379 2379
2380 extern int generic_show_options(struct seq_file *m, struct vfsmount *mnt); 2380 extern int generic_show_options(struct seq_file *m, struct vfsmount *mnt);
2381 extern void save_mount_options(struct super_block *sb, char *options); 2381 extern void save_mount_options(struct super_block *sb, char *options);
2382 extern void replace_mount_options(struct super_block *sb, char *options); 2382 extern void replace_mount_options(struct super_block *sb, char *options);
2383 2383
2384 static inline ino_t parent_ino(struct dentry *dentry) 2384 static inline ino_t parent_ino(struct dentry *dentry)
2385 { 2385 {
2386 ino_t res; 2386 ino_t res;
2387 2387
2388 spin_lock(&dentry->d_lock); 2388 spin_lock(&dentry->d_lock);
2389 res = dentry->d_parent->d_inode->i_ino; 2389 res = dentry->d_parent->d_inode->i_ino;
2390 spin_unlock(&dentry->d_lock); 2390 spin_unlock(&dentry->d_lock);
2391 return res; 2391 return res;
2392 } 2392 }
2393 2393
2394 /* Transaction based IO helpers */ 2394 /* Transaction based IO helpers */
2395 2395
2396 /* 2396 /*
2397 * An argresp is stored in an allocated page and holds the 2397 * An argresp is stored in an allocated page and holds the
2398 * size of the argument or response, along with its content 2398 * size of the argument or response, along with its content
2399 */ 2399 */
2400 struct simple_transaction_argresp { 2400 struct simple_transaction_argresp {
2401 ssize_t size; 2401 ssize_t size;
2402 char data[0]; 2402 char data[0];
2403 }; 2403 };
2404 2404
2405 #define SIMPLE_TRANSACTION_LIMIT (PAGE_SIZE - sizeof(struct simple_transaction_argresp)) 2405 #define SIMPLE_TRANSACTION_LIMIT (PAGE_SIZE - sizeof(struct simple_transaction_argresp))
2406 2406
2407 char *simple_transaction_get(struct file *file, const char __user *buf, 2407 char *simple_transaction_get(struct file *file, const char __user *buf,
2408 size_t size); 2408 size_t size);
2409 ssize_t simple_transaction_read(struct file *file, char __user *buf, 2409 ssize_t simple_transaction_read(struct file *file, char __user *buf,
2410 size_t size, loff_t *pos); 2410 size_t size, loff_t *pos);
2411 int simple_transaction_release(struct inode *inode, struct file *file); 2411 int simple_transaction_release(struct inode *inode, struct file *file);
2412 2412
2413 void simple_transaction_set(struct file *file, size_t n); 2413 void simple_transaction_set(struct file *file, size_t n);
2414 2414
2415 /* 2415 /*
2416 * simple attribute files 2416 * simple attribute files
2417 * 2417 *
2418 * These attributes behave similar to those in sysfs: 2418 * These attributes behave similar to those in sysfs:
2419 * 2419 *
2420 * Writing to an attribute immediately sets a value, an open file can be 2420 * Writing to an attribute immediately sets a value, an open file can be
2421 * written to multiple times. 2421 * written to multiple times.
2422 * 2422 *
2423 * Reading from an attribute creates a buffer from the value that might get 2423 * Reading from an attribute creates a buffer from the value that might get
2424 * read with multiple read calls. When the attribute has been read 2424 * read with multiple read calls. When the attribute has been read
2425 * completely, no further read calls are possible until the file is opened 2425 * completely, no further read calls are possible until the file is opened
2426 * again. 2426 * again.
2427 * 2427 *
2428 * All attributes contain a text representation of a numeric value 2428 * All attributes contain a text representation of a numeric value
2429 * that are accessed with the get() and set() functions. 2429 * that are accessed with the get() and set() functions.
2430 */ 2430 */
2431 #define DEFINE_SIMPLE_ATTRIBUTE(__fops, __get, __set, __fmt) \ 2431 #define DEFINE_SIMPLE_ATTRIBUTE(__fops, __get, __set, __fmt) \
2432 static int __fops ## _open(struct inode *inode, struct file *file) \ 2432 static int __fops ## _open(struct inode *inode, struct file *file) \
2433 { \ 2433 { \
2434 __simple_attr_check_format(__fmt, 0ull); \ 2434 __simple_attr_check_format(__fmt, 0ull); \
2435 return simple_attr_open(inode, file, __get, __set, __fmt); \ 2435 return simple_attr_open(inode, file, __get, __set, __fmt); \
2436 } \ 2436 } \
2437 static const struct file_operations __fops = { \ 2437 static const struct file_operations __fops = { \
2438 .owner = THIS_MODULE, \ 2438 .owner = THIS_MODULE, \
2439 .open = __fops ## _open, \ 2439 .open = __fops ## _open, \
2440 .release = simple_attr_release, \ 2440 .release = simple_attr_release, \
2441 .read = simple_attr_read, \ 2441 .read = simple_attr_read, \
2442 .write = simple_attr_write, \ 2442 .write = simple_attr_write, \
2443 }; 2443 };
2444 2444
2445 static inline void __attribute__((format(printf, 1, 2))) 2445 static inline void __attribute__((format(printf, 1, 2)))
2446 __simple_attr_check_format(const char *fmt, ...) 2446 __simple_attr_check_format(const char *fmt, ...)
2447 { 2447 {
2448 /* don't do anything, just let the compiler check the arguments; */ 2448 /* don't do anything, just let the compiler check the arguments; */
2449 } 2449 }
2450 2450
2451 int simple_attr_open(struct inode *inode, struct file *file, 2451 int simple_attr_open(struct inode *inode, struct file *file,
2452 int (*get)(void *, u64 *), int (*set)(void *, u64), 2452 int (*get)(void *, u64 *), int (*set)(void *, u64),
2453 const char *fmt); 2453 const char *fmt);
2454 int simple_attr_release(struct inode *inode, struct file *file); 2454 int simple_attr_release(struct inode *inode, struct file *file);
2455 ssize_t simple_attr_read(struct file *file, char __user *buf, 2455 ssize_t simple_attr_read(struct file *file, char __user *buf,
2456 size_t len, loff_t *ppos); 2456 size_t len, loff_t *ppos);
2457 ssize_t simple_attr_write(struct file *file, const char __user *buf, 2457 ssize_t simple_attr_write(struct file *file, const char __user *buf,
2458 size_t len, loff_t *ppos); 2458 size_t len, loff_t *ppos);
2459 2459
2460 struct ctl_table; 2460 struct ctl_table;
2461 int proc_nr_files(struct ctl_table *table, int write, 2461 int proc_nr_files(struct ctl_table *table, int write,
2462 void __user *buffer, size_t *lenp, loff_t *ppos); 2462 void __user *buffer, size_t *lenp, loff_t *ppos);
2463 2463
2464 int __init get_filesystem_list(char *buf); 2464 int __init get_filesystem_list(char *buf);
2465 2465
2466 #define ACC_MODE(x) ("\004\002\006\006"[(x)&O_ACCMODE]) 2466 #define ACC_MODE(x) ("\004\002\006\006"[(x)&O_ACCMODE])
2467 #define OPEN_FMODE(flag) ((__force fmode_t)((flag + 1) & O_ACCMODE)) 2467 #define OPEN_FMODE(flag) ((__force fmode_t)((flag + 1) & O_ACCMODE))
2468 2468
2469 #endif /* __KERNEL__ */ 2469 #endif /* __KERNEL__ */
1 /* 1 /*
2 * NET An implementation of the SOCKET network access protocol. 2 * NET An implementation of the SOCKET network access protocol.
3 * 3 *
4 * Version: @(#)socket.c 1.1.93 18/02/95 4 * Version: @(#)socket.c 1.1.93 18/02/95
5 * 5 *
6 * Authors: Orest Zborowski, <obz@Kodak.COM> 6 * Authors: Orest Zborowski, <obz@Kodak.COM>
7 * Ross Biro 7 * Ross Biro
8 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> 8 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
9 * 9 *
10 * Fixes: 10 * Fixes:
11 * Anonymous : NOTSOCK/BADF cleanup. Error fix in 11 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
12 * shutdown() 12 * shutdown()
13 * Alan Cox : verify_area() fixes 13 * Alan Cox : verify_area() fixes
14 * Alan Cox : Removed DDI 14 * Alan Cox : Removed DDI
15 * Jonathan Kamens : SOCK_DGRAM reconnect bug 15 * Jonathan Kamens : SOCK_DGRAM reconnect bug
16 * Alan Cox : Moved a load of checks to the very 16 * Alan Cox : Moved a load of checks to the very
17 * top level. 17 * top level.
18 * Alan Cox : Move address structures to/from user 18 * Alan Cox : Move address structures to/from user
19 * mode above the protocol layers. 19 * mode above the protocol layers.
20 * Rob Janssen : Allow 0 length sends. 20 * Rob Janssen : Allow 0 length sends.
21 * Alan Cox : Asynchronous I/O support (cribbed from the 21 * Alan Cox : Asynchronous I/O support (cribbed from the
22 * tty drivers). 22 * tty drivers).
23 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style) 23 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
24 * Jeff Uphoff : Made max number of sockets command-line 24 * Jeff Uphoff : Made max number of sockets command-line
25 * configurable. 25 * configurable.
26 * Matti Aarnio : Made the number of sockets dynamic, 26 * Matti Aarnio : Made the number of sockets dynamic,
27 * to be allocated when needed, and mr. 27 * to be allocated when needed, and mr.
28 * Uphoff's max is used as max to be 28 * Uphoff's max is used as max to be
29 * allowed to allocate. 29 * allowed to allocate.
30 * Linus : Argh. removed all the socket allocation 30 * Linus : Argh. removed all the socket allocation
31 * altogether: it's in the inode now. 31 * altogether: it's in the inode now.
32 * Alan Cox : Made sock_alloc()/sock_release() public 32 * Alan Cox : Made sock_alloc()/sock_release() public
33 * for NetROM and future kernel nfsd type 33 * for NetROM and future kernel nfsd type
34 * stuff. 34 * stuff.
35 * Alan Cox : sendmsg/recvmsg basics. 35 * Alan Cox : sendmsg/recvmsg basics.
36 * Tom Dyas : Export net symbols. 36 * Tom Dyas : Export net symbols.
37 * Marcin Dalecki : Fixed problems with CONFIG_NET="n". 37 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
38 * Alan Cox : Added thread locking to sys_* calls 38 * Alan Cox : Added thread locking to sys_* calls
39 * for sockets. May have errors at the 39 * for sockets. May have errors at the
40 * moment. 40 * moment.
41 * Kevin Buhr : Fixed the dumb errors in the above. 41 * Kevin Buhr : Fixed the dumb errors in the above.
42 * Andi Kleen : Some small cleanups, optimizations, 42 * Andi Kleen : Some small cleanups, optimizations,
43 * and fixed a copy_from_user() bug. 43 * and fixed a copy_from_user() bug.
44 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0) 44 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
45 * Tigran Aivazian : Made listen(2) backlog sanity checks 45 * Tigran Aivazian : Made listen(2) backlog sanity checks
46 * protocol-independent 46 * protocol-independent
47 * 47 *
48 * 48 *
49 * This program is free software; you can redistribute it and/or 49 * This program is free software; you can redistribute it and/or
50 * modify it under the terms of the GNU General Public License 50 * modify it under the terms of the GNU General Public License
51 * as published by the Free Software Foundation; either version 51 * as published by the Free Software Foundation; either version
52 * 2 of the License, or (at your option) any later version. 52 * 2 of the License, or (at your option) any later version.
53 * 53 *
54 * 54 *
55 * This module is effectively the top level interface to the BSD socket 55 * This module is effectively the top level interface to the BSD socket
56 * paradigm. 56 * paradigm.
57 * 57 *
58 * Based upon Swansea University Computer Society NET3.039 58 * Based upon Swansea University Computer Society NET3.039
59 */ 59 */
60 60
61 #include <linux/mm.h> 61 #include <linux/mm.h>
62 #include <linux/socket.h> 62 #include <linux/socket.h>
63 #include <linux/file.h> 63 #include <linux/file.h>
64 #include <linux/net.h> 64 #include <linux/net.h>
65 #include <linux/interrupt.h> 65 #include <linux/interrupt.h>
66 #include <linux/thread_info.h> 66 #include <linux/thread_info.h>
67 #include <linux/rcupdate.h> 67 #include <linux/rcupdate.h>
68 #include <linux/netdevice.h> 68 #include <linux/netdevice.h>
69 #include <linux/proc_fs.h> 69 #include <linux/proc_fs.h>
70 #include <linux/seq_file.h> 70 #include <linux/seq_file.h>
71 #include <linux/mutex.h> 71 #include <linux/mutex.h>
72 #include <linux/wanrouter.h> 72 #include <linux/wanrouter.h>
73 #include <linux/if_bridge.h> 73 #include <linux/if_bridge.h>
74 #include <linux/if_frad.h> 74 #include <linux/if_frad.h>
75 #include <linux/if_vlan.h> 75 #include <linux/if_vlan.h>
76 #include <linux/init.h> 76 #include <linux/init.h>
77 #include <linux/poll.h> 77 #include <linux/poll.h>
78 #include <linux/cache.h> 78 #include <linux/cache.h>
79 #include <linux/module.h> 79 #include <linux/module.h>
80 #include <linux/highmem.h> 80 #include <linux/highmem.h>
81 #include <linux/mount.h> 81 #include <linux/mount.h>
82 #include <linux/security.h> 82 #include <linux/security.h>
83 #include <linux/syscalls.h> 83 #include <linux/syscalls.h>
84 #include <linux/compat.h> 84 #include <linux/compat.h>
85 #include <linux/kmod.h> 85 #include <linux/kmod.h>
86 #include <linux/audit.h> 86 #include <linux/audit.h>
87 #include <linux/wireless.h> 87 #include <linux/wireless.h>
88 #include <linux/nsproxy.h> 88 #include <linux/nsproxy.h>
89 #include <linux/magic.h> 89 #include <linux/magic.h>
90 #include <linux/slab.h> 90 #include <linux/slab.h>
91 91
92 #include <asm/uaccess.h> 92 #include <asm/uaccess.h>
93 #include <asm/unistd.h> 93 #include <asm/unistd.h>
94 94
95 #include <net/compat.h> 95 #include <net/compat.h>
96 #include <net/wext.h> 96 #include <net/wext.h>
97 97
98 #include <net/sock.h> 98 #include <net/sock.h>
99 #include <linux/netfilter.h> 99 #include <linux/netfilter.h>
100 100
101 #include <linux/if_tun.h> 101 #include <linux/if_tun.h>
102 #include <linux/ipv6_route.h> 102 #include <linux/ipv6_route.h>
103 #include <linux/route.h> 103 #include <linux/route.h>
104 #include <linux/sockios.h> 104 #include <linux/sockios.h>
105 #include <linux/atalk.h> 105 #include <linux/atalk.h>
106 106
107 static int sock_no_open(struct inode *irrelevant, struct file *dontcare); 107 static int sock_no_open(struct inode *irrelevant, struct file *dontcare);
108 static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov, 108 static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
109 unsigned long nr_segs, loff_t pos); 109 unsigned long nr_segs, loff_t pos);
110 static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov, 110 static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
111 unsigned long nr_segs, loff_t pos); 111 unsigned long nr_segs, loff_t pos);
112 static int sock_mmap(struct file *file, struct vm_area_struct *vma); 112 static int sock_mmap(struct file *file, struct vm_area_struct *vma);
113 113
114 static int sock_close(struct inode *inode, struct file *file); 114 static int sock_close(struct inode *inode, struct file *file);
115 static unsigned int sock_poll(struct file *file, 115 static unsigned int sock_poll(struct file *file,
116 struct poll_table_struct *wait); 116 struct poll_table_struct *wait);
117 static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg); 117 static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
118 #ifdef CONFIG_COMPAT 118 #ifdef CONFIG_COMPAT
119 static long compat_sock_ioctl(struct file *file, 119 static long compat_sock_ioctl(struct file *file,
120 unsigned int cmd, unsigned long arg); 120 unsigned int cmd, unsigned long arg);
121 #endif 121 #endif
122 static int sock_fasync(int fd, struct file *filp, int on); 122 static int sock_fasync(int fd, struct file *filp, int on);
123 static ssize_t sock_sendpage(struct file *file, struct page *page, 123 static ssize_t sock_sendpage(struct file *file, struct page *page,
124 int offset, size_t size, loff_t *ppos, int more); 124 int offset, size_t size, loff_t *ppos, int more);
125 static ssize_t sock_splice_read(struct file *file, loff_t *ppos, 125 static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
126 struct pipe_inode_info *pipe, size_t len, 126 struct pipe_inode_info *pipe, size_t len,
127 unsigned int flags); 127 unsigned int flags);
128 128
129 /* 129 /*
130 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear 130 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
131 * in the operation structures but are done directly via the socketcall() multiplexor. 131 * in the operation structures but are done directly via the socketcall() multiplexor.
132 */ 132 */
133 133
134 static const struct file_operations socket_file_ops = { 134 static const struct file_operations socket_file_ops = {
135 .owner = THIS_MODULE, 135 .owner = THIS_MODULE,
136 .llseek = no_llseek, 136 .llseek = no_llseek,
137 .aio_read = sock_aio_read, 137 .aio_read = sock_aio_read,
138 .aio_write = sock_aio_write, 138 .aio_write = sock_aio_write,
139 .poll = sock_poll, 139 .poll = sock_poll,
140 .unlocked_ioctl = sock_ioctl, 140 .unlocked_ioctl = sock_ioctl,
141 #ifdef CONFIG_COMPAT 141 #ifdef CONFIG_COMPAT
142 .compat_ioctl = compat_sock_ioctl, 142 .compat_ioctl = compat_sock_ioctl,
143 #endif 143 #endif
144 .mmap = sock_mmap, 144 .mmap = sock_mmap,
145 .open = sock_no_open, /* special open code to disallow open via /proc */ 145 .open = sock_no_open, /* special open code to disallow open via /proc */
146 .release = sock_close, 146 .release = sock_close,
147 .fasync = sock_fasync, 147 .fasync = sock_fasync,
148 .sendpage = sock_sendpage, 148 .sendpage = sock_sendpage,
149 .splice_write = generic_splice_sendpage, 149 .splice_write = generic_splice_sendpage,
150 .splice_read = sock_splice_read, 150 .splice_read = sock_splice_read,
151 }; 151 };
152 152
153 /* 153 /*
154 * The protocol list. Each protocol is registered in here. 154 * The protocol list. Each protocol is registered in here.
155 */ 155 */
156 156
157 static DEFINE_SPINLOCK(net_family_lock); 157 static DEFINE_SPINLOCK(net_family_lock);
158 static const struct net_proto_family *net_families[NPROTO] __read_mostly; 158 static const struct net_proto_family *net_families[NPROTO] __read_mostly;
159 159
160 /* 160 /*
161 * Statistics counters of the socket lists 161 * Statistics counters of the socket lists
162 */ 162 */
163 163
164 static DEFINE_PER_CPU(int, sockets_in_use) = 0; 164 static DEFINE_PER_CPU(int, sockets_in_use) = 0;
165 165
166 /* 166 /*
167 * Support routines. 167 * Support routines.
168 * Move socket addresses back and forth across the kernel/user 168 * Move socket addresses back and forth across the kernel/user
169 * divide and look after the messy bits. 169 * divide and look after the messy bits.
170 */ 170 */
171 171
172 #define MAX_SOCK_ADDR 128 /* 108 for Unix domain - 172 #define MAX_SOCK_ADDR 128 /* 108 for Unix domain -
173 16 for IP, 16 for IPX, 173 16 for IP, 16 for IPX,
174 24 for IPv6, 174 24 for IPv6,
175 about 80 for AX.25 175 about 80 for AX.25
176 must be at least one bigger than 176 must be at least one bigger than
177 the AF_UNIX size (see net/unix/af_unix.c 177 the AF_UNIX size (see net/unix/af_unix.c
178 :unix_mkname()). 178 :unix_mkname()).
179 */ 179 */
180 180
181 /** 181 /**
182 * move_addr_to_kernel - copy a socket address into kernel space 182 * move_addr_to_kernel - copy a socket address into kernel space
183 * @uaddr: Address in user space 183 * @uaddr: Address in user space
184 * @kaddr: Address in kernel space 184 * @kaddr: Address in kernel space
185 * @ulen: Length in user space 185 * @ulen: Length in user space
186 * 186 *
187 * The address is copied into kernel space. If the provided address is 187 * The address is copied into kernel space. If the provided address is
188 * too long an error code of -EINVAL is returned. If the copy gives 188 * too long an error code of -EINVAL is returned. If the copy gives
189 * invalid addresses -EFAULT is returned. On a success 0 is returned. 189 * invalid addresses -EFAULT is returned. On a success 0 is returned.
190 */ 190 */
191 191
192 int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr *kaddr) 192 int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr *kaddr)
193 { 193 {
194 if (ulen < 0 || ulen > sizeof(struct sockaddr_storage)) 194 if (ulen < 0 || ulen > sizeof(struct sockaddr_storage))
195 return -EINVAL; 195 return -EINVAL;
196 if (ulen == 0) 196 if (ulen == 0)
197 return 0; 197 return 0;
198 if (copy_from_user(kaddr, uaddr, ulen)) 198 if (copy_from_user(kaddr, uaddr, ulen))
199 return -EFAULT; 199 return -EFAULT;
200 return audit_sockaddr(ulen, kaddr); 200 return audit_sockaddr(ulen, kaddr);
201 } 201 }
202 202
203 /** 203 /**
204 * move_addr_to_user - copy an address to user space 204 * move_addr_to_user - copy an address to user space
205 * @kaddr: kernel space address 205 * @kaddr: kernel space address
206 * @klen: length of address in kernel 206 * @klen: length of address in kernel
207 * @uaddr: user space address 207 * @uaddr: user space address
208 * @ulen: pointer to user length field 208 * @ulen: pointer to user length field
209 * 209 *
210 * The value pointed to by ulen on entry is the buffer length available. 210 * The value pointed to by ulen on entry is the buffer length available.
211 * This is overwritten with the buffer space used. -EINVAL is returned 211 * This is overwritten with the buffer space used. -EINVAL is returned
212 * if an overlong buffer is specified or a negative buffer size. -EFAULT 212 * if an overlong buffer is specified or a negative buffer size. -EFAULT
213 * is returned if either the buffer or the length field are not 213 * is returned if either the buffer or the length field are not
214 * accessible. 214 * accessible.
215 * After copying the data up to the limit the user specifies, the true 215 * After copying the data up to the limit the user specifies, the true
216 * length of the data is written over the length limit the user 216 * length of the data is written over the length limit the user
217 * specified. Zero is returned for a success. 217 * specified. Zero is returned for a success.
218 */ 218 */
219 219
220 int move_addr_to_user(struct sockaddr *kaddr, int klen, void __user *uaddr, 220 int move_addr_to_user(struct sockaddr *kaddr, int klen, void __user *uaddr,
221 int __user *ulen) 221 int __user *ulen)
222 { 222 {
223 int err; 223 int err;
224 int len; 224 int len;
225 225
226 err = get_user(len, ulen); 226 err = get_user(len, ulen);
227 if (err) 227 if (err)
228 return err; 228 return err;
229 if (len > klen) 229 if (len > klen)
230 len = klen; 230 len = klen;
231 if (len < 0 || len > sizeof(struct sockaddr_storage)) 231 if (len < 0 || len > sizeof(struct sockaddr_storage))
232 return -EINVAL; 232 return -EINVAL;
233 if (len) { 233 if (len) {
234 if (audit_sockaddr(klen, kaddr)) 234 if (audit_sockaddr(klen, kaddr))
235 return -ENOMEM; 235 return -ENOMEM;
236 if (copy_to_user(uaddr, kaddr, len)) 236 if (copy_to_user(uaddr, kaddr, len))
237 return -EFAULT; 237 return -EFAULT;
238 } 238 }
239 /* 239 /*
240 * "fromlen shall refer to the value before truncation.." 240 * "fromlen shall refer to the value before truncation.."
241 * 1003.1g 241 * 1003.1g
242 */ 242 */
243 return __put_user(klen, ulen); 243 return __put_user(klen, ulen);
244 } 244 }
245 245
246 static struct kmem_cache *sock_inode_cachep __read_mostly; 246 static struct kmem_cache *sock_inode_cachep __read_mostly;
247 247
248 static struct inode *sock_alloc_inode(struct super_block *sb) 248 static struct inode *sock_alloc_inode(struct super_block *sb)
249 { 249 {
250 struct socket_alloc *ei; 250 struct socket_alloc *ei;
251 251
252 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL); 252 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
253 if (!ei) 253 if (!ei)
254 return NULL; 254 return NULL;
255 init_waitqueue_head(&ei->socket.wait); 255 init_waitqueue_head(&ei->socket.wait);
256 256
257 ei->socket.fasync_list = NULL; 257 ei->socket.fasync_list = NULL;
258 ei->socket.state = SS_UNCONNECTED; 258 ei->socket.state = SS_UNCONNECTED;
259 ei->socket.flags = 0; 259 ei->socket.flags = 0;
260 ei->socket.ops = NULL; 260 ei->socket.ops = NULL;
261 ei->socket.sk = NULL; 261 ei->socket.sk = NULL;
262 ei->socket.file = NULL; 262 ei->socket.file = NULL;
263 263
264 return &ei->vfs_inode; 264 return &ei->vfs_inode;
265 } 265 }
266 266
267 static void sock_destroy_inode(struct inode *inode) 267 static void sock_destroy_inode(struct inode *inode)
268 { 268 {
269 kmem_cache_free(sock_inode_cachep, 269 kmem_cache_free(sock_inode_cachep,
270 container_of(inode, struct socket_alloc, vfs_inode)); 270 container_of(inode, struct socket_alloc, vfs_inode));
271 } 271 }
272 272
273 static void init_once(void *foo) 273 static void init_once(void *foo)
274 { 274 {
275 struct socket_alloc *ei = (struct socket_alloc *)foo; 275 struct socket_alloc *ei = (struct socket_alloc *)foo;
276 276
277 inode_init_once(&ei->vfs_inode); 277 inode_init_once(&ei->vfs_inode);
278 } 278 }
279 279
280 static int init_inodecache(void) 280 static int init_inodecache(void)
281 { 281 {
282 sock_inode_cachep = kmem_cache_create("sock_inode_cache", 282 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
283 sizeof(struct socket_alloc), 283 sizeof(struct socket_alloc),
284 0, 284 0,
285 (SLAB_HWCACHE_ALIGN | 285 (SLAB_HWCACHE_ALIGN |
286 SLAB_RECLAIM_ACCOUNT | 286 SLAB_RECLAIM_ACCOUNT |
287 SLAB_MEM_SPREAD), 287 SLAB_MEM_SPREAD),
288 init_once); 288 init_once);
289 if (sock_inode_cachep == NULL) 289 if (sock_inode_cachep == NULL)
290 return -ENOMEM; 290 return -ENOMEM;
291 return 0; 291 return 0;
292 } 292 }
293 293
294 static const struct super_operations sockfs_ops = { 294 static const struct super_operations sockfs_ops = {
295 .alloc_inode = sock_alloc_inode, 295 .alloc_inode = sock_alloc_inode,
296 .destroy_inode =sock_destroy_inode, 296 .destroy_inode =sock_destroy_inode,
297 .statfs = simple_statfs, 297 .statfs = simple_statfs,
298 }; 298 };
299 299
300 static int sockfs_get_sb(struct file_system_type *fs_type, 300 static int sockfs_get_sb(struct file_system_type *fs_type,
301 int flags, const char *dev_name, void *data, 301 int flags, const char *dev_name, void *data,
302 struct vfsmount *mnt) 302 struct vfsmount *mnt)
303 { 303 {
304 return get_sb_pseudo(fs_type, "socket:", &sockfs_ops, SOCKFS_MAGIC, 304 return get_sb_pseudo(fs_type, "socket:", &sockfs_ops, SOCKFS_MAGIC,
305 mnt); 305 mnt);
306 } 306 }
307 307
308 static struct vfsmount *sock_mnt __read_mostly; 308 static struct vfsmount *sock_mnt __read_mostly;
309 309
310 static struct file_system_type sock_fs_type = { 310 static struct file_system_type sock_fs_type = {
311 .name = "sockfs", 311 .name = "sockfs",
312 .get_sb = sockfs_get_sb, 312 .get_sb = sockfs_get_sb,
313 .kill_sb = kill_anon_super, 313 .kill_sb = kill_anon_super,
314 }; 314 };
315 315
316 /* 316 /*
317 * sockfs_dname() is called from d_path(). 317 * sockfs_dname() is called from d_path().
318 */ 318 */
319 static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen) 319 static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
320 { 320 {
321 return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]", 321 return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]",
322 dentry->d_inode->i_ino); 322 dentry->d_inode->i_ino);
323 } 323 }
324 324
325 static const struct dentry_operations sockfs_dentry_operations = { 325 static const struct dentry_operations sockfs_dentry_operations = {
326 .d_dname = sockfs_dname, 326 .d_dname = sockfs_dname,
327 }; 327 };
328 328
329 /* 329 /*
330 * Obtains the first available file descriptor and sets it up for use. 330 * Obtains the first available file descriptor and sets it up for use.
331 * 331 *
332 * These functions create file structures and maps them to fd space 332 * These functions create file structures and maps them to fd space
333 * of the current process. On success it returns file descriptor 333 * of the current process. On success it returns file descriptor
334 * and file struct implicitly stored in sock->file. 334 * and file struct implicitly stored in sock->file.
335 * Note that another thread may close file descriptor before we return 335 * Note that another thread may close file descriptor before we return
336 * from this function. We use the fact that now we do not refer 336 * from this function. We use the fact that now we do not refer
337 * to socket after mapping. If one day we will need it, this 337 * to socket after mapping. If one day we will need it, this
338 * function will increment ref. count on file by 1. 338 * function will increment ref. count on file by 1.
339 * 339 *
340 * In any case returned fd MAY BE not valid! 340 * In any case returned fd MAY BE not valid!
341 * This race condition is unavoidable 341 * This race condition is unavoidable
342 * with shared fd spaces, we cannot solve it inside kernel, 342 * with shared fd spaces, we cannot solve it inside kernel,
343 * but we take care of internal coherence yet. 343 * but we take care of internal coherence yet.
344 */ 344 */
345 345
346 static int sock_alloc_file(struct socket *sock, struct file **f, int flags) 346 static int sock_alloc_file(struct socket *sock, struct file **f, int flags)
347 { 347 {
348 struct qstr name = { .name = "" }; 348 struct qstr name = { .name = "" };
349 struct path path; 349 struct path path;
350 struct file *file; 350 struct file *file;
351 int fd; 351 int fd;
352 352
353 fd = get_unused_fd_flags(flags); 353 fd = get_unused_fd_flags(flags);
354 if (unlikely(fd < 0)) 354 if (unlikely(fd < 0))
355 return fd; 355 return fd;
356 356
357 path.dentry = d_alloc(sock_mnt->mnt_sb->s_root, &name); 357 path.dentry = d_alloc(sock_mnt->mnt_sb->s_root, &name);
358 if (unlikely(!path.dentry)) { 358 if (unlikely(!path.dentry)) {
359 put_unused_fd(fd); 359 put_unused_fd(fd);
360 return -ENOMEM; 360 return -ENOMEM;
361 } 361 }
362 path.mnt = mntget(sock_mnt); 362 path.mnt = mntget(sock_mnt);
363 363
364 path.dentry->d_op = &sockfs_dentry_operations; 364 path.dentry->d_op = &sockfs_dentry_operations;
365 d_instantiate(path.dentry, SOCK_INODE(sock)); 365 d_instantiate(path.dentry, SOCK_INODE(sock));
366 SOCK_INODE(sock)->i_fop = &socket_file_ops; 366 SOCK_INODE(sock)->i_fop = &socket_file_ops;
367 367
368 file = alloc_file(&path, FMODE_READ | FMODE_WRITE, 368 file = alloc_file(&path, FMODE_READ | FMODE_WRITE,
369 &socket_file_ops); 369 &socket_file_ops);
370 if (unlikely(!file)) { 370 if (unlikely(!file)) {
371 /* drop dentry, keep inode */ 371 /* drop dentry, keep inode */
372 atomic_inc(&path.dentry->d_inode->i_count); 372 atomic_inc(&path.dentry->d_inode->i_count);
373 path_put(&path); 373 path_put(&path);
374 put_unused_fd(fd); 374 put_unused_fd(fd);
375 return -ENFILE; 375 return -ENFILE;
376 } 376 }
377 377
378 sock->file = file; 378 sock->file = file;
379 file->f_flags = O_RDWR | (flags & O_NONBLOCK); 379 file->f_flags = O_RDWR | (flags & O_NONBLOCK);
380 file->f_pos = 0; 380 file->f_pos = 0;
381 file->private_data = sock; 381 file->private_data = sock;
382 382
383 *f = file; 383 *f = file;
384 return fd; 384 return fd;
385 } 385 }
386 386
387 int sock_map_fd(struct socket *sock, int flags) 387 int sock_map_fd(struct socket *sock, int flags)
388 { 388 {
389 struct file *newfile; 389 struct file *newfile;
390 int fd = sock_alloc_file(sock, &newfile, flags); 390 int fd = sock_alloc_file(sock, &newfile, flags);
391 391
392 if (likely(fd >= 0)) 392 if (likely(fd >= 0))
393 fd_install(fd, newfile); 393 fd_install(fd, newfile);
394 394
395 return fd; 395 return fd;
396 } 396 }
397 397
398 static struct socket *sock_from_file(struct file *file, int *err) 398 static struct socket *sock_from_file(struct file *file, int *err)
399 { 399 {
400 if (file->f_op == &socket_file_ops) 400 if (file->f_op == &socket_file_ops)
401 return file->private_data; /* set in sock_map_fd */ 401 return file->private_data; /* set in sock_map_fd */
402 402
403 *err = -ENOTSOCK; 403 *err = -ENOTSOCK;
404 return NULL; 404 return NULL;
405 } 405 }
406 406
407 /** 407 /**
408 * sockfd_lookup - Go from a file number to its socket slot 408 * sockfd_lookup - Go from a file number to its socket slot
409 * @fd: file handle 409 * @fd: file handle
410 * @err: pointer to an error code return 410 * @err: pointer to an error code return
411 * 411 *
412 * The file handle passed in is locked and the socket it is bound 412 * The file handle passed in is locked and the socket it is bound
413 * too is returned. If an error occurs the err pointer is overwritten 413 * too is returned. If an error occurs the err pointer is overwritten
414 * with a negative errno code and NULL is returned. The function checks 414 * with a negative errno code and NULL is returned. The function checks
415 * for both invalid handles and passing a handle which is not a socket. 415 * for both invalid handles and passing a handle which is not a socket.
416 * 416 *
417 * On a success the socket object pointer is returned. 417 * On a success the socket object pointer is returned.
418 */ 418 */
419 419
420 struct socket *sockfd_lookup(int fd, int *err) 420 struct socket *sockfd_lookup(int fd, int *err)
421 { 421 {
422 struct file *file; 422 struct file *file;
423 struct socket *sock; 423 struct socket *sock;
424 424
425 file = fget(fd); 425 file = fget(fd);
426 if (!file) { 426 if (!file) {
427 *err = -EBADF; 427 *err = -EBADF;
428 return NULL; 428 return NULL;
429 } 429 }
430 430
431 sock = sock_from_file(file, err); 431 sock = sock_from_file(file, err);
432 if (!sock) 432 if (!sock)
433 fput(file); 433 fput(file);
434 return sock; 434 return sock;
435 } 435 }
436 436
437 static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed) 437 static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
438 { 438 {
439 struct file *file; 439 struct file *file;
440 struct socket *sock; 440 struct socket *sock;
441 441
442 *err = -EBADF; 442 *err = -EBADF;
443 file = fget_light(fd, fput_needed); 443 file = fget_light(fd, fput_needed);
444 if (file) { 444 if (file) {
445 sock = sock_from_file(file, err); 445 sock = sock_from_file(file, err);
446 if (sock) 446 if (sock)
447 return sock; 447 return sock;
448 fput_light(file, *fput_needed); 448 fput_light(file, *fput_needed);
449 } 449 }
450 return NULL; 450 return NULL;
451 } 451 }
452 452
453 /** 453 /**
454 * sock_alloc - allocate a socket 454 * sock_alloc - allocate a socket
455 * 455 *
456 * Allocate a new inode and socket object. The two are bound together 456 * Allocate a new inode and socket object. The two are bound together
457 * and initialised. The socket is then returned. If we are out of inodes 457 * and initialised. The socket is then returned. If we are out of inodes
458 * NULL is returned. 458 * NULL is returned.
459 */ 459 */
460 460
461 static struct socket *sock_alloc(void) 461 static struct socket *sock_alloc(void)
462 { 462 {
463 struct inode *inode; 463 struct inode *inode;
464 struct socket *sock; 464 struct socket *sock;
465 465
466 inode = new_inode(sock_mnt->mnt_sb); 466 inode = new_inode(sock_mnt->mnt_sb);
467 if (!inode) 467 if (!inode)
468 return NULL; 468 return NULL;
469 469
470 sock = SOCKET_I(inode); 470 sock = SOCKET_I(inode);
471 471
472 kmemcheck_annotate_bitfield(sock, type); 472 kmemcheck_annotate_bitfield(sock, type);
473 inode->i_mode = S_IFSOCK | S_IRWXUGO; 473 inode->i_mode = S_IFSOCK | S_IRWXUGO;
474 inode->i_uid = current_fsuid(); 474 inode->i_uid = current_fsuid();
475 inode->i_gid = current_fsgid(); 475 inode->i_gid = current_fsgid();
476 476
477 percpu_add(sockets_in_use, 1); 477 percpu_add(sockets_in_use, 1);
478 return sock; 478 return sock;
479 } 479 }
480 480
481 /* 481 /*
482 * In theory you can't get an open on this inode, but /proc provides 482 * In theory you can't get an open on this inode, but /proc provides
483 * a back door. Remember to keep it shut otherwise you'll let the 483 * a back door. Remember to keep it shut otherwise you'll let the
484 * creepy crawlies in. 484 * creepy crawlies in.
485 */ 485 */
486 486
487 static int sock_no_open(struct inode *irrelevant, struct file *dontcare) 487 static int sock_no_open(struct inode *irrelevant, struct file *dontcare)
488 { 488 {
489 return -ENXIO; 489 return -ENXIO;
490 } 490 }
491 491
492 const struct file_operations bad_sock_fops = { 492 const struct file_operations bad_sock_fops = {
493 .owner = THIS_MODULE, 493 .owner = THIS_MODULE,
494 .open = sock_no_open, 494 .open = sock_no_open,
495 }; 495 };
496 496
497 /** 497 /**
498 * sock_release - close a socket 498 * sock_release - close a socket
499 * @sock: socket to close 499 * @sock: socket to close
500 * 500 *
501 * The socket is released from the protocol stack if it has a release 501 * The socket is released from the protocol stack if it has a release
502 * callback, and the inode is then released if the socket is bound to 502 * callback, and the inode is then released if the socket is bound to
503 * an inode not a file. 503 * an inode not a file.
504 */ 504 */
505 505
506 void sock_release(struct socket *sock) 506 void sock_release(struct socket *sock)
507 { 507 {
508 if (sock->ops) { 508 if (sock->ops) {
509 struct module *owner = sock->ops->owner; 509 struct module *owner = sock->ops->owner;
510 510
511 sock->ops->release(sock); 511 sock->ops->release(sock);
512 sock->ops = NULL; 512 sock->ops = NULL;
513 module_put(owner); 513 module_put(owner);
514 } 514 }
515 515
516 if (sock->fasync_list) 516 if (sock->fasync_list)
517 printk(KERN_ERR "sock_release: fasync list not empty!\n"); 517 printk(KERN_ERR "sock_release: fasync list not empty!\n");
518 518
519 percpu_sub(sockets_in_use, 1); 519 percpu_sub(sockets_in_use, 1);
520 if (!sock->file) { 520 if (!sock->file) {
521 iput(SOCK_INODE(sock)); 521 iput(SOCK_INODE(sock));
522 return; 522 return;
523 } 523 }
524 sock->file = NULL; 524 sock->file = NULL;
525 } 525 }
526 526
527 int sock_tx_timestamp(struct msghdr *msg, struct sock *sk, 527 int sock_tx_timestamp(struct msghdr *msg, struct sock *sk,
528 union skb_shared_tx *shtx) 528 union skb_shared_tx *shtx)
529 { 529 {
530 shtx->flags = 0; 530 shtx->flags = 0;
531 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE)) 531 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE))
532 shtx->hardware = 1; 532 shtx->hardware = 1;
533 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE)) 533 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE))
534 shtx->software = 1; 534 shtx->software = 1;
535 return 0; 535 return 0;
536 } 536 }
537 EXPORT_SYMBOL(sock_tx_timestamp); 537 EXPORT_SYMBOL(sock_tx_timestamp);
538 538
539 static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock, 539 static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
540 struct msghdr *msg, size_t size) 540 struct msghdr *msg, size_t size)
541 { 541 {
542 struct sock_iocb *si = kiocb_to_siocb(iocb); 542 struct sock_iocb *si = kiocb_to_siocb(iocb);
543 int err; 543 int err;
544 544
545 si->sock = sock; 545 si->sock = sock;
546 si->scm = NULL; 546 si->scm = NULL;
547 si->msg = msg; 547 si->msg = msg;
548 si->size = size; 548 si->size = size;
549 549
550 err = security_socket_sendmsg(sock, msg, size); 550 err = security_socket_sendmsg(sock, msg, size);
551 if (err) 551 if (err)
552 return err; 552 return err;
553 553
554 return sock->ops->sendmsg(iocb, sock, msg, size); 554 return sock->ops->sendmsg(iocb, sock, msg, size);
555 } 555 }
556 556
557 int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) 557 int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
558 { 558 {
559 struct kiocb iocb; 559 struct kiocb iocb;
560 struct sock_iocb siocb; 560 struct sock_iocb siocb;
561 int ret; 561 int ret;
562 562
563 init_sync_kiocb(&iocb, NULL); 563 init_sync_kiocb(&iocb, NULL);
564 iocb.private = &siocb; 564 iocb.private = &siocb;
565 ret = __sock_sendmsg(&iocb, sock, msg, size); 565 ret = __sock_sendmsg(&iocb, sock, msg, size);
566 if (-EIOCBQUEUED == ret) 566 if (-EIOCBQUEUED == ret)
567 ret = wait_on_sync_kiocb(&iocb); 567 ret = wait_on_sync_kiocb(&iocb);
568 return ret; 568 return ret;
569 } 569 }
570 570
571 int kernel_sendmsg(struct socket *sock, struct msghdr *msg, 571 int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
572 struct kvec *vec, size_t num, size_t size) 572 struct kvec *vec, size_t num, size_t size)
573 { 573 {
574 mm_segment_t oldfs = get_fs(); 574 mm_segment_t oldfs = get_fs();
575 int result; 575 int result;
576 576
577 set_fs(KERNEL_DS); 577 set_fs(KERNEL_DS);
578 /* 578 /*
579 * the following is safe, since for compiler definitions of kvec and 579 * the following is safe, since for compiler definitions of kvec and
580 * iovec are identical, yielding the same in-core layout and alignment 580 * iovec are identical, yielding the same in-core layout and alignment
581 */ 581 */
582 msg->msg_iov = (struct iovec *)vec; 582 msg->msg_iov = (struct iovec *)vec;
583 msg->msg_iovlen = num; 583 msg->msg_iovlen = num;
584 result = sock_sendmsg(sock, msg, size); 584 result = sock_sendmsg(sock, msg, size);
585 set_fs(oldfs); 585 set_fs(oldfs);
586 return result; 586 return result;
587 } 587 }
588 588
589 static int ktime2ts(ktime_t kt, struct timespec *ts) 589 static int ktime2ts(ktime_t kt, struct timespec *ts)
590 { 590 {
591 if (kt.tv64) { 591 if (kt.tv64) {
592 *ts = ktime_to_timespec(kt); 592 *ts = ktime_to_timespec(kt);
593 return 1; 593 return 1;
594 } else { 594 } else {
595 return 0; 595 return 0;
596 } 596 }
597 } 597 }
598 598
599 /* 599 /*
600 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP) 600 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
601 */ 601 */
602 void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk, 602 void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
603 struct sk_buff *skb) 603 struct sk_buff *skb)
604 { 604 {
605 int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP); 605 int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
606 struct timespec ts[3]; 606 struct timespec ts[3];
607 int empty = 1; 607 int empty = 1;
608 struct skb_shared_hwtstamps *shhwtstamps = 608 struct skb_shared_hwtstamps *shhwtstamps =
609 skb_hwtstamps(skb); 609 skb_hwtstamps(skb);
610 610
611 /* Race occurred between timestamp enabling and packet 611 /* Race occurred between timestamp enabling and packet
612 receiving. Fill in the current time for now. */ 612 receiving. Fill in the current time for now. */
613 if (need_software_tstamp && skb->tstamp.tv64 == 0) 613 if (need_software_tstamp && skb->tstamp.tv64 == 0)
614 __net_timestamp(skb); 614 __net_timestamp(skb);
615 615
616 if (need_software_tstamp) { 616 if (need_software_tstamp) {
617 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) { 617 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
618 struct timeval tv; 618 struct timeval tv;
619 skb_get_timestamp(skb, &tv); 619 skb_get_timestamp(skb, &tv);
620 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP, 620 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP,
621 sizeof(tv), &tv); 621 sizeof(tv), &tv);
622 } else { 622 } else {
623 skb_get_timestampns(skb, &ts[0]); 623 skb_get_timestampns(skb, &ts[0]);
624 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS, 624 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS,
625 sizeof(ts[0]), &ts[0]); 625 sizeof(ts[0]), &ts[0]);
626 } 626 }
627 } 627 }
628 628
629 629
630 memset(ts, 0, sizeof(ts)); 630 memset(ts, 0, sizeof(ts));
631 if (skb->tstamp.tv64 && 631 if (skb->tstamp.tv64 &&
632 sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE)) { 632 sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE)) {
633 skb_get_timestampns(skb, ts + 0); 633 skb_get_timestampns(skb, ts + 0);
634 empty = 0; 634 empty = 0;
635 } 635 }
636 if (shhwtstamps) { 636 if (shhwtstamps) {
637 if (sock_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE) && 637 if (sock_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE) &&
638 ktime2ts(shhwtstamps->syststamp, ts + 1)) 638 ktime2ts(shhwtstamps->syststamp, ts + 1))
639 empty = 0; 639 empty = 0;
640 if (sock_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE) && 640 if (sock_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE) &&
641 ktime2ts(shhwtstamps->hwtstamp, ts + 2)) 641 ktime2ts(shhwtstamps->hwtstamp, ts + 2))
642 empty = 0; 642 empty = 0;
643 } 643 }
644 if (!empty) 644 if (!empty)
645 put_cmsg(msg, SOL_SOCKET, 645 put_cmsg(msg, SOL_SOCKET,
646 SCM_TIMESTAMPING, sizeof(ts), &ts); 646 SCM_TIMESTAMPING, sizeof(ts), &ts);
647 } 647 }
648 648
649 EXPORT_SYMBOL_GPL(__sock_recv_timestamp); 649 EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
650 650
651 inline void sock_recv_drops(struct msghdr *msg, struct sock *sk, struct sk_buff *skb) 651 inline void sock_recv_drops(struct msghdr *msg, struct sock *sk, struct sk_buff *skb)
652 { 652 {
653 if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && skb->dropcount) 653 if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && skb->dropcount)
654 put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL, 654 put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL,
655 sizeof(__u32), &skb->dropcount); 655 sizeof(__u32), &skb->dropcount);
656 } 656 }
657 657
658 void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, 658 void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
659 struct sk_buff *skb) 659 struct sk_buff *skb)
660 { 660 {
661 sock_recv_timestamp(msg, sk, skb); 661 sock_recv_timestamp(msg, sk, skb);
662 sock_recv_drops(msg, sk, skb); 662 sock_recv_drops(msg, sk, skb);
663 } 663 }
664 EXPORT_SYMBOL_GPL(sock_recv_ts_and_drops); 664 EXPORT_SYMBOL_GPL(sock_recv_ts_and_drops);
665 665
666 static inline int __sock_recvmsg_nosec(struct kiocb *iocb, struct socket *sock, 666 static inline int __sock_recvmsg_nosec(struct kiocb *iocb, struct socket *sock,
667 struct msghdr *msg, size_t size, int flags) 667 struct msghdr *msg, size_t size, int flags)
668 { 668 {
669 struct sock_iocb *si = kiocb_to_siocb(iocb); 669 struct sock_iocb *si = kiocb_to_siocb(iocb);
670 670
671 si->sock = sock; 671 si->sock = sock;
672 si->scm = NULL; 672 si->scm = NULL;
673 si->msg = msg; 673 si->msg = msg;
674 si->size = size; 674 si->size = size;
675 si->flags = flags; 675 si->flags = flags;
676 676
677 return sock->ops->recvmsg(iocb, sock, msg, size, flags); 677 return sock->ops->recvmsg(iocb, sock, msg, size, flags);
678 } 678 }
679 679
680 static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock, 680 static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
681 struct msghdr *msg, size_t size, int flags) 681 struct msghdr *msg, size_t size, int flags)
682 { 682 {
683 int err = security_socket_recvmsg(sock, msg, size, flags); 683 int err = security_socket_recvmsg(sock, msg, size, flags);
684 684
685 return err ?: __sock_recvmsg_nosec(iocb, sock, msg, size, flags); 685 return err ?: __sock_recvmsg_nosec(iocb, sock, msg, size, flags);
686 } 686 }
687 687
688 int sock_recvmsg(struct socket *sock, struct msghdr *msg, 688 int sock_recvmsg(struct socket *sock, struct msghdr *msg,
689 size_t size, int flags) 689 size_t size, int flags)
690 { 690 {
691 struct kiocb iocb; 691 struct kiocb iocb;
692 struct sock_iocb siocb; 692 struct sock_iocb siocb;
693 int ret; 693 int ret;
694 694
695 init_sync_kiocb(&iocb, NULL); 695 init_sync_kiocb(&iocb, NULL);
696 iocb.private = &siocb; 696 iocb.private = &siocb;
697 ret = __sock_recvmsg(&iocb, sock, msg, size, flags); 697 ret = __sock_recvmsg(&iocb, sock, msg, size, flags);
698 if (-EIOCBQUEUED == ret) 698 if (-EIOCBQUEUED == ret)
699 ret = wait_on_sync_kiocb(&iocb); 699 ret = wait_on_sync_kiocb(&iocb);
700 return ret; 700 return ret;
701 } 701 }
702 702
703 static int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg, 703 static int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
704 size_t size, int flags) 704 size_t size, int flags)
705 { 705 {
706 struct kiocb iocb; 706 struct kiocb iocb;
707 struct sock_iocb siocb; 707 struct sock_iocb siocb;
708 int ret; 708 int ret;
709 709
710 init_sync_kiocb(&iocb, NULL); 710 init_sync_kiocb(&iocb, NULL);
711 iocb.private = &siocb; 711 iocb.private = &siocb;
712 ret = __sock_recvmsg_nosec(&iocb, sock, msg, size, flags); 712 ret = __sock_recvmsg_nosec(&iocb, sock, msg, size, flags);
713 if (-EIOCBQUEUED == ret) 713 if (-EIOCBQUEUED == ret)
714 ret = wait_on_sync_kiocb(&iocb); 714 ret = wait_on_sync_kiocb(&iocb);
715 return ret; 715 return ret;
716 } 716 }
717 717
718 int kernel_recvmsg(struct socket *sock, struct msghdr *msg, 718 int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
719 struct kvec *vec, size_t num, size_t size, int flags) 719 struct kvec *vec, size_t num, size_t size, int flags)
720 { 720 {
721 mm_segment_t oldfs = get_fs(); 721 mm_segment_t oldfs = get_fs();
722 int result; 722 int result;
723 723
724 set_fs(KERNEL_DS); 724 set_fs(KERNEL_DS);
725 /* 725 /*
726 * the following is safe, since for compiler definitions of kvec and 726 * the following is safe, since for compiler definitions of kvec and
727 * iovec are identical, yielding the same in-core layout and alignment 727 * iovec are identical, yielding the same in-core layout and alignment
728 */ 728 */
729 msg->msg_iov = (struct iovec *)vec, msg->msg_iovlen = num; 729 msg->msg_iov = (struct iovec *)vec, msg->msg_iovlen = num;
730 result = sock_recvmsg(sock, msg, size, flags); 730 result = sock_recvmsg(sock, msg, size, flags);
731 set_fs(oldfs); 731 set_fs(oldfs);
732 return result; 732 return result;
733 } 733 }
734 734
735 static void sock_aio_dtor(struct kiocb *iocb) 735 static void sock_aio_dtor(struct kiocb *iocb)
736 { 736 {
737 kfree(iocb->private); 737 kfree(iocb->private);
738 } 738 }
739 739
740 static ssize_t sock_sendpage(struct file *file, struct page *page, 740 static ssize_t sock_sendpage(struct file *file, struct page *page,
741 int offset, size_t size, loff_t *ppos, int more) 741 int offset, size_t size, loff_t *ppos, int more)
742 { 742 {
743 struct socket *sock; 743 struct socket *sock;
744 int flags; 744 int flags;
745 745
746 sock = file->private_data; 746 sock = file->private_data;
747 747
748 flags = !(file->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT; 748 flags = !(file->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT;
749 if (more) 749 if (more)
750 flags |= MSG_MORE; 750 flags |= MSG_MORE;
751 751
752 return kernel_sendpage(sock, page, offset, size, flags); 752 return kernel_sendpage(sock, page, offset, size, flags);
753 } 753 }
754 754
755 static ssize_t sock_splice_read(struct file *file, loff_t *ppos, 755 static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
756 struct pipe_inode_info *pipe, size_t len, 756 struct pipe_inode_info *pipe, size_t len,
757 unsigned int flags) 757 unsigned int flags)
758 { 758 {
759 struct socket *sock = file->private_data; 759 struct socket *sock = file->private_data;
760 760
761 if (unlikely(!sock->ops->splice_read)) 761 if (unlikely(!sock->ops->splice_read))
762 return -EINVAL; 762 return -EINVAL;
763 763
764 return sock->ops->splice_read(sock, ppos, pipe, len, flags); 764 return sock->ops->splice_read(sock, ppos, pipe, len, flags);
765 } 765 }
766 766
767 static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb, 767 static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb,
768 struct sock_iocb *siocb) 768 struct sock_iocb *siocb)
769 { 769 {
770 if (!is_sync_kiocb(iocb)) { 770 if (!is_sync_kiocb(iocb)) {
771 siocb = kmalloc(sizeof(*siocb), GFP_KERNEL); 771 siocb = kmalloc(sizeof(*siocb), GFP_KERNEL);
772 if (!siocb) 772 if (!siocb)
773 return NULL; 773 return NULL;
774 iocb->ki_dtor = sock_aio_dtor; 774 iocb->ki_dtor = sock_aio_dtor;
775 } 775 }
776 776
777 siocb->kiocb = iocb; 777 siocb->kiocb = iocb;
778 iocb->private = siocb; 778 iocb->private = siocb;
779 return siocb; 779 return siocb;
780 } 780 }
781 781
782 static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb, 782 static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb,
783 struct file *file, const struct iovec *iov, 783 struct file *file, const struct iovec *iov,
784 unsigned long nr_segs) 784 unsigned long nr_segs)
785 { 785 {
786 struct socket *sock = file->private_data; 786 struct socket *sock = file->private_data;
787 size_t size = 0; 787 size_t size = 0;
788 int i; 788 int i;
789 789
790 for (i = 0; i < nr_segs; i++) 790 for (i = 0; i < nr_segs; i++)
791 size += iov[i].iov_len; 791 size += iov[i].iov_len;
792 792
793 msg->msg_name = NULL; 793 msg->msg_name = NULL;
794 msg->msg_namelen = 0; 794 msg->msg_namelen = 0;
795 msg->msg_control = NULL; 795 msg->msg_control = NULL;
796 msg->msg_controllen = 0; 796 msg->msg_controllen = 0;
797 msg->msg_iov = (struct iovec *)iov; 797 msg->msg_iov = (struct iovec *)iov;
798 msg->msg_iovlen = nr_segs; 798 msg->msg_iovlen = nr_segs;
799 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0; 799 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
800 800
801 return __sock_recvmsg(iocb, sock, msg, size, msg->msg_flags); 801 return __sock_recvmsg(iocb, sock, msg, size, msg->msg_flags);
802 } 802 }
803 803
804 static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov, 804 static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
805 unsigned long nr_segs, loff_t pos) 805 unsigned long nr_segs, loff_t pos)
806 { 806 {
807 struct sock_iocb siocb, *x; 807 struct sock_iocb siocb, *x;
808 808
809 if (pos != 0) 809 if (pos != 0)
810 return -ESPIPE; 810 return -ESPIPE;
811 811
812 if (iocb->ki_left == 0) /* Match SYS5 behaviour */ 812 if (iocb->ki_left == 0) /* Match SYS5 behaviour */
813 return 0; 813 return 0;
814 814
815 815
816 x = alloc_sock_iocb(iocb, &siocb); 816 x = alloc_sock_iocb(iocb, &siocb);
817 if (!x) 817 if (!x)
818 return -ENOMEM; 818 return -ENOMEM;
819 return do_sock_read(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs); 819 return do_sock_read(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
820 } 820 }
821 821
822 static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb, 822 static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb,
823 struct file *file, const struct iovec *iov, 823 struct file *file, const struct iovec *iov,
824 unsigned long nr_segs) 824 unsigned long nr_segs)
825 { 825 {
826 struct socket *sock = file->private_data; 826 struct socket *sock = file->private_data;
827 size_t size = 0; 827 size_t size = 0;
828 int i; 828 int i;
829 829
830 for (i = 0; i < nr_segs; i++) 830 for (i = 0; i < nr_segs; i++)
831 size += iov[i].iov_len; 831 size += iov[i].iov_len;
832 832
833 msg->msg_name = NULL; 833 msg->msg_name = NULL;
834 msg->msg_namelen = 0; 834 msg->msg_namelen = 0;
835 msg->msg_control = NULL; 835 msg->msg_control = NULL;
836 msg->msg_controllen = 0; 836 msg->msg_controllen = 0;
837 msg->msg_iov = (struct iovec *)iov; 837 msg->msg_iov = (struct iovec *)iov;
838 msg->msg_iovlen = nr_segs; 838 msg->msg_iovlen = nr_segs;
839 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0; 839 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
840 if (sock->type == SOCK_SEQPACKET) 840 if (sock->type == SOCK_SEQPACKET)
841 msg->msg_flags |= MSG_EOR; 841 msg->msg_flags |= MSG_EOR;
842 842
843 return __sock_sendmsg(iocb, sock, msg, size); 843 return __sock_sendmsg(iocb, sock, msg, size);
844 } 844 }
845 845
846 static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov, 846 static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
847 unsigned long nr_segs, loff_t pos) 847 unsigned long nr_segs, loff_t pos)
848 { 848 {
849 struct sock_iocb siocb, *x; 849 struct sock_iocb siocb, *x;
850 850
851 if (pos != 0) 851 if (pos != 0)
852 return -ESPIPE; 852 return -ESPIPE;
853 853
854 x = alloc_sock_iocb(iocb, &siocb); 854 x = alloc_sock_iocb(iocb, &siocb);
855 if (!x) 855 if (!x)
856 return -ENOMEM; 856 return -ENOMEM;
857 857
858 return do_sock_write(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs); 858 return do_sock_write(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
859 } 859 }
860 860
861 /* 861 /*
862 * Atomic setting of ioctl hooks to avoid race 862 * Atomic setting of ioctl hooks to avoid race
863 * with module unload. 863 * with module unload.
864 */ 864 */
865 865
866 static DEFINE_MUTEX(br_ioctl_mutex); 866 static DEFINE_MUTEX(br_ioctl_mutex);
867 static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg) = NULL; 867 static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg) = NULL;
868 868
869 void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *)) 869 void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
870 { 870 {
871 mutex_lock(&br_ioctl_mutex); 871 mutex_lock(&br_ioctl_mutex);
872 br_ioctl_hook = hook; 872 br_ioctl_hook = hook;
873 mutex_unlock(&br_ioctl_mutex); 873 mutex_unlock(&br_ioctl_mutex);
874 } 874 }
875 875
876 EXPORT_SYMBOL(brioctl_set); 876 EXPORT_SYMBOL(brioctl_set);
877 877
878 static DEFINE_MUTEX(vlan_ioctl_mutex); 878 static DEFINE_MUTEX(vlan_ioctl_mutex);
879 static int (*vlan_ioctl_hook) (struct net *, void __user *arg); 879 static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
880 880
881 void vlan_ioctl_set(int (*hook) (struct net *, void __user *)) 881 void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
882 { 882 {
883 mutex_lock(&vlan_ioctl_mutex); 883 mutex_lock(&vlan_ioctl_mutex);
884 vlan_ioctl_hook = hook; 884 vlan_ioctl_hook = hook;
885 mutex_unlock(&vlan_ioctl_mutex); 885 mutex_unlock(&vlan_ioctl_mutex);
886 } 886 }
887 887
888 EXPORT_SYMBOL(vlan_ioctl_set); 888 EXPORT_SYMBOL(vlan_ioctl_set);
889 889
890 static DEFINE_MUTEX(dlci_ioctl_mutex); 890 static DEFINE_MUTEX(dlci_ioctl_mutex);
891 static int (*dlci_ioctl_hook) (unsigned int, void __user *); 891 static int (*dlci_ioctl_hook) (unsigned int, void __user *);
892 892
893 void dlci_ioctl_set(int (*hook) (unsigned int, void __user *)) 893 void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
894 { 894 {
895 mutex_lock(&dlci_ioctl_mutex); 895 mutex_lock(&dlci_ioctl_mutex);
896 dlci_ioctl_hook = hook; 896 dlci_ioctl_hook = hook;
897 mutex_unlock(&dlci_ioctl_mutex); 897 mutex_unlock(&dlci_ioctl_mutex);
898 } 898 }
899 899
900 EXPORT_SYMBOL(dlci_ioctl_set); 900 EXPORT_SYMBOL(dlci_ioctl_set);
901 901
902 static long sock_do_ioctl(struct net *net, struct socket *sock, 902 static long sock_do_ioctl(struct net *net, struct socket *sock,
903 unsigned int cmd, unsigned long arg) 903 unsigned int cmd, unsigned long arg)
904 { 904 {
905 int err; 905 int err;
906 void __user *argp = (void __user *)arg; 906 void __user *argp = (void __user *)arg;
907 907
908 err = sock->ops->ioctl(sock, cmd, arg); 908 err = sock->ops->ioctl(sock, cmd, arg);
909 909
910 /* 910 /*
911 * If this ioctl is unknown try to hand it down 911 * If this ioctl is unknown try to hand it down
912 * to the NIC driver. 912 * to the NIC driver.
913 */ 913 */
914 if (err == -ENOIOCTLCMD) 914 if (err == -ENOIOCTLCMD)
915 err = dev_ioctl(net, cmd, argp); 915 err = dev_ioctl(net, cmd, argp);
916 916
917 return err; 917 return err;
918 } 918 }
919 919
920 /* 920 /*
921 * With an ioctl, arg may well be a user mode pointer, but we don't know 921 * With an ioctl, arg may well be a user mode pointer, but we don't know
922 * what to do with it - that's up to the protocol still. 922 * what to do with it - that's up to the protocol still.
923 */ 923 */
924 924
925 static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg) 925 static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
926 { 926 {
927 struct socket *sock; 927 struct socket *sock;
928 struct sock *sk; 928 struct sock *sk;
929 void __user *argp = (void __user *)arg; 929 void __user *argp = (void __user *)arg;
930 int pid, err; 930 int pid, err;
931 struct net *net; 931 struct net *net;
932 932
933 sock = file->private_data; 933 sock = file->private_data;
934 sk = sock->sk; 934 sk = sock->sk;
935 net = sock_net(sk); 935 net = sock_net(sk);
936 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) { 936 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) {
937 err = dev_ioctl(net, cmd, argp); 937 err = dev_ioctl(net, cmd, argp);
938 } else 938 } else
939 #ifdef CONFIG_WEXT_CORE 939 #ifdef CONFIG_WEXT_CORE
940 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) { 940 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
941 err = dev_ioctl(net, cmd, argp); 941 err = dev_ioctl(net, cmd, argp);
942 } else 942 } else
943 #endif 943 #endif
944 switch (cmd) { 944 switch (cmd) {
945 case FIOSETOWN: 945 case FIOSETOWN:
946 case SIOCSPGRP: 946 case SIOCSPGRP:
947 err = -EFAULT; 947 err = -EFAULT;
948 if (get_user(pid, (int __user *)argp)) 948 if (get_user(pid, (int __user *)argp))
949 break; 949 break;
950 err = f_setown(sock->file, pid, 1); 950 err = f_setown(sock->file, pid, 1);
951 break; 951 break;
952 case FIOGETOWN: 952 case FIOGETOWN:
953 case SIOCGPGRP: 953 case SIOCGPGRP:
954 err = put_user(f_getown(sock->file), 954 err = put_user(f_getown(sock->file),
955 (int __user *)argp); 955 (int __user *)argp);
956 break; 956 break;
957 case SIOCGIFBR: 957 case SIOCGIFBR:
958 case SIOCSIFBR: 958 case SIOCSIFBR:
959 case SIOCBRADDBR: 959 case SIOCBRADDBR:
960 case SIOCBRDELBR: 960 case SIOCBRDELBR:
961 err = -ENOPKG; 961 err = -ENOPKG;
962 if (!br_ioctl_hook) 962 if (!br_ioctl_hook)
963 request_module("bridge"); 963 request_module("bridge");
964 964
965 mutex_lock(&br_ioctl_mutex); 965 mutex_lock(&br_ioctl_mutex);
966 if (br_ioctl_hook) 966 if (br_ioctl_hook)
967 err = br_ioctl_hook(net, cmd, argp); 967 err = br_ioctl_hook(net, cmd, argp);
968 mutex_unlock(&br_ioctl_mutex); 968 mutex_unlock(&br_ioctl_mutex);
969 break; 969 break;
970 case SIOCGIFVLAN: 970 case SIOCGIFVLAN:
971 case SIOCSIFVLAN: 971 case SIOCSIFVLAN:
972 err = -ENOPKG; 972 err = -ENOPKG;
973 if (!vlan_ioctl_hook) 973 if (!vlan_ioctl_hook)
974 request_module("8021q"); 974 request_module("8021q");
975 975
976 mutex_lock(&vlan_ioctl_mutex); 976 mutex_lock(&vlan_ioctl_mutex);
977 if (vlan_ioctl_hook) 977 if (vlan_ioctl_hook)
978 err = vlan_ioctl_hook(net, argp); 978 err = vlan_ioctl_hook(net, argp);
979 mutex_unlock(&vlan_ioctl_mutex); 979 mutex_unlock(&vlan_ioctl_mutex);
980 break; 980 break;
981 case SIOCADDDLCI: 981 case SIOCADDDLCI:
982 case SIOCDELDLCI: 982 case SIOCDELDLCI:
983 err = -ENOPKG; 983 err = -ENOPKG;
984 if (!dlci_ioctl_hook) 984 if (!dlci_ioctl_hook)
985 request_module("dlci"); 985 request_module("dlci");
986 986
987 mutex_lock(&dlci_ioctl_mutex); 987 mutex_lock(&dlci_ioctl_mutex);
988 if (dlci_ioctl_hook) 988 if (dlci_ioctl_hook)
989 err = dlci_ioctl_hook(cmd, argp); 989 err = dlci_ioctl_hook(cmd, argp);
990 mutex_unlock(&dlci_ioctl_mutex); 990 mutex_unlock(&dlci_ioctl_mutex);
991 break; 991 break;
992 default: 992 default:
993 err = sock_do_ioctl(net, sock, cmd, arg); 993 err = sock_do_ioctl(net, sock, cmd, arg);
994 break; 994 break;
995 } 995 }
996 return err; 996 return err;
997 } 997 }
998 998
999 int sock_create_lite(int family, int type, int protocol, struct socket **res) 999 int sock_create_lite(int family, int type, int protocol, struct socket **res)
1000 { 1000 {
1001 int err; 1001 int err;
1002 struct socket *sock = NULL; 1002 struct socket *sock = NULL;
1003 1003
1004 err = security_socket_create(family, type, protocol, 1); 1004 err = security_socket_create(family, type, protocol, 1);
1005 if (err) 1005 if (err)
1006 goto out; 1006 goto out;
1007 1007
1008 sock = sock_alloc(); 1008 sock = sock_alloc();
1009 if (!sock) { 1009 if (!sock) {
1010 err = -ENOMEM; 1010 err = -ENOMEM;
1011 goto out; 1011 goto out;
1012 } 1012 }
1013 1013
1014 sock->type = type; 1014 sock->type = type;
1015 err = security_socket_post_create(sock, family, type, protocol, 1); 1015 err = security_socket_post_create(sock, family, type, protocol, 1);
1016 if (err) 1016 if (err)
1017 goto out_release; 1017 goto out_release;
1018 1018
1019 out: 1019 out:
1020 *res = sock; 1020 *res = sock;
1021 return err; 1021 return err;
1022 out_release: 1022 out_release:
1023 sock_release(sock); 1023 sock_release(sock);
1024 sock = NULL; 1024 sock = NULL;
1025 goto out; 1025 goto out;
1026 } 1026 }
1027 1027
1028 /* No kernel lock held - perfect */ 1028 /* No kernel lock held - perfect */
1029 static unsigned int sock_poll(struct file *file, poll_table *wait) 1029 static unsigned int sock_poll(struct file *file, poll_table *wait)
1030 { 1030 {
1031 struct socket *sock; 1031 struct socket *sock;
1032 1032
1033 /* 1033 /*
1034 * We can't return errors to poll, so it's either yes or no. 1034 * We can't return errors to poll, so it's either yes or no.
1035 */ 1035 */
1036 sock = file->private_data; 1036 sock = file->private_data;
1037 return sock->ops->poll(file, sock, wait); 1037 return sock->ops->poll(file, sock, wait);
1038 } 1038 }
1039 1039
1040 static int sock_mmap(struct file *file, struct vm_area_struct *vma) 1040 static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1041 { 1041 {
1042 struct socket *sock = file->private_data; 1042 struct socket *sock = file->private_data;
1043 1043
1044 return sock->ops->mmap(file, sock, vma); 1044 return sock->ops->mmap(file, sock, vma);
1045 } 1045 }
1046 1046
1047 static int sock_close(struct inode *inode, struct file *filp) 1047 static int sock_close(struct inode *inode, struct file *filp)
1048 { 1048 {
1049 /* 1049 /*
1050 * It was possible the inode is NULL we were 1050 * It was possible the inode is NULL we were
1051 * closing an unfinished socket. 1051 * closing an unfinished socket.
1052 */ 1052 */
1053 1053
1054 if (!inode) { 1054 if (!inode) {
1055 printk(KERN_DEBUG "sock_close: NULL inode\n"); 1055 printk(KERN_DEBUG "sock_close: NULL inode\n");
1056 return 0; 1056 return 0;
1057 } 1057 }
1058 sock_release(SOCKET_I(inode)); 1058 sock_release(SOCKET_I(inode));
1059 return 0; 1059 return 0;
1060 } 1060 }
1061 1061
1062 /* 1062 /*
1063 * Update the socket async list 1063 * Update the socket async list
1064 * 1064 *
1065 * Fasync_list locking strategy. 1065 * Fasync_list locking strategy.
1066 * 1066 *
1067 * 1. fasync_list is modified only under process context socket lock 1067 * 1. fasync_list is modified only under process context socket lock
1068 * i.e. under semaphore. 1068 * i.e. under semaphore.
1069 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock) 1069 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
1070 * or under socket lock. 1070 * or under socket lock
1071 * 3. fasync_list can be used from softirq context, so that
1072 * modification under socket lock have to be enhanced with
1073 * write_lock_bh(&sk->sk_callback_lock).
1074 * --ANK (990710)
1075 */ 1071 */
1076 1072
1077 static int sock_fasync(int fd, struct file *filp, int on) 1073 static int sock_fasync(int fd, struct file *filp, int on)
1078 { 1074 {
1079 struct fasync_struct *fa, *fna = NULL, **prev; 1075 struct socket *sock = filp->private_data;
1080 struct socket *sock; 1076 struct sock *sk = sock->sk;
1081 struct sock *sk;
1082 1077
1083 if (on) { 1078 if (sk == NULL)
1084 fna = kmalloc(sizeof(struct fasync_struct), GFP_KERNEL);
1085 if (fna == NULL)
1086 return -ENOMEM;
1087 }
1088
1089 sock = filp->private_data;
1090
1091 sk = sock->sk;
1092 if (sk == NULL) {
1093 kfree(fna);
1094 return -EINVAL; 1079 return -EINVAL;
1095 }
1096 1080
1097 lock_sock(sk); 1081 lock_sock(sk);
1098 1082
1099 spin_lock(&filp->f_lock); 1083 fasync_helper(fd, filp, on, &sock->fasync_list);
1100 if (on)
1101 filp->f_flags |= FASYNC;
1102 else
1103 filp->f_flags &= ~FASYNC;
1104 spin_unlock(&filp->f_lock);
1105 1084
1106 prev = &(sock->fasync_list); 1085 if (!sock->fasync_list)
1107 1086 sock_reset_flag(sk, SOCK_FASYNC);
1108 for (fa = *prev; fa != NULL; prev = &fa->fa_next, fa = *prev) 1087 else
1109 if (fa->fa_file == filp)
1110 break;
1111
1112 if (on) {
1113 if (fa != NULL) {
1114 write_lock_bh(&sk->sk_callback_lock);
1115 fa->fa_fd = fd;
1116 write_unlock_bh(&sk->sk_callback_lock);
1117
1118 kfree(fna);
1119 goto out;
1120 }
1121 fna->fa_file = filp;
1122 fna->fa_fd = fd;
1123 fna->magic = FASYNC_MAGIC;
1124 fna->fa_next = sock->fasync_list;
1125 write_lock_bh(&sk->sk_callback_lock);
1126 sock->fasync_list = fna;
1127 sock_set_flag(sk, SOCK_FASYNC); 1088 sock_set_flag(sk, SOCK_FASYNC);
1128 write_unlock_bh(&sk->sk_callback_lock);
1129 } else {
1130 if (fa != NULL) {
1131 write_lock_bh(&sk->sk_callback_lock);
1132 *prev = fa->fa_next;
1133 if (!sock->fasync_list)
1134 sock_reset_flag(sk, SOCK_FASYNC);
1135 write_unlock_bh(&sk->sk_callback_lock);
1136 kfree(fa);
1137 }
1138 }
1139 1089
1140 out: 1090 release_sock(sk);
1141 release_sock(sock->sk);
1142 return 0; 1091 return 0;
1143 } 1092 }
1144 1093
1145 /* This function may be called only under socket lock or callback_lock */ 1094 /* This function may be called only under socket lock or callback_lock */
1146 1095
1147 int sock_wake_async(struct socket *sock, int how, int band) 1096 int sock_wake_async(struct socket *sock, int how, int band)
1148 { 1097 {
1149 if (!sock || !sock->fasync_list) 1098 if (!sock || !sock->fasync_list)
1150 return -1; 1099 return -1;
1151 switch (how) { 1100 switch (how) {
1152 case SOCK_WAKE_WAITD: 1101 case SOCK_WAKE_WAITD:
1153 if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags)) 1102 if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags))
1154 break; 1103 break;
1155 goto call_kill; 1104 goto call_kill;
1156 case SOCK_WAKE_SPACE: 1105 case SOCK_WAKE_SPACE:
1157 if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags)) 1106 if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags))
1158 break; 1107 break;
1159 /* fall through */ 1108 /* fall through */
1160 case SOCK_WAKE_IO: 1109 case SOCK_WAKE_IO:
1161 call_kill: 1110 call_kill:
1162 __kill_fasync(sock->fasync_list, SIGIO, band); 1111 kill_fasync(&sock->fasync_list, SIGIO, band);
1163 break; 1112 break;
1164 case SOCK_WAKE_URG: 1113 case SOCK_WAKE_URG:
1165 __kill_fasync(sock->fasync_list, SIGURG, band); 1114 kill_fasync(&sock->fasync_list, SIGURG, band);
1166 } 1115 }
1167 return 0; 1116 return 0;
1168 } 1117 }
1169 1118
1170 static int __sock_create(struct net *net, int family, int type, int protocol, 1119 static int __sock_create(struct net *net, int family, int type, int protocol,
1171 struct socket **res, int kern) 1120 struct socket **res, int kern)
1172 { 1121 {
1173 int err; 1122 int err;
1174 struct socket *sock; 1123 struct socket *sock;
1175 const struct net_proto_family *pf; 1124 const struct net_proto_family *pf;
1176 1125
1177 /* 1126 /*
1178 * Check protocol is in range 1127 * Check protocol is in range
1179 */ 1128 */
1180 if (family < 0 || family >= NPROTO) 1129 if (family < 0 || family >= NPROTO)
1181 return -EAFNOSUPPORT; 1130 return -EAFNOSUPPORT;
1182 if (type < 0 || type >= SOCK_MAX) 1131 if (type < 0 || type >= SOCK_MAX)
1183 return -EINVAL; 1132 return -EINVAL;
1184 1133
1185 /* Compatibility. 1134 /* Compatibility.
1186 1135
1187 This uglymoron is moved from INET layer to here to avoid 1136 This uglymoron is moved from INET layer to here to avoid
1188 deadlock in module load. 1137 deadlock in module load.
1189 */ 1138 */
1190 if (family == PF_INET && type == SOCK_PACKET) { 1139 if (family == PF_INET && type == SOCK_PACKET) {
1191 static int warned; 1140 static int warned;
1192 if (!warned) { 1141 if (!warned) {
1193 warned = 1; 1142 warned = 1;
1194 printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n", 1143 printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1195 current->comm); 1144 current->comm);
1196 } 1145 }
1197 family = PF_PACKET; 1146 family = PF_PACKET;
1198 } 1147 }
1199 1148
1200 err = security_socket_create(family, type, protocol, kern); 1149 err = security_socket_create(family, type, protocol, kern);
1201 if (err) 1150 if (err)
1202 return err; 1151 return err;
1203 1152
1204 /* 1153 /*
1205 * Allocate the socket and allow the family to set things up. if 1154 * Allocate the socket and allow the family to set things up. if
1206 * the protocol is 0, the family is instructed to select an appropriate 1155 * the protocol is 0, the family is instructed to select an appropriate
1207 * default. 1156 * default.
1208 */ 1157 */
1209 sock = sock_alloc(); 1158 sock = sock_alloc();
1210 if (!sock) { 1159 if (!sock) {
1211 if (net_ratelimit()) 1160 if (net_ratelimit())
1212 printk(KERN_WARNING "socket: no more sockets\n"); 1161 printk(KERN_WARNING "socket: no more sockets\n");
1213 return -ENFILE; /* Not exactly a match, but its the 1162 return -ENFILE; /* Not exactly a match, but its the
1214 closest posix thing */ 1163 closest posix thing */
1215 } 1164 }
1216 1165
1217 sock->type = type; 1166 sock->type = type;
1218 1167
1219 #ifdef CONFIG_MODULES 1168 #ifdef CONFIG_MODULES
1220 /* Attempt to load a protocol module if the find failed. 1169 /* Attempt to load a protocol module if the find failed.
1221 * 1170 *
1222 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user 1171 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1223 * requested real, full-featured networking support upon configuration. 1172 * requested real, full-featured networking support upon configuration.
1224 * Otherwise module support will break! 1173 * Otherwise module support will break!
1225 */ 1174 */
1226 if (net_families[family] == NULL) 1175 if (net_families[family] == NULL)
1227 request_module("net-pf-%d", family); 1176 request_module("net-pf-%d", family);
1228 #endif 1177 #endif
1229 1178
1230 rcu_read_lock(); 1179 rcu_read_lock();
1231 pf = rcu_dereference(net_families[family]); 1180 pf = rcu_dereference(net_families[family]);
1232 err = -EAFNOSUPPORT; 1181 err = -EAFNOSUPPORT;
1233 if (!pf) 1182 if (!pf)
1234 goto out_release; 1183 goto out_release;
1235 1184
1236 /* 1185 /*
1237 * We will call the ->create function, that possibly is in a loadable 1186 * We will call the ->create function, that possibly is in a loadable
1238 * module, so we have to bump that loadable module refcnt first. 1187 * module, so we have to bump that loadable module refcnt first.
1239 */ 1188 */
1240 if (!try_module_get(pf->owner)) 1189 if (!try_module_get(pf->owner))
1241 goto out_release; 1190 goto out_release;
1242 1191
1243 /* Now protected by module ref count */ 1192 /* Now protected by module ref count */
1244 rcu_read_unlock(); 1193 rcu_read_unlock();
1245 1194
1246 err = pf->create(net, sock, protocol, kern); 1195 err = pf->create(net, sock, protocol, kern);
1247 if (err < 0) 1196 if (err < 0)
1248 goto out_module_put; 1197 goto out_module_put;
1249 1198
1250 /* 1199 /*
1251 * Now to bump the refcnt of the [loadable] module that owns this 1200 * Now to bump the refcnt of the [loadable] module that owns this
1252 * socket at sock_release time we decrement its refcnt. 1201 * socket at sock_release time we decrement its refcnt.
1253 */ 1202 */
1254 if (!try_module_get(sock->ops->owner)) 1203 if (!try_module_get(sock->ops->owner))
1255 goto out_module_busy; 1204 goto out_module_busy;
1256 1205
1257 /* 1206 /*
1258 * Now that we're done with the ->create function, the [loadable] 1207 * Now that we're done with the ->create function, the [loadable]
1259 * module can have its refcnt decremented 1208 * module can have its refcnt decremented
1260 */ 1209 */
1261 module_put(pf->owner); 1210 module_put(pf->owner);
1262 err = security_socket_post_create(sock, family, type, protocol, kern); 1211 err = security_socket_post_create(sock, family, type, protocol, kern);
1263 if (err) 1212 if (err)
1264 goto out_sock_release; 1213 goto out_sock_release;
1265 *res = sock; 1214 *res = sock;
1266 1215
1267 return 0; 1216 return 0;
1268 1217
1269 out_module_busy: 1218 out_module_busy:
1270 err = -EAFNOSUPPORT; 1219 err = -EAFNOSUPPORT;
1271 out_module_put: 1220 out_module_put:
1272 sock->ops = NULL; 1221 sock->ops = NULL;
1273 module_put(pf->owner); 1222 module_put(pf->owner);
1274 out_sock_release: 1223 out_sock_release:
1275 sock_release(sock); 1224 sock_release(sock);
1276 return err; 1225 return err;
1277 1226
1278 out_release: 1227 out_release:
1279 rcu_read_unlock(); 1228 rcu_read_unlock();
1280 goto out_sock_release; 1229 goto out_sock_release;
1281 } 1230 }
1282 1231
1283 int sock_create(int family, int type, int protocol, struct socket **res) 1232 int sock_create(int family, int type, int protocol, struct socket **res)
1284 { 1233 {
1285 return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0); 1234 return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
1286 } 1235 }
1287 1236
1288 int sock_create_kern(int family, int type, int protocol, struct socket **res) 1237 int sock_create_kern(int family, int type, int protocol, struct socket **res)
1289 { 1238 {
1290 return __sock_create(&init_net, family, type, protocol, res, 1); 1239 return __sock_create(&init_net, family, type, protocol, res, 1);
1291 } 1240 }
1292 1241
1293 SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol) 1242 SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
1294 { 1243 {
1295 int retval; 1244 int retval;
1296 struct socket *sock; 1245 struct socket *sock;
1297 int flags; 1246 int flags;
1298 1247
1299 /* Check the SOCK_* constants for consistency. */ 1248 /* Check the SOCK_* constants for consistency. */
1300 BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC); 1249 BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
1301 BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK); 1250 BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);
1302 BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK); 1251 BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
1303 BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK); 1252 BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
1304 1253
1305 flags = type & ~SOCK_TYPE_MASK; 1254 flags = type & ~SOCK_TYPE_MASK;
1306 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) 1255 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
1307 return -EINVAL; 1256 return -EINVAL;
1308 type &= SOCK_TYPE_MASK; 1257 type &= SOCK_TYPE_MASK;
1309 1258
1310 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK)) 1259 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1311 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK; 1260 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1312 1261
1313 retval = sock_create(family, type, protocol, &sock); 1262 retval = sock_create(family, type, protocol, &sock);
1314 if (retval < 0) 1263 if (retval < 0)
1315 goto out; 1264 goto out;
1316 1265
1317 retval = sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK)); 1266 retval = sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
1318 if (retval < 0) 1267 if (retval < 0)
1319 goto out_release; 1268 goto out_release;
1320 1269
1321 out: 1270 out:
1322 /* It may be already another descriptor 8) Not kernel problem. */ 1271 /* It may be already another descriptor 8) Not kernel problem. */
1323 return retval; 1272 return retval;
1324 1273
1325 out_release: 1274 out_release:
1326 sock_release(sock); 1275 sock_release(sock);
1327 return retval; 1276 return retval;
1328 } 1277 }
1329 1278
1330 /* 1279 /*
1331 * Create a pair of connected sockets. 1280 * Create a pair of connected sockets.
1332 */ 1281 */
1333 1282
1334 SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol, 1283 SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
1335 int __user *, usockvec) 1284 int __user *, usockvec)
1336 { 1285 {
1337 struct socket *sock1, *sock2; 1286 struct socket *sock1, *sock2;
1338 int fd1, fd2, err; 1287 int fd1, fd2, err;
1339 struct file *newfile1, *newfile2; 1288 struct file *newfile1, *newfile2;
1340 int flags; 1289 int flags;
1341 1290
1342 flags = type & ~SOCK_TYPE_MASK; 1291 flags = type & ~SOCK_TYPE_MASK;
1343 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) 1292 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
1344 return -EINVAL; 1293 return -EINVAL;
1345 type &= SOCK_TYPE_MASK; 1294 type &= SOCK_TYPE_MASK;
1346 1295
1347 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK)) 1296 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1348 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK; 1297 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1349 1298
1350 /* 1299 /*
1351 * Obtain the first socket and check if the underlying protocol 1300 * Obtain the first socket and check if the underlying protocol
1352 * supports the socketpair call. 1301 * supports the socketpair call.
1353 */ 1302 */
1354 1303
1355 err = sock_create(family, type, protocol, &sock1); 1304 err = sock_create(family, type, protocol, &sock1);
1356 if (err < 0) 1305 if (err < 0)
1357 goto out; 1306 goto out;
1358 1307
1359 err = sock_create(family, type, protocol, &sock2); 1308 err = sock_create(family, type, protocol, &sock2);
1360 if (err < 0) 1309 if (err < 0)
1361 goto out_release_1; 1310 goto out_release_1;
1362 1311
1363 err = sock1->ops->socketpair(sock1, sock2); 1312 err = sock1->ops->socketpair(sock1, sock2);
1364 if (err < 0) 1313 if (err < 0)
1365 goto out_release_both; 1314 goto out_release_both;
1366 1315
1367 fd1 = sock_alloc_file(sock1, &newfile1, flags); 1316 fd1 = sock_alloc_file(sock1, &newfile1, flags);
1368 if (unlikely(fd1 < 0)) { 1317 if (unlikely(fd1 < 0)) {
1369 err = fd1; 1318 err = fd1;
1370 goto out_release_both; 1319 goto out_release_both;
1371 } 1320 }
1372 1321
1373 fd2 = sock_alloc_file(sock2, &newfile2, flags); 1322 fd2 = sock_alloc_file(sock2, &newfile2, flags);
1374 if (unlikely(fd2 < 0)) { 1323 if (unlikely(fd2 < 0)) {
1375 err = fd2; 1324 err = fd2;
1376 fput(newfile1); 1325 fput(newfile1);
1377 put_unused_fd(fd1); 1326 put_unused_fd(fd1);
1378 sock_release(sock2); 1327 sock_release(sock2);
1379 goto out; 1328 goto out;
1380 } 1329 }
1381 1330
1382 audit_fd_pair(fd1, fd2); 1331 audit_fd_pair(fd1, fd2);
1383 fd_install(fd1, newfile1); 1332 fd_install(fd1, newfile1);
1384 fd_install(fd2, newfile2); 1333 fd_install(fd2, newfile2);
1385 /* fd1 and fd2 may be already another descriptors. 1334 /* fd1 and fd2 may be already another descriptors.
1386 * Not kernel problem. 1335 * Not kernel problem.
1387 */ 1336 */
1388 1337
1389 err = put_user(fd1, &usockvec[0]); 1338 err = put_user(fd1, &usockvec[0]);
1390 if (!err) 1339 if (!err)
1391 err = put_user(fd2, &usockvec[1]); 1340 err = put_user(fd2, &usockvec[1]);
1392 if (!err) 1341 if (!err)
1393 return 0; 1342 return 0;
1394 1343
1395 sys_close(fd2); 1344 sys_close(fd2);
1396 sys_close(fd1); 1345 sys_close(fd1);
1397 return err; 1346 return err;
1398 1347
1399 out_release_both: 1348 out_release_both:
1400 sock_release(sock2); 1349 sock_release(sock2);
1401 out_release_1: 1350 out_release_1:
1402 sock_release(sock1); 1351 sock_release(sock1);
1403 out: 1352 out:
1404 return err; 1353 return err;
1405 } 1354 }
1406 1355
1407 /* 1356 /*
1408 * Bind a name to a socket. Nothing much to do here since it's 1357 * Bind a name to a socket. Nothing much to do here since it's
1409 * the protocol's responsibility to handle the local address. 1358 * the protocol's responsibility to handle the local address.
1410 * 1359 *
1411 * We move the socket address to kernel space before we call 1360 * We move the socket address to kernel space before we call
1412 * the protocol layer (having also checked the address is ok). 1361 * the protocol layer (having also checked the address is ok).
1413 */ 1362 */
1414 1363
1415 SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen) 1364 SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
1416 { 1365 {
1417 struct socket *sock; 1366 struct socket *sock;
1418 struct sockaddr_storage address; 1367 struct sockaddr_storage address;
1419 int err, fput_needed; 1368 int err, fput_needed;
1420 1369
1421 sock = sockfd_lookup_light(fd, &err, &fput_needed); 1370 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1422 if (sock) { 1371 if (sock) {
1423 err = move_addr_to_kernel(umyaddr, addrlen, (struct sockaddr *)&address); 1372 err = move_addr_to_kernel(umyaddr, addrlen, (struct sockaddr *)&address);
1424 if (err >= 0) { 1373 if (err >= 0) {
1425 err = security_socket_bind(sock, 1374 err = security_socket_bind(sock,
1426 (struct sockaddr *)&address, 1375 (struct sockaddr *)&address,
1427 addrlen); 1376 addrlen);
1428 if (!err) 1377 if (!err)
1429 err = sock->ops->bind(sock, 1378 err = sock->ops->bind(sock,
1430 (struct sockaddr *) 1379 (struct sockaddr *)
1431 &address, addrlen); 1380 &address, addrlen);
1432 } 1381 }
1433 fput_light(sock->file, fput_needed); 1382 fput_light(sock->file, fput_needed);
1434 } 1383 }
1435 return err; 1384 return err;
1436 } 1385 }
1437 1386
1438 /* 1387 /*
1439 * Perform a listen. Basically, we allow the protocol to do anything 1388 * Perform a listen. Basically, we allow the protocol to do anything
1440 * necessary for a listen, and if that works, we mark the socket as 1389 * necessary for a listen, and if that works, we mark the socket as
1441 * ready for listening. 1390 * ready for listening.
1442 */ 1391 */
1443 1392
1444 SYSCALL_DEFINE2(listen, int, fd, int, backlog) 1393 SYSCALL_DEFINE2(listen, int, fd, int, backlog)
1445 { 1394 {
1446 struct socket *sock; 1395 struct socket *sock;
1447 int err, fput_needed; 1396 int err, fput_needed;
1448 int somaxconn; 1397 int somaxconn;
1449 1398
1450 sock = sockfd_lookup_light(fd, &err, &fput_needed); 1399 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1451 if (sock) { 1400 if (sock) {
1452 somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn; 1401 somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn;
1453 if ((unsigned)backlog > somaxconn) 1402 if ((unsigned)backlog > somaxconn)
1454 backlog = somaxconn; 1403 backlog = somaxconn;
1455 1404
1456 err = security_socket_listen(sock, backlog); 1405 err = security_socket_listen(sock, backlog);
1457 if (!err) 1406 if (!err)
1458 err = sock->ops->listen(sock, backlog); 1407 err = sock->ops->listen(sock, backlog);
1459 1408
1460 fput_light(sock->file, fput_needed); 1409 fput_light(sock->file, fput_needed);
1461 } 1410 }
1462 return err; 1411 return err;
1463 } 1412 }
1464 1413
1465 /* 1414 /*
1466 * For accept, we attempt to create a new socket, set up the link 1415 * For accept, we attempt to create a new socket, set up the link
1467 * with the client, wake up the client, then return the new 1416 * with the client, wake up the client, then return the new
1468 * connected fd. We collect the address of the connector in kernel 1417 * connected fd. We collect the address of the connector in kernel
1469 * space and move it to user at the very end. This is unclean because 1418 * space and move it to user at the very end. This is unclean because
1470 * we open the socket then return an error. 1419 * we open the socket then return an error.
1471 * 1420 *
1472 * 1003.1g adds the ability to recvmsg() to query connection pending 1421 * 1003.1g adds the ability to recvmsg() to query connection pending
1473 * status to recvmsg. We need to add that support in a way thats 1422 * status to recvmsg. We need to add that support in a way thats
1474 * clean when we restucture accept also. 1423 * clean when we restucture accept also.
1475 */ 1424 */
1476 1425
1477 SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr, 1426 SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
1478 int __user *, upeer_addrlen, int, flags) 1427 int __user *, upeer_addrlen, int, flags)
1479 { 1428 {
1480 struct socket *sock, *newsock; 1429 struct socket *sock, *newsock;
1481 struct file *newfile; 1430 struct file *newfile;
1482 int err, len, newfd, fput_needed; 1431 int err, len, newfd, fput_needed;
1483 struct sockaddr_storage address; 1432 struct sockaddr_storage address;
1484 1433
1485 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) 1434 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
1486 return -EINVAL; 1435 return -EINVAL;
1487 1436
1488 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK)) 1437 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1489 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK; 1438 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1490 1439
1491 sock = sockfd_lookup_light(fd, &err, &fput_needed); 1440 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1492 if (!sock) 1441 if (!sock)
1493 goto out; 1442 goto out;
1494 1443
1495 err = -ENFILE; 1444 err = -ENFILE;
1496 if (!(newsock = sock_alloc())) 1445 if (!(newsock = sock_alloc()))
1497 goto out_put; 1446 goto out_put;
1498 1447
1499 newsock->type = sock->type; 1448 newsock->type = sock->type;
1500 newsock->ops = sock->ops; 1449 newsock->ops = sock->ops;
1501 1450
1502 /* 1451 /*
1503 * We don't need try_module_get here, as the listening socket (sock) 1452 * We don't need try_module_get here, as the listening socket (sock)
1504 * has the protocol module (sock->ops->owner) held. 1453 * has the protocol module (sock->ops->owner) held.
1505 */ 1454 */
1506 __module_get(newsock->ops->owner); 1455 __module_get(newsock->ops->owner);
1507 1456
1508 newfd = sock_alloc_file(newsock, &newfile, flags); 1457 newfd = sock_alloc_file(newsock, &newfile, flags);
1509 if (unlikely(newfd < 0)) { 1458 if (unlikely(newfd < 0)) {
1510 err = newfd; 1459 err = newfd;
1511 sock_release(newsock); 1460 sock_release(newsock);
1512 goto out_put; 1461 goto out_put;
1513 } 1462 }
1514 1463
1515 err = security_socket_accept(sock, newsock); 1464 err = security_socket_accept(sock, newsock);
1516 if (err) 1465 if (err)
1517 goto out_fd; 1466 goto out_fd;
1518 1467
1519 err = sock->ops->accept(sock, newsock, sock->file->f_flags); 1468 err = sock->ops->accept(sock, newsock, sock->file->f_flags);
1520 if (err < 0) 1469 if (err < 0)
1521 goto out_fd; 1470 goto out_fd;
1522 1471
1523 if (upeer_sockaddr) { 1472 if (upeer_sockaddr) {
1524 if (newsock->ops->getname(newsock, (struct sockaddr *)&address, 1473 if (newsock->ops->getname(newsock, (struct sockaddr *)&address,
1525 &len, 2) < 0) { 1474 &len, 2) < 0) {
1526 err = -ECONNABORTED; 1475 err = -ECONNABORTED;
1527 goto out_fd; 1476 goto out_fd;
1528 } 1477 }
1529 err = move_addr_to_user((struct sockaddr *)&address, 1478 err = move_addr_to_user((struct sockaddr *)&address,
1530 len, upeer_sockaddr, upeer_addrlen); 1479 len, upeer_sockaddr, upeer_addrlen);
1531 if (err < 0) 1480 if (err < 0)
1532 goto out_fd; 1481 goto out_fd;
1533 } 1482 }
1534 1483
1535 /* File flags are not inherited via accept() unlike another OSes. */ 1484 /* File flags are not inherited via accept() unlike another OSes. */
1536 1485
1537 fd_install(newfd, newfile); 1486 fd_install(newfd, newfile);
1538 err = newfd; 1487 err = newfd;
1539 1488
1540 out_put: 1489 out_put:
1541 fput_light(sock->file, fput_needed); 1490 fput_light(sock->file, fput_needed);
1542 out: 1491 out:
1543 return err; 1492 return err;
1544 out_fd: 1493 out_fd:
1545 fput(newfile); 1494 fput(newfile);
1546 put_unused_fd(newfd); 1495 put_unused_fd(newfd);
1547 goto out_put; 1496 goto out_put;
1548 } 1497 }
1549 1498
1550 SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr, 1499 SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr,
1551 int __user *, upeer_addrlen) 1500 int __user *, upeer_addrlen)
1552 { 1501 {
1553 return sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0); 1502 return sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0);
1554 } 1503 }
1555 1504
1556 /* 1505 /*
1557 * Attempt to connect to a socket with the server address. The address 1506 * Attempt to connect to a socket with the server address. The address
1558 * is in user space so we verify it is OK and move it to kernel space. 1507 * is in user space so we verify it is OK and move it to kernel space.
1559 * 1508 *
1560 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to 1509 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1561 * break bindings 1510 * break bindings
1562 * 1511 *
1563 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and 1512 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1564 * other SEQPACKET protocols that take time to connect() as it doesn't 1513 * other SEQPACKET protocols that take time to connect() as it doesn't
1565 * include the -EINPROGRESS status for such sockets. 1514 * include the -EINPROGRESS status for such sockets.
1566 */ 1515 */
1567 1516
1568 SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr, 1517 SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr,
1569 int, addrlen) 1518 int, addrlen)
1570 { 1519 {
1571 struct socket *sock; 1520 struct socket *sock;
1572 struct sockaddr_storage address; 1521 struct sockaddr_storage address;
1573 int err, fput_needed; 1522 int err, fput_needed;
1574 1523
1575 sock = sockfd_lookup_light(fd, &err, &fput_needed); 1524 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1576 if (!sock) 1525 if (!sock)
1577 goto out; 1526 goto out;
1578 err = move_addr_to_kernel(uservaddr, addrlen, (struct sockaddr *)&address); 1527 err = move_addr_to_kernel(uservaddr, addrlen, (struct sockaddr *)&address);
1579 if (err < 0) 1528 if (err < 0)
1580 goto out_put; 1529 goto out_put;
1581 1530
1582 err = 1531 err =
1583 security_socket_connect(sock, (struct sockaddr *)&address, addrlen); 1532 security_socket_connect(sock, (struct sockaddr *)&address, addrlen);
1584 if (err) 1533 if (err)
1585 goto out_put; 1534 goto out_put;
1586 1535
1587 err = sock->ops->connect(sock, (struct sockaddr *)&address, addrlen, 1536 err = sock->ops->connect(sock, (struct sockaddr *)&address, addrlen,
1588 sock->file->f_flags); 1537 sock->file->f_flags);
1589 out_put: 1538 out_put:
1590 fput_light(sock->file, fput_needed); 1539 fput_light(sock->file, fput_needed);
1591 out: 1540 out:
1592 return err; 1541 return err;
1593 } 1542 }
1594 1543
1595 /* 1544 /*
1596 * Get the local address ('name') of a socket object. Move the obtained 1545 * Get the local address ('name') of a socket object. Move the obtained
1597 * name to user space. 1546 * name to user space.
1598 */ 1547 */
1599 1548
1600 SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr, 1549 SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
1601 int __user *, usockaddr_len) 1550 int __user *, usockaddr_len)
1602 { 1551 {
1603 struct socket *sock; 1552 struct socket *sock;
1604 struct sockaddr_storage address; 1553 struct sockaddr_storage address;
1605 int len, err, fput_needed; 1554 int len, err, fput_needed;
1606 1555
1607 sock = sockfd_lookup_light(fd, &err, &fput_needed); 1556 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1608 if (!sock) 1557 if (!sock)
1609 goto out; 1558 goto out;
1610 1559
1611 err = security_socket_getsockname(sock); 1560 err = security_socket_getsockname(sock);
1612 if (err) 1561 if (err)
1613 goto out_put; 1562 goto out_put;
1614 1563
1615 err = sock->ops->getname(sock, (struct sockaddr *)&address, &len, 0); 1564 err = sock->ops->getname(sock, (struct sockaddr *)&address, &len, 0);
1616 if (err) 1565 if (err)
1617 goto out_put; 1566 goto out_put;
1618 err = move_addr_to_user((struct sockaddr *)&address, len, usockaddr, usockaddr_len); 1567 err = move_addr_to_user((struct sockaddr *)&address, len, usockaddr, usockaddr_len);
1619 1568
1620 out_put: 1569 out_put:
1621 fput_light(sock->file, fput_needed); 1570 fput_light(sock->file, fput_needed);
1622 out: 1571 out:
1623 return err; 1572 return err;
1624 } 1573 }
1625 1574
1626 /* 1575 /*
1627 * Get the remote address ('name') of a socket object. Move the obtained 1576 * Get the remote address ('name') of a socket object. Move the obtained
1628 * name to user space. 1577 * name to user space.
1629 */ 1578 */
1630 1579
1631 SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr, 1580 SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
1632 int __user *, usockaddr_len) 1581 int __user *, usockaddr_len)
1633 { 1582 {
1634 struct socket *sock; 1583 struct socket *sock;
1635 struct sockaddr_storage address; 1584 struct sockaddr_storage address;
1636 int len, err, fput_needed; 1585 int len, err, fput_needed;
1637 1586
1638 sock = sockfd_lookup_light(fd, &err, &fput_needed); 1587 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1639 if (sock != NULL) { 1588 if (sock != NULL) {
1640 err = security_socket_getpeername(sock); 1589 err = security_socket_getpeername(sock);
1641 if (err) { 1590 if (err) {
1642 fput_light(sock->file, fput_needed); 1591 fput_light(sock->file, fput_needed);
1643 return err; 1592 return err;
1644 } 1593 }
1645 1594
1646 err = 1595 err =
1647 sock->ops->getname(sock, (struct sockaddr *)&address, &len, 1596 sock->ops->getname(sock, (struct sockaddr *)&address, &len,
1648 1); 1597 1);
1649 if (!err) 1598 if (!err)
1650 err = move_addr_to_user((struct sockaddr *)&address, len, usockaddr, 1599 err = move_addr_to_user((struct sockaddr *)&address, len, usockaddr,
1651 usockaddr_len); 1600 usockaddr_len);
1652 fput_light(sock->file, fput_needed); 1601 fput_light(sock->file, fput_needed);
1653 } 1602 }
1654 return err; 1603 return err;
1655 } 1604 }
1656 1605
1657 /* 1606 /*
1658 * Send a datagram to a given address. We move the address into kernel 1607 * Send a datagram to a given address. We move the address into kernel
1659 * space and check the user space data area is readable before invoking 1608 * space and check the user space data area is readable before invoking
1660 * the protocol. 1609 * the protocol.
1661 */ 1610 */
1662 1611
1663 SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len, 1612 SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
1664 unsigned, flags, struct sockaddr __user *, addr, 1613 unsigned, flags, struct sockaddr __user *, addr,
1665 int, addr_len) 1614 int, addr_len)
1666 { 1615 {
1667 struct socket *sock; 1616 struct socket *sock;
1668 struct sockaddr_storage address; 1617 struct sockaddr_storage address;
1669 int err; 1618 int err;
1670 struct msghdr msg; 1619 struct msghdr msg;
1671 struct iovec iov; 1620 struct iovec iov;
1672 int fput_needed; 1621 int fput_needed;
1673 1622
1674 sock = sockfd_lookup_light(fd, &err, &fput_needed); 1623 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1675 if (!sock) 1624 if (!sock)
1676 goto out; 1625 goto out;
1677 1626
1678 iov.iov_base = buff; 1627 iov.iov_base = buff;
1679 iov.iov_len = len; 1628 iov.iov_len = len;
1680 msg.msg_name = NULL; 1629 msg.msg_name = NULL;
1681 msg.msg_iov = &iov; 1630 msg.msg_iov = &iov;
1682 msg.msg_iovlen = 1; 1631 msg.msg_iovlen = 1;
1683 msg.msg_control = NULL; 1632 msg.msg_control = NULL;
1684 msg.msg_controllen = 0; 1633 msg.msg_controllen = 0;
1685 msg.msg_namelen = 0; 1634 msg.msg_namelen = 0;
1686 if (addr) { 1635 if (addr) {
1687 err = move_addr_to_kernel(addr, addr_len, (struct sockaddr *)&address); 1636 err = move_addr_to_kernel(addr, addr_len, (struct sockaddr *)&address);
1688 if (err < 0) 1637 if (err < 0)
1689 goto out_put; 1638 goto out_put;
1690 msg.msg_name = (struct sockaddr *)&address; 1639 msg.msg_name = (struct sockaddr *)&address;
1691 msg.msg_namelen = addr_len; 1640 msg.msg_namelen = addr_len;
1692 } 1641 }
1693 if (sock->file->f_flags & O_NONBLOCK) 1642 if (sock->file->f_flags & O_NONBLOCK)
1694 flags |= MSG_DONTWAIT; 1643 flags |= MSG_DONTWAIT;
1695 msg.msg_flags = flags; 1644 msg.msg_flags = flags;
1696 err = sock_sendmsg(sock, &msg, len); 1645 err = sock_sendmsg(sock, &msg, len);
1697 1646
1698 out_put: 1647 out_put:
1699 fput_light(sock->file, fput_needed); 1648 fput_light(sock->file, fput_needed);
1700 out: 1649 out:
1701 return err; 1650 return err;
1702 } 1651 }
1703 1652
1704 /* 1653 /*
1705 * Send a datagram down a socket. 1654 * Send a datagram down a socket.
1706 */ 1655 */
1707 1656
1708 SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len, 1657 SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len,
1709 unsigned, flags) 1658 unsigned, flags)
1710 { 1659 {
1711 return sys_sendto(fd, buff, len, flags, NULL, 0); 1660 return sys_sendto(fd, buff, len, flags, NULL, 0);
1712 } 1661 }
1713 1662
1714 /* 1663 /*
1715 * Receive a frame from the socket and optionally record the address of the 1664 * Receive a frame from the socket and optionally record the address of the
1716 * sender. We verify the buffers are writable and if needed move the 1665 * sender. We verify the buffers are writable and if needed move the
1717 * sender address from kernel to user space. 1666 * sender address from kernel to user space.
1718 */ 1667 */
1719 1668
1720 SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size, 1669 SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
1721 unsigned, flags, struct sockaddr __user *, addr, 1670 unsigned, flags, struct sockaddr __user *, addr,
1722 int __user *, addr_len) 1671 int __user *, addr_len)
1723 { 1672 {
1724 struct socket *sock; 1673 struct socket *sock;
1725 struct iovec iov; 1674 struct iovec iov;
1726 struct msghdr msg; 1675 struct msghdr msg;
1727 struct sockaddr_storage address; 1676 struct sockaddr_storage address;
1728 int err, err2; 1677 int err, err2;
1729 int fput_needed; 1678 int fput_needed;
1730 1679
1731 sock = sockfd_lookup_light(fd, &err, &fput_needed); 1680 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1732 if (!sock) 1681 if (!sock)
1733 goto out; 1682 goto out;
1734 1683
1735 msg.msg_control = NULL; 1684 msg.msg_control = NULL;
1736 msg.msg_controllen = 0; 1685 msg.msg_controllen = 0;
1737 msg.msg_iovlen = 1; 1686 msg.msg_iovlen = 1;
1738 msg.msg_iov = &iov; 1687 msg.msg_iov = &iov;
1739 iov.iov_len = size; 1688 iov.iov_len = size;
1740 iov.iov_base = ubuf; 1689 iov.iov_base = ubuf;
1741 msg.msg_name = (struct sockaddr *)&address; 1690 msg.msg_name = (struct sockaddr *)&address;
1742 msg.msg_namelen = sizeof(address); 1691 msg.msg_namelen = sizeof(address);
1743 if (sock->file->f_flags & O_NONBLOCK) 1692 if (sock->file->f_flags & O_NONBLOCK)
1744 flags |= MSG_DONTWAIT; 1693 flags |= MSG_DONTWAIT;
1745 err = sock_recvmsg(sock, &msg, size, flags); 1694 err = sock_recvmsg(sock, &msg, size, flags);
1746 1695
1747 if (err >= 0 && addr != NULL) { 1696 if (err >= 0 && addr != NULL) {
1748 err2 = move_addr_to_user((struct sockaddr *)&address, 1697 err2 = move_addr_to_user((struct sockaddr *)&address,
1749 msg.msg_namelen, addr, addr_len); 1698 msg.msg_namelen, addr, addr_len);
1750 if (err2 < 0) 1699 if (err2 < 0)
1751 err = err2; 1700 err = err2;
1752 } 1701 }
1753 1702
1754 fput_light(sock->file, fput_needed); 1703 fput_light(sock->file, fput_needed);
1755 out: 1704 out:
1756 return err; 1705 return err;
1757 } 1706 }
1758 1707
1759 /* 1708 /*
1760 * Receive a datagram from a socket. 1709 * Receive a datagram from a socket.
1761 */ 1710 */
1762 1711
1763 asmlinkage long sys_recv(int fd, void __user *ubuf, size_t size, 1712 asmlinkage long sys_recv(int fd, void __user *ubuf, size_t size,
1764 unsigned flags) 1713 unsigned flags)
1765 { 1714 {
1766 return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL); 1715 return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1767 } 1716 }
1768 1717
1769 /* 1718 /*
1770 * Set a socket option. Because we don't know the option lengths we have 1719 * Set a socket option. Because we don't know the option lengths we have
1771 * to pass the user mode parameter for the protocols to sort out. 1720 * to pass the user mode parameter for the protocols to sort out.
1772 */ 1721 */
1773 1722
1774 SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname, 1723 SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
1775 char __user *, optval, int, optlen) 1724 char __user *, optval, int, optlen)
1776 { 1725 {
1777 int err, fput_needed; 1726 int err, fput_needed;
1778 struct socket *sock; 1727 struct socket *sock;
1779 1728
1780 if (optlen < 0) 1729 if (optlen < 0)
1781 return -EINVAL; 1730 return -EINVAL;
1782 1731
1783 sock = sockfd_lookup_light(fd, &err, &fput_needed); 1732 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1784 if (sock != NULL) { 1733 if (sock != NULL) {
1785 err = security_socket_setsockopt(sock, level, optname); 1734 err = security_socket_setsockopt(sock, level, optname);
1786 if (err) 1735 if (err)
1787 goto out_put; 1736 goto out_put;
1788 1737
1789 if (level == SOL_SOCKET) 1738 if (level == SOL_SOCKET)
1790 err = 1739 err =
1791 sock_setsockopt(sock, level, optname, optval, 1740 sock_setsockopt(sock, level, optname, optval,
1792 optlen); 1741 optlen);
1793 else 1742 else
1794 err = 1743 err =
1795 sock->ops->setsockopt(sock, level, optname, optval, 1744 sock->ops->setsockopt(sock, level, optname, optval,
1796 optlen); 1745 optlen);
1797 out_put: 1746 out_put:
1798 fput_light(sock->file, fput_needed); 1747 fput_light(sock->file, fput_needed);
1799 } 1748 }
1800 return err; 1749 return err;
1801 } 1750 }
1802 1751
1803 /* 1752 /*
1804 * Get a socket option. Because we don't know the option lengths we have 1753 * Get a socket option. Because we don't know the option lengths we have
1805 * to pass a user mode parameter for the protocols to sort out. 1754 * to pass a user mode parameter for the protocols to sort out.
1806 */ 1755 */
1807 1756
1808 SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname, 1757 SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname,
1809 char __user *, optval, int __user *, optlen) 1758 char __user *, optval, int __user *, optlen)
1810 { 1759 {
1811 int err, fput_needed; 1760 int err, fput_needed;
1812 struct socket *sock; 1761 struct socket *sock;
1813 1762
1814 sock = sockfd_lookup_light(fd, &err, &fput_needed); 1763 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1815 if (sock != NULL) { 1764 if (sock != NULL) {
1816 err = security_socket_getsockopt(sock, level, optname); 1765 err = security_socket_getsockopt(sock, level, optname);
1817 if (err) 1766 if (err)
1818 goto out_put; 1767 goto out_put;
1819 1768
1820 if (level == SOL_SOCKET) 1769 if (level == SOL_SOCKET)
1821 err = 1770 err =
1822 sock_getsockopt(sock, level, optname, optval, 1771 sock_getsockopt(sock, level, optname, optval,
1823 optlen); 1772 optlen);
1824 else 1773 else
1825 err = 1774 err =
1826 sock->ops->getsockopt(sock, level, optname, optval, 1775 sock->ops->getsockopt(sock, level, optname, optval,
1827 optlen); 1776 optlen);
1828 out_put: 1777 out_put:
1829 fput_light(sock->file, fput_needed); 1778 fput_light(sock->file, fput_needed);
1830 } 1779 }
1831 return err; 1780 return err;
1832 } 1781 }
1833 1782
1834 /* 1783 /*
1835 * Shutdown a socket. 1784 * Shutdown a socket.
1836 */ 1785 */
1837 1786
1838 SYSCALL_DEFINE2(shutdown, int, fd, int, how) 1787 SYSCALL_DEFINE2(shutdown, int, fd, int, how)
1839 { 1788 {
1840 int err, fput_needed; 1789 int err, fput_needed;
1841 struct socket *sock; 1790 struct socket *sock;
1842 1791
1843 sock = sockfd_lookup_light(fd, &err, &fput_needed); 1792 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1844 if (sock != NULL) { 1793 if (sock != NULL) {
1845 err = security_socket_shutdown(sock, how); 1794 err = security_socket_shutdown(sock, how);
1846 if (!err) 1795 if (!err)
1847 err = sock->ops->shutdown(sock, how); 1796 err = sock->ops->shutdown(sock, how);
1848 fput_light(sock->file, fput_needed); 1797 fput_light(sock->file, fput_needed);
1849 } 1798 }
1850 return err; 1799 return err;
1851 } 1800 }
1852 1801
1853 /* A couple of helpful macros for getting the address of the 32/64 bit 1802 /* A couple of helpful macros for getting the address of the 32/64 bit
1854 * fields which are the same type (int / unsigned) on our platforms. 1803 * fields which are the same type (int / unsigned) on our platforms.
1855 */ 1804 */
1856 #define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member) 1805 #define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
1857 #define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen) 1806 #define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
1858 #define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags) 1807 #define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
1859 1808
1860 /* 1809 /*
1861 * BSD sendmsg interface 1810 * BSD sendmsg interface
1862 */ 1811 */
1863 1812
1864 SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned, flags) 1813 SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned, flags)
1865 { 1814 {
1866 struct compat_msghdr __user *msg_compat = 1815 struct compat_msghdr __user *msg_compat =
1867 (struct compat_msghdr __user *)msg; 1816 (struct compat_msghdr __user *)msg;
1868 struct socket *sock; 1817 struct socket *sock;
1869 struct sockaddr_storage address; 1818 struct sockaddr_storage address;
1870 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack; 1819 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
1871 unsigned char ctl[sizeof(struct cmsghdr) + 20] 1820 unsigned char ctl[sizeof(struct cmsghdr) + 20]
1872 __attribute__ ((aligned(sizeof(__kernel_size_t)))); 1821 __attribute__ ((aligned(sizeof(__kernel_size_t))));
1873 /* 20 is size of ipv6_pktinfo */ 1822 /* 20 is size of ipv6_pktinfo */
1874 unsigned char *ctl_buf = ctl; 1823 unsigned char *ctl_buf = ctl;
1875 struct msghdr msg_sys; 1824 struct msghdr msg_sys;
1876 int err, ctl_len, iov_size, total_len; 1825 int err, ctl_len, iov_size, total_len;
1877 int fput_needed; 1826 int fput_needed;
1878 1827
1879 err = -EFAULT; 1828 err = -EFAULT;
1880 if (MSG_CMSG_COMPAT & flags) { 1829 if (MSG_CMSG_COMPAT & flags) {
1881 if (get_compat_msghdr(&msg_sys, msg_compat)) 1830 if (get_compat_msghdr(&msg_sys, msg_compat))
1882 return -EFAULT; 1831 return -EFAULT;
1883 } 1832 }
1884 else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr))) 1833 else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
1885 return -EFAULT; 1834 return -EFAULT;
1886 1835
1887 sock = sockfd_lookup_light(fd, &err, &fput_needed); 1836 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1888 if (!sock) 1837 if (!sock)
1889 goto out; 1838 goto out;
1890 1839
1891 /* do not move before msg_sys is valid */ 1840 /* do not move before msg_sys is valid */
1892 err = -EMSGSIZE; 1841 err = -EMSGSIZE;
1893 if (msg_sys.msg_iovlen > UIO_MAXIOV) 1842 if (msg_sys.msg_iovlen > UIO_MAXIOV)
1894 goto out_put; 1843 goto out_put;
1895 1844
1896 /* Check whether to allocate the iovec area */ 1845 /* Check whether to allocate the iovec area */
1897 err = -ENOMEM; 1846 err = -ENOMEM;
1898 iov_size = msg_sys.msg_iovlen * sizeof(struct iovec); 1847 iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
1899 if (msg_sys.msg_iovlen > UIO_FASTIOV) { 1848 if (msg_sys.msg_iovlen > UIO_FASTIOV) {
1900 iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL); 1849 iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
1901 if (!iov) 1850 if (!iov)
1902 goto out_put; 1851 goto out_put;
1903 } 1852 }
1904 1853
1905 /* This will also move the address data into kernel space */ 1854 /* This will also move the address data into kernel space */
1906 if (MSG_CMSG_COMPAT & flags) { 1855 if (MSG_CMSG_COMPAT & flags) {
1907 err = verify_compat_iovec(&msg_sys, iov, 1856 err = verify_compat_iovec(&msg_sys, iov,
1908 (struct sockaddr *)&address, 1857 (struct sockaddr *)&address,
1909 VERIFY_READ); 1858 VERIFY_READ);
1910 } else 1859 } else
1911 err = verify_iovec(&msg_sys, iov, 1860 err = verify_iovec(&msg_sys, iov,
1912 (struct sockaddr *)&address, 1861 (struct sockaddr *)&address,
1913 VERIFY_READ); 1862 VERIFY_READ);
1914 if (err < 0) 1863 if (err < 0)
1915 goto out_freeiov; 1864 goto out_freeiov;
1916 total_len = err; 1865 total_len = err;
1917 1866
1918 err = -ENOBUFS; 1867 err = -ENOBUFS;
1919 1868
1920 if (msg_sys.msg_controllen > INT_MAX) 1869 if (msg_sys.msg_controllen > INT_MAX)
1921 goto out_freeiov; 1870 goto out_freeiov;
1922 ctl_len = msg_sys.msg_controllen; 1871 ctl_len = msg_sys.msg_controllen;
1923 if ((MSG_CMSG_COMPAT & flags) && ctl_len) { 1872 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
1924 err = 1873 err =
1925 cmsghdr_from_user_compat_to_kern(&msg_sys, sock->sk, ctl, 1874 cmsghdr_from_user_compat_to_kern(&msg_sys, sock->sk, ctl,
1926 sizeof(ctl)); 1875 sizeof(ctl));
1927 if (err) 1876 if (err)
1928 goto out_freeiov; 1877 goto out_freeiov;
1929 ctl_buf = msg_sys.msg_control; 1878 ctl_buf = msg_sys.msg_control;
1930 ctl_len = msg_sys.msg_controllen; 1879 ctl_len = msg_sys.msg_controllen;
1931 } else if (ctl_len) { 1880 } else if (ctl_len) {
1932 if (ctl_len > sizeof(ctl)) { 1881 if (ctl_len > sizeof(ctl)) {
1933 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL); 1882 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
1934 if (ctl_buf == NULL) 1883 if (ctl_buf == NULL)
1935 goto out_freeiov; 1884 goto out_freeiov;
1936 } 1885 }
1937 err = -EFAULT; 1886 err = -EFAULT;
1938 /* 1887 /*
1939 * Careful! Before this, msg_sys.msg_control contains a user pointer. 1888 * Careful! Before this, msg_sys.msg_control contains a user pointer.
1940 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted 1889 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
1941 * checking falls down on this. 1890 * checking falls down on this.
1942 */ 1891 */
1943 if (copy_from_user(ctl_buf, (void __user *)msg_sys.msg_control, 1892 if (copy_from_user(ctl_buf, (void __user *)msg_sys.msg_control,
1944 ctl_len)) 1893 ctl_len))
1945 goto out_freectl; 1894 goto out_freectl;
1946 msg_sys.msg_control = ctl_buf; 1895 msg_sys.msg_control = ctl_buf;
1947 } 1896 }
1948 msg_sys.msg_flags = flags; 1897 msg_sys.msg_flags = flags;
1949 1898
1950 if (sock->file->f_flags & O_NONBLOCK) 1899 if (sock->file->f_flags & O_NONBLOCK)
1951 msg_sys.msg_flags |= MSG_DONTWAIT; 1900 msg_sys.msg_flags |= MSG_DONTWAIT;
1952 err = sock_sendmsg(sock, &msg_sys, total_len); 1901 err = sock_sendmsg(sock, &msg_sys, total_len);
1953 1902
1954 out_freectl: 1903 out_freectl:
1955 if (ctl_buf != ctl) 1904 if (ctl_buf != ctl)
1956 sock_kfree_s(sock->sk, ctl_buf, ctl_len); 1905 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
1957 out_freeiov: 1906 out_freeiov:
1958 if (iov != iovstack) 1907 if (iov != iovstack)
1959 sock_kfree_s(sock->sk, iov, iov_size); 1908 sock_kfree_s(sock->sk, iov, iov_size);
1960 out_put: 1909 out_put:
1961 fput_light(sock->file, fput_needed); 1910 fput_light(sock->file, fput_needed);
1962 out: 1911 out:
1963 return err; 1912 return err;
1964 } 1913 }
1965 1914
1966 static int __sys_recvmsg(struct socket *sock, struct msghdr __user *msg, 1915 static int __sys_recvmsg(struct socket *sock, struct msghdr __user *msg,
1967 struct msghdr *msg_sys, unsigned flags, int nosec) 1916 struct msghdr *msg_sys, unsigned flags, int nosec)
1968 { 1917 {
1969 struct compat_msghdr __user *msg_compat = 1918 struct compat_msghdr __user *msg_compat =
1970 (struct compat_msghdr __user *)msg; 1919 (struct compat_msghdr __user *)msg;
1971 struct iovec iovstack[UIO_FASTIOV]; 1920 struct iovec iovstack[UIO_FASTIOV];
1972 struct iovec *iov = iovstack; 1921 struct iovec *iov = iovstack;
1973 unsigned long cmsg_ptr; 1922 unsigned long cmsg_ptr;
1974 int err, iov_size, total_len, len; 1923 int err, iov_size, total_len, len;
1975 1924
1976 /* kernel mode address */ 1925 /* kernel mode address */
1977 struct sockaddr_storage addr; 1926 struct sockaddr_storage addr;
1978 1927
1979 /* user mode address pointers */ 1928 /* user mode address pointers */
1980 struct sockaddr __user *uaddr; 1929 struct sockaddr __user *uaddr;
1981 int __user *uaddr_len; 1930 int __user *uaddr_len;
1982 1931
1983 if (MSG_CMSG_COMPAT & flags) { 1932 if (MSG_CMSG_COMPAT & flags) {
1984 if (get_compat_msghdr(msg_sys, msg_compat)) 1933 if (get_compat_msghdr(msg_sys, msg_compat))
1985 return -EFAULT; 1934 return -EFAULT;
1986 } 1935 }
1987 else if (copy_from_user(msg_sys, msg, sizeof(struct msghdr))) 1936 else if (copy_from_user(msg_sys, msg, sizeof(struct msghdr)))
1988 return -EFAULT; 1937 return -EFAULT;
1989 1938
1990 err = -EMSGSIZE; 1939 err = -EMSGSIZE;
1991 if (msg_sys->msg_iovlen > UIO_MAXIOV) 1940 if (msg_sys->msg_iovlen > UIO_MAXIOV)
1992 goto out; 1941 goto out;
1993 1942
1994 /* Check whether to allocate the iovec area */ 1943 /* Check whether to allocate the iovec area */
1995 err = -ENOMEM; 1944 err = -ENOMEM;
1996 iov_size = msg_sys->msg_iovlen * sizeof(struct iovec); 1945 iov_size = msg_sys->msg_iovlen * sizeof(struct iovec);
1997 if (msg_sys->msg_iovlen > UIO_FASTIOV) { 1946 if (msg_sys->msg_iovlen > UIO_FASTIOV) {
1998 iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL); 1947 iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
1999 if (!iov) 1948 if (!iov)
2000 goto out; 1949 goto out;
2001 } 1950 }
2002 1951
2003 /* 1952 /*
2004 * Save the user-mode address (verify_iovec will change the 1953 * Save the user-mode address (verify_iovec will change the
2005 * kernel msghdr to use the kernel address space) 1954 * kernel msghdr to use the kernel address space)
2006 */ 1955 */
2007 1956
2008 uaddr = (__force void __user *)msg_sys->msg_name; 1957 uaddr = (__force void __user *)msg_sys->msg_name;
2009 uaddr_len = COMPAT_NAMELEN(msg); 1958 uaddr_len = COMPAT_NAMELEN(msg);
2010 if (MSG_CMSG_COMPAT & flags) { 1959 if (MSG_CMSG_COMPAT & flags) {
2011 err = verify_compat_iovec(msg_sys, iov, 1960 err = verify_compat_iovec(msg_sys, iov,
2012 (struct sockaddr *)&addr, 1961 (struct sockaddr *)&addr,
2013 VERIFY_WRITE); 1962 VERIFY_WRITE);
2014 } else 1963 } else
2015 err = verify_iovec(msg_sys, iov, 1964 err = verify_iovec(msg_sys, iov,
2016 (struct sockaddr *)&addr, 1965 (struct sockaddr *)&addr,
2017 VERIFY_WRITE); 1966 VERIFY_WRITE);
2018 if (err < 0) 1967 if (err < 0)
2019 goto out_freeiov; 1968 goto out_freeiov;
2020 total_len = err; 1969 total_len = err;
2021 1970
2022 cmsg_ptr = (unsigned long)msg_sys->msg_control; 1971 cmsg_ptr = (unsigned long)msg_sys->msg_control;
2023 msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT); 1972 msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
2024 1973
2025 if (sock->file->f_flags & O_NONBLOCK) 1974 if (sock->file->f_flags & O_NONBLOCK)
2026 flags |= MSG_DONTWAIT; 1975 flags |= MSG_DONTWAIT;
2027 err = (nosec ? sock_recvmsg_nosec : sock_recvmsg)(sock, msg_sys, 1976 err = (nosec ? sock_recvmsg_nosec : sock_recvmsg)(sock, msg_sys,
2028 total_len, flags); 1977 total_len, flags);
2029 if (err < 0) 1978 if (err < 0)
2030 goto out_freeiov; 1979 goto out_freeiov;
2031 len = err; 1980 len = err;
2032 1981
2033 if (uaddr != NULL) { 1982 if (uaddr != NULL) {
2034 err = move_addr_to_user((struct sockaddr *)&addr, 1983 err = move_addr_to_user((struct sockaddr *)&addr,
2035 msg_sys->msg_namelen, uaddr, 1984 msg_sys->msg_namelen, uaddr,
2036 uaddr_len); 1985 uaddr_len);
2037 if (err < 0) 1986 if (err < 0)
2038 goto out_freeiov; 1987 goto out_freeiov;
2039 } 1988 }
2040 err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT), 1989 err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT),
2041 COMPAT_FLAGS(msg)); 1990 COMPAT_FLAGS(msg));
2042 if (err) 1991 if (err)
2043 goto out_freeiov; 1992 goto out_freeiov;
2044 if (MSG_CMSG_COMPAT & flags) 1993 if (MSG_CMSG_COMPAT & flags)
2045 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr, 1994 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
2046 &msg_compat->msg_controllen); 1995 &msg_compat->msg_controllen);
2047 else 1996 else
2048 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr, 1997 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
2049 &msg->msg_controllen); 1998 &msg->msg_controllen);
2050 if (err) 1999 if (err)
2051 goto out_freeiov; 2000 goto out_freeiov;
2052 err = len; 2001 err = len;
2053 2002
2054 out_freeiov: 2003 out_freeiov:
2055 if (iov != iovstack) 2004 if (iov != iovstack)
2056 sock_kfree_s(sock->sk, iov, iov_size); 2005 sock_kfree_s(sock->sk, iov, iov_size);
2057 out: 2006 out:
2058 return err; 2007 return err;
2059 } 2008 }
2060 2009
2061 /* 2010 /*
2062 * BSD recvmsg interface 2011 * BSD recvmsg interface
2063 */ 2012 */
2064 2013
2065 SYSCALL_DEFINE3(recvmsg, int, fd, struct msghdr __user *, msg, 2014 SYSCALL_DEFINE3(recvmsg, int, fd, struct msghdr __user *, msg,
2066 unsigned int, flags) 2015 unsigned int, flags)
2067 { 2016 {
2068 int fput_needed, err; 2017 int fput_needed, err;
2069 struct msghdr msg_sys; 2018 struct msghdr msg_sys;
2070 struct socket *sock = sockfd_lookup_light(fd, &err, &fput_needed); 2019 struct socket *sock = sockfd_lookup_light(fd, &err, &fput_needed);
2071 2020
2072 if (!sock) 2021 if (!sock)
2073 goto out; 2022 goto out;
2074 2023
2075 err = __sys_recvmsg(sock, msg, &msg_sys, flags, 0); 2024 err = __sys_recvmsg(sock, msg, &msg_sys, flags, 0);
2076 2025
2077 fput_light(sock->file, fput_needed); 2026 fput_light(sock->file, fput_needed);
2078 out: 2027 out:
2079 return err; 2028 return err;
2080 } 2029 }
2081 2030
2082 /* 2031 /*
2083 * Linux recvmmsg interface 2032 * Linux recvmmsg interface
2084 */ 2033 */
2085 2034
2086 int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen, 2035 int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2087 unsigned int flags, struct timespec *timeout) 2036 unsigned int flags, struct timespec *timeout)
2088 { 2037 {
2089 int fput_needed, err, datagrams; 2038 int fput_needed, err, datagrams;
2090 struct socket *sock; 2039 struct socket *sock;
2091 struct mmsghdr __user *entry; 2040 struct mmsghdr __user *entry;
2092 struct compat_mmsghdr __user *compat_entry; 2041 struct compat_mmsghdr __user *compat_entry;
2093 struct msghdr msg_sys; 2042 struct msghdr msg_sys;
2094 struct timespec end_time; 2043 struct timespec end_time;
2095 2044
2096 if (timeout && 2045 if (timeout &&
2097 poll_select_set_timeout(&end_time, timeout->tv_sec, 2046 poll_select_set_timeout(&end_time, timeout->tv_sec,
2098 timeout->tv_nsec)) 2047 timeout->tv_nsec))
2099 return -EINVAL; 2048 return -EINVAL;
2100 2049
2101 datagrams = 0; 2050 datagrams = 0;
2102 2051
2103 sock = sockfd_lookup_light(fd, &err, &fput_needed); 2052 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2104 if (!sock) 2053 if (!sock)
2105 return err; 2054 return err;
2106 2055
2107 err = sock_error(sock->sk); 2056 err = sock_error(sock->sk);
2108 if (err) 2057 if (err)
2109 goto out_put; 2058 goto out_put;
2110 2059
2111 entry = mmsg; 2060 entry = mmsg;
2112 compat_entry = (struct compat_mmsghdr __user *)mmsg; 2061 compat_entry = (struct compat_mmsghdr __user *)mmsg;
2113 2062
2114 while (datagrams < vlen) { 2063 while (datagrams < vlen) {
2115 /* 2064 /*
2116 * No need to ask LSM for more than the first datagram. 2065 * No need to ask LSM for more than the first datagram.
2117 */ 2066 */
2118 if (MSG_CMSG_COMPAT & flags) { 2067 if (MSG_CMSG_COMPAT & flags) {
2119 err = __sys_recvmsg(sock, (struct msghdr __user *)compat_entry, 2068 err = __sys_recvmsg(sock, (struct msghdr __user *)compat_entry,
2120 &msg_sys, flags, datagrams); 2069 &msg_sys, flags, datagrams);
2121 if (err < 0) 2070 if (err < 0)
2122 break; 2071 break;
2123 err = __put_user(err, &compat_entry->msg_len); 2072 err = __put_user(err, &compat_entry->msg_len);
2124 ++compat_entry; 2073 ++compat_entry;
2125 } else { 2074 } else {
2126 err = __sys_recvmsg(sock, (struct msghdr __user *)entry, 2075 err = __sys_recvmsg(sock, (struct msghdr __user *)entry,
2127 &msg_sys, flags, datagrams); 2076 &msg_sys, flags, datagrams);
2128 if (err < 0) 2077 if (err < 0)
2129 break; 2078 break;
2130 err = put_user(err, &entry->msg_len); 2079 err = put_user(err, &entry->msg_len);
2131 ++entry; 2080 ++entry;
2132 } 2081 }
2133 2082
2134 if (err) 2083 if (err)
2135 break; 2084 break;
2136 ++datagrams; 2085 ++datagrams;
2137 2086
2138 /* MSG_WAITFORONE turns on MSG_DONTWAIT after one packet */ 2087 /* MSG_WAITFORONE turns on MSG_DONTWAIT after one packet */
2139 if (flags & MSG_WAITFORONE) 2088 if (flags & MSG_WAITFORONE)
2140 flags |= MSG_DONTWAIT; 2089 flags |= MSG_DONTWAIT;
2141 2090
2142 if (timeout) { 2091 if (timeout) {
2143 ktime_get_ts(timeout); 2092 ktime_get_ts(timeout);
2144 *timeout = timespec_sub(end_time, *timeout); 2093 *timeout = timespec_sub(end_time, *timeout);
2145 if (timeout->tv_sec < 0) { 2094 if (timeout->tv_sec < 0) {
2146 timeout->tv_sec = timeout->tv_nsec = 0; 2095 timeout->tv_sec = timeout->tv_nsec = 0;
2147 break; 2096 break;
2148 } 2097 }
2149 2098
2150 /* Timeout, return less than vlen datagrams */ 2099 /* Timeout, return less than vlen datagrams */
2151 if (timeout->tv_nsec == 0 && timeout->tv_sec == 0) 2100 if (timeout->tv_nsec == 0 && timeout->tv_sec == 0)
2152 break; 2101 break;
2153 } 2102 }
2154 2103
2155 /* Out of band data, return right away */ 2104 /* Out of band data, return right away */
2156 if (msg_sys.msg_flags & MSG_OOB) 2105 if (msg_sys.msg_flags & MSG_OOB)
2157 break; 2106 break;
2158 } 2107 }
2159 2108
2160 out_put: 2109 out_put:
2161 fput_light(sock->file, fput_needed); 2110 fput_light(sock->file, fput_needed);
2162 2111
2163 if (err == 0) 2112 if (err == 0)
2164 return datagrams; 2113 return datagrams;
2165 2114
2166 if (datagrams != 0) { 2115 if (datagrams != 0) {
2167 /* 2116 /*
2168 * We may return less entries than requested (vlen) if the 2117 * We may return less entries than requested (vlen) if the
2169 * sock is non block and there aren't enough datagrams... 2118 * sock is non block and there aren't enough datagrams...
2170 */ 2119 */
2171 if (err != -EAGAIN) { 2120 if (err != -EAGAIN) {
2172 /* 2121 /*
2173 * ... or if recvmsg returns an error after we 2122 * ... or if recvmsg returns an error after we
2174 * received some datagrams, where we record the 2123 * received some datagrams, where we record the
2175 * error to return on the next call or if the 2124 * error to return on the next call or if the
2176 * app asks about it using getsockopt(SO_ERROR). 2125 * app asks about it using getsockopt(SO_ERROR).
2177 */ 2126 */
2178 sock->sk->sk_err = -err; 2127 sock->sk->sk_err = -err;
2179 } 2128 }
2180 2129
2181 return datagrams; 2130 return datagrams;
2182 } 2131 }
2183 2132
2184 return err; 2133 return err;
2185 } 2134 }
2186 2135
2187 SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg, 2136 SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
2188 unsigned int, vlen, unsigned int, flags, 2137 unsigned int, vlen, unsigned int, flags,
2189 struct timespec __user *, timeout) 2138 struct timespec __user *, timeout)
2190 { 2139 {
2191 int datagrams; 2140 int datagrams;
2192 struct timespec timeout_sys; 2141 struct timespec timeout_sys;
2193 2142
2194 if (!timeout) 2143 if (!timeout)
2195 return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL); 2144 return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL);
2196 2145
2197 if (copy_from_user(&timeout_sys, timeout, sizeof(timeout_sys))) 2146 if (copy_from_user(&timeout_sys, timeout, sizeof(timeout_sys)))
2198 return -EFAULT; 2147 return -EFAULT;
2199 2148
2200 datagrams = __sys_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys); 2149 datagrams = __sys_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys);
2201 2150
2202 if (datagrams > 0 && 2151 if (datagrams > 0 &&
2203 copy_to_user(timeout, &timeout_sys, sizeof(timeout_sys))) 2152 copy_to_user(timeout, &timeout_sys, sizeof(timeout_sys)))
2204 datagrams = -EFAULT; 2153 datagrams = -EFAULT;
2205 2154
2206 return datagrams; 2155 return datagrams;
2207 } 2156 }
2208 2157
2209 #ifdef __ARCH_WANT_SYS_SOCKETCALL 2158 #ifdef __ARCH_WANT_SYS_SOCKETCALL
2210 /* Argument list sizes for sys_socketcall */ 2159 /* Argument list sizes for sys_socketcall */
2211 #define AL(x) ((x) * sizeof(unsigned long)) 2160 #define AL(x) ((x) * sizeof(unsigned long))
2212 static const unsigned char nargs[20] = { 2161 static const unsigned char nargs[20] = {
2213 AL(0),AL(3),AL(3),AL(3),AL(2),AL(3), 2162 AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
2214 AL(3),AL(3),AL(4),AL(4),AL(4),AL(6), 2163 AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
2215 AL(6),AL(2),AL(5),AL(5),AL(3),AL(3), 2164 AL(6),AL(2),AL(5),AL(5),AL(3),AL(3),
2216 AL(4),AL(5) 2165 AL(4),AL(5)
2217 }; 2166 };
2218 2167
2219 #undef AL 2168 #undef AL
2220 2169
2221 /* 2170 /*
2222 * System call vectors. 2171 * System call vectors.
2223 * 2172 *
2224 * Argument checking cleaned up. Saved 20% in size. 2173 * Argument checking cleaned up. Saved 20% in size.
2225 * This function doesn't need to set the kernel lock because 2174 * This function doesn't need to set the kernel lock because
2226 * it is set by the callees. 2175 * it is set by the callees.
2227 */ 2176 */
2228 2177
2229 SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args) 2178 SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
2230 { 2179 {
2231 unsigned long a[6]; 2180 unsigned long a[6];
2232 unsigned long a0, a1; 2181 unsigned long a0, a1;
2233 int err; 2182 int err;
2234 unsigned int len; 2183 unsigned int len;
2235 2184
2236 if (call < 1 || call > SYS_RECVMMSG) 2185 if (call < 1 || call > SYS_RECVMMSG)
2237 return -EINVAL; 2186 return -EINVAL;
2238 2187
2239 len = nargs[call]; 2188 len = nargs[call];
2240 if (len > sizeof(a)) 2189 if (len > sizeof(a))
2241 return -EINVAL; 2190 return -EINVAL;
2242 2191
2243 /* copy_from_user should be SMP safe. */ 2192 /* copy_from_user should be SMP safe. */
2244 if (copy_from_user(a, args, len)) 2193 if (copy_from_user(a, args, len))
2245 return -EFAULT; 2194 return -EFAULT;
2246 2195
2247 audit_socketcall(nargs[call] / sizeof(unsigned long), a); 2196 audit_socketcall(nargs[call] / sizeof(unsigned long), a);
2248 2197
2249 a0 = a[0]; 2198 a0 = a[0];
2250 a1 = a[1]; 2199 a1 = a[1];
2251 2200
2252 switch (call) { 2201 switch (call) {
2253 case SYS_SOCKET: 2202 case SYS_SOCKET:
2254 err = sys_socket(a0, a1, a[2]); 2203 err = sys_socket(a0, a1, a[2]);
2255 break; 2204 break;
2256 case SYS_BIND: 2205 case SYS_BIND:
2257 err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]); 2206 err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
2258 break; 2207 break;
2259 case SYS_CONNECT: 2208 case SYS_CONNECT:
2260 err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]); 2209 err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
2261 break; 2210 break;
2262 case SYS_LISTEN: 2211 case SYS_LISTEN:
2263 err = sys_listen(a0, a1); 2212 err = sys_listen(a0, a1);
2264 break; 2213 break;
2265 case SYS_ACCEPT: 2214 case SYS_ACCEPT:
2266 err = sys_accept4(a0, (struct sockaddr __user *)a1, 2215 err = sys_accept4(a0, (struct sockaddr __user *)a1,
2267 (int __user *)a[2], 0); 2216 (int __user *)a[2], 0);
2268 break; 2217 break;
2269 case SYS_GETSOCKNAME: 2218 case SYS_GETSOCKNAME:
2270 err = 2219 err =
2271 sys_getsockname(a0, (struct sockaddr __user *)a1, 2220 sys_getsockname(a0, (struct sockaddr __user *)a1,
2272 (int __user *)a[2]); 2221 (int __user *)a[2]);
2273 break; 2222 break;
2274 case SYS_GETPEERNAME: 2223 case SYS_GETPEERNAME:
2275 err = 2224 err =
2276 sys_getpeername(a0, (struct sockaddr __user *)a1, 2225 sys_getpeername(a0, (struct sockaddr __user *)a1,
2277 (int __user *)a[2]); 2226 (int __user *)a[2]);
2278 break; 2227 break;
2279 case SYS_SOCKETPAIR: 2228 case SYS_SOCKETPAIR:
2280 err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]); 2229 err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
2281 break; 2230 break;
2282 case SYS_SEND: 2231 case SYS_SEND:
2283 err = sys_send(a0, (void __user *)a1, a[2], a[3]); 2232 err = sys_send(a0, (void __user *)a1, a[2], a[3]);
2284 break; 2233 break;
2285 case SYS_SENDTO: 2234 case SYS_SENDTO:
2286 err = sys_sendto(a0, (void __user *)a1, a[2], a[3], 2235 err = sys_sendto(a0, (void __user *)a1, a[2], a[3],
2287 (struct sockaddr __user *)a[4], a[5]); 2236 (struct sockaddr __user *)a[4], a[5]);
2288 break; 2237 break;
2289 case SYS_RECV: 2238 case SYS_RECV:
2290 err = sys_recv(a0, (void __user *)a1, a[2], a[3]); 2239 err = sys_recv(a0, (void __user *)a1, a[2], a[3]);
2291 break; 2240 break;
2292 case SYS_RECVFROM: 2241 case SYS_RECVFROM:
2293 err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3], 2242 err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2294 (struct sockaddr __user *)a[4], 2243 (struct sockaddr __user *)a[4],
2295 (int __user *)a[5]); 2244 (int __user *)a[5]);
2296 break; 2245 break;
2297 case SYS_SHUTDOWN: 2246 case SYS_SHUTDOWN:
2298 err = sys_shutdown(a0, a1); 2247 err = sys_shutdown(a0, a1);
2299 break; 2248 break;
2300 case SYS_SETSOCKOPT: 2249 case SYS_SETSOCKOPT:
2301 err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]); 2250 err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]);
2302 break; 2251 break;
2303 case SYS_GETSOCKOPT: 2252 case SYS_GETSOCKOPT:
2304 err = 2253 err =
2305 sys_getsockopt(a0, a1, a[2], (char __user *)a[3], 2254 sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2306 (int __user *)a[4]); 2255 (int __user *)a[4]);
2307 break; 2256 break;
2308 case SYS_SENDMSG: 2257 case SYS_SENDMSG:
2309 err = sys_sendmsg(a0, (struct msghdr __user *)a1, a[2]); 2258 err = sys_sendmsg(a0, (struct msghdr __user *)a1, a[2]);
2310 break; 2259 break;
2311 case SYS_RECVMSG: 2260 case SYS_RECVMSG:
2312 err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]); 2261 err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]);
2313 break; 2262 break;
2314 case SYS_RECVMMSG: 2263 case SYS_RECVMMSG:
2315 err = sys_recvmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3], 2264 err = sys_recvmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3],
2316 (struct timespec __user *)a[4]); 2265 (struct timespec __user *)a[4]);
2317 break; 2266 break;
2318 case SYS_ACCEPT4: 2267 case SYS_ACCEPT4:
2319 err = sys_accept4(a0, (struct sockaddr __user *)a1, 2268 err = sys_accept4(a0, (struct sockaddr __user *)a1,
2320 (int __user *)a[2], a[3]); 2269 (int __user *)a[2], a[3]);
2321 break; 2270 break;
2322 default: 2271 default:
2323 err = -EINVAL; 2272 err = -EINVAL;
2324 break; 2273 break;
2325 } 2274 }
2326 return err; 2275 return err;
2327 } 2276 }
2328 2277
2329 #endif /* __ARCH_WANT_SYS_SOCKETCALL */ 2278 #endif /* __ARCH_WANT_SYS_SOCKETCALL */
2330 2279
2331 /** 2280 /**
2332 * sock_register - add a socket protocol handler 2281 * sock_register - add a socket protocol handler
2333 * @ops: description of protocol 2282 * @ops: description of protocol
2334 * 2283 *
2335 * This function is called by a protocol handler that wants to 2284 * This function is called by a protocol handler that wants to
2336 * advertise its address family, and have it linked into the 2285 * advertise its address family, and have it linked into the
2337 * socket interface. The value ops->family coresponds to the 2286 * socket interface. The value ops->family coresponds to the
2338 * socket system call protocol family. 2287 * socket system call protocol family.
2339 */ 2288 */
2340 int sock_register(const struct net_proto_family *ops) 2289 int sock_register(const struct net_proto_family *ops)
2341 { 2290 {
2342 int err; 2291 int err;
2343 2292
2344 if (ops->family >= NPROTO) { 2293 if (ops->family >= NPROTO) {
2345 printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family, 2294 printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family,
2346 NPROTO); 2295 NPROTO);
2347 return -ENOBUFS; 2296 return -ENOBUFS;
2348 } 2297 }
2349 2298
2350 spin_lock(&net_family_lock); 2299 spin_lock(&net_family_lock);
2351 if (net_families[ops->family]) 2300 if (net_families[ops->family])
2352 err = -EEXIST; 2301 err = -EEXIST;
2353 else { 2302 else {
2354 net_families[ops->family] = ops; 2303 net_families[ops->family] = ops;
2355 err = 0; 2304 err = 0;
2356 } 2305 }
2357 spin_unlock(&net_family_lock); 2306 spin_unlock(&net_family_lock);
2358 2307
2359 printk(KERN_INFO "NET: Registered protocol family %d\n", ops->family); 2308 printk(KERN_INFO "NET: Registered protocol family %d\n", ops->family);
2360 return err; 2309 return err;
2361 } 2310 }
2362 2311
2363 /** 2312 /**
2364 * sock_unregister - remove a protocol handler 2313 * sock_unregister - remove a protocol handler
2365 * @family: protocol family to remove 2314 * @family: protocol family to remove
2366 * 2315 *
2367 * This function is called by a protocol handler that wants to 2316 * This function is called by a protocol handler that wants to
2368 * remove its address family, and have it unlinked from the 2317 * remove its address family, and have it unlinked from the
2369 * new socket creation. 2318 * new socket creation.
2370 * 2319 *
2371 * If protocol handler is a module, then it can use module reference 2320 * If protocol handler is a module, then it can use module reference
2372 * counts to protect against new references. If protocol handler is not 2321 * counts to protect against new references. If protocol handler is not
2373 * a module then it needs to provide its own protection in 2322 * a module then it needs to provide its own protection in
2374 * the ops->create routine. 2323 * the ops->create routine.
2375 */ 2324 */
2376 void sock_unregister(int family) 2325 void sock_unregister(int family)
2377 { 2326 {
2378 BUG_ON(family < 0 || family >= NPROTO); 2327 BUG_ON(family < 0 || family >= NPROTO);
2379 2328
2380 spin_lock(&net_family_lock); 2329 spin_lock(&net_family_lock);
2381 net_families[family] = NULL; 2330 net_families[family] = NULL;
2382 spin_unlock(&net_family_lock); 2331 spin_unlock(&net_family_lock);
2383 2332
2384 synchronize_rcu(); 2333 synchronize_rcu();
2385 2334
2386 printk(KERN_INFO "NET: Unregistered protocol family %d\n", family); 2335 printk(KERN_INFO "NET: Unregistered protocol family %d\n", family);
2387 } 2336 }
2388 2337
2389 static int __init sock_init(void) 2338 static int __init sock_init(void)
2390 { 2339 {
2391 /* 2340 /*
2392 * Initialize sock SLAB cache. 2341 * Initialize sock SLAB cache.
2393 */ 2342 */
2394 2343
2395 sk_init(); 2344 sk_init();
2396 2345
2397 /* 2346 /*
2398 * Initialize skbuff SLAB cache 2347 * Initialize skbuff SLAB cache
2399 */ 2348 */
2400 skb_init(); 2349 skb_init();
2401 2350
2402 /* 2351 /*
2403 * Initialize the protocols module. 2352 * Initialize the protocols module.
2404 */ 2353 */
2405 2354
2406 init_inodecache(); 2355 init_inodecache();
2407 register_filesystem(&sock_fs_type); 2356 register_filesystem(&sock_fs_type);
2408 sock_mnt = kern_mount(&sock_fs_type); 2357 sock_mnt = kern_mount(&sock_fs_type);
2409 2358
2410 /* The real protocol initialization is performed in later initcalls. 2359 /* The real protocol initialization is performed in later initcalls.
2411 */ 2360 */
2412 2361
2413 #ifdef CONFIG_NETFILTER 2362 #ifdef CONFIG_NETFILTER
2414 netfilter_init(); 2363 netfilter_init();
2415 #endif 2364 #endif
2416 2365
2417 return 0; 2366 return 0;
2418 } 2367 }
2419 2368
2420 core_initcall(sock_init); /* early initcall */ 2369 core_initcall(sock_init); /* early initcall */
2421 2370
2422 #ifdef CONFIG_PROC_FS 2371 #ifdef CONFIG_PROC_FS
2423 void socket_seq_show(struct seq_file *seq) 2372 void socket_seq_show(struct seq_file *seq)
2424 { 2373 {
2425 int cpu; 2374 int cpu;
2426 int counter = 0; 2375 int counter = 0;
2427 2376
2428 for_each_possible_cpu(cpu) 2377 for_each_possible_cpu(cpu)
2429 counter += per_cpu(sockets_in_use, cpu); 2378 counter += per_cpu(sockets_in_use, cpu);
2430 2379
2431 /* It can be negative, by the way. 8) */ 2380 /* It can be negative, by the way. 8) */
2432 if (counter < 0) 2381 if (counter < 0)
2433 counter = 0; 2382 counter = 0;
2434 2383
2435 seq_printf(seq, "sockets: used %d\n", counter); 2384 seq_printf(seq, "sockets: used %d\n", counter);
2436 } 2385 }
2437 #endif /* CONFIG_PROC_FS */ 2386 #endif /* CONFIG_PROC_FS */
2438 2387
2439 #ifdef CONFIG_COMPAT 2388 #ifdef CONFIG_COMPAT
2440 static int do_siocgstamp(struct net *net, struct socket *sock, 2389 static int do_siocgstamp(struct net *net, struct socket *sock,
2441 unsigned int cmd, struct compat_timeval __user *up) 2390 unsigned int cmd, struct compat_timeval __user *up)
2442 { 2391 {
2443 mm_segment_t old_fs = get_fs(); 2392 mm_segment_t old_fs = get_fs();
2444 struct timeval ktv; 2393 struct timeval ktv;
2445 int err; 2394 int err;
2446 2395
2447 set_fs(KERNEL_DS); 2396 set_fs(KERNEL_DS);
2448 err = sock_do_ioctl(net, sock, cmd, (unsigned long)&ktv); 2397 err = sock_do_ioctl(net, sock, cmd, (unsigned long)&ktv);
2449 set_fs(old_fs); 2398 set_fs(old_fs);
2450 if (!err) { 2399 if (!err) {
2451 err = put_user(ktv.tv_sec, &up->tv_sec); 2400 err = put_user(ktv.tv_sec, &up->tv_sec);
2452 err |= __put_user(ktv.tv_usec, &up->tv_usec); 2401 err |= __put_user(ktv.tv_usec, &up->tv_usec);
2453 } 2402 }
2454 return err; 2403 return err;
2455 } 2404 }
2456 2405
2457 static int do_siocgstampns(struct net *net, struct socket *sock, 2406 static int do_siocgstampns(struct net *net, struct socket *sock,
2458 unsigned int cmd, struct compat_timespec __user *up) 2407 unsigned int cmd, struct compat_timespec __user *up)
2459 { 2408 {
2460 mm_segment_t old_fs = get_fs(); 2409 mm_segment_t old_fs = get_fs();
2461 struct timespec kts; 2410 struct timespec kts;
2462 int err; 2411 int err;
2463 2412
2464 set_fs(KERNEL_DS); 2413 set_fs(KERNEL_DS);
2465 err = sock_do_ioctl(net, sock, cmd, (unsigned long)&kts); 2414 err = sock_do_ioctl(net, sock, cmd, (unsigned long)&kts);
2466 set_fs(old_fs); 2415 set_fs(old_fs);
2467 if (!err) { 2416 if (!err) {
2468 err = put_user(kts.tv_sec, &up->tv_sec); 2417 err = put_user(kts.tv_sec, &up->tv_sec);
2469 err |= __put_user(kts.tv_nsec, &up->tv_nsec); 2418 err |= __put_user(kts.tv_nsec, &up->tv_nsec);
2470 } 2419 }
2471 return err; 2420 return err;
2472 } 2421 }
2473 2422
2474 static int dev_ifname32(struct net *net, struct compat_ifreq __user *uifr32) 2423 static int dev_ifname32(struct net *net, struct compat_ifreq __user *uifr32)
2475 { 2424 {
2476 struct ifreq __user *uifr; 2425 struct ifreq __user *uifr;
2477 int err; 2426 int err;
2478 2427
2479 uifr = compat_alloc_user_space(sizeof(struct ifreq)); 2428 uifr = compat_alloc_user_space(sizeof(struct ifreq));
2480 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq))) 2429 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
2481 return -EFAULT; 2430 return -EFAULT;
2482 2431
2483 err = dev_ioctl(net, SIOCGIFNAME, uifr); 2432 err = dev_ioctl(net, SIOCGIFNAME, uifr);
2484 if (err) 2433 if (err)
2485 return err; 2434 return err;
2486 2435
2487 if (copy_in_user(uifr32, uifr, sizeof(struct compat_ifreq))) 2436 if (copy_in_user(uifr32, uifr, sizeof(struct compat_ifreq)))
2488 return -EFAULT; 2437 return -EFAULT;
2489 2438
2490 return 0; 2439 return 0;
2491 } 2440 }
2492 2441
2493 static int dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32) 2442 static int dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
2494 { 2443 {
2495 struct compat_ifconf ifc32; 2444 struct compat_ifconf ifc32;
2496 struct ifconf ifc; 2445 struct ifconf ifc;
2497 struct ifconf __user *uifc; 2446 struct ifconf __user *uifc;
2498 struct compat_ifreq __user *ifr32; 2447 struct compat_ifreq __user *ifr32;
2499 struct ifreq __user *ifr; 2448 struct ifreq __user *ifr;
2500 unsigned int i, j; 2449 unsigned int i, j;
2501 int err; 2450 int err;
2502 2451
2503 if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf))) 2452 if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf)))
2504 return -EFAULT; 2453 return -EFAULT;
2505 2454
2506 if (ifc32.ifcbuf == 0) { 2455 if (ifc32.ifcbuf == 0) {
2507 ifc32.ifc_len = 0; 2456 ifc32.ifc_len = 0;
2508 ifc.ifc_len = 0; 2457 ifc.ifc_len = 0;
2509 ifc.ifc_req = NULL; 2458 ifc.ifc_req = NULL;
2510 uifc = compat_alloc_user_space(sizeof(struct ifconf)); 2459 uifc = compat_alloc_user_space(sizeof(struct ifconf));
2511 } else { 2460 } else {
2512 size_t len =((ifc32.ifc_len / sizeof (struct compat_ifreq)) + 1) * 2461 size_t len =((ifc32.ifc_len / sizeof (struct compat_ifreq)) + 1) *
2513 sizeof (struct ifreq); 2462 sizeof (struct ifreq);
2514 uifc = compat_alloc_user_space(sizeof(struct ifconf) + len); 2463 uifc = compat_alloc_user_space(sizeof(struct ifconf) + len);
2515 ifc.ifc_len = len; 2464 ifc.ifc_len = len;
2516 ifr = ifc.ifc_req = (void __user *)(uifc + 1); 2465 ifr = ifc.ifc_req = (void __user *)(uifc + 1);
2517 ifr32 = compat_ptr(ifc32.ifcbuf); 2466 ifr32 = compat_ptr(ifc32.ifcbuf);
2518 for (i = 0; i < ifc32.ifc_len; i += sizeof (struct compat_ifreq)) { 2467 for (i = 0; i < ifc32.ifc_len; i += sizeof (struct compat_ifreq)) {
2519 if (copy_in_user(ifr, ifr32, sizeof(struct compat_ifreq))) 2468 if (copy_in_user(ifr, ifr32, sizeof(struct compat_ifreq)))
2520 return -EFAULT; 2469 return -EFAULT;
2521 ifr++; 2470 ifr++;
2522 ifr32++; 2471 ifr32++;
2523 } 2472 }
2524 } 2473 }
2525 if (copy_to_user(uifc, &ifc, sizeof(struct ifconf))) 2474 if (copy_to_user(uifc, &ifc, sizeof(struct ifconf)))
2526 return -EFAULT; 2475 return -EFAULT;
2527 2476
2528 err = dev_ioctl(net, SIOCGIFCONF, uifc); 2477 err = dev_ioctl(net, SIOCGIFCONF, uifc);
2529 if (err) 2478 if (err)
2530 return err; 2479 return err;
2531 2480
2532 if (copy_from_user(&ifc, uifc, sizeof(struct ifconf))) 2481 if (copy_from_user(&ifc, uifc, sizeof(struct ifconf)))
2533 return -EFAULT; 2482 return -EFAULT;
2534 2483
2535 ifr = ifc.ifc_req; 2484 ifr = ifc.ifc_req;
2536 ifr32 = compat_ptr(ifc32.ifcbuf); 2485 ifr32 = compat_ptr(ifc32.ifcbuf);
2537 for (i = 0, j = 0; 2486 for (i = 0, j = 0;
2538 i + sizeof (struct compat_ifreq) <= ifc32.ifc_len && j < ifc.ifc_len; 2487 i + sizeof (struct compat_ifreq) <= ifc32.ifc_len && j < ifc.ifc_len;
2539 i += sizeof (struct compat_ifreq), j += sizeof (struct ifreq)) { 2488 i += sizeof (struct compat_ifreq), j += sizeof (struct ifreq)) {
2540 if (copy_in_user(ifr32, ifr, sizeof (struct compat_ifreq))) 2489 if (copy_in_user(ifr32, ifr, sizeof (struct compat_ifreq)))
2541 return -EFAULT; 2490 return -EFAULT;
2542 ifr32++; 2491 ifr32++;
2543 ifr++; 2492 ifr++;
2544 } 2493 }
2545 2494
2546 if (ifc32.ifcbuf == 0) { 2495 if (ifc32.ifcbuf == 0) {
2547 /* Translate from 64-bit structure multiple to 2496 /* Translate from 64-bit structure multiple to
2548 * a 32-bit one. 2497 * a 32-bit one.
2549 */ 2498 */
2550 i = ifc.ifc_len; 2499 i = ifc.ifc_len;
2551 i = ((i / sizeof(struct ifreq)) * sizeof(struct compat_ifreq)); 2500 i = ((i / sizeof(struct ifreq)) * sizeof(struct compat_ifreq));
2552 ifc32.ifc_len = i; 2501 ifc32.ifc_len = i;
2553 } else { 2502 } else {
2554 ifc32.ifc_len = i; 2503 ifc32.ifc_len = i;
2555 } 2504 }
2556 if (copy_to_user(uifc32, &ifc32, sizeof(struct compat_ifconf))) 2505 if (copy_to_user(uifc32, &ifc32, sizeof(struct compat_ifconf)))
2557 return -EFAULT; 2506 return -EFAULT;
2558 2507
2559 return 0; 2508 return 0;
2560 } 2509 }
2561 2510
2562 static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32) 2511 static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
2563 { 2512 {
2564 struct ifreq __user *ifr; 2513 struct ifreq __user *ifr;
2565 u32 data; 2514 u32 data;
2566 void __user *datap; 2515 void __user *datap;
2567 2516
2568 ifr = compat_alloc_user_space(sizeof(*ifr)); 2517 ifr = compat_alloc_user_space(sizeof(*ifr));
2569 2518
2570 if (copy_in_user(&ifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ)) 2519 if (copy_in_user(&ifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ))
2571 return -EFAULT; 2520 return -EFAULT;
2572 2521
2573 if (get_user(data, &ifr32->ifr_ifru.ifru_data)) 2522 if (get_user(data, &ifr32->ifr_ifru.ifru_data))
2574 return -EFAULT; 2523 return -EFAULT;
2575 2524
2576 datap = compat_ptr(data); 2525 datap = compat_ptr(data);
2577 if (put_user(datap, &ifr->ifr_ifru.ifru_data)) 2526 if (put_user(datap, &ifr->ifr_ifru.ifru_data))
2578 return -EFAULT; 2527 return -EFAULT;
2579 2528
2580 return dev_ioctl(net, SIOCETHTOOL, ifr); 2529 return dev_ioctl(net, SIOCETHTOOL, ifr);
2581 } 2530 }
2582 2531
2583 static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32) 2532 static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32)
2584 { 2533 {
2585 void __user *uptr; 2534 void __user *uptr;
2586 compat_uptr_t uptr32; 2535 compat_uptr_t uptr32;
2587 struct ifreq __user *uifr; 2536 struct ifreq __user *uifr;
2588 2537
2589 uifr = compat_alloc_user_space(sizeof (*uifr)); 2538 uifr = compat_alloc_user_space(sizeof (*uifr));
2590 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq))) 2539 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
2591 return -EFAULT; 2540 return -EFAULT;
2592 2541
2593 if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu)) 2542 if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu))
2594 return -EFAULT; 2543 return -EFAULT;
2595 2544
2596 uptr = compat_ptr(uptr32); 2545 uptr = compat_ptr(uptr32);
2597 2546
2598 if (put_user(uptr, &uifr->ifr_settings.ifs_ifsu.raw_hdlc)) 2547 if (put_user(uptr, &uifr->ifr_settings.ifs_ifsu.raw_hdlc))
2599 return -EFAULT; 2548 return -EFAULT;
2600 2549
2601 return dev_ioctl(net, SIOCWANDEV, uifr); 2550 return dev_ioctl(net, SIOCWANDEV, uifr);
2602 } 2551 }
2603 2552
2604 static int bond_ioctl(struct net *net, unsigned int cmd, 2553 static int bond_ioctl(struct net *net, unsigned int cmd,
2605 struct compat_ifreq __user *ifr32) 2554 struct compat_ifreq __user *ifr32)
2606 { 2555 {
2607 struct ifreq kifr; 2556 struct ifreq kifr;
2608 struct ifreq __user *uifr; 2557 struct ifreq __user *uifr;
2609 mm_segment_t old_fs; 2558 mm_segment_t old_fs;
2610 int err; 2559 int err;
2611 u32 data; 2560 u32 data;
2612 void __user *datap; 2561 void __user *datap;
2613 2562
2614 switch (cmd) { 2563 switch (cmd) {
2615 case SIOCBONDENSLAVE: 2564 case SIOCBONDENSLAVE:
2616 case SIOCBONDRELEASE: 2565 case SIOCBONDRELEASE:
2617 case SIOCBONDSETHWADDR: 2566 case SIOCBONDSETHWADDR:
2618 case SIOCBONDCHANGEACTIVE: 2567 case SIOCBONDCHANGEACTIVE:
2619 if (copy_from_user(&kifr, ifr32, sizeof(struct compat_ifreq))) 2568 if (copy_from_user(&kifr, ifr32, sizeof(struct compat_ifreq)))
2620 return -EFAULT; 2569 return -EFAULT;
2621 2570
2622 old_fs = get_fs(); 2571 old_fs = get_fs();
2623 set_fs (KERNEL_DS); 2572 set_fs (KERNEL_DS);
2624 err = dev_ioctl(net, cmd, &kifr); 2573 err = dev_ioctl(net, cmd, &kifr);
2625 set_fs (old_fs); 2574 set_fs (old_fs);
2626 2575
2627 return err; 2576 return err;
2628 case SIOCBONDSLAVEINFOQUERY: 2577 case SIOCBONDSLAVEINFOQUERY:
2629 case SIOCBONDINFOQUERY: 2578 case SIOCBONDINFOQUERY:
2630 uifr = compat_alloc_user_space(sizeof(*uifr)); 2579 uifr = compat_alloc_user_space(sizeof(*uifr));
2631 if (copy_in_user(&uifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ)) 2580 if (copy_in_user(&uifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ))
2632 return -EFAULT; 2581 return -EFAULT;
2633 2582
2634 if (get_user(data, &ifr32->ifr_ifru.ifru_data)) 2583 if (get_user(data, &ifr32->ifr_ifru.ifru_data))
2635 return -EFAULT; 2584 return -EFAULT;
2636 2585
2637 datap = compat_ptr(data); 2586 datap = compat_ptr(data);
2638 if (put_user(datap, &uifr->ifr_ifru.ifru_data)) 2587 if (put_user(datap, &uifr->ifr_ifru.ifru_data))
2639 return -EFAULT; 2588 return -EFAULT;
2640 2589
2641 return dev_ioctl(net, cmd, uifr); 2590 return dev_ioctl(net, cmd, uifr);
2642 default: 2591 default:
2643 return -EINVAL; 2592 return -EINVAL;
2644 }; 2593 };
2645 } 2594 }
2646 2595
2647 static int siocdevprivate_ioctl(struct net *net, unsigned int cmd, 2596 static int siocdevprivate_ioctl(struct net *net, unsigned int cmd,
2648 struct compat_ifreq __user *u_ifreq32) 2597 struct compat_ifreq __user *u_ifreq32)
2649 { 2598 {
2650 struct ifreq __user *u_ifreq64; 2599 struct ifreq __user *u_ifreq64;
2651 char tmp_buf[IFNAMSIZ]; 2600 char tmp_buf[IFNAMSIZ];
2652 void __user *data64; 2601 void __user *data64;
2653 u32 data32; 2602 u32 data32;
2654 2603
2655 if (copy_from_user(&tmp_buf[0], &(u_ifreq32->ifr_ifrn.ifrn_name[0]), 2604 if (copy_from_user(&tmp_buf[0], &(u_ifreq32->ifr_ifrn.ifrn_name[0]),
2656 IFNAMSIZ)) 2605 IFNAMSIZ))
2657 return -EFAULT; 2606 return -EFAULT;
2658 if (__get_user(data32, &u_ifreq32->ifr_ifru.ifru_data)) 2607 if (__get_user(data32, &u_ifreq32->ifr_ifru.ifru_data))
2659 return -EFAULT; 2608 return -EFAULT;
2660 data64 = compat_ptr(data32); 2609 data64 = compat_ptr(data32);
2661 2610
2662 u_ifreq64 = compat_alloc_user_space(sizeof(*u_ifreq64)); 2611 u_ifreq64 = compat_alloc_user_space(sizeof(*u_ifreq64));
2663 2612
2664 /* Don't check these user accesses, just let that get trapped 2613 /* Don't check these user accesses, just let that get trapped
2665 * in the ioctl handler instead. 2614 * in the ioctl handler instead.
2666 */ 2615 */
2667 if (copy_to_user(&u_ifreq64->ifr_ifrn.ifrn_name[0], &tmp_buf[0], 2616 if (copy_to_user(&u_ifreq64->ifr_ifrn.ifrn_name[0], &tmp_buf[0],
2668 IFNAMSIZ)) 2617 IFNAMSIZ))
2669 return -EFAULT; 2618 return -EFAULT;
2670 if (__put_user(data64, &u_ifreq64->ifr_ifru.ifru_data)) 2619 if (__put_user(data64, &u_ifreq64->ifr_ifru.ifru_data))
2671 return -EFAULT; 2620 return -EFAULT;
2672 2621
2673 return dev_ioctl(net, cmd, u_ifreq64); 2622 return dev_ioctl(net, cmd, u_ifreq64);
2674 } 2623 }
2675 2624
2676 static int dev_ifsioc(struct net *net, struct socket *sock, 2625 static int dev_ifsioc(struct net *net, struct socket *sock,
2677 unsigned int cmd, struct compat_ifreq __user *uifr32) 2626 unsigned int cmd, struct compat_ifreq __user *uifr32)
2678 { 2627 {
2679 struct ifreq __user *uifr; 2628 struct ifreq __user *uifr;
2680 int err; 2629 int err;
2681 2630
2682 uifr = compat_alloc_user_space(sizeof(*uifr)); 2631 uifr = compat_alloc_user_space(sizeof(*uifr));
2683 if (copy_in_user(uifr, uifr32, sizeof(*uifr32))) 2632 if (copy_in_user(uifr, uifr32, sizeof(*uifr32)))
2684 return -EFAULT; 2633 return -EFAULT;
2685 2634
2686 err = sock_do_ioctl(net, sock, cmd, (unsigned long)uifr); 2635 err = sock_do_ioctl(net, sock, cmd, (unsigned long)uifr);
2687 2636
2688 if (!err) { 2637 if (!err) {
2689 switch (cmd) { 2638 switch (cmd) {
2690 case SIOCGIFFLAGS: 2639 case SIOCGIFFLAGS:
2691 case SIOCGIFMETRIC: 2640 case SIOCGIFMETRIC:
2692 case SIOCGIFMTU: 2641 case SIOCGIFMTU:
2693 case SIOCGIFMEM: 2642 case SIOCGIFMEM:
2694 case SIOCGIFHWADDR: 2643 case SIOCGIFHWADDR:
2695 case SIOCGIFINDEX: 2644 case SIOCGIFINDEX:
2696 case SIOCGIFADDR: 2645 case SIOCGIFADDR:
2697 case SIOCGIFBRDADDR: 2646 case SIOCGIFBRDADDR:
2698 case SIOCGIFDSTADDR: 2647 case SIOCGIFDSTADDR:
2699 case SIOCGIFNETMASK: 2648 case SIOCGIFNETMASK:
2700 case SIOCGIFPFLAGS: 2649 case SIOCGIFPFLAGS:
2701 case SIOCGIFTXQLEN: 2650 case SIOCGIFTXQLEN:
2702 case SIOCGMIIPHY: 2651 case SIOCGMIIPHY:
2703 case SIOCGMIIREG: 2652 case SIOCGMIIREG:
2704 if (copy_in_user(uifr32, uifr, sizeof(*uifr32))) 2653 if (copy_in_user(uifr32, uifr, sizeof(*uifr32)))
2705 err = -EFAULT; 2654 err = -EFAULT;
2706 break; 2655 break;
2707 } 2656 }
2708 } 2657 }
2709 return err; 2658 return err;
2710 } 2659 }
2711 2660
2712 static int compat_sioc_ifmap(struct net *net, unsigned int cmd, 2661 static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
2713 struct compat_ifreq __user *uifr32) 2662 struct compat_ifreq __user *uifr32)
2714 { 2663 {
2715 struct ifreq ifr; 2664 struct ifreq ifr;
2716 struct compat_ifmap __user *uifmap32; 2665 struct compat_ifmap __user *uifmap32;
2717 mm_segment_t old_fs; 2666 mm_segment_t old_fs;
2718 int err; 2667 int err;
2719 2668
2720 uifmap32 = &uifr32->ifr_ifru.ifru_map; 2669 uifmap32 = &uifr32->ifr_ifru.ifru_map;
2721 err = copy_from_user(&ifr, uifr32, sizeof(ifr.ifr_name)); 2670 err = copy_from_user(&ifr, uifr32, sizeof(ifr.ifr_name));
2722 err |= __get_user(ifr.ifr_map.mem_start, &uifmap32->mem_start); 2671 err |= __get_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
2723 err |= __get_user(ifr.ifr_map.mem_end, &uifmap32->mem_end); 2672 err |= __get_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
2724 err |= __get_user(ifr.ifr_map.base_addr, &uifmap32->base_addr); 2673 err |= __get_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
2725 err |= __get_user(ifr.ifr_map.irq, &uifmap32->irq); 2674 err |= __get_user(ifr.ifr_map.irq, &uifmap32->irq);
2726 err |= __get_user(ifr.ifr_map.dma, &uifmap32->dma); 2675 err |= __get_user(ifr.ifr_map.dma, &uifmap32->dma);
2727 err |= __get_user(ifr.ifr_map.port, &uifmap32->port); 2676 err |= __get_user(ifr.ifr_map.port, &uifmap32->port);
2728 if (err) 2677 if (err)
2729 return -EFAULT; 2678 return -EFAULT;
2730 2679
2731 old_fs = get_fs(); 2680 old_fs = get_fs();
2732 set_fs (KERNEL_DS); 2681 set_fs (KERNEL_DS);
2733 err = dev_ioctl(net, cmd, (void __user *)&ifr); 2682 err = dev_ioctl(net, cmd, (void __user *)&ifr);
2734 set_fs (old_fs); 2683 set_fs (old_fs);
2735 2684
2736 if (cmd == SIOCGIFMAP && !err) { 2685 if (cmd == SIOCGIFMAP && !err) {
2737 err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name)); 2686 err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name));
2738 err |= __put_user(ifr.ifr_map.mem_start, &uifmap32->mem_start); 2687 err |= __put_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
2739 err |= __put_user(ifr.ifr_map.mem_end, &uifmap32->mem_end); 2688 err |= __put_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
2740 err |= __put_user(ifr.ifr_map.base_addr, &uifmap32->base_addr); 2689 err |= __put_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
2741 err |= __put_user(ifr.ifr_map.irq, &uifmap32->irq); 2690 err |= __put_user(ifr.ifr_map.irq, &uifmap32->irq);
2742 err |= __put_user(ifr.ifr_map.dma, &uifmap32->dma); 2691 err |= __put_user(ifr.ifr_map.dma, &uifmap32->dma);
2743 err |= __put_user(ifr.ifr_map.port, &uifmap32->port); 2692 err |= __put_user(ifr.ifr_map.port, &uifmap32->port);
2744 if (err) 2693 if (err)
2745 err = -EFAULT; 2694 err = -EFAULT;
2746 } 2695 }
2747 return err; 2696 return err;
2748 } 2697 }
2749 2698
2750 static int compat_siocshwtstamp(struct net *net, struct compat_ifreq __user *uifr32) 2699 static int compat_siocshwtstamp(struct net *net, struct compat_ifreq __user *uifr32)
2751 { 2700 {
2752 void __user *uptr; 2701 void __user *uptr;
2753 compat_uptr_t uptr32; 2702 compat_uptr_t uptr32;
2754 struct ifreq __user *uifr; 2703 struct ifreq __user *uifr;
2755 2704
2756 uifr = compat_alloc_user_space(sizeof (*uifr)); 2705 uifr = compat_alloc_user_space(sizeof (*uifr));
2757 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq))) 2706 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
2758 return -EFAULT; 2707 return -EFAULT;
2759 2708
2760 if (get_user(uptr32, &uifr32->ifr_data)) 2709 if (get_user(uptr32, &uifr32->ifr_data))
2761 return -EFAULT; 2710 return -EFAULT;
2762 2711
2763 uptr = compat_ptr(uptr32); 2712 uptr = compat_ptr(uptr32);
2764 2713
2765 if (put_user(uptr, &uifr->ifr_data)) 2714 if (put_user(uptr, &uifr->ifr_data))
2766 return -EFAULT; 2715 return -EFAULT;
2767 2716
2768 return dev_ioctl(net, SIOCSHWTSTAMP, uifr); 2717 return dev_ioctl(net, SIOCSHWTSTAMP, uifr);
2769 } 2718 }
2770 2719
2771 struct rtentry32 { 2720 struct rtentry32 {
2772 u32 rt_pad1; 2721 u32 rt_pad1;
2773 struct sockaddr rt_dst; /* target address */ 2722 struct sockaddr rt_dst; /* target address */
2774 struct sockaddr rt_gateway; /* gateway addr (RTF_GATEWAY) */ 2723 struct sockaddr rt_gateway; /* gateway addr (RTF_GATEWAY) */
2775 struct sockaddr rt_genmask; /* target network mask (IP) */ 2724 struct sockaddr rt_genmask; /* target network mask (IP) */
2776 unsigned short rt_flags; 2725 unsigned short rt_flags;
2777 short rt_pad2; 2726 short rt_pad2;
2778 u32 rt_pad3; 2727 u32 rt_pad3;
2779 unsigned char rt_tos; 2728 unsigned char rt_tos;
2780 unsigned char rt_class; 2729 unsigned char rt_class;
2781 short rt_pad4; 2730 short rt_pad4;
2782 short rt_metric; /* +1 for binary compatibility! */ 2731 short rt_metric; /* +1 for binary compatibility! */
2783 /* char * */ u32 rt_dev; /* forcing the device at add */ 2732 /* char * */ u32 rt_dev; /* forcing the device at add */
2784 u32 rt_mtu; /* per route MTU/Window */ 2733 u32 rt_mtu; /* per route MTU/Window */
2785 u32 rt_window; /* Window clamping */ 2734 u32 rt_window; /* Window clamping */
2786 unsigned short rt_irtt; /* Initial RTT */ 2735 unsigned short rt_irtt; /* Initial RTT */
2787 }; 2736 };
2788 2737
2789 struct in6_rtmsg32 { 2738 struct in6_rtmsg32 {
2790 struct in6_addr rtmsg_dst; 2739 struct in6_addr rtmsg_dst;
2791 struct in6_addr rtmsg_src; 2740 struct in6_addr rtmsg_src;
2792 struct in6_addr rtmsg_gateway; 2741 struct in6_addr rtmsg_gateway;
2793 u32 rtmsg_type; 2742 u32 rtmsg_type;
2794 u16 rtmsg_dst_len; 2743 u16 rtmsg_dst_len;
2795 u16 rtmsg_src_len; 2744 u16 rtmsg_src_len;
2796 u32 rtmsg_metric; 2745 u32 rtmsg_metric;
2797 u32 rtmsg_info; 2746 u32 rtmsg_info;
2798 u32 rtmsg_flags; 2747 u32 rtmsg_flags;
2799 s32 rtmsg_ifindex; 2748 s32 rtmsg_ifindex;
2800 }; 2749 };
2801 2750
2802 static int routing_ioctl(struct net *net, struct socket *sock, 2751 static int routing_ioctl(struct net *net, struct socket *sock,
2803 unsigned int cmd, void __user *argp) 2752 unsigned int cmd, void __user *argp)
2804 { 2753 {
2805 int ret; 2754 int ret;
2806 void *r = NULL; 2755 void *r = NULL;
2807 struct in6_rtmsg r6; 2756 struct in6_rtmsg r6;
2808 struct rtentry r4; 2757 struct rtentry r4;
2809 char devname[16]; 2758 char devname[16];
2810 u32 rtdev; 2759 u32 rtdev;
2811 mm_segment_t old_fs = get_fs(); 2760 mm_segment_t old_fs = get_fs();
2812 2761
2813 if (sock && sock->sk && sock->sk->sk_family == AF_INET6) { /* ipv6 */ 2762 if (sock && sock->sk && sock->sk->sk_family == AF_INET6) { /* ipv6 */
2814 struct in6_rtmsg32 __user *ur6 = argp; 2763 struct in6_rtmsg32 __user *ur6 = argp;
2815 ret = copy_from_user (&r6.rtmsg_dst, &(ur6->rtmsg_dst), 2764 ret = copy_from_user (&r6.rtmsg_dst, &(ur6->rtmsg_dst),
2816 3 * sizeof(struct in6_addr)); 2765 3 * sizeof(struct in6_addr));
2817 ret |= __get_user (r6.rtmsg_type, &(ur6->rtmsg_type)); 2766 ret |= __get_user (r6.rtmsg_type, &(ur6->rtmsg_type));
2818 ret |= __get_user (r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len)); 2767 ret |= __get_user (r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len));
2819 ret |= __get_user (r6.rtmsg_src_len, &(ur6->rtmsg_src_len)); 2768 ret |= __get_user (r6.rtmsg_src_len, &(ur6->rtmsg_src_len));
2820 ret |= __get_user (r6.rtmsg_metric, &(ur6->rtmsg_metric)); 2769 ret |= __get_user (r6.rtmsg_metric, &(ur6->rtmsg_metric));
2821 ret |= __get_user (r6.rtmsg_info, &(ur6->rtmsg_info)); 2770 ret |= __get_user (r6.rtmsg_info, &(ur6->rtmsg_info));
2822 ret |= __get_user (r6.rtmsg_flags, &(ur6->rtmsg_flags)); 2771 ret |= __get_user (r6.rtmsg_flags, &(ur6->rtmsg_flags));
2823 ret |= __get_user (r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex)); 2772 ret |= __get_user (r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex));
2824 2773
2825 r = (void *) &r6; 2774 r = (void *) &r6;
2826 } else { /* ipv4 */ 2775 } else { /* ipv4 */
2827 struct rtentry32 __user *ur4 = argp; 2776 struct rtentry32 __user *ur4 = argp;
2828 ret = copy_from_user (&r4.rt_dst, &(ur4->rt_dst), 2777 ret = copy_from_user (&r4.rt_dst, &(ur4->rt_dst),
2829 3 * sizeof(struct sockaddr)); 2778 3 * sizeof(struct sockaddr));
2830 ret |= __get_user (r4.rt_flags, &(ur4->rt_flags)); 2779 ret |= __get_user (r4.rt_flags, &(ur4->rt_flags));
2831 ret |= __get_user (r4.rt_metric, &(ur4->rt_metric)); 2780 ret |= __get_user (r4.rt_metric, &(ur4->rt_metric));
2832 ret |= __get_user (r4.rt_mtu, &(ur4->rt_mtu)); 2781 ret |= __get_user (r4.rt_mtu, &(ur4->rt_mtu));
2833 ret |= __get_user (r4.rt_window, &(ur4->rt_window)); 2782 ret |= __get_user (r4.rt_window, &(ur4->rt_window));
2834 ret |= __get_user (r4.rt_irtt, &(ur4->rt_irtt)); 2783 ret |= __get_user (r4.rt_irtt, &(ur4->rt_irtt));
2835 ret |= __get_user (rtdev, &(ur4->rt_dev)); 2784 ret |= __get_user (rtdev, &(ur4->rt_dev));
2836 if (rtdev) { 2785 if (rtdev) {
2837 ret |= copy_from_user (devname, compat_ptr(rtdev), 15); 2786 ret |= copy_from_user (devname, compat_ptr(rtdev), 15);
2838 r4.rt_dev = devname; devname[15] = 0; 2787 r4.rt_dev = devname; devname[15] = 0;
2839 } else 2788 } else
2840 r4.rt_dev = NULL; 2789 r4.rt_dev = NULL;
2841 2790
2842 r = (void *) &r4; 2791 r = (void *) &r4;
2843 } 2792 }
2844 2793
2845 if (ret) { 2794 if (ret) {
2846 ret = -EFAULT; 2795 ret = -EFAULT;
2847 goto out; 2796 goto out;
2848 } 2797 }
2849 2798
2850 set_fs (KERNEL_DS); 2799 set_fs (KERNEL_DS);
2851 ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r); 2800 ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r);
2852 set_fs (old_fs); 2801 set_fs (old_fs);
2853 2802
2854 out: 2803 out:
2855 return ret; 2804 return ret;
2856 } 2805 }
2857 2806
2858 /* Since old style bridge ioctl's endup using SIOCDEVPRIVATE 2807 /* Since old style bridge ioctl's endup using SIOCDEVPRIVATE
2859 * for some operations; this forces use of the newer bridge-utils that 2808 * for some operations; this forces use of the newer bridge-utils that
2860 * use compatiable ioctls 2809 * use compatiable ioctls
2861 */ 2810 */
2862 static int old_bridge_ioctl(compat_ulong_t __user *argp) 2811 static int old_bridge_ioctl(compat_ulong_t __user *argp)
2863 { 2812 {
2864 compat_ulong_t tmp; 2813 compat_ulong_t tmp;
2865 2814
2866 if (get_user(tmp, argp)) 2815 if (get_user(tmp, argp))
2867 return -EFAULT; 2816 return -EFAULT;
2868 if (tmp == BRCTL_GET_VERSION) 2817 if (tmp == BRCTL_GET_VERSION)
2869 return BRCTL_VERSION + 1; 2818 return BRCTL_VERSION + 1;
2870 return -EINVAL; 2819 return -EINVAL;
2871 } 2820 }
2872 2821
2873 static int compat_sock_ioctl_trans(struct file *file, struct socket *sock, 2822 static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
2874 unsigned int cmd, unsigned long arg) 2823 unsigned int cmd, unsigned long arg)
2875 { 2824 {
2876 void __user *argp = compat_ptr(arg); 2825 void __user *argp = compat_ptr(arg);
2877 struct sock *sk = sock->sk; 2826 struct sock *sk = sock->sk;
2878 struct net *net = sock_net(sk); 2827 struct net *net = sock_net(sk);
2879 2828
2880 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) 2829 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))
2881 return siocdevprivate_ioctl(net, cmd, argp); 2830 return siocdevprivate_ioctl(net, cmd, argp);
2882 2831
2883 switch (cmd) { 2832 switch (cmd) {
2884 case SIOCSIFBR: 2833 case SIOCSIFBR:
2885 case SIOCGIFBR: 2834 case SIOCGIFBR:
2886 return old_bridge_ioctl(argp); 2835 return old_bridge_ioctl(argp);
2887 case SIOCGIFNAME: 2836 case SIOCGIFNAME:
2888 return dev_ifname32(net, argp); 2837 return dev_ifname32(net, argp);
2889 case SIOCGIFCONF: 2838 case SIOCGIFCONF:
2890 return dev_ifconf(net, argp); 2839 return dev_ifconf(net, argp);
2891 case SIOCETHTOOL: 2840 case SIOCETHTOOL:
2892 return ethtool_ioctl(net, argp); 2841 return ethtool_ioctl(net, argp);
2893 case SIOCWANDEV: 2842 case SIOCWANDEV:
2894 return compat_siocwandev(net, argp); 2843 return compat_siocwandev(net, argp);
2895 case SIOCGIFMAP: 2844 case SIOCGIFMAP:
2896 case SIOCSIFMAP: 2845 case SIOCSIFMAP:
2897 return compat_sioc_ifmap(net, cmd, argp); 2846 return compat_sioc_ifmap(net, cmd, argp);
2898 case SIOCBONDENSLAVE: 2847 case SIOCBONDENSLAVE:
2899 case SIOCBONDRELEASE: 2848 case SIOCBONDRELEASE:
2900 case SIOCBONDSETHWADDR: 2849 case SIOCBONDSETHWADDR:
2901 case SIOCBONDSLAVEINFOQUERY: 2850 case SIOCBONDSLAVEINFOQUERY:
2902 case SIOCBONDINFOQUERY: 2851 case SIOCBONDINFOQUERY:
2903 case SIOCBONDCHANGEACTIVE: 2852 case SIOCBONDCHANGEACTIVE:
2904 return bond_ioctl(net, cmd, argp); 2853 return bond_ioctl(net, cmd, argp);
2905 case SIOCADDRT: 2854 case SIOCADDRT:
2906 case SIOCDELRT: 2855 case SIOCDELRT:
2907 return routing_ioctl(net, sock, cmd, argp); 2856 return routing_ioctl(net, sock, cmd, argp);
2908 case SIOCGSTAMP: 2857 case SIOCGSTAMP:
2909 return do_siocgstamp(net, sock, cmd, argp); 2858 return do_siocgstamp(net, sock, cmd, argp);
2910 case SIOCGSTAMPNS: 2859 case SIOCGSTAMPNS:
2911 return do_siocgstampns(net, sock, cmd, argp); 2860 return do_siocgstampns(net, sock, cmd, argp);
2912 case SIOCSHWTSTAMP: 2861 case SIOCSHWTSTAMP:
2913 return compat_siocshwtstamp(net, argp); 2862 return compat_siocshwtstamp(net, argp);
2914 2863
2915 case FIOSETOWN: 2864 case FIOSETOWN:
2916 case SIOCSPGRP: 2865 case SIOCSPGRP:
2917 case FIOGETOWN: 2866 case FIOGETOWN:
2918 case SIOCGPGRP: 2867 case SIOCGPGRP:
2919 case SIOCBRADDBR: 2868 case SIOCBRADDBR:
2920 case SIOCBRDELBR: 2869 case SIOCBRDELBR:
2921 case SIOCGIFVLAN: 2870 case SIOCGIFVLAN:
2922 case SIOCSIFVLAN: 2871 case SIOCSIFVLAN:
2923 case SIOCADDDLCI: 2872 case SIOCADDDLCI:
2924 case SIOCDELDLCI: 2873 case SIOCDELDLCI:
2925 return sock_ioctl(file, cmd, arg); 2874 return sock_ioctl(file, cmd, arg);
2926 2875
2927 case SIOCGIFFLAGS: 2876 case SIOCGIFFLAGS:
2928 case SIOCSIFFLAGS: 2877 case SIOCSIFFLAGS:
2929 case SIOCGIFMETRIC: 2878 case SIOCGIFMETRIC:
2930 case SIOCSIFMETRIC: 2879 case SIOCSIFMETRIC:
2931 case SIOCGIFMTU: 2880 case SIOCGIFMTU:
2932 case SIOCSIFMTU: 2881 case SIOCSIFMTU:
2933 case SIOCGIFMEM: 2882 case SIOCGIFMEM:
2934 case SIOCSIFMEM: 2883 case SIOCSIFMEM:
2935 case SIOCGIFHWADDR: 2884 case SIOCGIFHWADDR:
2936 case SIOCSIFHWADDR: 2885 case SIOCSIFHWADDR:
2937 case SIOCADDMULTI: 2886 case SIOCADDMULTI:
2938 case SIOCDELMULTI: 2887 case SIOCDELMULTI:
2939 case SIOCGIFINDEX: 2888 case SIOCGIFINDEX:
2940 case SIOCGIFADDR: 2889 case SIOCGIFADDR:
2941 case SIOCSIFADDR: 2890 case SIOCSIFADDR:
2942 case SIOCSIFHWBROADCAST: 2891 case SIOCSIFHWBROADCAST:
2943 case SIOCDIFADDR: 2892 case SIOCDIFADDR:
2944 case SIOCGIFBRDADDR: 2893 case SIOCGIFBRDADDR:
2945 case SIOCSIFBRDADDR: 2894 case SIOCSIFBRDADDR:
2946 case SIOCGIFDSTADDR: 2895 case SIOCGIFDSTADDR:
2947 case SIOCSIFDSTADDR: 2896 case SIOCSIFDSTADDR:
2948 case SIOCGIFNETMASK: 2897 case SIOCGIFNETMASK:
2949 case SIOCSIFNETMASK: 2898 case SIOCSIFNETMASK:
2950 case SIOCSIFPFLAGS: 2899 case SIOCSIFPFLAGS:
2951 case SIOCGIFPFLAGS: 2900 case SIOCGIFPFLAGS:
2952 case SIOCGIFTXQLEN: 2901 case SIOCGIFTXQLEN:
2953 case SIOCSIFTXQLEN: 2902 case SIOCSIFTXQLEN:
2954 case SIOCBRADDIF: 2903 case SIOCBRADDIF:
2955 case SIOCBRDELIF: 2904 case SIOCBRDELIF:
2956 case SIOCSIFNAME: 2905 case SIOCSIFNAME:
2957 case SIOCGMIIPHY: 2906 case SIOCGMIIPHY:
2958 case SIOCGMIIREG: 2907 case SIOCGMIIREG:
2959 case SIOCSMIIREG: 2908 case SIOCSMIIREG:
2960 return dev_ifsioc(net, sock, cmd, argp); 2909 return dev_ifsioc(net, sock, cmd, argp);
2961 2910
2962 case SIOCSARP: 2911 case SIOCSARP:
2963 case SIOCGARP: 2912 case SIOCGARP:
2964 case SIOCDARP: 2913 case SIOCDARP:
2965 case SIOCATMARK: 2914 case SIOCATMARK:
2966 return sock_do_ioctl(net, sock, cmd, arg); 2915 return sock_do_ioctl(net, sock, cmd, arg);
2967 } 2916 }
2968 2917
2969 /* Prevent warning from compat_sys_ioctl, these always 2918 /* Prevent warning from compat_sys_ioctl, these always
2970 * result in -EINVAL in the native case anyway. */ 2919 * result in -EINVAL in the native case anyway. */
2971 switch (cmd) { 2920 switch (cmd) {
2972 case SIOCRTMSG: 2921 case SIOCRTMSG:
2973 case SIOCGIFCOUNT: 2922 case SIOCGIFCOUNT:
2974 case SIOCSRARP: 2923 case SIOCSRARP:
2975 case SIOCGRARP: 2924 case SIOCGRARP:
2976 case SIOCDRARP: 2925 case SIOCDRARP:
2977 case SIOCSIFLINK: 2926 case SIOCSIFLINK:
2978 case SIOCGIFSLAVE: 2927 case SIOCGIFSLAVE:
2979 case SIOCSIFSLAVE: 2928 case SIOCSIFSLAVE:
2980 return -EINVAL; 2929 return -EINVAL;
2981 } 2930 }
2982 2931
2983 return -ENOIOCTLCMD; 2932 return -ENOIOCTLCMD;
2984 } 2933 }
2985 2934
2986 static long compat_sock_ioctl(struct file *file, unsigned cmd, 2935 static long compat_sock_ioctl(struct file *file, unsigned cmd,
2987 unsigned long arg) 2936 unsigned long arg)
2988 { 2937 {
2989 struct socket *sock = file->private_data; 2938 struct socket *sock = file->private_data;
2990 int ret = -ENOIOCTLCMD; 2939 int ret = -ENOIOCTLCMD;
2991 struct sock *sk; 2940 struct sock *sk;
2992 struct net *net; 2941 struct net *net;
2993 2942
2994 sk = sock->sk; 2943 sk = sock->sk;
2995 net = sock_net(sk); 2944 net = sock_net(sk);
2996 2945
2997 if (sock->ops->compat_ioctl) 2946 if (sock->ops->compat_ioctl)
2998 ret = sock->ops->compat_ioctl(sock, cmd, arg); 2947 ret = sock->ops->compat_ioctl(sock, cmd, arg);
2999 2948
3000 if (ret == -ENOIOCTLCMD && 2949 if (ret == -ENOIOCTLCMD &&
3001 (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST)) 2950 (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST))
3002 ret = compat_wext_handle_ioctl(net, cmd, arg); 2951 ret = compat_wext_handle_ioctl(net, cmd, arg);
3003 2952
3004 if (ret == -ENOIOCTLCMD) 2953 if (ret == -ENOIOCTLCMD)
3005 ret = compat_sock_ioctl_trans(file, sock, cmd, arg); 2954 ret = compat_sock_ioctl_trans(file, sock, cmd, arg);
3006 2955
3007 return ret; 2956 return ret;
3008 } 2957 }
3009 #endif 2958 #endif
3010 2959
3011 int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen) 2960 int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
3012 { 2961 {
3013 return sock->ops->bind(sock, addr, addrlen); 2962 return sock->ops->bind(sock, addr, addrlen);
3014 } 2963 }
3015 2964
3016 int kernel_listen(struct socket *sock, int backlog) 2965 int kernel_listen(struct socket *sock, int backlog)
3017 { 2966 {
3018 return sock->ops->listen(sock, backlog); 2967 return sock->ops->listen(sock, backlog);
3019 } 2968 }
3020 2969
3021 int kernel_accept(struct socket *sock, struct socket **newsock, int flags) 2970 int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
3022 { 2971 {
3023 struct sock *sk = sock->sk; 2972 struct sock *sk = sock->sk;
3024 int err; 2973 int err;
3025 2974
3026 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol, 2975 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
3027 newsock); 2976 newsock);
3028 if (err < 0) 2977 if (err < 0)
3029 goto done; 2978 goto done;
3030 2979
3031 err = sock->ops->accept(sock, *newsock, flags); 2980 err = sock->ops->accept(sock, *newsock, flags);
3032 if (err < 0) { 2981 if (err < 0) {
3033 sock_release(*newsock); 2982 sock_release(*newsock);
3034 *newsock = NULL; 2983 *newsock = NULL;
3035 goto done; 2984 goto done;
3036 } 2985 }
3037 2986
3038 (*newsock)->ops = sock->ops; 2987 (*newsock)->ops = sock->ops;
3039 __module_get((*newsock)->ops->owner); 2988 __module_get((*newsock)->ops->owner);
3040 2989
3041 done: 2990 done:
3042 return err; 2991 return err;
3043 } 2992 }
3044 2993
3045 int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen, 2994 int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
3046 int flags) 2995 int flags)
3047 { 2996 {
3048 return sock->ops->connect(sock, addr, addrlen, flags); 2997 return sock->ops->connect(sock, addr, addrlen, flags);
3049 } 2998 }
3050 2999
3051 int kernel_getsockname(struct socket *sock, struct sockaddr *addr, 3000 int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
3052 int *addrlen) 3001 int *addrlen)
3053 { 3002 {
3054 return sock->ops->getname(sock, addr, addrlen, 0); 3003 return sock->ops->getname(sock, addr, addrlen, 0);
3055 } 3004 }
3056 3005
3057 int kernel_getpeername(struct socket *sock, struct sockaddr *addr, 3006 int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
3058 int *addrlen) 3007 int *addrlen)
3059 { 3008 {
3060 return sock->ops->getname(sock, addr, addrlen, 1); 3009 return sock->ops->getname(sock, addr, addrlen, 1);
3061 } 3010 }
3062 3011
3063 int kernel_getsockopt(struct socket *sock, int level, int optname, 3012 int kernel_getsockopt(struct socket *sock, int level, int optname,
3064 char *optval, int *optlen) 3013 char *optval, int *optlen)
3065 { 3014 {
3066 mm_segment_t oldfs = get_fs(); 3015 mm_segment_t oldfs = get_fs();
3067 int err; 3016 int err;
3068 3017
3069 set_fs(KERNEL_DS); 3018 set_fs(KERNEL_DS);
3070 if (level == SOL_SOCKET) 3019 if (level == SOL_SOCKET)
3071 err = sock_getsockopt(sock, level, optname, optval, optlen); 3020 err = sock_getsockopt(sock, level, optname, optval, optlen);
3072 else 3021 else
3073 err = sock->ops->getsockopt(sock, level, optname, optval, 3022 err = sock->ops->getsockopt(sock, level, optname, optval,
3074 optlen); 3023 optlen);
3075 set_fs(oldfs); 3024 set_fs(oldfs);
3076 return err; 3025 return err;
3077 } 3026 }
3078 3027
3079 int kernel_setsockopt(struct socket *sock, int level, int optname, 3028 int kernel_setsockopt(struct socket *sock, int level, int optname,
3080 char *optval, unsigned int optlen) 3029 char *optval, unsigned int optlen)
3081 { 3030 {
3082 mm_segment_t oldfs = get_fs(); 3031 mm_segment_t oldfs = get_fs();
3083 int err; 3032 int err;
3084 3033
3085 set_fs(KERNEL_DS); 3034 set_fs(KERNEL_DS);
3086 if (level == SOL_SOCKET) 3035 if (level == SOL_SOCKET)
3087 err = sock_setsockopt(sock, level, optname, optval, optlen); 3036 err = sock_setsockopt(sock, level, optname, optval, optlen);
3088 else 3037 else
3089 err = sock->ops->setsockopt(sock, level, optname, optval, 3038 err = sock->ops->setsockopt(sock, level, optname, optval,
3090 optlen); 3039 optlen);
3091 set_fs(oldfs); 3040 set_fs(oldfs);
3092 return err; 3041 return err;
3093 } 3042 }
3094 3043
3095 int kernel_sendpage(struct socket *sock, struct page *page, int offset, 3044 int kernel_sendpage(struct socket *sock, struct page *page, int offset,
3096 size_t size, int flags) 3045 size_t size, int flags)
3097 { 3046 {
3098 if (sock->ops->sendpage) 3047 if (sock->ops->sendpage)
3099 return sock->ops->sendpage(sock, page, offset, size, flags); 3048 return sock->ops->sendpage(sock, page, offset, size, flags);
3100 3049
3101 return sock_no_sendpage(sock, page, offset, size, flags); 3050 return sock_no_sendpage(sock, page, offset, size, flags);
3102 } 3051 }
3103 3052
3104 int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg) 3053 int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
3105 { 3054 {
3106 mm_segment_t oldfs = get_fs(); 3055 mm_segment_t oldfs = get_fs();
3107 int err; 3056 int err;
3108 3057
3109 set_fs(KERNEL_DS); 3058 set_fs(KERNEL_DS);
3110 err = sock->ops->ioctl(sock, cmd, arg); 3059 err = sock->ops->ioctl(sock, cmd, arg);
3111 set_fs(oldfs); 3060 set_fs(oldfs);
3112 3061
3113 return err; 3062 return err;
3114 } 3063 }
3115 3064
3116 int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how) 3065 int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
3117 { 3066 {
3118 return sock->ops->shutdown(sock, how); 3067 return sock->ops->shutdown(sock, how);
3119 } 3068 }
3120 3069
3121 EXPORT_SYMBOL(sock_create); 3070 EXPORT_SYMBOL(sock_create);
3122 EXPORT_SYMBOL(sock_create_kern); 3071 EXPORT_SYMBOL(sock_create_kern);
3123 EXPORT_SYMBOL(sock_create_lite); 3072 EXPORT_SYMBOL(sock_create_lite);
3124 EXPORT_SYMBOL(sock_map_fd); 3073 EXPORT_SYMBOL(sock_map_fd);
3125 EXPORT_SYMBOL(sock_recvmsg); 3074 EXPORT_SYMBOL(sock_recvmsg);
3126 EXPORT_SYMBOL(sock_register); 3075 EXPORT_SYMBOL(sock_register);
3127 EXPORT_SYMBOL(sock_release); 3076 EXPORT_SYMBOL(sock_release);
3128 EXPORT_SYMBOL(sock_sendmsg); 3077 EXPORT_SYMBOL(sock_sendmsg);
3129 EXPORT_SYMBOL(sock_unregister); 3078 EXPORT_SYMBOL(sock_unregister);
3130 EXPORT_SYMBOL(sock_wake_async); 3079 EXPORT_SYMBOL(sock_wake_async);
3131 EXPORT_SYMBOL(sockfd_lookup); 3080 EXPORT_SYMBOL(sockfd_lookup);
3132 EXPORT_SYMBOL(kernel_sendmsg); 3081 EXPORT_SYMBOL(kernel_sendmsg);
3133 EXPORT_SYMBOL(kernel_recvmsg); 3082 EXPORT_SYMBOL(kernel_recvmsg);
3134 EXPORT_SYMBOL(kernel_bind); 3083 EXPORT_SYMBOL(kernel_bind);
3135 EXPORT_SYMBOL(kernel_listen); 3084 EXPORT_SYMBOL(kernel_listen);
3136 EXPORT_SYMBOL(kernel_accept); 3085 EXPORT_SYMBOL(kernel_accept);
3137 EXPORT_SYMBOL(kernel_connect); 3086 EXPORT_SYMBOL(kernel_connect);
3138 EXPORT_SYMBOL(kernel_getsockname); 3087 EXPORT_SYMBOL(kernel_getsockname);
3139 EXPORT_SYMBOL(kernel_getpeername); 3088 EXPORT_SYMBOL(kernel_getpeername);
3140 EXPORT_SYMBOL(kernel_getsockopt); 3089 EXPORT_SYMBOL(kernel_getsockopt);
3141 EXPORT_SYMBOL(kernel_setsockopt); 3090 EXPORT_SYMBOL(kernel_setsockopt);
3142 EXPORT_SYMBOL(kernel_sendpage); 3091 EXPORT_SYMBOL(kernel_sendpage);
3143 EXPORT_SYMBOL(kernel_sock_ioctl); 3092 EXPORT_SYMBOL(kernel_sock_ioctl);
3144 EXPORT_SYMBOL(kernel_sock_shutdown); 3093 EXPORT_SYMBOL(kernel_sock_shutdown);
3145 3094