Commit 989a2979205dd34269382b357e6d4b4b6956b889
Committed by
David S. Miller
1 parent
e5700aff14
Exists in
master
and in
7 other branches
fasync: RCU and fine grained locking
kill_fasync() uses a central rwlock, candidate for RCU conversion, to avoid cache line ping pongs on SMP. fasync_remove_entry() and fasync_add_entry() can disable IRQS on a short section instead during whole list scan. Use a spinlock per fasync_struct to synchronize kill_fasync_rcu() and fasync_{remove|add}_entry(). This spinlock is IRQ safe, so sock_fasync() doesnt need its own implementation and can use fasync_helper(), to reduce code size and complexity. We can remove __kill_fasync() direct use in net/socket.c, and rename it to kill_fasync_rcu(). Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Cc: Lai Jiangshan <laijs@cn.fujitsu.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Showing 3 changed files with 59 additions and 92 deletions Inline Diff
fs/fcntl.c
1 | /* | 1 | /* |
2 | * linux/fs/fcntl.c | 2 | * linux/fs/fcntl.c |
3 | * | 3 | * |
4 | * Copyright (C) 1991, 1992 Linus Torvalds | 4 | * Copyright (C) 1991, 1992 Linus Torvalds |
5 | */ | 5 | */ |
6 | 6 | ||
7 | #include <linux/syscalls.h> | 7 | #include <linux/syscalls.h> |
8 | #include <linux/init.h> | 8 | #include <linux/init.h> |
9 | #include <linux/mm.h> | 9 | #include <linux/mm.h> |
10 | #include <linux/fs.h> | 10 | #include <linux/fs.h> |
11 | #include <linux/file.h> | 11 | #include <linux/file.h> |
12 | #include <linux/fdtable.h> | 12 | #include <linux/fdtable.h> |
13 | #include <linux/capability.h> | 13 | #include <linux/capability.h> |
14 | #include <linux/dnotify.h> | 14 | #include <linux/dnotify.h> |
15 | #include <linux/slab.h> | 15 | #include <linux/slab.h> |
16 | #include <linux/module.h> | 16 | #include <linux/module.h> |
17 | #include <linux/security.h> | 17 | #include <linux/security.h> |
18 | #include <linux/ptrace.h> | 18 | #include <linux/ptrace.h> |
19 | #include <linux/signal.h> | 19 | #include <linux/signal.h> |
20 | #include <linux/rcupdate.h> | 20 | #include <linux/rcupdate.h> |
21 | #include <linux/pid_namespace.h> | 21 | #include <linux/pid_namespace.h> |
22 | 22 | ||
23 | #include <asm/poll.h> | 23 | #include <asm/poll.h> |
24 | #include <asm/siginfo.h> | 24 | #include <asm/siginfo.h> |
25 | #include <asm/uaccess.h> | 25 | #include <asm/uaccess.h> |
26 | 26 | ||
27 | void set_close_on_exec(unsigned int fd, int flag) | 27 | void set_close_on_exec(unsigned int fd, int flag) |
28 | { | 28 | { |
29 | struct files_struct *files = current->files; | 29 | struct files_struct *files = current->files; |
30 | struct fdtable *fdt; | 30 | struct fdtable *fdt; |
31 | spin_lock(&files->file_lock); | 31 | spin_lock(&files->file_lock); |
32 | fdt = files_fdtable(files); | 32 | fdt = files_fdtable(files); |
33 | if (flag) | 33 | if (flag) |
34 | FD_SET(fd, fdt->close_on_exec); | 34 | FD_SET(fd, fdt->close_on_exec); |
35 | else | 35 | else |
36 | FD_CLR(fd, fdt->close_on_exec); | 36 | FD_CLR(fd, fdt->close_on_exec); |
37 | spin_unlock(&files->file_lock); | 37 | spin_unlock(&files->file_lock); |
38 | } | 38 | } |
39 | 39 | ||
40 | static int get_close_on_exec(unsigned int fd) | 40 | static int get_close_on_exec(unsigned int fd) |
41 | { | 41 | { |
42 | struct files_struct *files = current->files; | 42 | struct files_struct *files = current->files; |
43 | struct fdtable *fdt; | 43 | struct fdtable *fdt; |
44 | int res; | 44 | int res; |
45 | rcu_read_lock(); | 45 | rcu_read_lock(); |
46 | fdt = files_fdtable(files); | 46 | fdt = files_fdtable(files); |
47 | res = FD_ISSET(fd, fdt->close_on_exec); | 47 | res = FD_ISSET(fd, fdt->close_on_exec); |
48 | rcu_read_unlock(); | 48 | rcu_read_unlock(); |
49 | return res; | 49 | return res; |
50 | } | 50 | } |
51 | 51 | ||
52 | SYSCALL_DEFINE3(dup3, unsigned int, oldfd, unsigned int, newfd, int, flags) | 52 | SYSCALL_DEFINE3(dup3, unsigned int, oldfd, unsigned int, newfd, int, flags) |
53 | { | 53 | { |
54 | int err = -EBADF; | 54 | int err = -EBADF; |
55 | struct file * file, *tofree; | 55 | struct file * file, *tofree; |
56 | struct files_struct * files = current->files; | 56 | struct files_struct * files = current->files; |
57 | struct fdtable *fdt; | 57 | struct fdtable *fdt; |
58 | 58 | ||
59 | if ((flags & ~O_CLOEXEC) != 0) | 59 | if ((flags & ~O_CLOEXEC) != 0) |
60 | return -EINVAL; | 60 | return -EINVAL; |
61 | 61 | ||
62 | if (unlikely(oldfd == newfd)) | 62 | if (unlikely(oldfd == newfd)) |
63 | return -EINVAL; | 63 | return -EINVAL; |
64 | 64 | ||
65 | spin_lock(&files->file_lock); | 65 | spin_lock(&files->file_lock); |
66 | err = expand_files(files, newfd); | 66 | err = expand_files(files, newfd); |
67 | file = fcheck(oldfd); | 67 | file = fcheck(oldfd); |
68 | if (unlikely(!file)) | 68 | if (unlikely(!file)) |
69 | goto Ebadf; | 69 | goto Ebadf; |
70 | if (unlikely(err < 0)) { | 70 | if (unlikely(err < 0)) { |
71 | if (err == -EMFILE) | 71 | if (err == -EMFILE) |
72 | goto Ebadf; | 72 | goto Ebadf; |
73 | goto out_unlock; | 73 | goto out_unlock; |
74 | } | 74 | } |
75 | /* | 75 | /* |
76 | * We need to detect attempts to do dup2() over allocated but still | 76 | * We need to detect attempts to do dup2() over allocated but still |
77 | * not finished descriptor. NB: OpenBSD avoids that at the price of | 77 | * not finished descriptor. NB: OpenBSD avoids that at the price of |
78 | * extra work in their equivalent of fget() - they insert struct | 78 | * extra work in their equivalent of fget() - they insert struct |
79 | * file immediately after grabbing descriptor, mark it larval if | 79 | * file immediately after grabbing descriptor, mark it larval if |
80 | * more work (e.g. actual opening) is needed and make sure that | 80 | * more work (e.g. actual opening) is needed and make sure that |
81 | * fget() treats larval files as absent. Potentially interesting, | 81 | * fget() treats larval files as absent. Potentially interesting, |
82 | * but while extra work in fget() is trivial, locking implications | 82 | * but while extra work in fget() is trivial, locking implications |
83 | * and amount of surgery on open()-related paths in VFS are not. | 83 | * and amount of surgery on open()-related paths in VFS are not. |
84 | * FreeBSD fails with -EBADF in the same situation, NetBSD "solution" | 84 | * FreeBSD fails with -EBADF in the same situation, NetBSD "solution" |
85 | * deadlocks in rather amusing ways, AFAICS. All of that is out of | 85 | * deadlocks in rather amusing ways, AFAICS. All of that is out of |
86 | * scope of POSIX or SUS, since neither considers shared descriptor | 86 | * scope of POSIX or SUS, since neither considers shared descriptor |
87 | * tables and this condition does not arise without those. | 87 | * tables and this condition does not arise without those. |
88 | */ | 88 | */ |
89 | err = -EBUSY; | 89 | err = -EBUSY; |
90 | fdt = files_fdtable(files); | 90 | fdt = files_fdtable(files); |
91 | tofree = fdt->fd[newfd]; | 91 | tofree = fdt->fd[newfd]; |
92 | if (!tofree && FD_ISSET(newfd, fdt->open_fds)) | 92 | if (!tofree && FD_ISSET(newfd, fdt->open_fds)) |
93 | goto out_unlock; | 93 | goto out_unlock; |
94 | get_file(file); | 94 | get_file(file); |
95 | rcu_assign_pointer(fdt->fd[newfd], file); | 95 | rcu_assign_pointer(fdt->fd[newfd], file); |
96 | FD_SET(newfd, fdt->open_fds); | 96 | FD_SET(newfd, fdt->open_fds); |
97 | if (flags & O_CLOEXEC) | 97 | if (flags & O_CLOEXEC) |
98 | FD_SET(newfd, fdt->close_on_exec); | 98 | FD_SET(newfd, fdt->close_on_exec); |
99 | else | 99 | else |
100 | FD_CLR(newfd, fdt->close_on_exec); | 100 | FD_CLR(newfd, fdt->close_on_exec); |
101 | spin_unlock(&files->file_lock); | 101 | spin_unlock(&files->file_lock); |
102 | 102 | ||
103 | if (tofree) | 103 | if (tofree) |
104 | filp_close(tofree, files); | 104 | filp_close(tofree, files); |
105 | 105 | ||
106 | return newfd; | 106 | return newfd; |
107 | 107 | ||
108 | Ebadf: | 108 | Ebadf: |
109 | err = -EBADF; | 109 | err = -EBADF; |
110 | out_unlock: | 110 | out_unlock: |
111 | spin_unlock(&files->file_lock); | 111 | spin_unlock(&files->file_lock); |
112 | return err; | 112 | return err; |
113 | } | 113 | } |
114 | 114 | ||
115 | SYSCALL_DEFINE2(dup2, unsigned int, oldfd, unsigned int, newfd) | 115 | SYSCALL_DEFINE2(dup2, unsigned int, oldfd, unsigned int, newfd) |
116 | { | 116 | { |
117 | if (unlikely(newfd == oldfd)) { /* corner case */ | 117 | if (unlikely(newfd == oldfd)) { /* corner case */ |
118 | struct files_struct *files = current->files; | 118 | struct files_struct *files = current->files; |
119 | int retval = oldfd; | 119 | int retval = oldfd; |
120 | 120 | ||
121 | rcu_read_lock(); | 121 | rcu_read_lock(); |
122 | if (!fcheck_files(files, oldfd)) | 122 | if (!fcheck_files(files, oldfd)) |
123 | retval = -EBADF; | 123 | retval = -EBADF; |
124 | rcu_read_unlock(); | 124 | rcu_read_unlock(); |
125 | return retval; | 125 | return retval; |
126 | } | 126 | } |
127 | return sys_dup3(oldfd, newfd, 0); | 127 | return sys_dup3(oldfd, newfd, 0); |
128 | } | 128 | } |
129 | 129 | ||
130 | SYSCALL_DEFINE1(dup, unsigned int, fildes) | 130 | SYSCALL_DEFINE1(dup, unsigned int, fildes) |
131 | { | 131 | { |
132 | int ret = -EBADF; | 132 | int ret = -EBADF; |
133 | struct file *file = fget(fildes); | 133 | struct file *file = fget(fildes); |
134 | 134 | ||
135 | if (file) { | 135 | if (file) { |
136 | ret = get_unused_fd(); | 136 | ret = get_unused_fd(); |
137 | if (ret >= 0) | 137 | if (ret >= 0) |
138 | fd_install(ret, file); | 138 | fd_install(ret, file); |
139 | else | 139 | else |
140 | fput(file); | 140 | fput(file); |
141 | } | 141 | } |
142 | return ret; | 142 | return ret; |
143 | } | 143 | } |
144 | 144 | ||
145 | #define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT | O_NOATIME) | 145 | #define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT | O_NOATIME) |
146 | 146 | ||
147 | static int setfl(int fd, struct file * filp, unsigned long arg) | 147 | static int setfl(int fd, struct file * filp, unsigned long arg) |
148 | { | 148 | { |
149 | struct inode * inode = filp->f_path.dentry->d_inode; | 149 | struct inode * inode = filp->f_path.dentry->d_inode; |
150 | int error = 0; | 150 | int error = 0; |
151 | 151 | ||
152 | /* | 152 | /* |
153 | * O_APPEND cannot be cleared if the file is marked as append-only | 153 | * O_APPEND cannot be cleared if the file is marked as append-only |
154 | * and the file is open for write. | 154 | * and the file is open for write. |
155 | */ | 155 | */ |
156 | if (((arg ^ filp->f_flags) & O_APPEND) && IS_APPEND(inode)) | 156 | if (((arg ^ filp->f_flags) & O_APPEND) && IS_APPEND(inode)) |
157 | return -EPERM; | 157 | return -EPERM; |
158 | 158 | ||
159 | /* O_NOATIME can only be set by the owner or superuser */ | 159 | /* O_NOATIME can only be set by the owner or superuser */ |
160 | if ((arg & O_NOATIME) && !(filp->f_flags & O_NOATIME)) | 160 | if ((arg & O_NOATIME) && !(filp->f_flags & O_NOATIME)) |
161 | if (!is_owner_or_cap(inode)) | 161 | if (!is_owner_or_cap(inode)) |
162 | return -EPERM; | 162 | return -EPERM; |
163 | 163 | ||
164 | /* required for strict SunOS emulation */ | 164 | /* required for strict SunOS emulation */ |
165 | if (O_NONBLOCK != O_NDELAY) | 165 | if (O_NONBLOCK != O_NDELAY) |
166 | if (arg & O_NDELAY) | 166 | if (arg & O_NDELAY) |
167 | arg |= O_NONBLOCK; | 167 | arg |= O_NONBLOCK; |
168 | 168 | ||
169 | if (arg & O_DIRECT) { | 169 | if (arg & O_DIRECT) { |
170 | if (!filp->f_mapping || !filp->f_mapping->a_ops || | 170 | if (!filp->f_mapping || !filp->f_mapping->a_ops || |
171 | !filp->f_mapping->a_ops->direct_IO) | 171 | !filp->f_mapping->a_ops->direct_IO) |
172 | return -EINVAL; | 172 | return -EINVAL; |
173 | } | 173 | } |
174 | 174 | ||
175 | if (filp->f_op && filp->f_op->check_flags) | 175 | if (filp->f_op && filp->f_op->check_flags) |
176 | error = filp->f_op->check_flags(arg); | 176 | error = filp->f_op->check_flags(arg); |
177 | if (error) | 177 | if (error) |
178 | return error; | 178 | return error; |
179 | 179 | ||
180 | /* | 180 | /* |
181 | * ->fasync() is responsible for setting the FASYNC bit. | 181 | * ->fasync() is responsible for setting the FASYNC bit. |
182 | */ | 182 | */ |
183 | if (((arg ^ filp->f_flags) & FASYNC) && filp->f_op && | 183 | if (((arg ^ filp->f_flags) & FASYNC) && filp->f_op && |
184 | filp->f_op->fasync) { | 184 | filp->f_op->fasync) { |
185 | error = filp->f_op->fasync(fd, filp, (arg & FASYNC) != 0); | 185 | error = filp->f_op->fasync(fd, filp, (arg & FASYNC) != 0); |
186 | if (error < 0) | 186 | if (error < 0) |
187 | goto out; | 187 | goto out; |
188 | if (error > 0) | 188 | if (error > 0) |
189 | error = 0; | 189 | error = 0; |
190 | } | 190 | } |
191 | spin_lock(&filp->f_lock); | 191 | spin_lock(&filp->f_lock); |
192 | filp->f_flags = (arg & SETFL_MASK) | (filp->f_flags & ~SETFL_MASK); | 192 | filp->f_flags = (arg & SETFL_MASK) | (filp->f_flags & ~SETFL_MASK); |
193 | spin_unlock(&filp->f_lock); | 193 | spin_unlock(&filp->f_lock); |
194 | 194 | ||
195 | out: | 195 | out: |
196 | return error; | 196 | return error; |
197 | } | 197 | } |
198 | 198 | ||
199 | static void f_modown(struct file *filp, struct pid *pid, enum pid_type type, | 199 | static void f_modown(struct file *filp, struct pid *pid, enum pid_type type, |
200 | int force) | 200 | int force) |
201 | { | 201 | { |
202 | write_lock_irq(&filp->f_owner.lock); | 202 | write_lock_irq(&filp->f_owner.lock); |
203 | if (force || !filp->f_owner.pid) { | 203 | if (force || !filp->f_owner.pid) { |
204 | put_pid(filp->f_owner.pid); | 204 | put_pid(filp->f_owner.pid); |
205 | filp->f_owner.pid = get_pid(pid); | 205 | filp->f_owner.pid = get_pid(pid); |
206 | filp->f_owner.pid_type = type; | 206 | filp->f_owner.pid_type = type; |
207 | 207 | ||
208 | if (pid) { | 208 | if (pid) { |
209 | const struct cred *cred = current_cred(); | 209 | const struct cred *cred = current_cred(); |
210 | filp->f_owner.uid = cred->uid; | 210 | filp->f_owner.uid = cred->uid; |
211 | filp->f_owner.euid = cred->euid; | 211 | filp->f_owner.euid = cred->euid; |
212 | } | 212 | } |
213 | } | 213 | } |
214 | write_unlock_irq(&filp->f_owner.lock); | 214 | write_unlock_irq(&filp->f_owner.lock); |
215 | } | 215 | } |
216 | 216 | ||
217 | int __f_setown(struct file *filp, struct pid *pid, enum pid_type type, | 217 | int __f_setown(struct file *filp, struct pid *pid, enum pid_type type, |
218 | int force) | 218 | int force) |
219 | { | 219 | { |
220 | int err; | 220 | int err; |
221 | 221 | ||
222 | err = security_file_set_fowner(filp); | 222 | err = security_file_set_fowner(filp); |
223 | if (err) | 223 | if (err) |
224 | return err; | 224 | return err; |
225 | 225 | ||
226 | f_modown(filp, pid, type, force); | 226 | f_modown(filp, pid, type, force); |
227 | return 0; | 227 | return 0; |
228 | } | 228 | } |
229 | EXPORT_SYMBOL(__f_setown); | 229 | EXPORT_SYMBOL(__f_setown); |
230 | 230 | ||
231 | int f_setown(struct file *filp, unsigned long arg, int force) | 231 | int f_setown(struct file *filp, unsigned long arg, int force) |
232 | { | 232 | { |
233 | enum pid_type type; | 233 | enum pid_type type; |
234 | struct pid *pid; | 234 | struct pid *pid; |
235 | int who = arg; | 235 | int who = arg; |
236 | int result; | 236 | int result; |
237 | type = PIDTYPE_PID; | 237 | type = PIDTYPE_PID; |
238 | if (who < 0) { | 238 | if (who < 0) { |
239 | type = PIDTYPE_PGID; | 239 | type = PIDTYPE_PGID; |
240 | who = -who; | 240 | who = -who; |
241 | } | 241 | } |
242 | rcu_read_lock(); | 242 | rcu_read_lock(); |
243 | pid = find_vpid(who); | 243 | pid = find_vpid(who); |
244 | result = __f_setown(filp, pid, type, force); | 244 | result = __f_setown(filp, pid, type, force); |
245 | rcu_read_unlock(); | 245 | rcu_read_unlock(); |
246 | return result; | 246 | return result; |
247 | } | 247 | } |
248 | EXPORT_SYMBOL(f_setown); | 248 | EXPORT_SYMBOL(f_setown); |
249 | 249 | ||
250 | void f_delown(struct file *filp) | 250 | void f_delown(struct file *filp) |
251 | { | 251 | { |
252 | f_modown(filp, NULL, PIDTYPE_PID, 1); | 252 | f_modown(filp, NULL, PIDTYPE_PID, 1); |
253 | } | 253 | } |
254 | 254 | ||
255 | pid_t f_getown(struct file *filp) | 255 | pid_t f_getown(struct file *filp) |
256 | { | 256 | { |
257 | pid_t pid; | 257 | pid_t pid; |
258 | read_lock(&filp->f_owner.lock); | 258 | read_lock(&filp->f_owner.lock); |
259 | pid = pid_vnr(filp->f_owner.pid); | 259 | pid = pid_vnr(filp->f_owner.pid); |
260 | if (filp->f_owner.pid_type == PIDTYPE_PGID) | 260 | if (filp->f_owner.pid_type == PIDTYPE_PGID) |
261 | pid = -pid; | 261 | pid = -pid; |
262 | read_unlock(&filp->f_owner.lock); | 262 | read_unlock(&filp->f_owner.lock); |
263 | return pid; | 263 | return pid; |
264 | } | 264 | } |
265 | 265 | ||
266 | static int f_setown_ex(struct file *filp, unsigned long arg) | 266 | static int f_setown_ex(struct file *filp, unsigned long arg) |
267 | { | 267 | { |
268 | struct f_owner_ex * __user owner_p = (void * __user)arg; | 268 | struct f_owner_ex * __user owner_p = (void * __user)arg; |
269 | struct f_owner_ex owner; | 269 | struct f_owner_ex owner; |
270 | struct pid *pid; | 270 | struct pid *pid; |
271 | int type; | 271 | int type; |
272 | int ret; | 272 | int ret; |
273 | 273 | ||
274 | ret = copy_from_user(&owner, owner_p, sizeof(owner)); | 274 | ret = copy_from_user(&owner, owner_p, sizeof(owner)); |
275 | if (ret) | 275 | if (ret) |
276 | return ret; | 276 | return ret; |
277 | 277 | ||
278 | switch (owner.type) { | 278 | switch (owner.type) { |
279 | case F_OWNER_TID: | 279 | case F_OWNER_TID: |
280 | type = PIDTYPE_MAX; | 280 | type = PIDTYPE_MAX; |
281 | break; | 281 | break; |
282 | 282 | ||
283 | case F_OWNER_PID: | 283 | case F_OWNER_PID: |
284 | type = PIDTYPE_PID; | 284 | type = PIDTYPE_PID; |
285 | break; | 285 | break; |
286 | 286 | ||
287 | case F_OWNER_PGRP: | 287 | case F_OWNER_PGRP: |
288 | type = PIDTYPE_PGID; | 288 | type = PIDTYPE_PGID; |
289 | break; | 289 | break; |
290 | 290 | ||
291 | default: | 291 | default: |
292 | return -EINVAL; | 292 | return -EINVAL; |
293 | } | 293 | } |
294 | 294 | ||
295 | rcu_read_lock(); | 295 | rcu_read_lock(); |
296 | pid = find_vpid(owner.pid); | 296 | pid = find_vpid(owner.pid); |
297 | if (owner.pid && !pid) | 297 | if (owner.pid && !pid) |
298 | ret = -ESRCH; | 298 | ret = -ESRCH; |
299 | else | 299 | else |
300 | ret = __f_setown(filp, pid, type, 1); | 300 | ret = __f_setown(filp, pid, type, 1); |
301 | rcu_read_unlock(); | 301 | rcu_read_unlock(); |
302 | 302 | ||
303 | return ret; | 303 | return ret; |
304 | } | 304 | } |
305 | 305 | ||
306 | static int f_getown_ex(struct file *filp, unsigned long arg) | 306 | static int f_getown_ex(struct file *filp, unsigned long arg) |
307 | { | 307 | { |
308 | struct f_owner_ex * __user owner_p = (void * __user)arg; | 308 | struct f_owner_ex * __user owner_p = (void * __user)arg; |
309 | struct f_owner_ex owner; | 309 | struct f_owner_ex owner; |
310 | int ret = 0; | 310 | int ret = 0; |
311 | 311 | ||
312 | read_lock(&filp->f_owner.lock); | 312 | read_lock(&filp->f_owner.lock); |
313 | owner.pid = pid_vnr(filp->f_owner.pid); | 313 | owner.pid = pid_vnr(filp->f_owner.pid); |
314 | switch (filp->f_owner.pid_type) { | 314 | switch (filp->f_owner.pid_type) { |
315 | case PIDTYPE_MAX: | 315 | case PIDTYPE_MAX: |
316 | owner.type = F_OWNER_TID; | 316 | owner.type = F_OWNER_TID; |
317 | break; | 317 | break; |
318 | 318 | ||
319 | case PIDTYPE_PID: | 319 | case PIDTYPE_PID: |
320 | owner.type = F_OWNER_PID; | 320 | owner.type = F_OWNER_PID; |
321 | break; | 321 | break; |
322 | 322 | ||
323 | case PIDTYPE_PGID: | 323 | case PIDTYPE_PGID: |
324 | owner.type = F_OWNER_PGRP; | 324 | owner.type = F_OWNER_PGRP; |
325 | break; | 325 | break; |
326 | 326 | ||
327 | default: | 327 | default: |
328 | WARN_ON(1); | 328 | WARN_ON(1); |
329 | ret = -EINVAL; | 329 | ret = -EINVAL; |
330 | break; | 330 | break; |
331 | } | 331 | } |
332 | read_unlock(&filp->f_owner.lock); | 332 | read_unlock(&filp->f_owner.lock); |
333 | 333 | ||
334 | if (!ret) | 334 | if (!ret) |
335 | ret = copy_to_user(owner_p, &owner, sizeof(owner)); | 335 | ret = copy_to_user(owner_p, &owner, sizeof(owner)); |
336 | return ret; | 336 | return ret; |
337 | } | 337 | } |
338 | 338 | ||
339 | static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, | 339 | static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, |
340 | struct file *filp) | 340 | struct file *filp) |
341 | { | 341 | { |
342 | long err = -EINVAL; | 342 | long err = -EINVAL; |
343 | 343 | ||
344 | switch (cmd) { | 344 | switch (cmd) { |
345 | case F_DUPFD: | 345 | case F_DUPFD: |
346 | case F_DUPFD_CLOEXEC: | 346 | case F_DUPFD_CLOEXEC: |
347 | if (arg >= rlimit(RLIMIT_NOFILE)) | 347 | if (arg >= rlimit(RLIMIT_NOFILE)) |
348 | break; | 348 | break; |
349 | err = alloc_fd(arg, cmd == F_DUPFD_CLOEXEC ? O_CLOEXEC : 0); | 349 | err = alloc_fd(arg, cmd == F_DUPFD_CLOEXEC ? O_CLOEXEC : 0); |
350 | if (err >= 0) { | 350 | if (err >= 0) { |
351 | get_file(filp); | 351 | get_file(filp); |
352 | fd_install(err, filp); | 352 | fd_install(err, filp); |
353 | } | 353 | } |
354 | break; | 354 | break; |
355 | case F_GETFD: | 355 | case F_GETFD: |
356 | err = get_close_on_exec(fd) ? FD_CLOEXEC : 0; | 356 | err = get_close_on_exec(fd) ? FD_CLOEXEC : 0; |
357 | break; | 357 | break; |
358 | case F_SETFD: | 358 | case F_SETFD: |
359 | err = 0; | 359 | err = 0; |
360 | set_close_on_exec(fd, arg & FD_CLOEXEC); | 360 | set_close_on_exec(fd, arg & FD_CLOEXEC); |
361 | break; | 361 | break; |
362 | case F_GETFL: | 362 | case F_GETFL: |
363 | err = filp->f_flags; | 363 | err = filp->f_flags; |
364 | break; | 364 | break; |
365 | case F_SETFL: | 365 | case F_SETFL: |
366 | err = setfl(fd, filp, arg); | 366 | err = setfl(fd, filp, arg); |
367 | break; | 367 | break; |
368 | case F_GETLK: | 368 | case F_GETLK: |
369 | err = fcntl_getlk(filp, (struct flock __user *) arg); | 369 | err = fcntl_getlk(filp, (struct flock __user *) arg); |
370 | break; | 370 | break; |
371 | case F_SETLK: | 371 | case F_SETLK: |
372 | case F_SETLKW: | 372 | case F_SETLKW: |
373 | err = fcntl_setlk(fd, filp, cmd, (struct flock __user *) arg); | 373 | err = fcntl_setlk(fd, filp, cmd, (struct flock __user *) arg); |
374 | break; | 374 | break; |
375 | case F_GETOWN: | 375 | case F_GETOWN: |
376 | /* | 376 | /* |
377 | * XXX If f_owner is a process group, the | 377 | * XXX If f_owner is a process group, the |
378 | * negative return value will get converted | 378 | * negative return value will get converted |
379 | * into an error. Oops. If we keep the | 379 | * into an error. Oops. If we keep the |
380 | * current syscall conventions, the only way | 380 | * current syscall conventions, the only way |
381 | * to fix this will be in libc. | 381 | * to fix this will be in libc. |
382 | */ | 382 | */ |
383 | err = f_getown(filp); | 383 | err = f_getown(filp); |
384 | force_successful_syscall_return(); | 384 | force_successful_syscall_return(); |
385 | break; | 385 | break; |
386 | case F_SETOWN: | 386 | case F_SETOWN: |
387 | err = f_setown(filp, arg, 1); | 387 | err = f_setown(filp, arg, 1); |
388 | break; | 388 | break; |
389 | case F_GETOWN_EX: | 389 | case F_GETOWN_EX: |
390 | err = f_getown_ex(filp, arg); | 390 | err = f_getown_ex(filp, arg); |
391 | break; | 391 | break; |
392 | case F_SETOWN_EX: | 392 | case F_SETOWN_EX: |
393 | err = f_setown_ex(filp, arg); | 393 | err = f_setown_ex(filp, arg); |
394 | break; | 394 | break; |
395 | case F_GETSIG: | 395 | case F_GETSIG: |
396 | err = filp->f_owner.signum; | 396 | err = filp->f_owner.signum; |
397 | break; | 397 | break; |
398 | case F_SETSIG: | 398 | case F_SETSIG: |
399 | /* arg == 0 restores default behaviour. */ | 399 | /* arg == 0 restores default behaviour. */ |
400 | if (!valid_signal(arg)) { | 400 | if (!valid_signal(arg)) { |
401 | break; | 401 | break; |
402 | } | 402 | } |
403 | err = 0; | 403 | err = 0; |
404 | filp->f_owner.signum = arg; | 404 | filp->f_owner.signum = arg; |
405 | break; | 405 | break; |
406 | case F_GETLEASE: | 406 | case F_GETLEASE: |
407 | err = fcntl_getlease(filp); | 407 | err = fcntl_getlease(filp); |
408 | break; | 408 | break; |
409 | case F_SETLEASE: | 409 | case F_SETLEASE: |
410 | err = fcntl_setlease(fd, filp, arg); | 410 | err = fcntl_setlease(fd, filp, arg); |
411 | break; | 411 | break; |
412 | case F_NOTIFY: | 412 | case F_NOTIFY: |
413 | err = fcntl_dirnotify(fd, filp, arg); | 413 | err = fcntl_dirnotify(fd, filp, arg); |
414 | break; | 414 | break; |
415 | default: | 415 | default: |
416 | break; | 416 | break; |
417 | } | 417 | } |
418 | return err; | 418 | return err; |
419 | } | 419 | } |
420 | 420 | ||
421 | SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, unsigned long, arg) | 421 | SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, unsigned long, arg) |
422 | { | 422 | { |
423 | struct file *filp; | 423 | struct file *filp; |
424 | long err = -EBADF; | 424 | long err = -EBADF; |
425 | 425 | ||
426 | filp = fget(fd); | 426 | filp = fget(fd); |
427 | if (!filp) | 427 | if (!filp) |
428 | goto out; | 428 | goto out; |
429 | 429 | ||
430 | err = security_file_fcntl(filp, cmd, arg); | 430 | err = security_file_fcntl(filp, cmd, arg); |
431 | if (err) { | 431 | if (err) { |
432 | fput(filp); | 432 | fput(filp); |
433 | return err; | 433 | return err; |
434 | } | 434 | } |
435 | 435 | ||
436 | err = do_fcntl(fd, cmd, arg, filp); | 436 | err = do_fcntl(fd, cmd, arg, filp); |
437 | 437 | ||
438 | fput(filp); | 438 | fput(filp); |
439 | out: | 439 | out: |
440 | return err; | 440 | return err; |
441 | } | 441 | } |
442 | 442 | ||
443 | #if BITS_PER_LONG == 32 | 443 | #if BITS_PER_LONG == 32 |
444 | SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd, | 444 | SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd, |
445 | unsigned long, arg) | 445 | unsigned long, arg) |
446 | { | 446 | { |
447 | struct file * filp; | 447 | struct file * filp; |
448 | long err; | 448 | long err; |
449 | 449 | ||
450 | err = -EBADF; | 450 | err = -EBADF; |
451 | filp = fget(fd); | 451 | filp = fget(fd); |
452 | if (!filp) | 452 | if (!filp) |
453 | goto out; | 453 | goto out; |
454 | 454 | ||
455 | err = security_file_fcntl(filp, cmd, arg); | 455 | err = security_file_fcntl(filp, cmd, arg); |
456 | if (err) { | 456 | if (err) { |
457 | fput(filp); | 457 | fput(filp); |
458 | return err; | 458 | return err; |
459 | } | 459 | } |
460 | err = -EBADF; | 460 | err = -EBADF; |
461 | 461 | ||
462 | switch (cmd) { | 462 | switch (cmd) { |
463 | case F_GETLK64: | 463 | case F_GETLK64: |
464 | err = fcntl_getlk64(filp, (struct flock64 __user *) arg); | 464 | err = fcntl_getlk64(filp, (struct flock64 __user *) arg); |
465 | break; | 465 | break; |
466 | case F_SETLK64: | 466 | case F_SETLK64: |
467 | case F_SETLKW64: | 467 | case F_SETLKW64: |
468 | err = fcntl_setlk64(fd, filp, cmd, | 468 | err = fcntl_setlk64(fd, filp, cmd, |
469 | (struct flock64 __user *) arg); | 469 | (struct flock64 __user *) arg); |
470 | break; | 470 | break; |
471 | default: | 471 | default: |
472 | err = do_fcntl(fd, cmd, arg, filp); | 472 | err = do_fcntl(fd, cmd, arg, filp); |
473 | break; | 473 | break; |
474 | } | 474 | } |
475 | fput(filp); | 475 | fput(filp); |
476 | out: | 476 | out: |
477 | return err; | 477 | return err; |
478 | } | 478 | } |
479 | #endif | 479 | #endif |
480 | 480 | ||
481 | /* Table to convert sigio signal codes into poll band bitmaps */ | 481 | /* Table to convert sigio signal codes into poll band bitmaps */ |
482 | 482 | ||
483 | static const long band_table[NSIGPOLL] = { | 483 | static const long band_table[NSIGPOLL] = { |
484 | POLLIN | POLLRDNORM, /* POLL_IN */ | 484 | POLLIN | POLLRDNORM, /* POLL_IN */ |
485 | POLLOUT | POLLWRNORM | POLLWRBAND, /* POLL_OUT */ | 485 | POLLOUT | POLLWRNORM | POLLWRBAND, /* POLL_OUT */ |
486 | POLLIN | POLLRDNORM | POLLMSG, /* POLL_MSG */ | 486 | POLLIN | POLLRDNORM | POLLMSG, /* POLL_MSG */ |
487 | POLLERR, /* POLL_ERR */ | 487 | POLLERR, /* POLL_ERR */ |
488 | POLLPRI | POLLRDBAND, /* POLL_PRI */ | 488 | POLLPRI | POLLRDBAND, /* POLL_PRI */ |
489 | POLLHUP | POLLERR /* POLL_HUP */ | 489 | POLLHUP | POLLERR /* POLL_HUP */ |
490 | }; | 490 | }; |
491 | 491 | ||
492 | static inline int sigio_perm(struct task_struct *p, | 492 | static inline int sigio_perm(struct task_struct *p, |
493 | struct fown_struct *fown, int sig) | 493 | struct fown_struct *fown, int sig) |
494 | { | 494 | { |
495 | const struct cred *cred; | 495 | const struct cred *cred; |
496 | int ret; | 496 | int ret; |
497 | 497 | ||
498 | rcu_read_lock(); | 498 | rcu_read_lock(); |
499 | cred = __task_cred(p); | 499 | cred = __task_cred(p); |
500 | ret = ((fown->euid == 0 || | 500 | ret = ((fown->euid == 0 || |
501 | fown->euid == cred->suid || fown->euid == cred->uid || | 501 | fown->euid == cred->suid || fown->euid == cred->uid || |
502 | fown->uid == cred->suid || fown->uid == cred->uid) && | 502 | fown->uid == cred->suid || fown->uid == cred->uid) && |
503 | !security_file_send_sigiotask(p, fown, sig)); | 503 | !security_file_send_sigiotask(p, fown, sig)); |
504 | rcu_read_unlock(); | 504 | rcu_read_unlock(); |
505 | return ret; | 505 | return ret; |
506 | } | 506 | } |
507 | 507 | ||
508 | static void send_sigio_to_task(struct task_struct *p, | 508 | static void send_sigio_to_task(struct task_struct *p, |
509 | struct fown_struct *fown, | 509 | struct fown_struct *fown, |
510 | int fd, int reason, int group) | 510 | int fd, int reason, int group) |
511 | { | 511 | { |
512 | /* | 512 | /* |
513 | * F_SETSIG can change ->signum lockless in parallel, make | 513 | * F_SETSIG can change ->signum lockless in parallel, make |
514 | * sure we read it once and use the same value throughout. | 514 | * sure we read it once and use the same value throughout. |
515 | */ | 515 | */ |
516 | int signum = ACCESS_ONCE(fown->signum); | 516 | int signum = ACCESS_ONCE(fown->signum); |
517 | 517 | ||
518 | if (!sigio_perm(p, fown, signum)) | 518 | if (!sigio_perm(p, fown, signum)) |
519 | return; | 519 | return; |
520 | 520 | ||
521 | switch (signum) { | 521 | switch (signum) { |
522 | siginfo_t si; | 522 | siginfo_t si; |
523 | default: | 523 | default: |
524 | /* Queue a rt signal with the appropriate fd as its | 524 | /* Queue a rt signal with the appropriate fd as its |
525 | value. We use SI_SIGIO as the source, not | 525 | value. We use SI_SIGIO as the source, not |
526 | SI_KERNEL, since kernel signals always get | 526 | SI_KERNEL, since kernel signals always get |
527 | delivered even if we can't queue. Failure to | 527 | delivered even if we can't queue. Failure to |
528 | queue in this case _should_ be reported; we fall | 528 | queue in this case _should_ be reported; we fall |
529 | back to SIGIO in that case. --sct */ | 529 | back to SIGIO in that case. --sct */ |
530 | si.si_signo = signum; | 530 | si.si_signo = signum; |
531 | si.si_errno = 0; | 531 | si.si_errno = 0; |
532 | si.si_code = reason; | 532 | si.si_code = reason; |
533 | /* Make sure we are called with one of the POLL_* | 533 | /* Make sure we are called with one of the POLL_* |
534 | reasons, otherwise we could leak kernel stack into | 534 | reasons, otherwise we could leak kernel stack into |
535 | userspace. */ | 535 | userspace. */ |
536 | BUG_ON((reason & __SI_MASK) != __SI_POLL); | 536 | BUG_ON((reason & __SI_MASK) != __SI_POLL); |
537 | if (reason - POLL_IN >= NSIGPOLL) | 537 | if (reason - POLL_IN >= NSIGPOLL) |
538 | si.si_band = ~0L; | 538 | si.si_band = ~0L; |
539 | else | 539 | else |
540 | si.si_band = band_table[reason - POLL_IN]; | 540 | si.si_band = band_table[reason - POLL_IN]; |
541 | si.si_fd = fd; | 541 | si.si_fd = fd; |
542 | if (!do_send_sig_info(signum, &si, p, group)) | 542 | if (!do_send_sig_info(signum, &si, p, group)) |
543 | break; | 543 | break; |
544 | /* fall-through: fall back on the old plain SIGIO signal */ | 544 | /* fall-through: fall back on the old plain SIGIO signal */ |
545 | case 0: | 545 | case 0: |
546 | do_send_sig_info(SIGIO, SEND_SIG_PRIV, p, group); | 546 | do_send_sig_info(SIGIO, SEND_SIG_PRIV, p, group); |
547 | } | 547 | } |
548 | } | 548 | } |
549 | 549 | ||
550 | void send_sigio(struct fown_struct *fown, int fd, int band) | 550 | void send_sigio(struct fown_struct *fown, int fd, int band) |
551 | { | 551 | { |
552 | struct task_struct *p; | 552 | struct task_struct *p; |
553 | enum pid_type type; | 553 | enum pid_type type; |
554 | struct pid *pid; | 554 | struct pid *pid; |
555 | int group = 1; | 555 | int group = 1; |
556 | 556 | ||
557 | read_lock(&fown->lock); | 557 | read_lock(&fown->lock); |
558 | 558 | ||
559 | type = fown->pid_type; | 559 | type = fown->pid_type; |
560 | if (type == PIDTYPE_MAX) { | 560 | if (type == PIDTYPE_MAX) { |
561 | group = 0; | 561 | group = 0; |
562 | type = PIDTYPE_PID; | 562 | type = PIDTYPE_PID; |
563 | } | 563 | } |
564 | 564 | ||
565 | pid = fown->pid; | 565 | pid = fown->pid; |
566 | if (!pid) | 566 | if (!pid) |
567 | goto out_unlock_fown; | 567 | goto out_unlock_fown; |
568 | 568 | ||
569 | read_lock(&tasklist_lock); | 569 | read_lock(&tasklist_lock); |
570 | do_each_pid_task(pid, type, p) { | 570 | do_each_pid_task(pid, type, p) { |
571 | send_sigio_to_task(p, fown, fd, band, group); | 571 | send_sigio_to_task(p, fown, fd, band, group); |
572 | } while_each_pid_task(pid, type, p); | 572 | } while_each_pid_task(pid, type, p); |
573 | read_unlock(&tasklist_lock); | 573 | read_unlock(&tasklist_lock); |
574 | out_unlock_fown: | 574 | out_unlock_fown: |
575 | read_unlock(&fown->lock); | 575 | read_unlock(&fown->lock); |
576 | } | 576 | } |
577 | 577 | ||
578 | static void send_sigurg_to_task(struct task_struct *p, | 578 | static void send_sigurg_to_task(struct task_struct *p, |
579 | struct fown_struct *fown, int group) | 579 | struct fown_struct *fown, int group) |
580 | { | 580 | { |
581 | if (sigio_perm(p, fown, SIGURG)) | 581 | if (sigio_perm(p, fown, SIGURG)) |
582 | do_send_sig_info(SIGURG, SEND_SIG_PRIV, p, group); | 582 | do_send_sig_info(SIGURG, SEND_SIG_PRIV, p, group); |
583 | } | 583 | } |
584 | 584 | ||
585 | int send_sigurg(struct fown_struct *fown) | 585 | int send_sigurg(struct fown_struct *fown) |
586 | { | 586 | { |
587 | struct task_struct *p; | 587 | struct task_struct *p; |
588 | enum pid_type type; | 588 | enum pid_type type; |
589 | struct pid *pid; | 589 | struct pid *pid; |
590 | int group = 1; | 590 | int group = 1; |
591 | int ret = 0; | 591 | int ret = 0; |
592 | 592 | ||
593 | read_lock(&fown->lock); | 593 | read_lock(&fown->lock); |
594 | 594 | ||
595 | type = fown->pid_type; | 595 | type = fown->pid_type; |
596 | if (type == PIDTYPE_MAX) { | 596 | if (type == PIDTYPE_MAX) { |
597 | group = 0; | 597 | group = 0; |
598 | type = PIDTYPE_PID; | 598 | type = PIDTYPE_PID; |
599 | } | 599 | } |
600 | 600 | ||
601 | pid = fown->pid; | 601 | pid = fown->pid; |
602 | if (!pid) | 602 | if (!pid) |
603 | goto out_unlock_fown; | 603 | goto out_unlock_fown; |
604 | 604 | ||
605 | ret = 1; | 605 | ret = 1; |
606 | 606 | ||
607 | read_lock(&tasklist_lock); | 607 | read_lock(&tasklist_lock); |
608 | do_each_pid_task(pid, type, p) { | 608 | do_each_pid_task(pid, type, p) { |
609 | send_sigurg_to_task(p, fown, group); | 609 | send_sigurg_to_task(p, fown, group); |
610 | } while_each_pid_task(pid, type, p); | 610 | } while_each_pid_task(pid, type, p); |
611 | read_unlock(&tasklist_lock); | 611 | read_unlock(&tasklist_lock); |
612 | out_unlock_fown: | 612 | out_unlock_fown: |
613 | read_unlock(&fown->lock); | 613 | read_unlock(&fown->lock); |
614 | return ret; | 614 | return ret; |
615 | } | 615 | } |
616 | 616 | ||
617 | static DEFINE_RWLOCK(fasync_lock); | 617 | static DEFINE_SPINLOCK(fasync_lock); |
618 | static struct kmem_cache *fasync_cache __read_mostly; | 618 | static struct kmem_cache *fasync_cache __read_mostly; |
619 | 619 | ||
620 | static void fasync_free_rcu(struct rcu_head *head) | ||
621 | { | ||
622 | kmem_cache_free(fasync_cache, | ||
623 | container_of(head, struct fasync_struct, fa_rcu)); | ||
624 | } | ||
625 | |||
620 | /* | 626 | /* |
621 | * Remove a fasync entry. If successfully removed, return | 627 | * Remove a fasync entry. If successfully removed, return |
622 | * positive and clear the FASYNC flag. If no entry exists, | 628 | * positive and clear the FASYNC flag. If no entry exists, |
623 | * do nothing and return 0. | 629 | * do nothing and return 0. |
624 | * | 630 | * |
625 | * NOTE! It is very important that the FASYNC flag always | 631 | * NOTE! It is very important that the FASYNC flag always |
626 | * match the state "is the filp on a fasync list". | 632 | * match the state "is the filp on a fasync list". |
627 | * | 633 | * |
628 | * We always take the 'filp->f_lock', in since fasync_lock | ||
629 | * needs to be irq-safe. | ||
630 | */ | 634 | */ |
631 | static int fasync_remove_entry(struct file *filp, struct fasync_struct **fapp) | 635 | static int fasync_remove_entry(struct file *filp, struct fasync_struct **fapp) |
632 | { | 636 | { |
633 | struct fasync_struct *fa, **fp; | 637 | struct fasync_struct *fa, **fp; |
634 | int result = 0; | 638 | int result = 0; |
635 | 639 | ||
636 | spin_lock(&filp->f_lock); | 640 | spin_lock(&filp->f_lock); |
637 | write_lock_irq(&fasync_lock); | 641 | spin_lock(&fasync_lock); |
638 | for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) { | 642 | for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) { |
639 | if (fa->fa_file != filp) | 643 | if (fa->fa_file != filp) |
640 | continue; | 644 | continue; |
645 | |||
646 | spin_lock_irq(&fa->fa_lock); | ||
647 | fa->fa_file = NULL; | ||
648 | spin_unlock_irq(&fa->fa_lock); | ||
649 | |||
641 | *fp = fa->fa_next; | 650 | *fp = fa->fa_next; |
642 | kmem_cache_free(fasync_cache, fa); | 651 | call_rcu(&fa->fa_rcu, fasync_free_rcu); |
643 | filp->f_flags &= ~FASYNC; | 652 | filp->f_flags &= ~FASYNC; |
644 | result = 1; | 653 | result = 1; |
645 | break; | 654 | break; |
646 | } | 655 | } |
647 | write_unlock_irq(&fasync_lock); | 656 | spin_unlock(&fasync_lock); |
648 | spin_unlock(&filp->f_lock); | 657 | spin_unlock(&filp->f_lock); |
649 | return result; | 658 | return result; |
650 | } | 659 | } |
651 | 660 | ||
652 | /* | 661 | /* |
653 | * Add a fasync entry. Return negative on error, positive if | 662 | * Add a fasync entry. Return negative on error, positive if |
654 | * added, and zero if did nothing but change an existing one. | 663 | * added, and zero if did nothing but change an existing one. |
655 | * | 664 | * |
656 | * NOTE! It is very important that the FASYNC flag always | 665 | * NOTE! It is very important that the FASYNC flag always |
657 | * match the state "is the filp on a fasync list". | 666 | * match the state "is the filp on a fasync list". |
658 | */ | 667 | */ |
659 | static int fasync_add_entry(int fd, struct file *filp, struct fasync_struct **fapp) | 668 | static int fasync_add_entry(int fd, struct file *filp, struct fasync_struct **fapp) |
660 | { | 669 | { |
661 | struct fasync_struct *new, *fa, **fp; | 670 | struct fasync_struct *new, *fa, **fp; |
662 | int result = 0; | 671 | int result = 0; |
663 | 672 | ||
664 | new = kmem_cache_alloc(fasync_cache, GFP_KERNEL); | 673 | new = kmem_cache_alloc(fasync_cache, GFP_KERNEL); |
665 | if (!new) | 674 | if (!new) |
666 | return -ENOMEM; | 675 | return -ENOMEM; |
667 | 676 | ||
668 | spin_lock(&filp->f_lock); | 677 | spin_lock(&filp->f_lock); |
669 | write_lock_irq(&fasync_lock); | 678 | spin_lock(&fasync_lock); |
670 | for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) { | 679 | for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) { |
671 | if (fa->fa_file != filp) | 680 | if (fa->fa_file != filp) |
672 | continue; | 681 | continue; |
682 | |||
683 | spin_lock_irq(&fa->fa_lock); | ||
673 | fa->fa_fd = fd; | 684 | fa->fa_fd = fd; |
685 | spin_unlock_irq(&fa->fa_lock); | ||
686 | |||
674 | kmem_cache_free(fasync_cache, new); | 687 | kmem_cache_free(fasync_cache, new); |
675 | goto out; | 688 | goto out; |
676 | } | 689 | } |
677 | 690 | ||
691 | spin_lock_init(&new->fa_lock); | ||
678 | new->magic = FASYNC_MAGIC; | 692 | new->magic = FASYNC_MAGIC; |
679 | new->fa_file = filp; | 693 | new->fa_file = filp; |
680 | new->fa_fd = fd; | 694 | new->fa_fd = fd; |
681 | new->fa_next = *fapp; | 695 | new->fa_next = *fapp; |
682 | *fapp = new; | 696 | rcu_assign_pointer(*fapp, new); |
683 | result = 1; | 697 | result = 1; |
684 | filp->f_flags |= FASYNC; | 698 | filp->f_flags |= FASYNC; |
685 | 699 | ||
686 | out: | 700 | out: |
687 | write_unlock_irq(&fasync_lock); | 701 | spin_unlock(&fasync_lock); |
688 | spin_unlock(&filp->f_lock); | 702 | spin_unlock(&filp->f_lock); |
689 | return result; | 703 | return result; |
690 | } | 704 | } |
691 | 705 | ||
692 | /* | 706 | /* |
693 | * fasync_helper() is used by almost all character device drivers | 707 | * fasync_helper() is used by almost all character device drivers |
694 | * to set up the fasync queue, and for regular files by the file | 708 | * to set up the fasync queue, and for regular files by the file |
695 | * lease code. It returns negative on error, 0 if it did no changes | 709 | * lease code. It returns negative on error, 0 if it did no changes |
696 | * and positive if it added/deleted the entry. | 710 | * and positive if it added/deleted the entry. |
697 | */ | 711 | */ |
698 | int fasync_helper(int fd, struct file * filp, int on, struct fasync_struct **fapp) | 712 | int fasync_helper(int fd, struct file * filp, int on, struct fasync_struct **fapp) |
699 | { | 713 | { |
700 | if (!on) | 714 | if (!on) |
701 | return fasync_remove_entry(filp, fapp); | 715 | return fasync_remove_entry(filp, fapp); |
702 | return fasync_add_entry(fd, filp, fapp); | 716 | return fasync_add_entry(fd, filp, fapp); |
703 | } | 717 | } |
704 | 718 | ||
705 | EXPORT_SYMBOL(fasync_helper); | 719 | EXPORT_SYMBOL(fasync_helper); |
706 | 720 | ||
707 | void __kill_fasync(struct fasync_struct *fa, int sig, int band) | 721 | /* |
722 | * rcu_read_lock() is held | ||
723 | */ | ||
724 | static void kill_fasync_rcu(struct fasync_struct *fa, int sig, int band) | ||
708 | { | 725 | { |
709 | while (fa) { | 726 | while (fa) { |
710 | struct fown_struct * fown; | 727 | struct fown_struct *fown; |
711 | if (fa->magic != FASYNC_MAGIC) { | 728 | if (fa->magic != FASYNC_MAGIC) { |
712 | printk(KERN_ERR "kill_fasync: bad magic number in " | 729 | printk(KERN_ERR "kill_fasync: bad magic number in " |
713 | "fasync_struct!\n"); | 730 | "fasync_struct!\n"); |
714 | return; | 731 | return; |
715 | } | 732 | } |
716 | fown = &fa->fa_file->f_owner; | 733 | spin_lock(&fa->fa_lock); |
717 | /* Don't send SIGURG to processes which have not set a | 734 | if (fa->fa_file) { |
718 | queued signum: SIGURG has its own default signalling | 735 | fown = &fa->fa_file->f_owner; |
719 | mechanism. */ | 736 | /* Don't send SIGURG to processes which have not set a |
720 | if (!(sig == SIGURG && fown->signum == 0)) | 737 | queued signum: SIGURG has its own default signalling |
721 | send_sigio(fown, fa->fa_fd, band); | 738 | mechanism. */ |
722 | fa = fa->fa_next; | 739 | if (!(sig == SIGURG && fown->signum == 0)) |
740 | send_sigio(fown, fa->fa_fd, band); | ||
741 | } | ||
742 | spin_unlock(&fa->fa_lock); | ||
743 | fa = rcu_dereference(fa->fa_next); | ||
723 | } | 744 | } |
724 | } | 745 | } |
725 | 746 | ||
726 | EXPORT_SYMBOL(__kill_fasync); | ||
727 | |||
728 | void kill_fasync(struct fasync_struct **fp, int sig, int band) | 747 | void kill_fasync(struct fasync_struct **fp, int sig, int band) |
729 | { | 748 | { |
730 | /* First a quick test without locking: usually | 749 | /* First a quick test without locking: usually |
731 | * the list is empty. | 750 | * the list is empty. |
732 | */ | 751 | */ |
733 | if (*fp) { | 752 | if (*fp) { |
734 | read_lock(&fasync_lock); | 753 | rcu_read_lock(); |
735 | /* reread *fp after obtaining the lock */ | 754 | kill_fasync_rcu(rcu_dereference(*fp), sig, band); |
736 | __kill_fasync(*fp, sig, band); | 755 | rcu_read_unlock(); |
737 | read_unlock(&fasync_lock); | ||
738 | } | 756 | } |
739 | } | 757 | } |
740 | EXPORT_SYMBOL(kill_fasync); | 758 | EXPORT_SYMBOL(kill_fasync); |
741 | 759 | ||
742 | static int __init fasync_init(void) | 760 | static int __init fasync_init(void) |
743 | { | 761 | { |
744 | fasync_cache = kmem_cache_create("fasync_cache", | 762 | fasync_cache = kmem_cache_create("fasync_cache", |
745 | sizeof(struct fasync_struct), 0, SLAB_PANIC, NULL); | 763 | sizeof(struct fasync_struct), 0, SLAB_PANIC, NULL); |
include/linux/fs.h
1 | #ifndef _LINUX_FS_H | 1 | #ifndef _LINUX_FS_H |
2 | #define _LINUX_FS_H | 2 | #define _LINUX_FS_H |
3 | 3 | ||
4 | /* | 4 | /* |
5 | * This file has definitions for some important file table | 5 | * This file has definitions for some important file table |
6 | * structures etc. | 6 | * structures etc. |
7 | */ | 7 | */ |
8 | 8 | ||
9 | #include <linux/limits.h> | 9 | #include <linux/limits.h> |
10 | #include <linux/ioctl.h> | 10 | #include <linux/ioctl.h> |
11 | 11 | ||
12 | /* | 12 | /* |
13 | * It's silly to have NR_OPEN bigger than NR_FILE, but you can change | 13 | * It's silly to have NR_OPEN bigger than NR_FILE, but you can change |
14 | * the file limit at runtime and only root can increase the per-process | 14 | * the file limit at runtime and only root can increase the per-process |
15 | * nr_file rlimit, so it's safe to set up a ridiculously high absolute | 15 | * nr_file rlimit, so it's safe to set up a ridiculously high absolute |
16 | * upper limit on files-per-process. | 16 | * upper limit on files-per-process. |
17 | * | 17 | * |
18 | * Some programs (notably those using select()) may have to be | 18 | * Some programs (notably those using select()) may have to be |
19 | * recompiled to take full advantage of the new limits.. | 19 | * recompiled to take full advantage of the new limits.. |
20 | */ | 20 | */ |
21 | 21 | ||
22 | /* Fixed constants first: */ | 22 | /* Fixed constants first: */ |
23 | #undef NR_OPEN | 23 | #undef NR_OPEN |
24 | #define INR_OPEN 1024 /* Initial setting for nfile rlimits */ | 24 | #define INR_OPEN 1024 /* Initial setting for nfile rlimits */ |
25 | 25 | ||
26 | #define BLOCK_SIZE_BITS 10 | 26 | #define BLOCK_SIZE_BITS 10 |
27 | #define BLOCK_SIZE (1<<BLOCK_SIZE_BITS) | 27 | #define BLOCK_SIZE (1<<BLOCK_SIZE_BITS) |
28 | 28 | ||
29 | #define SEEK_SET 0 /* seek relative to beginning of file */ | 29 | #define SEEK_SET 0 /* seek relative to beginning of file */ |
30 | #define SEEK_CUR 1 /* seek relative to current file position */ | 30 | #define SEEK_CUR 1 /* seek relative to current file position */ |
31 | #define SEEK_END 2 /* seek relative to end of file */ | 31 | #define SEEK_END 2 /* seek relative to end of file */ |
32 | #define SEEK_MAX SEEK_END | 32 | #define SEEK_MAX SEEK_END |
33 | 33 | ||
34 | /* And dynamically-tunable limits and defaults: */ | 34 | /* And dynamically-tunable limits and defaults: */ |
35 | struct files_stat_struct { | 35 | struct files_stat_struct { |
36 | int nr_files; /* read only */ | 36 | int nr_files; /* read only */ |
37 | int nr_free_files; /* read only */ | 37 | int nr_free_files; /* read only */ |
38 | int max_files; /* tunable */ | 38 | int max_files; /* tunable */ |
39 | }; | 39 | }; |
40 | 40 | ||
41 | struct inodes_stat_t { | 41 | struct inodes_stat_t { |
42 | int nr_inodes; | 42 | int nr_inodes; |
43 | int nr_unused; | 43 | int nr_unused; |
44 | int dummy[5]; /* padding for sysctl ABI compatibility */ | 44 | int dummy[5]; /* padding for sysctl ABI compatibility */ |
45 | }; | 45 | }; |
46 | 46 | ||
47 | 47 | ||
48 | #define NR_FILE 8192 /* this can well be larger on a larger system */ | 48 | #define NR_FILE 8192 /* this can well be larger on a larger system */ |
49 | 49 | ||
50 | #define MAY_EXEC 1 | 50 | #define MAY_EXEC 1 |
51 | #define MAY_WRITE 2 | 51 | #define MAY_WRITE 2 |
52 | #define MAY_READ 4 | 52 | #define MAY_READ 4 |
53 | #define MAY_APPEND 8 | 53 | #define MAY_APPEND 8 |
54 | #define MAY_ACCESS 16 | 54 | #define MAY_ACCESS 16 |
55 | #define MAY_OPEN 32 | 55 | #define MAY_OPEN 32 |
56 | 56 | ||
57 | /* | 57 | /* |
58 | * flags in file.f_mode. Note that FMODE_READ and FMODE_WRITE must correspond | 58 | * flags in file.f_mode. Note that FMODE_READ and FMODE_WRITE must correspond |
59 | * to O_WRONLY and O_RDWR via the strange trick in __dentry_open() | 59 | * to O_WRONLY and O_RDWR via the strange trick in __dentry_open() |
60 | */ | 60 | */ |
61 | 61 | ||
62 | /* file is open for reading */ | 62 | /* file is open for reading */ |
63 | #define FMODE_READ ((__force fmode_t)0x1) | 63 | #define FMODE_READ ((__force fmode_t)0x1) |
64 | /* file is open for writing */ | 64 | /* file is open for writing */ |
65 | #define FMODE_WRITE ((__force fmode_t)0x2) | 65 | #define FMODE_WRITE ((__force fmode_t)0x2) |
66 | /* file is seekable */ | 66 | /* file is seekable */ |
67 | #define FMODE_LSEEK ((__force fmode_t)0x4) | 67 | #define FMODE_LSEEK ((__force fmode_t)0x4) |
68 | /* file can be accessed using pread */ | 68 | /* file can be accessed using pread */ |
69 | #define FMODE_PREAD ((__force fmode_t)0x8) | 69 | #define FMODE_PREAD ((__force fmode_t)0x8) |
70 | /* file can be accessed using pwrite */ | 70 | /* file can be accessed using pwrite */ |
71 | #define FMODE_PWRITE ((__force fmode_t)0x10) | 71 | #define FMODE_PWRITE ((__force fmode_t)0x10) |
72 | /* File is opened for execution with sys_execve / sys_uselib */ | 72 | /* File is opened for execution with sys_execve / sys_uselib */ |
73 | #define FMODE_EXEC ((__force fmode_t)0x20) | 73 | #define FMODE_EXEC ((__force fmode_t)0x20) |
74 | /* File is opened with O_NDELAY (only set for block devices) */ | 74 | /* File is opened with O_NDELAY (only set for block devices) */ |
75 | #define FMODE_NDELAY ((__force fmode_t)0x40) | 75 | #define FMODE_NDELAY ((__force fmode_t)0x40) |
76 | /* File is opened with O_EXCL (only set for block devices) */ | 76 | /* File is opened with O_EXCL (only set for block devices) */ |
77 | #define FMODE_EXCL ((__force fmode_t)0x80) | 77 | #define FMODE_EXCL ((__force fmode_t)0x80) |
78 | /* File is opened using open(.., 3, ..) and is writeable only for ioctls | 78 | /* File is opened using open(.., 3, ..) and is writeable only for ioctls |
79 | (specialy hack for floppy.c) */ | 79 | (specialy hack for floppy.c) */ |
80 | #define FMODE_WRITE_IOCTL ((__force fmode_t)0x100) | 80 | #define FMODE_WRITE_IOCTL ((__force fmode_t)0x100) |
81 | 81 | ||
82 | /* | 82 | /* |
83 | * Don't update ctime and mtime. | 83 | * Don't update ctime and mtime. |
84 | * | 84 | * |
85 | * Currently a special hack for the XFS open_by_handle ioctl, but we'll | 85 | * Currently a special hack for the XFS open_by_handle ioctl, but we'll |
86 | * hopefully graduate it to a proper O_CMTIME flag supported by open(2) soon. | 86 | * hopefully graduate it to a proper O_CMTIME flag supported by open(2) soon. |
87 | */ | 87 | */ |
88 | #define FMODE_NOCMTIME ((__force fmode_t)0x800) | 88 | #define FMODE_NOCMTIME ((__force fmode_t)0x800) |
89 | 89 | ||
90 | /* Expect random access pattern */ | 90 | /* Expect random access pattern */ |
91 | #define FMODE_RANDOM ((__force fmode_t)0x1000) | 91 | #define FMODE_RANDOM ((__force fmode_t)0x1000) |
92 | 92 | ||
93 | /* | 93 | /* |
94 | * The below are the various read and write types that we support. Some of | 94 | * The below are the various read and write types that we support. Some of |
95 | * them include behavioral modifiers that send information down to the | 95 | * them include behavioral modifiers that send information down to the |
96 | * block layer and IO scheduler. Terminology: | 96 | * block layer and IO scheduler. Terminology: |
97 | * | 97 | * |
98 | * The block layer uses device plugging to defer IO a little bit, in | 98 | * The block layer uses device plugging to defer IO a little bit, in |
99 | * the hope that we will see more IO very shortly. This increases | 99 | * the hope that we will see more IO very shortly. This increases |
100 | * coalescing of adjacent IO and thus reduces the number of IOs we | 100 | * coalescing of adjacent IO and thus reduces the number of IOs we |
101 | * have to send to the device. It also allows for better queuing, | 101 | * have to send to the device. It also allows for better queuing, |
102 | * if the IO isn't mergeable. If the caller is going to be waiting | 102 | * if the IO isn't mergeable. If the caller is going to be waiting |
103 | * for the IO, then he must ensure that the device is unplugged so | 103 | * for the IO, then he must ensure that the device is unplugged so |
104 | * that the IO is dispatched to the driver. | 104 | * that the IO is dispatched to the driver. |
105 | * | 105 | * |
106 | * All IO is handled async in Linux. This is fine for background | 106 | * All IO is handled async in Linux. This is fine for background |
107 | * writes, but for reads or writes that someone waits for completion | 107 | * writes, but for reads or writes that someone waits for completion |
108 | * on, we want to notify the block layer and IO scheduler so that they | 108 | * on, we want to notify the block layer and IO scheduler so that they |
109 | * know about it. That allows them to make better scheduling | 109 | * know about it. That allows them to make better scheduling |
110 | * decisions. So when the below references 'sync' and 'async', it | 110 | * decisions. So when the below references 'sync' and 'async', it |
111 | * is referencing this priority hint. | 111 | * is referencing this priority hint. |
112 | * | 112 | * |
113 | * With that in mind, the available types are: | 113 | * With that in mind, the available types are: |
114 | * | 114 | * |
115 | * READ A normal read operation. Device will be plugged. | 115 | * READ A normal read operation. Device will be plugged. |
116 | * READ_SYNC A synchronous read. Device is not plugged, caller can | 116 | * READ_SYNC A synchronous read. Device is not plugged, caller can |
117 | * immediately wait on this read without caring about | 117 | * immediately wait on this read without caring about |
118 | * unplugging. | 118 | * unplugging. |
119 | * READA Used for read-ahead operations. Lower priority, and the | 119 | * READA Used for read-ahead operations. Lower priority, and the |
120 | * block layer could (in theory) choose to ignore this | 120 | * block layer could (in theory) choose to ignore this |
121 | * request if it runs into resource problems. | 121 | * request if it runs into resource problems. |
122 | * WRITE A normal async write. Device will be plugged. | 122 | * WRITE A normal async write. Device will be plugged. |
123 | * SWRITE Like WRITE, but a special case for ll_rw_block() that | 123 | * SWRITE Like WRITE, but a special case for ll_rw_block() that |
124 | * tells it to lock the buffer first. Normally a buffer | 124 | * tells it to lock the buffer first. Normally a buffer |
125 | * must be locked before doing IO. | 125 | * must be locked before doing IO. |
126 | * WRITE_SYNC_PLUG Synchronous write. Identical to WRITE, but passes down | 126 | * WRITE_SYNC_PLUG Synchronous write. Identical to WRITE, but passes down |
127 | * the hint that someone will be waiting on this IO | 127 | * the hint that someone will be waiting on this IO |
128 | * shortly. The device must still be unplugged explicitly, | 128 | * shortly. The device must still be unplugged explicitly, |
129 | * WRITE_SYNC_PLUG does not do this as we could be | 129 | * WRITE_SYNC_PLUG does not do this as we could be |
130 | * submitting more writes before we actually wait on any | 130 | * submitting more writes before we actually wait on any |
131 | * of them. | 131 | * of them. |
132 | * WRITE_SYNC Like WRITE_SYNC_PLUG, but also unplugs the device | 132 | * WRITE_SYNC Like WRITE_SYNC_PLUG, but also unplugs the device |
133 | * immediately after submission. The write equivalent | 133 | * immediately after submission. The write equivalent |
134 | * of READ_SYNC. | 134 | * of READ_SYNC. |
135 | * WRITE_ODIRECT_PLUG Special case write for O_DIRECT only. | 135 | * WRITE_ODIRECT_PLUG Special case write for O_DIRECT only. |
136 | * SWRITE_SYNC | 136 | * SWRITE_SYNC |
137 | * SWRITE_SYNC_PLUG Like WRITE_SYNC/WRITE_SYNC_PLUG, but locks the buffer. | 137 | * SWRITE_SYNC_PLUG Like WRITE_SYNC/WRITE_SYNC_PLUG, but locks the buffer. |
138 | * See SWRITE. | 138 | * See SWRITE. |
139 | * WRITE_BARRIER Like WRITE, but tells the block layer that all | 139 | * WRITE_BARRIER Like WRITE, but tells the block layer that all |
140 | * previously submitted writes must be safely on storage | 140 | * previously submitted writes must be safely on storage |
141 | * before this one is started. Also guarantees that when | 141 | * before this one is started. Also guarantees that when |
142 | * this write is complete, it itself is also safely on | 142 | * this write is complete, it itself is also safely on |
143 | * storage. Prevents reordering of writes on both sides | 143 | * storage. Prevents reordering of writes on both sides |
144 | * of this IO. | 144 | * of this IO. |
145 | * | 145 | * |
146 | */ | 146 | */ |
147 | #define RW_MASK 1 | 147 | #define RW_MASK 1 |
148 | #define RWA_MASK 2 | 148 | #define RWA_MASK 2 |
149 | #define READ 0 | 149 | #define READ 0 |
150 | #define WRITE 1 | 150 | #define WRITE 1 |
151 | #define READA 2 /* read-ahead - don't block if no resources */ | 151 | #define READA 2 /* read-ahead - don't block if no resources */ |
152 | #define SWRITE 3 /* for ll_rw_block() - wait for buffer lock */ | 152 | #define SWRITE 3 /* for ll_rw_block() - wait for buffer lock */ |
153 | #define READ_SYNC (READ | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG)) | 153 | #define READ_SYNC (READ | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG)) |
154 | #define READ_META (READ | (1 << BIO_RW_META)) | 154 | #define READ_META (READ | (1 << BIO_RW_META)) |
155 | #define WRITE_SYNC_PLUG (WRITE | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_NOIDLE)) | 155 | #define WRITE_SYNC_PLUG (WRITE | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_NOIDLE)) |
156 | #define WRITE_SYNC (WRITE_SYNC_PLUG | (1 << BIO_RW_UNPLUG)) | 156 | #define WRITE_SYNC (WRITE_SYNC_PLUG | (1 << BIO_RW_UNPLUG)) |
157 | #define WRITE_ODIRECT_PLUG (WRITE | (1 << BIO_RW_SYNCIO)) | 157 | #define WRITE_ODIRECT_PLUG (WRITE | (1 << BIO_RW_SYNCIO)) |
158 | #define WRITE_META (WRITE | (1 << BIO_RW_META)) | 158 | #define WRITE_META (WRITE | (1 << BIO_RW_META)) |
159 | #define SWRITE_SYNC_PLUG \ | 159 | #define SWRITE_SYNC_PLUG \ |
160 | (SWRITE | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_NOIDLE)) | 160 | (SWRITE | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_NOIDLE)) |
161 | #define SWRITE_SYNC (SWRITE_SYNC_PLUG | (1 << BIO_RW_UNPLUG)) | 161 | #define SWRITE_SYNC (SWRITE_SYNC_PLUG | (1 << BIO_RW_UNPLUG)) |
162 | #define WRITE_BARRIER (WRITE | (1 << BIO_RW_BARRIER)) | 162 | #define WRITE_BARRIER (WRITE | (1 << BIO_RW_BARRIER)) |
163 | 163 | ||
164 | /* | 164 | /* |
165 | * These aren't really reads or writes, they pass down information about | 165 | * These aren't really reads or writes, they pass down information about |
166 | * parts of device that are now unused by the file system. | 166 | * parts of device that are now unused by the file system. |
167 | */ | 167 | */ |
168 | #define DISCARD_NOBARRIER (WRITE | (1 << BIO_RW_DISCARD)) | 168 | #define DISCARD_NOBARRIER (WRITE | (1 << BIO_RW_DISCARD)) |
169 | #define DISCARD_BARRIER (DISCARD_NOBARRIER | (1 << BIO_RW_BARRIER)) | 169 | #define DISCARD_BARRIER (DISCARD_NOBARRIER | (1 << BIO_RW_BARRIER)) |
170 | 170 | ||
171 | #define SEL_IN 1 | 171 | #define SEL_IN 1 |
172 | #define SEL_OUT 2 | 172 | #define SEL_OUT 2 |
173 | #define SEL_EX 4 | 173 | #define SEL_EX 4 |
174 | 174 | ||
175 | /* public flags for file_system_type */ | 175 | /* public flags for file_system_type */ |
176 | #define FS_REQUIRES_DEV 1 | 176 | #define FS_REQUIRES_DEV 1 |
177 | #define FS_BINARY_MOUNTDATA 2 | 177 | #define FS_BINARY_MOUNTDATA 2 |
178 | #define FS_HAS_SUBTYPE 4 | 178 | #define FS_HAS_SUBTYPE 4 |
179 | #define FS_REVAL_DOT 16384 /* Check the paths ".", ".." for staleness */ | 179 | #define FS_REVAL_DOT 16384 /* Check the paths ".", ".." for staleness */ |
180 | #define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() | 180 | #define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() |
181 | * during rename() internally. | 181 | * during rename() internally. |
182 | */ | 182 | */ |
183 | 183 | ||
184 | /* | 184 | /* |
185 | * These are the fs-independent mount-flags: up to 32 flags are supported | 185 | * These are the fs-independent mount-flags: up to 32 flags are supported |
186 | */ | 186 | */ |
187 | #define MS_RDONLY 1 /* Mount read-only */ | 187 | #define MS_RDONLY 1 /* Mount read-only */ |
188 | #define MS_NOSUID 2 /* Ignore suid and sgid bits */ | 188 | #define MS_NOSUID 2 /* Ignore suid and sgid bits */ |
189 | #define MS_NODEV 4 /* Disallow access to device special files */ | 189 | #define MS_NODEV 4 /* Disallow access to device special files */ |
190 | #define MS_NOEXEC 8 /* Disallow program execution */ | 190 | #define MS_NOEXEC 8 /* Disallow program execution */ |
191 | #define MS_SYNCHRONOUS 16 /* Writes are synced at once */ | 191 | #define MS_SYNCHRONOUS 16 /* Writes are synced at once */ |
192 | #define MS_REMOUNT 32 /* Alter flags of a mounted FS */ | 192 | #define MS_REMOUNT 32 /* Alter flags of a mounted FS */ |
193 | #define MS_MANDLOCK 64 /* Allow mandatory locks on an FS */ | 193 | #define MS_MANDLOCK 64 /* Allow mandatory locks on an FS */ |
194 | #define MS_DIRSYNC 128 /* Directory modifications are synchronous */ | 194 | #define MS_DIRSYNC 128 /* Directory modifications are synchronous */ |
195 | #define MS_NOATIME 1024 /* Do not update access times. */ | 195 | #define MS_NOATIME 1024 /* Do not update access times. */ |
196 | #define MS_NODIRATIME 2048 /* Do not update directory access times */ | 196 | #define MS_NODIRATIME 2048 /* Do not update directory access times */ |
197 | #define MS_BIND 4096 | 197 | #define MS_BIND 4096 |
198 | #define MS_MOVE 8192 | 198 | #define MS_MOVE 8192 |
199 | #define MS_REC 16384 | 199 | #define MS_REC 16384 |
200 | #define MS_VERBOSE 32768 /* War is peace. Verbosity is silence. | 200 | #define MS_VERBOSE 32768 /* War is peace. Verbosity is silence. |
201 | MS_VERBOSE is deprecated. */ | 201 | MS_VERBOSE is deprecated. */ |
202 | #define MS_SILENT 32768 | 202 | #define MS_SILENT 32768 |
203 | #define MS_POSIXACL (1<<16) /* VFS does not apply the umask */ | 203 | #define MS_POSIXACL (1<<16) /* VFS does not apply the umask */ |
204 | #define MS_UNBINDABLE (1<<17) /* change to unbindable */ | 204 | #define MS_UNBINDABLE (1<<17) /* change to unbindable */ |
205 | #define MS_PRIVATE (1<<18) /* change to private */ | 205 | #define MS_PRIVATE (1<<18) /* change to private */ |
206 | #define MS_SLAVE (1<<19) /* change to slave */ | 206 | #define MS_SLAVE (1<<19) /* change to slave */ |
207 | #define MS_SHARED (1<<20) /* change to shared */ | 207 | #define MS_SHARED (1<<20) /* change to shared */ |
208 | #define MS_RELATIME (1<<21) /* Update atime relative to mtime/ctime. */ | 208 | #define MS_RELATIME (1<<21) /* Update atime relative to mtime/ctime. */ |
209 | #define MS_KERNMOUNT (1<<22) /* this is a kern_mount call */ | 209 | #define MS_KERNMOUNT (1<<22) /* this is a kern_mount call */ |
210 | #define MS_I_VERSION (1<<23) /* Update inode I_version field */ | 210 | #define MS_I_VERSION (1<<23) /* Update inode I_version field */ |
211 | #define MS_STRICTATIME (1<<24) /* Always perform atime updates */ | 211 | #define MS_STRICTATIME (1<<24) /* Always perform atime updates */ |
212 | #define MS_ACTIVE (1<<30) | 212 | #define MS_ACTIVE (1<<30) |
213 | #define MS_NOUSER (1<<31) | 213 | #define MS_NOUSER (1<<31) |
214 | 214 | ||
215 | /* | 215 | /* |
216 | * Superblock flags that can be altered by MS_REMOUNT | 216 | * Superblock flags that can be altered by MS_REMOUNT |
217 | */ | 217 | */ |
218 | #define MS_RMT_MASK (MS_RDONLY|MS_SYNCHRONOUS|MS_MANDLOCK|MS_I_VERSION) | 218 | #define MS_RMT_MASK (MS_RDONLY|MS_SYNCHRONOUS|MS_MANDLOCK|MS_I_VERSION) |
219 | 219 | ||
220 | /* | 220 | /* |
221 | * Old magic mount flag and mask | 221 | * Old magic mount flag and mask |
222 | */ | 222 | */ |
223 | #define MS_MGC_VAL 0xC0ED0000 | 223 | #define MS_MGC_VAL 0xC0ED0000 |
224 | #define MS_MGC_MSK 0xffff0000 | 224 | #define MS_MGC_MSK 0xffff0000 |
225 | 225 | ||
226 | /* Inode flags - they have nothing to superblock flags now */ | 226 | /* Inode flags - they have nothing to superblock flags now */ |
227 | 227 | ||
228 | #define S_SYNC 1 /* Writes are synced at once */ | 228 | #define S_SYNC 1 /* Writes are synced at once */ |
229 | #define S_NOATIME 2 /* Do not update access times */ | 229 | #define S_NOATIME 2 /* Do not update access times */ |
230 | #define S_APPEND 4 /* Append-only file */ | 230 | #define S_APPEND 4 /* Append-only file */ |
231 | #define S_IMMUTABLE 8 /* Immutable file */ | 231 | #define S_IMMUTABLE 8 /* Immutable file */ |
232 | #define S_DEAD 16 /* removed, but still open directory */ | 232 | #define S_DEAD 16 /* removed, but still open directory */ |
233 | #define S_NOQUOTA 32 /* Inode is not counted to quota */ | 233 | #define S_NOQUOTA 32 /* Inode is not counted to quota */ |
234 | #define S_DIRSYNC 64 /* Directory modifications are synchronous */ | 234 | #define S_DIRSYNC 64 /* Directory modifications are synchronous */ |
235 | #define S_NOCMTIME 128 /* Do not update file c/mtime */ | 235 | #define S_NOCMTIME 128 /* Do not update file c/mtime */ |
236 | #define S_SWAPFILE 256 /* Do not truncate: swapon got its bmaps */ | 236 | #define S_SWAPFILE 256 /* Do not truncate: swapon got its bmaps */ |
237 | #define S_PRIVATE 512 /* Inode is fs-internal */ | 237 | #define S_PRIVATE 512 /* Inode is fs-internal */ |
238 | 238 | ||
239 | /* | 239 | /* |
240 | * Note that nosuid etc flags are inode-specific: setting some file-system | 240 | * Note that nosuid etc flags are inode-specific: setting some file-system |
241 | * flags just means all the inodes inherit those flags by default. It might be | 241 | * flags just means all the inodes inherit those flags by default. It might be |
242 | * possible to override it selectively if you really wanted to with some | 242 | * possible to override it selectively if you really wanted to with some |
243 | * ioctl() that is not currently implemented. | 243 | * ioctl() that is not currently implemented. |
244 | * | 244 | * |
245 | * Exception: MS_RDONLY is always applied to the entire file system. | 245 | * Exception: MS_RDONLY is always applied to the entire file system. |
246 | * | 246 | * |
247 | * Unfortunately, it is possible to change a filesystems flags with it mounted | 247 | * Unfortunately, it is possible to change a filesystems flags with it mounted |
248 | * with files in use. This means that all of the inodes will not have their | 248 | * with files in use. This means that all of the inodes will not have their |
249 | * i_flags updated. Hence, i_flags no longer inherit the superblock mount | 249 | * i_flags updated. Hence, i_flags no longer inherit the superblock mount |
250 | * flags, so these have to be checked separately. -- rmk@arm.uk.linux.org | 250 | * flags, so these have to be checked separately. -- rmk@arm.uk.linux.org |
251 | */ | 251 | */ |
252 | #define __IS_FLG(inode,flg) ((inode)->i_sb->s_flags & (flg)) | 252 | #define __IS_FLG(inode,flg) ((inode)->i_sb->s_flags & (flg)) |
253 | 253 | ||
254 | #define IS_RDONLY(inode) ((inode)->i_sb->s_flags & MS_RDONLY) | 254 | #define IS_RDONLY(inode) ((inode)->i_sb->s_flags & MS_RDONLY) |
255 | #define IS_SYNC(inode) (__IS_FLG(inode, MS_SYNCHRONOUS) || \ | 255 | #define IS_SYNC(inode) (__IS_FLG(inode, MS_SYNCHRONOUS) || \ |
256 | ((inode)->i_flags & S_SYNC)) | 256 | ((inode)->i_flags & S_SYNC)) |
257 | #define IS_DIRSYNC(inode) (__IS_FLG(inode, MS_SYNCHRONOUS|MS_DIRSYNC) || \ | 257 | #define IS_DIRSYNC(inode) (__IS_FLG(inode, MS_SYNCHRONOUS|MS_DIRSYNC) || \ |
258 | ((inode)->i_flags & (S_SYNC|S_DIRSYNC))) | 258 | ((inode)->i_flags & (S_SYNC|S_DIRSYNC))) |
259 | #define IS_MANDLOCK(inode) __IS_FLG(inode, MS_MANDLOCK) | 259 | #define IS_MANDLOCK(inode) __IS_FLG(inode, MS_MANDLOCK) |
260 | #define IS_NOATIME(inode) __IS_FLG(inode, MS_RDONLY|MS_NOATIME) | 260 | #define IS_NOATIME(inode) __IS_FLG(inode, MS_RDONLY|MS_NOATIME) |
261 | #define IS_I_VERSION(inode) __IS_FLG(inode, MS_I_VERSION) | 261 | #define IS_I_VERSION(inode) __IS_FLG(inode, MS_I_VERSION) |
262 | 262 | ||
263 | #define IS_NOQUOTA(inode) ((inode)->i_flags & S_NOQUOTA) | 263 | #define IS_NOQUOTA(inode) ((inode)->i_flags & S_NOQUOTA) |
264 | #define IS_APPEND(inode) ((inode)->i_flags & S_APPEND) | 264 | #define IS_APPEND(inode) ((inode)->i_flags & S_APPEND) |
265 | #define IS_IMMUTABLE(inode) ((inode)->i_flags & S_IMMUTABLE) | 265 | #define IS_IMMUTABLE(inode) ((inode)->i_flags & S_IMMUTABLE) |
266 | #define IS_POSIXACL(inode) __IS_FLG(inode, MS_POSIXACL) | 266 | #define IS_POSIXACL(inode) __IS_FLG(inode, MS_POSIXACL) |
267 | 267 | ||
268 | #define IS_DEADDIR(inode) ((inode)->i_flags & S_DEAD) | 268 | #define IS_DEADDIR(inode) ((inode)->i_flags & S_DEAD) |
269 | #define IS_NOCMTIME(inode) ((inode)->i_flags & S_NOCMTIME) | 269 | #define IS_NOCMTIME(inode) ((inode)->i_flags & S_NOCMTIME) |
270 | #define IS_SWAPFILE(inode) ((inode)->i_flags & S_SWAPFILE) | 270 | #define IS_SWAPFILE(inode) ((inode)->i_flags & S_SWAPFILE) |
271 | #define IS_PRIVATE(inode) ((inode)->i_flags & S_PRIVATE) | 271 | #define IS_PRIVATE(inode) ((inode)->i_flags & S_PRIVATE) |
272 | 272 | ||
273 | /* the read-only stuff doesn't really belong here, but any other place is | 273 | /* the read-only stuff doesn't really belong here, but any other place is |
274 | probably as bad and I don't want to create yet another include file. */ | 274 | probably as bad and I don't want to create yet another include file. */ |
275 | 275 | ||
276 | #define BLKROSET _IO(0x12,93) /* set device read-only (0 = read-write) */ | 276 | #define BLKROSET _IO(0x12,93) /* set device read-only (0 = read-write) */ |
277 | #define BLKROGET _IO(0x12,94) /* get read-only status (0 = read_write) */ | 277 | #define BLKROGET _IO(0x12,94) /* get read-only status (0 = read_write) */ |
278 | #define BLKRRPART _IO(0x12,95) /* re-read partition table */ | 278 | #define BLKRRPART _IO(0x12,95) /* re-read partition table */ |
279 | #define BLKGETSIZE _IO(0x12,96) /* return device size /512 (long *arg) */ | 279 | #define BLKGETSIZE _IO(0x12,96) /* return device size /512 (long *arg) */ |
280 | #define BLKFLSBUF _IO(0x12,97) /* flush buffer cache */ | 280 | #define BLKFLSBUF _IO(0x12,97) /* flush buffer cache */ |
281 | #define BLKRASET _IO(0x12,98) /* set read ahead for block device */ | 281 | #define BLKRASET _IO(0x12,98) /* set read ahead for block device */ |
282 | #define BLKRAGET _IO(0x12,99) /* get current read ahead setting */ | 282 | #define BLKRAGET _IO(0x12,99) /* get current read ahead setting */ |
283 | #define BLKFRASET _IO(0x12,100)/* set filesystem (mm/filemap.c) read-ahead */ | 283 | #define BLKFRASET _IO(0x12,100)/* set filesystem (mm/filemap.c) read-ahead */ |
284 | #define BLKFRAGET _IO(0x12,101)/* get filesystem (mm/filemap.c) read-ahead */ | 284 | #define BLKFRAGET _IO(0x12,101)/* get filesystem (mm/filemap.c) read-ahead */ |
285 | #define BLKSECTSET _IO(0x12,102)/* set max sectors per request (ll_rw_blk.c) */ | 285 | #define BLKSECTSET _IO(0x12,102)/* set max sectors per request (ll_rw_blk.c) */ |
286 | #define BLKSECTGET _IO(0x12,103)/* get max sectors per request (ll_rw_blk.c) */ | 286 | #define BLKSECTGET _IO(0x12,103)/* get max sectors per request (ll_rw_blk.c) */ |
287 | #define BLKSSZGET _IO(0x12,104)/* get block device sector size */ | 287 | #define BLKSSZGET _IO(0x12,104)/* get block device sector size */ |
288 | #if 0 | 288 | #if 0 |
289 | #define BLKPG _IO(0x12,105)/* See blkpg.h */ | 289 | #define BLKPG _IO(0x12,105)/* See blkpg.h */ |
290 | 290 | ||
291 | /* Some people are morons. Do not use sizeof! */ | 291 | /* Some people are morons. Do not use sizeof! */ |
292 | 292 | ||
293 | #define BLKELVGET _IOR(0x12,106,size_t)/* elevator get */ | 293 | #define BLKELVGET _IOR(0x12,106,size_t)/* elevator get */ |
294 | #define BLKELVSET _IOW(0x12,107,size_t)/* elevator set */ | 294 | #define BLKELVSET _IOW(0x12,107,size_t)/* elevator set */ |
295 | /* This was here just to show that the number is taken - | 295 | /* This was here just to show that the number is taken - |
296 | probably all these _IO(0x12,*) ioctls should be moved to blkpg.h. */ | 296 | probably all these _IO(0x12,*) ioctls should be moved to blkpg.h. */ |
297 | #endif | 297 | #endif |
298 | /* A jump here: 108-111 have been used for various private purposes. */ | 298 | /* A jump here: 108-111 have been used for various private purposes. */ |
299 | #define BLKBSZGET _IOR(0x12,112,size_t) | 299 | #define BLKBSZGET _IOR(0x12,112,size_t) |
300 | #define BLKBSZSET _IOW(0x12,113,size_t) | 300 | #define BLKBSZSET _IOW(0x12,113,size_t) |
301 | #define BLKGETSIZE64 _IOR(0x12,114,size_t) /* return device size in bytes (u64 *arg) */ | 301 | #define BLKGETSIZE64 _IOR(0x12,114,size_t) /* return device size in bytes (u64 *arg) */ |
302 | #define BLKTRACESETUP _IOWR(0x12,115,struct blk_user_trace_setup) | 302 | #define BLKTRACESETUP _IOWR(0x12,115,struct blk_user_trace_setup) |
303 | #define BLKTRACESTART _IO(0x12,116) | 303 | #define BLKTRACESTART _IO(0x12,116) |
304 | #define BLKTRACESTOP _IO(0x12,117) | 304 | #define BLKTRACESTOP _IO(0x12,117) |
305 | #define BLKTRACETEARDOWN _IO(0x12,118) | 305 | #define BLKTRACETEARDOWN _IO(0x12,118) |
306 | #define BLKDISCARD _IO(0x12,119) | 306 | #define BLKDISCARD _IO(0x12,119) |
307 | #define BLKIOMIN _IO(0x12,120) | 307 | #define BLKIOMIN _IO(0x12,120) |
308 | #define BLKIOOPT _IO(0x12,121) | 308 | #define BLKIOOPT _IO(0x12,121) |
309 | #define BLKALIGNOFF _IO(0x12,122) | 309 | #define BLKALIGNOFF _IO(0x12,122) |
310 | #define BLKPBSZGET _IO(0x12,123) | 310 | #define BLKPBSZGET _IO(0x12,123) |
311 | #define BLKDISCARDZEROES _IO(0x12,124) | 311 | #define BLKDISCARDZEROES _IO(0x12,124) |
312 | 312 | ||
313 | #define BMAP_IOCTL 1 /* obsolete - kept for compatibility */ | 313 | #define BMAP_IOCTL 1 /* obsolete - kept for compatibility */ |
314 | #define FIBMAP _IO(0x00,1) /* bmap access */ | 314 | #define FIBMAP _IO(0x00,1) /* bmap access */ |
315 | #define FIGETBSZ _IO(0x00,2) /* get the block size used for bmap */ | 315 | #define FIGETBSZ _IO(0x00,2) /* get the block size used for bmap */ |
316 | #define FIFREEZE _IOWR('X', 119, int) /* Freeze */ | 316 | #define FIFREEZE _IOWR('X', 119, int) /* Freeze */ |
317 | #define FITHAW _IOWR('X', 120, int) /* Thaw */ | 317 | #define FITHAW _IOWR('X', 120, int) /* Thaw */ |
318 | 318 | ||
319 | #define FS_IOC_GETFLAGS _IOR('f', 1, long) | 319 | #define FS_IOC_GETFLAGS _IOR('f', 1, long) |
320 | #define FS_IOC_SETFLAGS _IOW('f', 2, long) | 320 | #define FS_IOC_SETFLAGS _IOW('f', 2, long) |
321 | #define FS_IOC_GETVERSION _IOR('v', 1, long) | 321 | #define FS_IOC_GETVERSION _IOR('v', 1, long) |
322 | #define FS_IOC_SETVERSION _IOW('v', 2, long) | 322 | #define FS_IOC_SETVERSION _IOW('v', 2, long) |
323 | #define FS_IOC_FIEMAP _IOWR('f', 11, struct fiemap) | 323 | #define FS_IOC_FIEMAP _IOWR('f', 11, struct fiemap) |
324 | #define FS_IOC32_GETFLAGS _IOR('f', 1, int) | 324 | #define FS_IOC32_GETFLAGS _IOR('f', 1, int) |
325 | #define FS_IOC32_SETFLAGS _IOW('f', 2, int) | 325 | #define FS_IOC32_SETFLAGS _IOW('f', 2, int) |
326 | #define FS_IOC32_GETVERSION _IOR('v', 1, int) | 326 | #define FS_IOC32_GETVERSION _IOR('v', 1, int) |
327 | #define FS_IOC32_SETVERSION _IOW('v', 2, int) | 327 | #define FS_IOC32_SETVERSION _IOW('v', 2, int) |
328 | 328 | ||
329 | /* | 329 | /* |
330 | * Inode flags (FS_IOC_GETFLAGS / FS_IOC_SETFLAGS) | 330 | * Inode flags (FS_IOC_GETFLAGS / FS_IOC_SETFLAGS) |
331 | */ | 331 | */ |
332 | #define FS_SECRM_FL 0x00000001 /* Secure deletion */ | 332 | #define FS_SECRM_FL 0x00000001 /* Secure deletion */ |
333 | #define FS_UNRM_FL 0x00000002 /* Undelete */ | 333 | #define FS_UNRM_FL 0x00000002 /* Undelete */ |
334 | #define FS_COMPR_FL 0x00000004 /* Compress file */ | 334 | #define FS_COMPR_FL 0x00000004 /* Compress file */ |
335 | #define FS_SYNC_FL 0x00000008 /* Synchronous updates */ | 335 | #define FS_SYNC_FL 0x00000008 /* Synchronous updates */ |
336 | #define FS_IMMUTABLE_FL 0x00000010 /* Immutable file */ | 336 | #define FS_IMMUTABLE_FL 0x00000010 /* Immutable file */ |
337 | #define FS_APPEND_FL 0x00000020 /* writes to file may only append */ | 337 | #define FS_APPEND_FL 0x00000020 /* writes to file may only append */ |
338 | #define FS_NODUMP_FL 0x00000040 /* do not dump file */ | 338 | #define FS_NODUMP_FL 0x00000040 /* do not dump file */ |
339 | #define FS_NOATIME_FL 0x00000080 /* do not update atime */ | 339 | #define FS_NOATIME_FL 0x00000080 /* do not update atime */ |
340 | /* Reserved for compression usage... */ | 340 | /* Reserved for compression usage... */ |
341 | #define FS_DIRTY_FL 0x00000100 | 341 | #define FS_DIRTY_FL 0x00000100 |
342 | #define FS_COMPRBLK_FL 0x00000200 /* One or more compressed clusters */ | 342 | #define FS_COMPRBLK_FL 0x00000200 /* One or more compressed clusters */ |
343 | #define FS_NOCOMP_FL 0x00000400 /* Don't compress */ | 343 | #define FS_NOCOMP_FL 0x00000400 /* Don't compress */ |
344 | #define FS_ECOMPR_FL 0x00000800 /* Compression error */ | 344 | #define FS_ECOMPR_FL 0x00000800 /* Compression error */ |
345 | /* End compression flags --- maybe not all used */ | 345 | /* End compression flags --- maybe not all used */ |
346 | #define FS_BTREE_FL 0x00001000 /* btree format dir */ | 346 | #define FS_BTREE_FL 0x00001000 /* btree format dir */ |
347 | #define FS_INDEX_FL 0x00001000 /* hash-indexed directory */ | 347 | #define FS_INDEX_FL 0x00001000 /* hash-indexed directory */ |
348 | #define FS_IMAGIC_FL 0x00002000 /* AFS directory */ | 348 | #define FS_IMAGIC_FL 0x00002000 /* AFS directory */ |
349 | #define FS_JOURNAL_DATA_FL 0x00004000 /* Reserved for ext3 */ | 349 | #define FS_JOURNAL_DATA_FL 0x00004000 /* Reserved for ext3 */ |
350 | #define FS_NOTAIL_FL 0x00008000 /* file tail should not be merged */ | 350 | #define FS_NOTAIL_FL 0x00008000 /* file tail should not be merged */ |
351 | #define FS_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */ | 351 | #define FS_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */ |
352 | #define FS_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/ | 352 | #define FS_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/ |
353 | #define FS_EXTENT_FL 0x00080000 /* Extents */ | 353 | #define FS_EXTENT_FL 0x00080000 /* Extents */ |
354 | #define FS_DIRECTIO_FL 0x00100000 /* Use direct i/o */ | 354 | #define FS_DIRECTIO_FL 0x00100000 /* Use direct i/o */ |
355 | #define FS_RESERVED_FL 0x80000000 /* reserved for ext2 lib */ | 355 | #define FS_RESERVED_FL 0x80000000 /* reserved for ext2 lib */ |
356 | 356 | ||
357 | #define FS_FL_USER_VISIBLE 0x0003DFFF /* User visible flags */ | 357 | #define FS_FL_USER_VISIBLE 0x0003DFFF /* User visible flags */ |
358 | #define FS_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */ | 358 | #define FS_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */ |
359 | 359 | ||
360 | 360 | ||
361 | #define SYNC_FILE_RANGE_WAIT_BEFORE 1 | 361 | #define SYNC_FILE_RANGE_WAIT_BEFORE 1 |
362 | #define SYNC_FILE_RANGE_WRITE 2 | 362 | #define SYNC_FILE_RANGE_WRITE 2 |
363 | #define SYNC_FILE_RANGE_WAIT_AFTER 4 | 363 | #define SYNC_FILE_RANGE_WAIT_AFTER 4 |
364 | 364 | ||
365 | #ifdef __KERNEL__ | 365 | #ifdef __KERNEL__ |
366 | 366 | ||
367 | #include <linux/linkage.h> | 367 | #include <linux/linkage.h> |
368 | #include <linux/wait.h> | 368 | #include <linux/wait.h> |
369 | #include <linux/types.h> | 369 | #include <linux/types.h> |
370 | #include <linux/kdev_t.h> | 370 | #include <linux/kdev_t.h> |
371 | #include <linux/dcache.h> | 371 | #include <linux/dcache.h> |
372 | #include <linux/path.h> | 372 | #include <linux/path.h> |
373 | #include <linux/stat.h> | 373 | #include <linux/stat.h> |
374 | #include <linux/cache.h> | 374 | #include <linux/cache.h> |
375 | #include <linux/kobject.h> | 375 | #include <linux/kobject.h> |
376 | #include <linux/list.h> | 376 | #include <linux/list.h> |
377 | #include <linux/radix-tree.h> | 377 | #include <linux/radix-tree.h> |
378 | #include <linux/prio_tree.h> | 378 | #include <linux/prio_tree.h> |
379 | #include <linux/init.h> | 379 | #include <linux/init.h> |
380 | #include <linux/pid.h> | 380 | #include <linux/pid.h> |
381 | #include <linux/mutex.h> | 381 | #include <linux/mutex.h> |
382 | #include <linux/capability.h> | 382 | #include <linux/capability.h> |
383 | #include <linux/semaphore.h> | 383 | #include <linux/semaphore.h> |
384 | #include <linux/fiemap.h> | 384 | #include <linux/fiemap.h> |
385 | 385 | ||
386 | #include <asm/atomic.h> | 386 | #include <asm/atomic.h> |
387 | #include <asm/byteorder.h> | 387 | #include <asm/byteorder.h> |
388 | 388 | ||
389 | struct export_operations; | 389 | struct export_operations; |
390 | struct hd_geometry; | 390 | struct hd_geometry; |
391 | struct iovec; | 391 | struct iovec; |
392 | struct nameidata; | 392 | struct nameidata; |
393 | struct kiocb; | 393 | struct kiocb; |
394 | struct pipe_inode_info; | 394 | struct pipe_inode_info; |
395 | struct poll_table_struct; | 395 | struct poll_table_struct; |
396 | struct kstatfs; | 396 | struct kstatfs; |
397 | struct vm_area_struct; | 397 | struct vm_area_struct; |
398 | struct vfsmount; | 398 | struct vfsmount; |
399 | struct cred; | 399 | struct cred; |
400 | 400 | ||
401 | extern void __init inode_init(void); | 401 | extern void __init inode_init(void); |
402 | extern void __init inode_init_early(void); | 402 | extern void __init inode_init_early(void); |
403 | extern void __init files_init(unsigned long); | 403 | extern void __init files_init(unsigned long); |
404 | 404 | ||
405 | extern struct files_stat_struct files_stat; | 405 | extern struct files_stat_struct files_stat; |
406 | extern int get_max_files(void); | 406 | extern int get_max_files(void); |
407 | extern int sysctl_nr_open; | 407 | extern int sysctl_nr_open; |
408 | extern struct inodes_stat_t inodes_stat; | 408 | extern struct inodes_stat_t inodes_stat; |
409 | extern int leases_enable, lease_break_time; | 409 | extern int leases_enable, lease_break_time; |
410 | #ifdef CONFIG_DNOTIFY | 410 | #ifdef CONFIG_DNOTIFY |
411 | extern int dir_notify_enable; | 411 | extern int dir_notify_enable; |
412 | #endif | 412 | #endif |
413 | 413 | ||
414 | struct buffer_head; | 414 | struct buffer_head; |
415 | typedef int (get_block_t)(struct inode *inode, sector_t iblock, | 415 | typedef int (get_block_t)(struct inode *inode, sector_t iblock, |
416 | struct buffer_head *bh_result, int create); | 416 | struct buffer_head *bh_result, int create); |
417 | typedef void (dio_iodone_t)(struct kiocb *iocb, loff_t offset, | 417 | typedef void (dio_iodone_t)(struct kiocb *iocb, loff_t offset, |
418 | ssize_t bytes, void *private); | 418 | ssize_t bytes, void *private); |
419 | 419 | ||
420 | /* | 420 | /* |
421 | * Attribute flags. These should be or-ed together to figure out what | 421 | * Attribute flags. These should be or-ed together to figure out what |
422 | * has been changed! | 422 | * has been changed! |
423 | */ | 423 | */ |
424 | #define ATTR_MODE (1 << 0) | 424 | #define ATTR_MODE (1 << 0) |
425 | #define ATTR_UID (1 << 1) | 425 | #define ATTR_UID (1 << 1) |
426 | #define ATTR_GID (1 << 2) | 426 | #define ATTR_GID (1 << 2) |
427 | #define ATTR_SIZE (1 << 3) | 427 | #define ATTR_SIZE (1 << 3) |
428 | #define ATTR_ATIME (1 << 4) | 428 | #define ATTR_ATIME (1 << 4) |
429 | #define ATTR_MTIME (1 << 5) | 429 | #define ATTR_MTIME (1 << 5) |
430 | #define ATTR_CTIME (1 << 6) | 430 | #define ATTR_CTIME (1 << 6) |
431 | #define ATTR_ATIME_SET (1 << 7) | 431 | #define ATTR_ATIME_SET (1 << 7) |
432 | #define ATTR_MTIME_SET (1 << 8) | 432 | #define ATTR_MTIME_SET (1 << 8) |
433 | #define ATTR_FORCE (1 << 9) /* Not a change, but a change it */ | 433 | #define ATTR_FORCE (1 << 9) /* Not a change, but a change it */ |
434 | #define ATTR_ATTR_FLAG (1 << 10) | 434 | #define ATTR_ATTR_FLAG (1 << 10) |
435 | #define ATTR_KILL_SUID (1 << 11) | 435 | #define ATTR_KILL_SUID (1 << 11) |
436 | #define ATTR_KILL_SGID (1 << 12) | 436 | #define ATTR_KILL_SGID (1 << 12) |
437 | #define ATTR_FILE (1 << 13) | 437 | #define ATTR_FILE (1 << 13) |
438 | #define ATTR_KILL_PRIV (1 << 14) | 438 | #define ATTR_KILL_PRIV (1 << 14) |
439 | #define ATTR_OPEN (1 << 15) /* Truncating from open(O_TRUNC) */ | 439 | #define ATTR_OPEN (1 << 15) /* Truncating from open(O_TRUNC) */ |
440 | #define ATTR_TIMES_SET (1 << 16) | 440 | #define ATTR_TIMES_SET (1 << 16) |
441 | 441 | ||
442 | /* | 442 | /* |
443 | * This is the Inode Attributes structure, used for notify_change(). It | 443 | * This is the Inode Attributes structure, used for notify_change(). It |
444 | * uses the above definitions as flags, to know which values have changed. | 444 | * uses the above definitions as flags, to know which values have changed. |
445 | * Also, in this manner, a Filesystem can look at only the values it cares | 445 | * Also, in this manner, a Filesystem can look at only the values it cares |
446 | * about. Basically, these are the attributes that the VFS layer can | 446 | * about. Basically, these are the attributes that the VFS layer can |
447 | * request to change from the FS layer. | 447 | * request to change from the FS layer. |
448 | * | 448 | * |
449 | * Derek Atkins <warlord@MIT.EDU> 94-10-20 | 449 | * Derek Atkins <warlord@MIT.EDU> 94-10-20 |
450 | */ | 450 | */ |
451 | struct iattr { | 451 | struct iattr { |
452 | unsigned int ia_valid; | 452 | unsigned int ia_valid; |
453 | umode_t ia_mode; | 453 | umode_t ia_mode; |
454 | uid_t ia_uid; | 454 | uid_t ia_uid; |
455 | gid_t ia_gid; | 455 | gid_t ia_gid; |
456 | loff_t ia_size; | 456 | loff_t ia_size; |
457 | struct timespec ia_atime; | 457 | struct timespec ia_atime; |
458 | struct timespec ia_mtime; | 458 | struct timespec ia_mtime; |
459 | struct timespec ia_ctime; | 459 | struct timespec ia_ctime; |
460 | 460 | ||
461 | /* | 461 | /* |
462 | * Not an attribute, but an auxilary info for filesystems wanting to | 462 | * Not an attribute, but an auxilary info for filesystems wanting to |
463 | * implement an ftruncate() like method. NOTE: filesystem should | 463 | * implement an ftruncate() like method. NOTE: filesystem should |
464 | * check for (ia_valid & ATTR_FILE), and not for (ia_file != NULL). | 464 | * check for (ia_valid & ATTR_FILE), and not for (ia_file != NULL). |
465 | */ | 465 | */ |
466 | struct file *ia_file; | 466 | struct file *ia_file; |
467 | }; | 467 | }; |
468 | 468 | ||
469 | /* | 469 | /* |
470 | * Includes for diskquotas. | 470 | * Includes for diskquotas. |
471 | */ | 471 | */ |
472 | #include <linux/quota.h> | 472 | #include <linux/quota.h> |
473 | 473 | ||
474 | /** | 474 | /** |
475 | * enum positive_aop_returns - aop return codes with specific semantics | 475 | * enum positive_aop_returns - aop return codes with specific semantics |
476 | * | 476 | * |
477 | * @AOP_WRITEPAGE_ACTIVATE: Informs the caller that page writeback has | 477 | * @AOP_WRITEPAGE_ACTIVATE: Informs the caller that page writeback has |
478 | * completed, that the page is still locked, and | 478 | * completed, that the page is still locked, and |
479 | * should be considered active. The VM uses this hint | 479 | * should be considered active. The VM uses this hint |
480 | * to return the page to the active list -- it won't | 480 | * to return the page to the active list -- it won't |
481 | * be a candidate for writeback again in the near | 481 | * be a candidate for writeback again in the near |
482 | * future. Other callers must be careful to unlock | 482 | * future. Other callers must be careful to unlock |
483 | * the page if they get this return. Returned by | 483 | * the page if they get this return. Returned by |
484 | * writepage(); | 484 | * writepage(); |
485 | * | 485 | * |
486 | * @AOP_TRUNCATED_PAGE: The AOP method that was handed a locked page has | 486 | * @AOP_TRUNCATED_PAGE: The AOP method that was handed a locked page has |
487 | * unlocked it and the page might have been truncated. | 487 | * unlocked it and the page might have been truncated. |
488 | * The caller should back up to acquiring a new page and | 488 | * The caller should back up to acquiring a new page and |
489 | * trying again. The aop will be taking reasonable | 489 | * trying again. The aop will be taking reasonable |
490 | * precautions not to livelock. If the caller held a page | 490 | * precautions not to livelock. If the caller held a page |
491 | * reference, it should drop it before retrying. Returned | 491 | * reference, it should drop it before retrying. Returned |
492 | * by readpage(). | 492 | * by readpage(). |
493 | * | 493 | * |
494 | * address_space_operation functions return these large constants to indicate | 494 | * address_space_operation functions return these large constants to indicate |
495 | * special semantics to the caller. These are much larger than the bytes in a | 495 | * special semantics to the caller. These are much larger than the bytes in a |
496 | * page to allow for functions that return the number of bytes operated on in a | 496 | * page to allow for functions that return the number of bytes operated on in a |
497 | * given page. | 497 | * given page. |
498 | */ | 498 | */ |
499 | 499 | ||
500 | enum positive_aop_returns { | 500 | enum positive_aop_returns { |
501 | AOP_WRITEPAGE_ACTIVATE = 0x80000, | 501 | AOP_WRITEPAGE_ACTIVATE = 0x80000, |
502 | AOP_TRUNCATED_PAGE = 0x80001, | 502 | AOP_TRUNCATED_PAGE = 0x80001, |
503 | }; | 503 | }; |
504 | 504 | ||
505 | #define AOP_FLAG_UNINTERRUPTIBLE 0x0001 /* will not do a short write */ | 505 | #define AOP_FLAG_UNINTERRUPTIBLE 0x0001 /* will not do a short write */ |
506 | #define AOP_FLAG_CONT_EXPAND 0x0002 /* called from cont_expand */ | 506 | #define AOP_FLAG_CONT_EXPAND 0x0002 /* called from cont_expand */ |
507 | #define AOP_FLAG_NOFS 0x0004 /* used by filesystem to direct | 507 | #define AOP_FLAG_NOFS 0x0004 /* used by filesystem to direct |
508 | * helper code (eg buffer layer) | 508 | * helper code (eg buffer layer) |
509 | * to clear GFP_FS from alloc */ | 509 | * to clear GFP_FS from alloc */ |
510 | 510 | ||
511 | /* | 511 | /* |
512 | * oh the beauties of C type declarations. | 512 | * oh the beauties of C type declarations. |
513 | */ | 513 | */ |
514 | struct page; | 514 | struct page; |
515 | struct address_space; | 515 | struct address_space; |
516 | struct writeback_control; | 516 | struct writeback_control; |
517 | 517 | ||
518 | struct iov_iter { | 518 | struct iov_iter { |
519 | const struct iovec *iov; | 519 | const struct iovec *iov; |
520 | unsigned long nr_segs; | 520 | unsigned long nr_segs; |
521 | size_t iov_offset; | 521 | size_t iov_offset; |
522 | size_t count; | 522 | size_t count; |
523 | }; | 523 | }; |
524 | 524 | ||
525 | size_t iov_iter_copy_from_user_atomic(struct page *page, | 525 | size_t iov_iter_copy_from_user_atomic(struct page *page, |
526 | struct iov_iter *i, unsigned long offset, size_t bytes); | 526 | struct iov_iter *i, unsigned long offset, size_t bytes); |
527 | size_t iov_iter_copy_from_user(struct page *page, | 527 | size_t iov_iter_copy_from_user(struct page *page, |
528 | struct iov_iter *i, unsigned long offset, size_t bytes); | 528 | struct iov_iter *i, unsigned long offset, size_t bytes); |
529 | void iov_iter_advance(struct iov_iter *i, size_t bytes); | 529 | void iov_iter_advance(struct iov_iter *i, size_t bytes); |
530 | int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes); | 530 | int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes); |
531 | size_t iov_iter_single_seg_count(struct iov_iter *i); | 531 | size_t iov_iter_single_seg_count(struct iov_iter *i); |
532 | 532 | ||
533 | static inline void iov_iter_init(struct iov_iter *i, | 533 | static inline void iov_iter_init(struct iov_iter *i, |
534 | const struct iovec *iov, unsigned long nr_segs, | 534 | const struct iovec *iov, unsigned long nr_segs, |
535 | size_t count, size_t written) | 535 | size_t count, size_t written) |
536 | { | 536 | { |
537 | i->iov = iov; | 537 | i->iov = iov; |
538 | i->nr_segs = nr_segs; | 538 | i->nr_segs = nr_segs; |
539 | i->iov_offset = 0; | 539 | i->iov_offset = 0; |
540 | i->count = count + written; | 540 | i->count = count + written; |
541 | 541 | ||
542 | iov_iter_advance(i, written); | 542 | iov_iter_advance(i, written); |
543 | } | 543 | } |
544 | 544 | ||
545 | static inline size_t iov_iter_count(struct iov_iter *i) | 545 | static inline size_t iov_iter_count(struct iov_iter *i) |
546 | { | 546 | { |
547 | return i->count; | 547 | return i->count; |
548 | } | 548 | } |
549 | 549 | ||
550 | /* | 550 | /* |
551 | * "descriptor" for what we're up to with a read. | 551 | * "descriptor" for what we're up to with a read. |
552 | * This allows us to use the same read code yet | 552 | * This allows us to use the same read code yet |
553 | * have multiple different users of the data that | 553 | * have multiple different users of the data that |
554 | * we read from a file. | 554 | * we read from a file. |
555 | * | 555 | * |
556 | * The simplest case just copies the data to user | 556 | * The simplest case just copies the data to user |
557 | * mode. | 557 | * mode. |
558 | */ | 558 | */ |
559 | typedef struct { | 559 | typedef struct { |
560 | size_t written; | 560 | size_t written; |
561 | size_t count; | 561 | size_t count; |
562 | union { | 562 | union { |
563 | char __user *buf; | 563 | char __user *buf; |
564 | void *data; | 564 | void *data; |
565 | } arg; | 565 | } arg; |
566 | int error; | 566 | int error; |
567 | } read_descriptor_t; | 567 | } read_descriptor_t; |
568 | 568 | ||
569 | typedef int (*read_actor_t)(read_descriptor_t *, struct page *, | 569 | typedef int (*read_actor_t)(read_descriptor_t *, struct page *, |
570 | unsigned long, unsigned long); | 570 | unsigned long, unsigned long); |
571 | 571 | ||
572 | struct address_space_operations { | 572 | struct address_space_operations { |
573 | int (*writepage)(struct page *page, struct writeback_control *wbc); | 573 | int (*writepage)(struct page *page, struct writeback_control *wbc); |
574 | int (*readpage)(struct file *, struct page *); | 574 | int (*readpage)(struct file *, struct page *); |
575 | void (*sync_page)(struct page *); | 575 | void (*sync_page)(struct page *); |
576 | 576 | ||
577 | /* Write back some dirty pages from this mapping. */ | 577 | /* Write back some dirty pages from this mapping. */ |
578 | int (*writepages)(struct address_space *, struct writeback_control *); | 578 | int (*writepages)(struct address_space *, struct writeback_control *); |
579 | 579 | ||
580 | /* Set a page dirty. Return true if this dirtied it */ | 580 | /* Set a page dirty. Return true if this dirtied it */ |
581 | int (*set_page_dirty)(struct page *page); | 581 | int (*set_page_dirty)(struct page *page); |
582 | 582 | ||
583 | int (*readpages)(struct file *filp, struct address_space *mapping, | 583 | int (*readpages)(struct file *filp, struct address_space *mapping, |
584 | struct list_head *pages, unsigned nr_pages); | 584 | struct list_head *pages, unsigned nr_pages); |
585 | 585 | ||
586 | int (*write_begin)(struct file *, struct address_space *mapping, | 586 | int (*write_begin)(struct file *, struct address_space *mapping, |
587 | loff_t pos, unsigned len, unsigned flags, | 587 | loff_t pos, unsigned len, unsigned flags, |
588 | struct page **pagep, void **fsdata); | 588 | struct page **pagep, void **fsdata); |
589 | int (*write_end)(struct file *, struct address_space *mapping, | 589 | int (*write_end)(struct file *, struct address_space *mapping, |
590 | loff_t pos, unsigned len, unsigned copied, | 590 | loff_t pos, unsigned len, unsigned copied, |
591 | struct page *page, void *fsdata); | 591 | struct page *page, void *fsdata); |
592 | 592 | ||
593 | /* Unfortunately this kludge is needed for FIBMAP. Don't use it */ | 593 | /* Unfortunately this kludge is needed for FIBMAP. Don't use it */ |
594 | sector_t (*bmap)(struct address_space *, sector_t); | 594 | sector_t (*bmap)(struct address_space *, sector_t); |
595 | void (*invalidatepage) (struct page *, unsigned long); | 595 | void (*invalidatepage) (struct page *, unsigned long); |
596 | int (*releasepage) (struct page *, gfp_t); | 596 | int (*releasepage) (struct page *, gfp_t); |
597 | ssize_t (*direct_IO)(int, struct kiocb *, const struct iovec *iov, | 597 | ssize_t (*direct_IO)(int, struct kiocb *, const struct iovec *iov, |
598 | loff_t offset, unsigned long nr_segs); | 598 | loff_t offset, unsigned long nr_segs); |
599 | int (*get_xip_mem)(struct address_space *, pgoff_t, int, | 599 | int (*get_xip_mem)(struct address_space *, pgoff_t, int, |
600 | void **, unsigned long *); | 600 | void **, unsigned long *); |
601 | /* migrate the contents of a page to the specified target */ | 601 | /* migrate the contents of a page to the specified target */ |
602 | int (*migratepage) (struct address_space *, | 602 | int (*migratepage) (struct address_space *, |
603 | struct page *, struct page *); | 603 | struct page *, struct page *); |
604 | int (*launder_page) (struct page *); | 604 | int (*launder_page) (struct page *); |
605 | int (*is_partially_uptodate) (struct page *, read_descriptor_t *, | 605 | int (*is_partially_uptodate) (struct page *, read_descriptor_t *, |
606 | unsigned long); | 606 | unsigned long); |
607 | int (*error_remove_page)(struct address_space *, struct page *); | 607 | int (*error_remove_page)(struct address_space *, struct page *); |
608 | }; | 608 | }; |
609 | 609 | ||
610 | /* | 610 | /* |
611 | * pagecache_write_begin/pagecache_write_end must be used by general code | 611 | * pagecache_write_begin/pagecache_write_end must be used by general code |
612 | * to write into the pagecache. | 612 | * to write into the pagecache. |
613 | */ | 613 | */ |
614 | int pagecache_write_begin(struct file *, struct address_space *mapping, | 614 | int pagecache_write_begin(struct file *, struct address_space *mapping, |
615 | loff_t pos, unsigned len, unsigned flags, | 615 | loff_t pos, unsigned len, unsigned flags, |
616 | struct page **pagep, void **fsdata); | 616 | struct page **pagep, void **fsdata); |
617 | 617 | ||
618 | int pagecache_write_end(struct file *, struct address_space *mapping, | 618 | int pagecache_write_end(struct file *, struct address_space *mapping, |
619 | loff_t pos, unsigned len, unsigned copied, | 619 | loff_t pos, unsigned len, unsigned copied, |
620 | struct page *page, void *fsdata); | 620 | struct page *page, void *fsdata); |
621 | 621 | ||
622 | struct backing_dev_info; | 622 | struct backing_dev_info; |
623 | struct address_space { | 623 | struct address_space { |
624 | struct inode *host; /* owner: inode, block_device */ | 624 | struct inode *host; /* owner: inode, block_device */ |
625 | struct radix_tree_root page_tree; /* radix tree of all pages */ | 625 | struct radix_tree_root page_tree; /* radix tree of all pages */ |
626 | spinlock_t tree_lock; /* and lock protecting it */ | 626 | spinlock_t tree_lock; /* and lock protecting it */ |
627 | unsigned int i_mmap_writable;/* count VM_SHARED mappings */ | 627 | unsigned int i_mmap_writable;/* count VM_SHARED mappings */ |
628 | struct prio_tree_root i_mmap; /* tree of private and shared mappings */ | 628 | struct prio_tree_root i_mmap; /* tree of private and shared mappings */ |
629 | struct list_head i_mmap_nonlinear;/*list VM_NONLINEAR mappings */ | 629 | struct list_head i_mmap_nonlinear;/*list VM_NONLINEAR mappings */ |
630 | spinlock_t i_mmap_lock; /* protect tree, count, list */ | 630 | spinlock_t i_mmap_lock; /* protect tree, count, list */ |
631 | unsigned int truncate_count; /* Cover race condition with truncate */ | 631 | unsigned int truncate_count; /* Cover race condition with truncate */ |
632 | unsigned long nrpages; /* number of total pages */ | 632 | unsigned long nrpages; /* number of total pages */ |
633 | pgoff_t writeback_index;/* writeback starts here */ | 633 | pgoff_t writeback_index;/* writeback starts here */ |
634 | const struct address_space_operations *a_ops; /* methods */ | 634 | const struct address_space_operations *a_ops; /* methods */ |
635 | unsigned long flags; /* error bits/gfp mask */ | 635 | unsigned long flags; /* error bits/gfp mask */ |
636 | struct backing_dev_info *backing_dev_info; /* device readahead, etc */ | 636 | struct backing_dev_info *backing_dev_info; /* device readahead, etc */ |
637 | spinlock_t private_lock; /* for use by the address_space */ | 637 | spinlock_t private_lock; /* for use by the address_space */ |
638 | struct list_head private_list; /* ditto */ | 638 | struct list_head private_list; /* ditto */ |
639 | struct address_space *assoc_mapping; /* ditto */ | 639 | struct address_space *assoc_mapping; /* ditto */ |
640 | } __attribute__((aligned(sizeof(long)))); | 640 | } __attribute__((aligned(sizeof(long)))); |
641 | /* | 641 | /* |
642 | * On most architectures that alignment is already the case; but | 642 | * On most architectures that alignment is already the case; but |
643 | * must be enforced here for CRIS, to let the least signficant bit | 643 | * must be enforced here for CRIS, to let the least signficant bit |
644 | * of struct page's "mapping" pointer be used for PAGE_MAPPING_ANON. | 644 | * of struct page's "mapping" pointer be used for PAGE_MAPPING_ANON. |
645 | */ | 645 | */ |
646 | 646 | ||
647 | struct block_device { | 647 | struct block_device { |
648 | dev_t bd_dev; /* not a kdev_t - it's a search key */ | 648 | dev_t bd_dev; /* not a kdev_t - it's a search key */ |
649 | struct inode * bd_inode; /* will die */ | 649 | struct inode * bd_inode; /* will die */ |
650 | struct super_block * bd_super; | 650 | struct super_block * bd_super; |
651 | int bd_openers; | 651 | int bd_openers; |
652 | struct mutex bd_mutex; /* open/close mutex */ | 652 | struct mutex bd_mutex; /* open/close mutex */ |
653 | struct list_head bd_inodes; | 653 | struct list_head bd_inodes; |
654 | void * bd_holder; | 654 | void * bd_holder; |
655 | int bd_holders; | 655 | int bd_holders; |
656 | #ifdef CONFIG_SYSFS | 656 | #ifdef CONFIG_SYSFS |
657 | struct list_head bd_holder_list; | 657 | struct list_head bd_holder_list; |
658 | #endif | 658 | #endif |
659 | struct block_device * bd_contains; | 659 | struct block_device * bd_contains; |
660 | unsigned bd_block_size; | 660 | unsigned bd_block_size; |
661 | struct hd_struct * bd_part; | 661 | struct hd_struct * bd_part; |
662 | /* number of times partitions within this device have been opened. */ | 662 | /* number of times partitions within this device have been opened. */ |
663 | unsigned bd_part_count; | 663 | unsigned bd_part_count; |
664 | int bd_invalidated; | 664 | int bd_invalidated; |
665 | struct gendisk * bd_disk; | 665 | struct gendisk * bd_disk; |
666 | struct list_head bd_list; | 666 | struct list_head bd_list; |
667 | /* | 667 | /* |
668 | * Private data. You must have bd_claim'ed the block_device | 668 | * Private data. You must have bd_claim'ed the block_device |
669 | * to use this. NOTE: bd_claim allows an owner to claim | 669 | * to use this. NOTE: bd_claim allows an owner to claim |
670 | * the same device multiple times, the owner must take special | 670 | * the same device multiple times, the owner must take special |
671 | * care to not mess up bd_private for that case. | 671 | * care to not mess up bd_private for that case. |
672 | */ | 672 | */ |
673 | unsigned long bd_private; | 673 | unsigned long bd_private; |
674 | 674 | ||
675 | /* The counter of freeze processes */ | 675 | /* The counter of freeze processes */ |
676 | int bd_fsfreeze_count; | 676 | int bd_fsfreeze_count; |
677 | /* Mutex for freeze */ | 677 | /* Mutex for freeze */ |
678 | struct mutex bd_fsfreeze_mutex; | 678 | struct mutex bd_fsfreeze_mutex; |
679 | }; | 679 | }; |
680 | 680 | ||
681 | /* | 681 | /* |
682 | * Radix-tree tags, for tagging dirty and writeback pages within the pagecache | 682 | * Radix-tree tags, for tagging dirty and writeback pages within the pagecache |
683 | * radix trees | 683 | * radix trees |
684 | */ | 684 | */ |
685 | #define PAGECACHE_TAG_DIRTY 0 | 685 | #define PAGECACHE_TAG_DIRTY 0 |
686 | #define PAGECACHE_TAG_WRITEBACK 1 | 686 | #define PAGECACHE_TAG_WRITEBACK 1 |
687 | 687 | ||
688 | int mapping_tagged(struct address_space *mapping, int tag); | 688 | int mapping_tagged(struct address_space *mapping, int tag); |
689 | 689 | ||
690 | /* | 690 | /* |
691 | * Might pages of this file be mapped into userspace? | 691 | * Might pages of this file be mapped into userspace? |
692 | */ | 692 | */ |
693 | static inline int mapping_mapped(struct address_space *mapping) | 693 | static inline int mapping_mapped(struct address_space *mapping) |
694 | { | 694 | { |
695 | return !prio_tree_empty(&mapping->i_mmap) || | 695 | return !prio_tree_empty(&mapping->i_mmap) || |
696 | !list_empty(&mapping->i_mmap_nonlinear); | 696 | !list_empty(&mapping->i_mmap_nonlinear); |
697 | } | 697 | } |
698 | 698 | ||
699 | /* | 699 | /* |
700 | * Might pages of this file have been modified in userspace? | 700 | * Might pages of this file have been modified in userspace? |
701 | * Note that i_mmap_writable counts all VM_SHARED vmas: do_mmap_pgoff | 701 | * Note that i_mmap_writable counts all VM_SHARED vmas: do_mmap_pgoff |
702 | * marks vma as VM_SHARED if it is shared, and the file was opened for | 702 | * marks vma as VM_SHARED if it is shared, and the file was opened for |
703 | * writing i.e. vma may be mprotected writable even if now readonly. | 703 | * writing i.e. vma may be mprotected writable even if now readonly. |
704 | */ | 704 | */ |
705 | static inline int mapping_writably_mapped(struct address_space *mapping) | 705 | static inline int mapping_writably_mapped(struct address_space *mapping) |
706 | { | 706 | { |
707 | return mapping->i_mmap_writable != 0; | 707 | return mapping->i_mmap_writable != 0; |
708 | } | 708 | } |
709 | 709 | ||
710 | /* | 710 | /* |
711 | * Use sequence counter to get consistent i_size on 32-bit processors. | 711 | * Use sequence counter to get consistent i_size on 32-bit processors. |
712 | */ | 712 | */ |
713 | #if BITS_PER_LONG==32 && defined(CONFIG_SMP) | 713 | #if BITS_PER_LONG==32 && defined(CONFIG_SMP) |
714 | #include <linux/seqlock.h> | 714 | #include <linux/seqlock.h> |
715 | #define __NEED_I_SIZE_ORDERED | 715 | #define __NEED_I_SIZE_ORDERED |
716 | #define i_size_ordered_init(inode) seqcount_init(&inode->i_size_seqcount) | 716 | #define i_size_ordered_init(inode) seqcount_init(&inode->i_size_seqcount) |
717 | #else | 717 | #else |
718 | #define i_size_ordered_init(inode) do { } while (0) | 718 | #define i_size_ordered_init(inode) do { } while (0) |
719 | #endif | 719 | #endif |
720 | 720 | ||
721 | struct posix_acl; | 721 | struct posix_acl; |
722 | #define ACL_NOT_CACHED ((void *)(-1)) | 722 | #define ACL_NOT_CACHED ((void *)(-1)) |
723 | 723 | ||
724 | struct inode { | 724 | struct inode { |
725 | struct hlist_node i_hash; | 725 | struct hlist_node i_hash; |
726 | struct list_head i_list; /* backing dev IO list */ | 726 | struct list_head i_list; /* backing dev IO list */ |
727 | struct list_head i_sb_list; | 727 | struct list_head i_sb_list; |
728 | struct list_head i_dentry; | 728 | struct list_head i_dentry; |
729 | unsigned long i_ino; | 729 | unsigned long i_ino; |
730 | atomic_t i_count; | 730 | atomic_t i_count; |
731 | unsigned int i_nlink; | 731 | unsigned int i_nlink; |
732 | uid_t i_uid; | 732 | uid_t i_uid; |
733 | gid_t i_gid; | 733 | gid_t i_gid; |
734 | dev_t i_rdev; | 734 | dev_t i_rdev; |
735 | unsigned int i_blkbits; | 735 | unsigned int i_blkbits; |
736 | u64 i_version; | 736 | u64 i_version; |
737 | loff_t i_size; | 737 | loff_t i_size; |
738 | #ifdef __NEED_I_SIZE_ORDERED | 738 | #ifdef __NEED_I_SIZE_ORDERED |
739 | seqcount_t i_size_seqcount; | 739 | seqcount_t i_size_seqcount; |
740 | #endif | 740 | #endif |
741 | struct timespec i_atime; | 741 | struct timespec i_atime; |
742 | struct timespec i_mtime; | 742 | struct timespec i_mtime; |
743 | struct timespec i_ctime; | 743 | struct timespec i_ctime; |
744 | blkcnt_t i_blocks; | 744 | blkcnt_t i_blocks; |
745 | unsigned short i_bytes; | 745 | unsigned short i_bytes; |
746 | umode_t i_mode; | 746 | umode_t i_mode; |
747 | spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */ | 747 | spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */ |
748 | struct mutex i_mutex; | 748 | struct mutex i_mutex; |
749 | struct rw_semaphore i_alloc_sem; | 749 | struct rw_semaphore i_alloc_sem; |
750 | const struct inode_operations *i_op; | 750 | const struct inode_operations *i_op; |
751 | const struct file_operations *i_fop; /* former ->i_op->default_file_ops */ | 751 | const struct file_operations *i_fop; /* former ->i_op->default_file_ops */ |
752 | struct super_block *i_sb; | 752 | struct super_block *i_sb; |
753 | struct file_lock *i_flock; | 753 | struct file_lock *i_flock; |
754 | struct address_space *i_mapping; | 754 | struct address_space *i_mapping; |
755 | struct address_space i_data; | 755 | struct address_space i_data; |
756 | #ifdef CONFIG_QUOTA | 756 | #ifdef CONFIG_QUOTA |
757 | struct dquot *i_dquot[MAXQUOTAS]; | 757 | struct dquot *i_dquot[MAXQUOTAS]; |
758 | #endif | 758 | #endif |
759 | struct list_head i_devices; | 759 | struct list_head i_devices; |
760 | union { | 760 | union { |
761 | struct pipe_inode_info *i_pipe; | 761 | struct pipe_inode_info *i_pipe; |
762 | struct block_device *i_bdev; | 762 | struct block_device *i_bdev; |
763 | struct cdev *i_cdev; | 763 | struct cdev *i_cdev; |
764 | }; | 764 | }; |
765 | 765 | ||
766 | __u32 i_generation; | 766 | __u32 i_generation; |
767 | 767 | ||
768 | #ifdef CONFIG_FSNOTIFY | 768 | #ifdef CONFIG_FSNOTIFY |
769 | __u32 i_fsnotify_mask; /* all events this inode cares about */ | 769 | __u32 i_fsnotify_mask; /* all events this inode cares about */ |
770 | struct hlist_head i_fsnotify_mark_entries; /* fsnotify mark entries */ | 770 | struct hlist_head i_fsnotify_mark_entries; /* fsnotify mark entries */ |
771 | #endif | 771 | #endif |
772 | 772 | ||
773 | #ifdef CONFIG_INOTIFY | 773 | #ifdef CONFIG_INOTIFY |
774 | struct list_head inotify_watches; /* watches on this inode */ | 774 | struct list_head inotify_watches; /* watches on this inode */ |
775 | struct mutex inotify_mutex; /* protects the watches list */ | 775 | struct mutex inotify_mutex; /* protects the watches list */ |
776 | #endif | 776 | #endif |
777 | 777 | ||
778 | unsigned long i_state; | 778 | unsigned long i_state; |
779 | unsigned long dirtied_when; /* jiffies of first dirtying */ | 779 | unsigned long dirtied_when; /* jiffies of first dirtying */ |
780 | 780 | ||
781 | unsigned int i_flags; | 781 | unsigned int i_flags; |
782 | 782 | ||
783 | atomic_t i_writecount; | 783 | atomic_t i_writecount; |
784 | #ifdef CONFIG_SECURITY | 784 | #ifdef CONFIG_SECURITY |
785 | void *i_security; | 785 | void *i_security; |
786 | #endif | 786 | #endif |
787 | #ifdef CONFIG_FS_POSIX_ACL | 787 | #ifdef CONFIG_FS_POSIX_ACL |
788 | struct posix_acl *i_acl; | 788 | struct posix_acl *i_acl; |
789 | struct posix_acl *i_default_acl; | 789 | struct posix_acl *i_default_acl; |
790 | #endif | 790 | #endif |
791 | void *i_private; /* fs or device private pointer */ | 791 | void *i_private; /* fs or device private pointer */ |
792 | }; | 792 | }; |
793 | 793 | ||
794 | /* | 794 | /* |
795 | * inode->i_mutex nesting subclasses for the lock validator: | 795 | * inode->i_mutex nesting subclasses for the lock validator: |
796 | * | 796 | * |
797 | * 0: the object of the current VFS operation | 797 | * 0: the object of the current VFS operation |
798 | * 1: parent | 798 | * 1: parent |
799 | * 2: child/target | 799 | * 2: child/target |
800 | * 3: quota file | 800 | * 3: quota file |
801 | * | 801 | * |
802 | * The locking order between these classes is | 802 | * The locking order between these classes is |
803 | * parent -> child -> normal -> xattr -> quota | 803 | * parent -> child -> normal -> xattr -> quota |
804 | */ | 804 | */ |
805 | enum inode_i_mutex_lock_class | 805 | enum inode_i_mutex_lock_class |
806 | { | 806 | { |
807 | I_MUTEX_NORMAL, | 807 | I_MUTEX_NORMAL, |
808 | I_MUTEX_PARENT, | 808 | I_MUTEX_PARENT, |
809 | I_MUTEX_CHILD, | 809 | I_MUTEX_CHILD, |
810 | I_MUTEX_XATTR, | 810 | I_MUTEX_XATTR, |
811 | I_MUTEX_QUOTA | 811 | I_MUTEX_QUOTA |
812 | }; | 812 | }; |
813 | 813 | ||
814 | /* | 814 | /* |
815 | * NOTE: in a 32bit arch with a preemptable kernel and | 815 | * NOTE: in a 32bit arch with a preemptable kernel and |
816 | * an UP compile the i_size_read/write must be atomic | 816 | * an UP compile the i_size_read/write must be atomic |
817 | * with respect to the local cpu (unlike with preempt disabled), | 817 | * with respect to the local cpu (unlike with preempt disabled), |
818 | * but they don't need to be atomic with respect to other cpus like in | 818 | * but they don't need to be atomic with respect to other cpus like in |
819 | * true SMP (so they need either to either locally disable irq around | 819 | * true SMP (so they need either to either locally disable irq around |
820 | * the read or for example on x86 they can be still implemented as a | 820 | * the read or for example on x86 they can be still implemented as a |
821 | * cmpxchg8b without the need of the lock prefix). For SMP compiles | 821 | * cmpxchg8b without the need of the lock prefix). For SMP compiles |
822 | * and 64bit archs it makes no difference if preempt is enabled or not. | 822 | * and 64bit archs it makes no difference if preempt is enabled or not. |
823 | */ | 823 | */ |
824 | static inline loff_t i_size_read(const struct inode *inode) | 824 | static inline loff_t i_size_read(const struct inode *inode) |
825 | { | 825 | { |
826 | #if BITS_PER_LONG==32 && defined(CONFIG_SMP) | 826 | #if BITS_PER_LONG==32 && defined(CONFIG_SMP) |
827 | loff_t i_size; | 827 | loff_t i_size; |
828 | unsigned int seq; | 828 | unsigned int seq; |
829 | 829 | ||
830 | do { | 830 | do { |
831 | seq = read_seqcount_begin(&inode->i_size_seqcount); | 831 | seq = read_seqcount_begin(&inode->i_size_seqcount); |
832 | i_size = inode->i_size; | 832 | i_size = inode->i_size; |
833 | } while (read_seqcount_retry(&inode->i_size_seqcount, seq)); | 833 | } while (read_seqcount_retry(&inode->i_size_seqcount, seq)); |
834 | return i_size; | 834 | return i_size; |
835 | #elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPT) | 835 | #elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPT) |
836 | loff_t i_size; | 836 | loff_t i_size; |
837 | 837 | ||
838 | preempt_disable(); | 838 | preempt_disable(); |
839 | i_size = inode->i_size; | 839 | i_size = inode->i_size; |
840 | preempt_enable(); | 840 | preempt_enable(); |
841 | return i_size; | 841 | return i_size; |
842 | #else | 842 | #else |
843 | return inode->i_size; | 843 | return inode->i_size; |
844 | #endif | 844 | #endif |
845 | } | 845 | } |
846 | 846 | ||
847 | /* | 847 | /* |
848 | * NOTE: unlike i_size_read(), i_size_write() does need locking around it | 848 | * NOTE: unlike i_size_read(), i_size_write() does need locking around it |
849 | * (normally i_mutex), otherwise on 32bit/SMP an update of i_size_seqcount | 849 | * (normally i_mutex), otherwise on 32bit/SMP an update of i_size_seqcount |
850 | * can be lost, resulting in subsequent i_size_read() calls spinning forever. | 850 | * can be lost, resulting in subsequent i_size_read() calls spinning forever. |
851 | */ | 851 | */ |
852 | static inline void i_size_write(struct inode *inode, loff_t i_size) | 852 | static inline void i_size_write(struct inode *inode, loff_t i_size) |
853 | { | 853 | { |
854 | #if BITS_PER_LONG==32 && defined(CONFIG_SMP) | 854 | #if BITS_PER_LONG==32 && defined(CONFIG_SMP) |
855 | write_seqcount_begin(&inode->i_size_seqcount); | 855 | write_seqcount_begin(&inode->i_size_seqcount); |
856 | inode->i_size = i_size; | 856 | inode->i_size = i_size; |
857 | write_seqcount_end(&inode->i_size_seqcount); | 857 | write_seqcount_end(&inode->i_size_seqcount); |
858 | #elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPT) | 858 | #elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPT) |
859 | preempt_disable(); | 859 | preempt_disable(); |
860 | inode->i_size = i_size; | 860 | inode->i_size = i_size; |
861 | preempt_enable(); | 861 | preempt_enable(); |
862 | #else | 862 | #else |
863 | inode->i_size = i_size; | 863 | inode->i_size = i_size; |
864 | #endif | 864 | #endif |
865 | } | 865 | } |
866 | 866 | ||
867 | static inline unsigned iminor(const struct inode *inode) | 867 | static inline unsigned iminor(const struct inode *inode) |
868 | { | 868 | { |
869 | return MINOR(inode->i_rdev); | 869 | return MINOR(inode->i_rdev); |
870 | } | 870 | } |
871 | 871 | ||
872 | static inline unsigned imajor(const struct inode *inode) | 872 | static inline unsigned imajor(const struct inode *inode) |
873 | { | 873 | { |
874 | return MAJOR(inode->i_rdev); | 874 | return MAJOR(inode->i_rdev); |
875 | } | 875 | } |
876 | 876 | ||
877 | extern struct block_device *I_BDEV(struct inode *inode); | 877 | extern struct block_device *I_BDEV(struct inode *inode); |
878 | 878 | ||
879 | struct fown_struct { | 879 | struct fown_struct { |
880 | rwlock_t lock; /* protects pid, uid, euid fields */ | 880 | rwlock_t lock; /* protects pid, uid, euid fields */ |
881 | struct pid *pid; /* pid or -pgrp where SIGIO should be sent */ | 881 | struct pid *pid; /* pid or -pgrp where SIGIO should be sent */ |
882 | enum pid_type pid_type; /* Kind of process group SIGIO should be sent to */ | 882 | enum pid_type pid_type; /* Kind of process group SIGIO should be sent to */ |
883 | uid_t uid, euid; /* uid/euid of process setting the owner */ | 883 | uid_t uid, euid; /* uid/euid of process setting the owner */ |
884 | int signum; /* posix.1b rt signal to be delivered on IO */ | 884 | int signum; /* posix.1b rt signal to be delivered on IO */ |
885 | }; | 885 | }; |
886 | 886 | ||
887 | /* | 887 | /* |
888 | * Track a single file's readahead state | 888 | * Track a single file's readahead state |
889 | */ | 889 | */ |
890 | struct file_ra_state { | 890 | struct file_ra_state { |
891 | pgoff_t start; /* where readahead started */ | 891 | pgoff_t start; /* where readahead started */ |
892 | unsigned int size; /* # of readahead pages */ | 892 | unsigned int size; /* # of readahead pages */ |
893 | unsigned int async_size; /* do asynchronous readahead when | 893 | unsigned int async_size; /* do asynchronous readahead when |
894 | there are only # of pages ahead */ | 894 | there are only # of pages ahead */ |
895 | 895 | ||
896 | unsigned int ra_pages; /* Maximum readahead window */ | 896 | unsigned int ra_pages; /* Maximum readahead window */ |
897 | unsigned int mmap_miss; /* Cache miss stat for mmap accesses */ | 897 | unsigned int mmap_miss; /* Cache miss stat for mmap accesses */ |
898 | loff_t prev_pos; /* Cache last read() position */ | 898 | loff_t prev_pos; /* Cache last read() position */ |
899 | }; | 899 | }; |
900 | 900 | ||
901 | /* | 901 | /* |
902 | * Check if @index falls in the readahead windows. | 902 | * Check if @index falls in the readahead windows. |
903 | */ | 903 | */ |
904 | static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index) | 904 | static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index) |
905 | { | 905 | { |
906 | return (index >= ra->start && | 906 | return (index >= ra->start && |
907 | index < ra->start + ra->size); | 907 | index < ra->start + ra->size); |
908 | } | 908 | } |
909 | 909 | ||
910 | #define FILE_MNT_WRITE_TAKEN 1 | 910 | #define FILE_MNT_WRITE_TAKEN 1 |
911 | #define FILE_MNT_WRITE_RELEASED 2 | 911 | #define FILE_MNT_WRITE_RELEASED 2 |
912 | 912 | ||
913 | struct file { | 913 | struct file { |
914 | /* | 914 | /* |
915 | * fu_list becomes invalid after file_free is called and queued via | 915 | * fu_list becomes invalid after file_free is called and queued via |
916 | * fu_rcuhead for RCU freeing | 916 | * fu_rcuhead for RCU freeing |
917 | */ | 917 | */ |
918 | union { | 918 | union { |
919 | struct list_head fu_list; | 919 | struct list_head fu_list; |
920 | struct rcu_head fu_rcuhead; | 920 | struct rcu_head fu_rcuhead; |
921 | } f_u; | 921 | } f_u; |
922 | struct path f_path; | 922 | struct path f_path; |
923 | #define f_dentry f_path.dentry | 923 | #define f_dentry f_path.dentry |
924 | #define f_vfsmnt f_path.mnt | 924 | #define f_vfsmnt f_path.mnt |
925 | const struct file_operations *f_op; | 925 | const struct file_operations *f_op; |
926 | spinlock_t f_lock; /* f_ep_links, f_flags, no IRQ */ | 926 | spinlock_t f_lock; /* f_ep_links, f_flags, no IRQ */ |
927 | atomic_long_t f_count; | 927 | atomic_long_t f_count; |
928 | unsigned int f_flags; | 928 | unsigned int f_flags; |
929 | fmode_t f_mode; | 929 | fmode_t f_mode; |
930 | loff_t f_pos; | 930 | loff_t f_pos; |
931 | struct fown_struct f_owner; | 931 | struct fown_struct f_owner; |
932 | const struct cred *f_cred; | 932 | const struct cred *f_cred; |
933 | struct file_ra_state f_ra; | 933 | struct file_ra_state f_ra; |
934 | 934 | ||
935 | u64 f_version; | 935 | u64 f_version; |
936 | #ifdef CONFIG_SECURITY | 936 | #ifdef CONFIG_SECURITY |
937 | void *f_security; | 937 | void *f_security; |
938 | #endif | 938 | #endif |
939 | /* needed for tty driver, and maybe others */ | 939 | /* needed for tty driver, and maybe others */ |
940 | void *private_data; | 940 | void *private_data; |
941 | 941 | ||
942 | #ifdef CONFIG_EPOLL | 942 | #ifdef CONFIG_EPOLL |
943 | /* Used by fs/eventpoll.c to link all the hooks to this file */ | 943 | /* Used by fs/eventpoll.c to link all the hooks to this file */ |
944 | struct list_head f_ep_links; | 944 | struct list_head f_ep_links; |
945 | #endif /* #ifdef CONFIG_EPOLL */ | 945 | #endif /* #ifdef CONFIG_EPOLL */ |
946 | struct address_space *f_mapping; | 946 | struct address_space *f_mapping; |
947 | #ifdef CONFIG_DEBUG_WRITECOUNT | 947 | #ifdef CONFIG_DEBUG_WRITECOUNT |
948 | unsigned long f_mnt_write_state; | 948 | unsigned long f_mnt_write_state; |
949 | #endif | 949 | #endif |
950 | }; | 950 | }; |
951 | extern spinlock_t files_lock; | 951 | extern spinlock_t files_lock; |
952 | #define file_list_lock() spin_lock(&files_lock); | 952 | #define file_list_lock() spin_lock(&files_lock); |
953 | #define file_list_unlock() spin_unlock(&files_lock); | 953 | #define file_list_unlock() spin_unlock(&files_lock); |
954 | 954 | ||
955 | #define get_file(x) atomic_long_inc(&(x)->f_count) | 955 | #define get_file(x) atomic_long_inc(&(x)->f_count) |
956 | #define file_count(x) atomic_long_read(&(x)->f_count) | 956 | #define file_count(x) atomic_long_read(&(x)->f_count) |
957 | 957 | ||
958 | #ifdef CONFIG_DEBUG_WRITECOUNT | 958 | #ifdef CONFIG_DEBUG_WRITECOUNT |
959 | static inline void file_take_write(struct file *f) | 959 | static inline void file_take_write(struct file *f) |
960 | { | 960 | { |
961 | WARN_ON(f->f_mnt_write_state != 0); | 961 | WARN_ON(f->f_mnt_write_state != 0); |
962 | f->f_mnt_write_state = FILE_MNT_WRITE_TAKEN; | 962 | f->f_mnt_write_state = FILE_MNT_WRITE_TAKEN; |
963 | } | 963 | } |
964 | static inline void file_release_write(struct file *f) | 964 | static inline void file_release_write(struct file *f) |
965 | { | 965 | { |
966 | f->f_mnt_write_state |= FILE_MNT_WRITE_RELEASED; | 966 | f->f_mnt_write_state |= FILE_MNT_WRITE_RELEASED; |
967 | } | 967 | } |
968 | static inline void file_reset_write(struct file *f) | 968 | static inline void file_reset_write(struct file *f) |
969 | { | 969 | { |
970 | f->f_mnt_write_state = 0; | 970 | f->f_mnt_write_state = 0; |
971 | } | 971 | } |
972 | static inline void file_check_state(struct file *f) | 972 | static inline void file_check_state(struct file *f) |
973 | { | 973 | { |
974 | /* | 974 | /* |
975 | * At this point, either both or neither of these bits | 975 | * At this point, either both or neither of these bits |
976 | * should be set. | 976 | * should be set. |
977 | */ | 977 | */ |
978 | WARN_ON(f->f_mnt_write_state == FILE_MNT_WRITE_TAKEN); | 978 | WARN_ON(f->f_mnt_write_state == FILE_MNT_WRITE_TAKEN); |
979 | WARN_ON(f->f_mnt_write_state == FILE_MNT_WRITE_RELEASED); | 979 | WARN_ON(f->f_mnt_write_state == FILE_MNT_WRITE_RELEASED); |
980 | } | 980 | } |
981 | static inline int file_check_writeable(struct file *f) | 981 | static inline int file_check_writeable(struct file *f) |
982 | { | 982 | { |
983 | if (f->f_mnt_write_state == FILE_MNT_WRITE_TAKEN) | 983 | if (f->f_mnt_write_state == FILE_MNT_WRITE_TAKEN) |
984 | return 0; | 984 | return 0; |
985 | printk(KERN_WARNING "writeable file with no " | 985 | printk(KERN_WARNING "writeable file with no " |
986 | "mnt_want_write()\n"); | 986 | "mnt_want_write()\n"); |
987 | WARN_ON(1); | 987 | WARN_ON(1); |
988 | return -EINVAL; | 988 | return -EINVAL; |
989 | } | 989 | } |
990 | #else /* !CONFIG_DEBUG_WRITECOUNT */ | 990 | #else /* !CONFIG_DEBUG_WRITECOUNT */ |
991 | static inline void file_take_write(struct file *filp) {} | 991 | static inline void file_take_write(struct file *filp) {} |
992 | static inline void file_release_write(struct file *filp) {} | 992 | static inline void file_release_write(struct file *filp) {} |
993 | static inline void file_reset_write(struct file *filp) {} | 993 | static inline void file_reset_write(struct file *filp) {} |
994 | static inline void file_check_state(struct file *filp) {} | 994 | static inline void file_check_state(struct file *filp) {} |
995 | static inline int file_check_writeable(struct file *filp) | 995 | static inline int file_check_writeable(struct file *filp) |
996 | { | 996 | { |
997 | return 0; | 997 | return 0; |
998 | } | 998 | } |
999 | #endif /* CONFIG_DEBUG_WRITECOUNT */ | 999 | #endif /* CONFIG_DEBUG_WRITECOUNT */ |
1000 | 1000 | ||
1001 | #define MAX_NON_LFS ((1UL<<31) - 1) | 1001 | #define MAX_NON_LFS ((1UL<<31) - 1) |
1002 | 1002 | ||
1003 | /* Page cache limit. The filesystems should put that into their s_maxbytes | 1003 | /* Page cache limit. The filesystems should put that into their s_maxbytes |
1004 | limits, otherwise bad things can happen in VM. */ | 1004 | limits, otherwise bad things can happen in VM. */ |
1005 | #if BITS_PER_LONG==32 | 1005 | #if BITS_PER_LONG==32 |
1006 | #define MAX_LFS_FILESIZE (((u64)PAGE_CACHE_SIZE << (BITS_PER_LONG-1))-1) | 1006 | #define MAX_LFS_FILESIZE (((u64)PAGE_CACHE_SIZE << (BITS_PER_LONG-1))-1) |
1007 | #elif BITS_PER_LONG==64 | 1007 | #elif BITS_PER_LONG==64 |
1008 | #define MAX_LFS_FILESIZE 0x7fffffffffffffffUL | 1008 | #define MAX_LFS_FILESIZE 0x7fffffffffffffffUL |
1009 | #endif | 1009 | #endif |
1010 | 1010 | ||
1011 | #define FL_POSIX 1 | 1011 | #define FL_POSIX 1 |
1012 | #define FL_FLOCK 2 | 1012 | #define FL_FLOCK 2 |
1013 | #define FL_ACCESS 8 /* not trying to lock, just looking */ | 1013 | #define FL_ACCESS 8 /* not trying to lock, just looking */ |
1014 | #define FL_EXISTS 16 /* when unlocking, test for existence */ | 1014 | #define FL_EXISTS 16 /* when unlocking, test for existence */ |
1015 | #define FL_LEASE 32 /* lease held on this file */ | 1015 | #define FL_LEASE 32 /* lease held on this file */ |
1016 | #define FL_CLOSE 64 /* unlock on close */ | 1016 | #define FL_CLOSE 64 /* unlock on close */ |
1017 | #define FL_SLEEP 128 /* A blocking lock */ | 1017 | #define FL_SLEEP 128 /* A blocking lock */ |
1018 | 1018 | ||
1019 | /* | 1019 | /* |
1020 | * Special return value from posix_lock_file() and vfs_lock_file() for | 1020 | * Special return value from posix_lock_file() and vfs_lock_file() for |
1021 | * asynchronous locking. | 1021 | * asynchronous locking. |
1022 | */ | 1022 | */ |
1023 | #define FILE_LOCK_DEFERRED 1 | 1023 | #define FILE_LOCK_DEFERRED 1 |
1024 | 1024 | ||
1025 | /* | 1025 | /* |
1026 | * The POSIX file lock owner is determined by | 1026 | * The POSIX file lock owner is determined by |
1027 | * the "struct files_struct" in the thread group | 1027 | * the "struct files_struct" in the thread group |
1028 | * (or NULL for no owner - BSD locks). | 1028 | * (or NULL for no owner - BSD locks). |
1029 | * | 1029 | * |
1030 | * Lockd stuffs a "host" pointer into this. | 1030 | * Lockd stuffs a "host" pointer into this. |
1031 | */ | 1031 | */ |
1032 | typedef struct files_struct *fl_owner_t; | 1032 | typedef struct files_struct *fl_owner_t; |
1033 | 1033 | ||
1034 | struct file_lock_operations { | 1034 | struct file_lock_operations { |
1035 | void (*fl_copy_lock)(struct file_lock *, struct file_lock *); | 1035 | void (*fl_copy_lock)(struct file_lock *, struct file_lock *); |
1036 | void (*fl_release_private)(struct file_lock *); | 1036 | void (*fl_release_private)(struct file_lock *); |
1037 | }; | 1037 | }; |
1038 | 1038 | ||
1039 | struct lock_manager_operations { | 1039 | struct lock_manager_operations { |
1040 | int (*fl_compare_owner)(struct file_lock *, struct file_lock *); | 1040 | int (*fl_compare_owner)(struct file_lock *, struct file_lock *); |
1041 | void (*fl_notify)(struct file_lock *); /* unblock callback */ | 1041 | void (*fl_notify)(struct file_lock *); /* unblock callback */ |
1042 | int (*fl_grant)(struct file_lock *, struct file_lock *, int); | 1042 | int (*fl_grant)(struct file_lock *, struct file_lock *, int); |
1043 | void (*fl_copy_lock)(struct file_lock *, struct file_lock *); | 1043 | void (*fl_copy_lock)(struct file_lock *, struct file_lock *); |
1044 | void (*fl_release_private)(struct file_lock *); | 1044 | void (*fl_release_private)(struct file_lock *); |
1045 | void (*fl_break)(struct file_lock *); | 1045 | void (*fl_break)(struct file_lock *); |
1046 | int (*fl_mylease)(struct file_lock *, struct file_lock *); | 1046 | int (*fl_mylease)(struct file_lock *, struct file_lock *); |
1047 | int (*fl_change)(struct file_lock **, int); | 1047 | int (*fl_change)(struct file_lock **, int); |
1048 | }; | 1048 | }; |
1049 | 1049 | ||
1050 | struct lock_manager { | 1050 | struct lock_manager { |
1051 | struct list_head list; | 1051 | struct list_head list; |
1052 | }; | 1052 | }; |
1053 | 1053 | ||
1054 | void locks_start_grace(struct lock_manager *); | 1054 | void locks_start_grace(struct lock_manager *); |
1055 | void locks_end_grace(struct lock_manager *); | 1055 | void locks_end_grace(struct lock_manager *); |
1056 | int locks_in_grace(void); | 1056 | int locks_in_grace(void); |
1057 | 1057 | ||
1058 | /* that will die - we need it for nfs_lock_info */ | 1058 | /* that will die - we need it for nfs_lock_info */ |
1059 | #include <linux/nfs_fs_i.h> | 1059 | #include <linux/nfs_fs_i.h> |
1060 | 1060 | ||
1061 | struct file_lock { | 1061 | struct file_lock { |
1062 | struct file_lock *fl_next; /* singly linked list for this inode */ | 1062 | struct file_lock *fl_next; /* singly linked list for this inode */ |
1063 | struct list_head fl_link; /* doubly linked list of all locks */ | 1063 | struct list_head fl_link; /* doubly linked list of all locks */ |
1064 | struct list_head fl_block; /* circular list of blocked processes */ | 1064 | struct list_head fl_block; /* circular list of blocked processes */ |
1065 | fl_owner_t fl_owner; | 1065 | fl_owner_t fl_owner; |
1066 | unsigned char fl_flags; | 1066 | unsigned char fl_flags; |
1067 | unsigned char fl_type; | 1067 | unsigned char fl_type; |
1068 | unsigned int fl_pid; | 1068 | unsigned int fl_pid; |
1069 | struct pid *fl_nspid; | 1069 | struct pid *fl_nspid; |
1070 | wait_queue_head_t fl_wait; | 1070 | wait_queue_head_t fl_wait; |
1071 | struct file *fl_file; | 1071 | struct file *fl_file; |
1072 | loff_t fl_start; | 1072 | loff_t fl_start; |
1073 | loff_t fl_end; | 1073 | loff_t fl_end; |
1074 | 1074 | ||
1075 | struct fasync_struct * fl_fasync; /* for lease break notifications */ | 1075 | struct fasync_struct * fl_fasync; /* for lease break notifications */ |
1076 | unsigned long fl_break_time; /* for nonblocking lease breaks */ | 1076 | unsigned long fl_break_time; /* for nonblocking lease breaks */ |
1077 | 1077 | ||
1078 | const struct file_lock_operations *fl_ops; /* Callbacks for filesystems */ | 1078 | const struct file_lock_operations *fl_ops; /* Callbacks for filesystems */ |
1079 | const struct lock_manager_operations *fl_lmops; /* Callbacks for lockmanagers */ | 1079 | const struct lock_manager_operations *fl_lmops; /* Callbacks for lockmanagers */ |
1080 | union { | 1080 | union { |
1081 | struct nfs_lock_info nfs_fl; | 1081 | struct nfs_lock_info nfs_fl; |
1082 | struct nfs4_lock_info nfs4_fl; | 1082 | struct nfs4_lock_info nfs4_fl; |
1083 | struct { | 1083 | struct { |
1084 | struct list_head link; /* link in AFS vnode's pending_locks list */ | 1084 | struct list_head link; /* link in AFS vnode's pending_locks list */ |
1085 | int state; /* state of grant or error if -ve */ | 1085 | int state; /* state of grant or error if -ve */ |
1086 | } afs; | 1086 | } afs; |
1087 | } fl_u; | 1087 | } fl_u; |
1088 | }; | 1088 | }; |
1089 | 1089 | ||
1090 | /* The following constant reflects the upper bound of the file/locking space */ | 1090 | /* The following constant reflects the upper bound of the file/locking space */ |
1091 | #ifndef OFFSET_MAX | 1091 | #ifndef OFFSET_MAX |
1092 | #define INT_LIMIT(x) (~((x)1 << (sizeof(x)*8 - 1))) | 1092 | #define INT_LIMIT(x) (~((x)1 << (sizeof(x)*8 - 1))) |
1093 | #define OFFSET_MAX INT_LIMIT(loff_t) | 1093 | #define OFFSET_MAX INT_LIMIT(loff_t) |
1094 | #define OFFT_OFFSET_MAX INT_LIMIT(off_t) | 1094 | #define OFFT_OFFSET_MAX INT_LIMIT(off_t) |
1095 | #endif | 1095 | #endif |
1096 | 1096 | ||
1097 | #include <linux/fcntl.h> | 1097 | #include <linux/fcntl.h> |
1098 | 1098 | ||
1099 | extern void send_sigio(struct fown_struct *fown, int fd, int band); | 1099 | extern void send_sigio(struct fown_struct *fown, int fd, int band); |
1100 | 1100 | ||
1101 | #ifdef CONFIG_FILE_LOCKING | 1101 | #ifdef CONFIG_FILE_LOCKING |
1102 | extern int fcntl_getlk(struct file *, struct flock __user *); | 1102 | extern int fcntl_getlk(struct file *, struct flock __user *); |
1103 | extern int fcntl_setlk(unsigned int, struct file *, unsigned int, | 1103 | extern int fcntl_setlk(unsigned int, struct file *, unsigned int, |
1104 | struct flock __user *); | 1104 | struct flock __user *); |
1105 | 1105 | ||
1106 | #if BITS_PER_LONG == 32 | 1106 | #if BITS_PER_LONG == 32 |
1107 | extern int fcntl_getlk64(struct file *, struct flock64 __user *); | 1107 | extern int fcntl_getlk64(struct file *, struct flock64 __user *); |
1108 | extern int fcntl_setlk64(unsigned int, struct file *, unsigned int, | 1108 | extern int fcntl_setlk64(unsigned int, struct file *, unsigned int, |
1109 | struct flock64 __user *); | 1109 | struct flock64 __user *); |
1110 | #endif | 1110 | #endif |
1111 | 1111 | ||
1112 | extern int fcntl_setlease(unsigned int fd, struct file *filp, long arg); | 1112 | extern int fcntl_setlease(unsigned int fd, struct file *filp, long arg); |
1113 | extern int fcntl_getlease(struct file *filp); | 1113 | extern int fcntl_getlease(struct file *filp); |
1114 | 1114 | ||
1115 | /* fs/locks.c */ | 1115 | /* fs/locks.c */ |
1116 | extern void locks_init_lock(struct file_lock *); | 1116 | extern void locks_init_lock(struct file_lock *); |
1117 | extern void locks_copy_lock(struct file_lock *, struct file_lock *); | 1117 | extern void locks_copy_lock(struct file_lock *, struct file_lock *); |
1118 | extern void __locks_copy_lock(struct file_lock *, const struct file_lock *); | 1118 | extern void __locks_copy_lock(struct file_lock *, const struct file_lock *); |
1119 | extern void locks_remove_posix(struct file *, fl_owner_t); | 1119 | extern void locks_remove_posix(struct file *, fl_owner_t); |
1120 | extern void locks_remove_flock(struct file *); | 1120 | extern void locks_remove_flock(struct file *); |
1121 | extern void locks_release_private(struct file_lock *); | 1121 | extern void locks_release_private(struct file_lock *); |
1122 | extern void posix_test_lock(struct file *, struct file_lock *); | 1122 | extern void posix_test_lock(struct file *, struct file_lock *); |
1123 | extern int posix_lock_file(struct file *, struct file_lock *, struct file_lock *); | 1123 | extern int posix_lock_file(struct file *, struct file_lock *, struct file_lock *); |
1124 | extern int posix_lock_file_wait(struct file *, struct file_lock *); | 1124 | extern int posix_lock_file_wait(struct file *, struct file_lock *); |
1125 | extern int posix_unblock_lock(struct file *, struct file_lock *); | 1125 | extern int posix_unblock_lock(struct file *, struct file_lock *); |
1126 | extern int vfs_test_lock(struct file *, struct file_lock *); | 1126 | extern int vfs_test_lock(struct file *, struct file_lock *); |
1127 | extern int vfs_lock_file(struct file *, unsigned int, struct file_lock *, struct file_lock *); | 1127 | extern int vfs_lock_file(struct file *, unsigned int, struct file_lock *, struct file_lock *); |
1128 | extern int vfs_cancel_lock(struct file *filp, struct file_lock *fl); | 1128 | extern int vfs_cancel_lock(struct file *filp, struct file_lock *fl); |
1129 | extern int flock_lock_file_wait(struct file *filp, struct file_lock *fl); | 1129 | extern int flock_lock_file_wait(struct file *filp, struct file_lock *fl); |
1130 | extern int __break_lease(struct inode *inode, unsigned int flags); | 1130 | extern int __break_lease(struct inode *inode, unsigned int flags); |
1131 | extern void lease_get_mtime(struct inode *, struct timespec *time); | 1131 | extern void lease_get_mtime(struct inode *, struct timespec *time); |
1132 | extern int generic_setlease(struct file *, long, struct file_lock **); | 1132 | extern int generic_setlease(struct file *, long, struct file_lock **); |
1133 | extern int vfs_setlease(struct file *, long, struct file_lock **); | 1133 | extern int vfs_setlease(struct file *, long, struct file_lock **); |
1134 | extern int lease_modify(struct file_lock **, int); | 1134 | extern int lease_modify(struct file_lock **, int); |
1135 | extern int lock_may_read(struct inode *, loff_t start, unsigned long count); | 1135 | extern int lock_may_read(struct inode *, loff_t start, unsigned long count); |
1136 | extern int lock_may_write(struct inode *, loff_t start, unsigned long count); | 1136 | extern int lock_may_write(struct inode *, loff_t start, unsigned long count); |
1137 | #else /* !CONFIG_FILE_LOCKING */ | 1137 | #else /* !CONFIG_FILE_LOCKING */ |
1138 | static inline int fcntl_getlk(struct file *file, struct flock __user *user) | 1138 | static inline int fcntl_getlk(struct file *file, struct flock __user *user) |
1139 | { | 1139 | { |
1140 | return -EINVAL; | 1140 | return -EINVAL; |
1141 | } | 1141 | } |
1142 | 1142 | ||
1143 | static inline int fcntl_setlk(unsigned int fd, struct file *file, | 1143 | static inline int fcntl_setlk(unsigned int fd, struct file *file, |
1144 | unsigned int cmd, struct flock __user *user) | 1144 | unsigned int cmd, struct flock __user *user) |
1145 | { | 1145 | { |
1146 | return -EACCES; | 1146 | return -EACCES; |
1147 | } | 1147 | } |
1148 | 1148 | ||
1149 | #if BITS_PER_LONG == 32 | 1149 | #if BITS_PER_LONG == 32 |
1150 | static inline int fcntl_getlk64(struct file *file, struct flock64 __user *user) | 1150 | static inline int fcntl_getlk64(struct file *file, struct flock64 __user *user) |
1151 | { | 1151 | { |
1152 | return -EINVAL; | 1152 | return -EINVAL; |
1153 | } | 1153 | } |
1154 | 1154 | ||
1155 | static inline int fcntl_setlk64(unsigned int fd, struct file *file, | 1155 | static inline int fcntl_setlk64(unsigned int fd, struct file *file, |
1156 | unsigned int cmd, struct flock64 __user *user) | 1156 | unsigned int cmd, struct flock64 __user *user) |
1157 | { | 1157 | { |
1158 | return -EACCES; | 1158 | return -EACCES; |
1159 | } | 1159 | } |
1160 | #endif | 1160 | #endif |
1161 | static inline int fcntl_setlease(unsigned int fd, struct file *filp, long arg) | 1161 | static inline int fcntl_setlease(unsigned int fd, struct file *filp, long arg) |
1162 | { | 1162 | { |
1163 | return 0; | 1163 | return 0; |
1164 | } | 1164 | } |
1165 | 1165 | ||
1166 | static inline int fcntl_getlease(struct file *filp) | 1166 | static inline int fcntl_getlease(struct file *filp) |
1167 | { | 1167 | { |
1168 | return 0; | 1168 | return 0; |
1169 | } | 1169 | } |
1170 | 1170 | ||
1171 | static inline void locks_init_lock(struct file_lock *fl) | 1171 | static inline void locks_init_lock(struct file_lock *fl) |
1172 | { | 1172 | { |
1173 | return; | 1173 | return; |
1174 | } | 1174 | } |
1175 | 1175 | ||
1176 | static inline void __locks_copy_lock(struct file_lock *new, struct file_lock *fl) | 1176 | static inline void __locks_copy_lock(struct file_lock *new, struct file_lock *fl) |
1177 | { | 1177 | { |
1178 | return; | 1178 | return; |
1179 | } | 1179 | } |
1180 | 1180 | ||
1181 | static inline void locks_copy_lock(struct file_lock *new, struct file_lock *fl) | 1181 | static inline void locks_copy_lock(struct file_lock *new, struct file_lock *fl) |
1182 | { | 1182 | { |
1183 | return; | 1183 | return; |
1184 | } | 1184 | } |
1185 | 1185 | ||
1186 | static inline void locks_remove_posix(struct file *filp, fl_owner_t owner) | 1186 | static inline void locks_remove_posix(struct file *filp, fl_owner_t owner) |
1187 | { | 1187 | { |
1188 | return; | 1188 | return; |
1189 | } | 1189 | } |
1190 | 1190 | ||
1191 | static inline void locks_remove_flock(struct file *filp) | 1191 | static inline void locks_remove_flock(struct file *filp) |
1192 | { | 1192 | { |
1193 | return; | 1193 | return; |
1194 | } | 1194 | } |
1195 | 1195 | ||
1196 | static inline void posix_test_lock(struct file *filp, struct file_lock *fl) | 1196 | static inline void posix_test_lock(struct file *filp, struct file_lock *fl) |
1197 | { | 1197 | { |
1198 | return; | 1198 | return; |
1199 | } | 1199 | } |
1200 | 1200 | ||
1201 | static inline int posix_lock_file(struct file *filp, struct file_lock *fl, | 1201 | static inline int posix_lock_file(struct file *filp, struct file_lock *fl, |
1202 | struct file_lock *conflock) | 1202 | struct file_lock *conflock) |
1203 | { | 1203 | { |
1204 | return -ENOLCK; | 1204 | return -ENOLCK; |
1205 | } | 1205 | } |
1206 | 1206 | ||
1207 | static inline int posix_lock_file_wait(struct file *filp, struct file_lock *fl) | 1207 | static inline int posix_lock_file_wait(struct file *filp, struct file_lock *fl) |
1208 | { | 1208 | { |
1209 | return -ENOLCK; | 1209 | return -ENOLCK; |
1210 | } | 1210 | } |
1211 | 1211 | ||
1212 | static inline int posix_unblock_lock(struct file *filp, | 1212 | static inline int posix_unblock_lock(struct file *filp, |
1213 | struct file_lock *waiter) | 1213 | struct file_lock *waiter) |
1214 | { | 1214 | { |
1215 | return -ENOENT; | 1215 | return -ENOENT; |
1216 | } | 1216 | } |
1217 | 1217 | ||
1218 | static inline int vfs_test_lock(struct file *filp, struct file_lock *fl) | 1218 | static inline int vfs_test_lock(struct file *filp, struct file_lock *fl) |
1219 | { | 1219 | { |
1220 | return 0; | 1220 | return 0; |
1221 | } | 1221 | } |
1222 | 1222 | ||
1223 | static inline int vfs_lock_file(struct file *filp, unsigned int cmd, | 1223 | static inline int vfs_lock_file(struct file *filp, unsigned int cmd, |
1224 | struct file_lock *fl, struct file_lock *conf) | 1224 | struct file_lock *fl, struct file_lock *conf) |
1225 | { | 1225 | { |
1226 | return -ENOLCK; | 1226 | return -ENOLCK; |
1227 | } | 1227 | } |
1228 | 1228 | ||
1229 | static inline int vfs_cancel_lock(struct file *filp, struct file_lock *fl) | 1229 | static inline int vfs_cancel_lock(struct file *filp, struct file_lock *fl) |
1230 | { | 1230 | { |
1231 | return 0; | 1231 | return 0; |
1232 | } | 1232 | } |
1233 | 1233 | ||
1234 | static inline int flock_lock_file_wait(struct file *filp, | 1234 | static inline int flock_lock_file_wait(struct file *filp, |
1235 | struct file_lock *request) | 1235 | struct file_lock *request) |
1236 | { | 1236 | { |
1237 | return -ENOLCK; | 1237 | return -ENOLCK; |
1238 | } | 1238 | } |
1239 | 1239 | ||
1240 | static inline int __break_lease(struct inode *inode, unsigned int mode) | 1240 | static inline int __break_lease(struct inode *inode, unsigned int mode) |
1241 | { | 1241 | { |
1242 | return 0; | 1242 | return 0; |
1243 | } | 1243 | } |
1244 | 1244 | ||
1245 | static inline void lease_get_mtime(struct inode *inode, struct timespec *time) | 1245 | static inline void lease_get_mtime(struct inode *inode, struct timespec *time) |
1246 | { | 1246 | { |
1247 | return; | 1247 | return; |
1248 | } | 1248 | } |
1249 | 1249 | ||
1250 | static inline int generic_setlease(struct file *filp, long arg, | 1250 | static inline int generic_setlease(struct file *filp, long arg, |
1251 | struct file_lock **flp) | 1251 | struct file_lock **flp) |
1252 | { | 1252 | { |
1253 | return -EINVAL; | 1253 | return -EINVAL; |
1254 | } | 1254 | } |
1255 | 1255 | ||
1256 | static inline int vfs_setlease(struct file *filp, long arg, | 1256 | static inline int vfs_setlease(struct file *filp, long arg, |
1257 | struct file_lock **lease) | 1257 | struct file_lock **lease) |
1258 | { | 1258 | { |
1259 | return -EINVAL; | 1259 | return -EINVAL; |
1260 | } | 1260 | } |
1261 | 1261 | ||
1262 | static inline int lease_modify(struct file_lock **before, int arg) | 1262 | static inline int lease_modify(struct file_lock **before, int arg) |
1263 | { | 1263 | { |
1264 | return -EINVAL; | 1264 | return -EINVAL; |
1265 | } | 1265 | } |
1266 | 1266 | ||
1267 | static inline int lock_may_read(struct inode *inode, loff_t start, | 1267 | static inline int lock_may_read(struct inode *inode, loff_t start, |
1268 | unsigned long len) | 1268 | unsigned long len) |
1269 | { | 1269 | { |
1270 | return 1; | 1270 | return 1; |
1271 | } | 1271 | } |
1272 | 1272 | ||
1273 | static inline int lock_may_write(struct inode *inode, loff_t start, | 1273 | static inline int lock_may_write(struct inode *inode, loff_t start, |
1274 | unsigned long len) | 1274 | unsigned long len) |
1275 | { | 1275 | { |
1276 | return 1; | 1276 | return 1; |
1277 | } | 1277 | } |
1278 | 1278 | ||
1279 | #endif /* !CONFIG_FILE_LOCKING */ | 1279 | #endif /* !CONFIG_FILE_LOCKING */ |
1280 | 1280 | ||
1281 | 1281 | ||
1282 | struct fasync_struct { | 1282 | struct fasync_struct { |
1283 | int magic; | 1283 | spinlock_t fa_lock; |
1284 | int fa_fd; | 1284 | int magic; |
1285 | struct fasync_struct *fa_next; /* singly linked list */ | 1285 | int fa_fd; |
1286 | struct file *fa_file; | 1286 | struct fasync_struct *fa_next; /* singly linked list */ |
1287 | struct file *fa_file; | ||
1288 | struct rcu_head fa_rcu; | ||
1287 | }; | 1289 | }; |
1288 | 1290 | ||
1289 | #define FASYNC_MAGIC 0x4601 | 1291 | #define FASYNC_MAGIC 0x4601 |
1290 | 1292 | ||
1291 | /* SMP safe fasync helpers: */ | 1293 | /* SMP safe fasync helpers: */ |
1292 | extern int fasync_helper(int, struct file *, int, struct fasync_struct **); | 1294 | extern int fasync_helper(int, struct file *, int, struct fasync_struct **); |
1293 | /* can be called from interrupts */ | 1295 | /* can be called from interrupts */ |
1294 | extern void kill_fasync(struct fasync_struct **, int, int); | 1296 | extern void kill_fasync(struct fasync_struct **, int, int); |
1295 | /* only for net: no internal synchronization */ | ||
1296 | extern void __kill_fasync(struct fasync_struct *, int, int); | ||
1297 | 1297 | ||
1298 | extern int __f_setown(struct file *filp, struct pid *, enum pid_type, int force); | 1298 | extern int __f_setown(struct file *filp, struct pid *, enum pid_type, int force); |
1299 | extern int f_setown(struct file *filp, unsigned long arg, int force); | 1299 | extern int f_setown(struct file *filp, unsigned long arg, int force); |
1300 | extern void f_delown(struct file *filp); | 1300 | extern void f_delown(struct file *filp); |
1301 | extern pid_t f_getown(struct file *filp); | 1301 | extern pid_t f_getown(struct file *filp); |
1302 | extern int send_sigurg(struct fown_struct *fown); | 1302 | extern int send_sigurg(struct fown_struct *fown); |
1303 | 1303 | ||
1304 | /* | 1304 | /* |
1305 | * Umount options | 1305 | * Umount options |
1306 | */ | 1306 | */ |
1307 | 1307 | ||
1308 | #define MNT_FORCE 0x00000001 /* Attempt to forcibily umount */ | 1308 | #define MNT_FORCE 0x00000001 /* Attempt to forcibily umount */ |
1309 | #define MNT_DETACH 0x00000002 /* Just detach from the tree */ | 1309 | #define MNT_DETACH 0x00000002 /* Just detach from the tree */ |
1310 | #define MNT_EXPIRE 0x00000004 /* Mark for expiry */ | 1310 | #define MNT_EXPIRE 0x00000004 /* Mark for expiry */ |
1311 | #define UMOUNT_NOFOLLOW 0x00000008 /* Don't follow symlink on umount */ | 1311 | #define UMOUNT_NOFOLLOW 0x00000008 /* Don't follow symlink on umount */ |
1312 | #define UMOUNT_UNUSED 0x80000000 /* Flag guaranteed to be unused */ | 1312 | #define UMOUNT_UNUSED 0x80000000 /* Flag guaranteed to be unused */ |
1313 | 1313 | ||
1314 | extern struct list_head super_blocks; | 1314 | extern struct list_head super_blocks; |
1315 | extern spinlock_t sb_lock; | 1315 | extern spinlock_t sb_lock; |
1316 | 1316 | ||
1317 | #define sb_entry(list) list_entry((list), struct super_block, s_list) | 1317 | #define sb_entry(list) list_entry((list), struct super_block, s_list) |
1318 | #define S_BIAS (1<<30) | 1318 | #define S_BIAS (1<<30) |
1319 | struct super_block { | 1319 | struct super_block { |
1320 | struct list_head s_list; /* Keep this first */ | 1320 | struct list_head s_list; /* Keep this first */ |
1321 | dev_t s_dev; /* search index; _not_ kdev_t */ | 1321 | dev_t s_dev; /* search index; _not_ kdev_t */ |
1322 | unsigned char s_dirt; | 1322 | unsigned char s_dirt; |
1323 | unsigned char s_blocksize_bits; | 1323 | unsigned char s_blocksize_bits; |
1324 | unsigned long s_blocksize; | 1324 | unsigned long s_blocksize; |
1325 | loff_t s_maxbytes; /* Max file size */ | 1325 | loff_t s_maxbytes; /* Max file size */ |
1326 | struct file_system_type *s_type; | 1326 | struct file_system_type *s_type; |
1327 | const struct super_operations *s_op; | 1327 | const struct super_operations *s_op; |
1328 | const struct dquot_operations *dq_op; | 1328 | const struct dquot_operations *dq_op; |
1329 | const struct quotactl_ops *s_qcop; | 1329 | const struct quotactl_ops *s_qcop; |
1330 | const struct export_operations *s_export_op; | 1330 | const struct export_operations *s_export_op; |
1331 | unsigned long s_flags; | 1331 | unsigned long s_flags; |
1332 | unsigned long s_magic; | 1332 | unsigned long s_magic; |
1333 | struct dentry *s_root; | 1333 | struct dentry *s_root; |
1334 | struct rw_semaphore s_umount; | 1334 | struct rw_semaphore s_umount; |
1335 | struct mutex s_lock; | 1335 | struct mutex s_lock; |
1336 | int s_count; | 1336 | int s_count; |
1337 | int s_need_sync; | 1337 | int s_need_sync; |
1338 | atomic_t s_active; | 1338 | atomic_t s_active; |
1339 | #ifdef CONFIG_SECURITY | 1339 | #ifdef CONFIG_SECURITY |
1340 | void *s_security; | 1340 | void *s_security; |
1341 | #endif | 1341 | #endif |
1342 | struct xattr_handler **s_xattr; | 1342 | struct xattr_handler **s_xattr; |
1343 | 1343 | ||
1344 | struct list_head s_inodes; /* all inodes */ | 1344 | struct list_head s_inodes; /* all inodes */ |
1345 | struct hlist_head s_anon; /* anonymous dentries for (nfs) exporting */ | 1345 | struct hlist_head s_anon; /* anonymous dentries for (nfs) exporting */ |
1346 | struct list_head s_files; | 1346 | struct list_head s_files; |
1347 | /* s_dentry_lru and s_nr_dentry_unused are protected by dcache_lock */ | 1347 | /* s_dentry_lru and s_nr_dentry_unused are protected by dcache_lock */ |
1348 | struct list_head s_dentry_lru; /* unused dentry lru */ | 1348 | struct list_head s_dentry_lru; /* unused dentry lru */ |
1349 | int s_nr_dentry_unused; /* # of dentry on lru */ | 1349 | int s_nr_dentry_unused; /* # of dentry on lru */ |
1350 | 1350 | ||
1351 | struct block_device *s_bdev; | 1351 | struct block_device *s_bdev; |
1352 | struct backing_dev_info *s_bdi; | 1352 | struct backing_dev_info *s_bdi; |
1353 | struct mtd_info *s_mtd; | 1353 | struct mtd_info *s_mtd; |
1354 | struct list_head s_instances; | 1354 | struct list_head s_instances; |
1355 | struct quota_info s_dquot; /* Diskquota specific options */ | 1355 | struct quota_info s_dquot; /* Diskquota specific options */ |
1356 | 1356 | ||
1357 | int s_frozen; | 1357 | int s_frozen; |
1358 | wait_queue_head_t s_wait_unfrozen; | 1358 | wait_queue_head_t s_wait_unfrozen; |
1359 | 1359 | ||
1360 | char s_id[32]; /* Informational name */ | 1360 | char s_id[32]; /* Informational name */ |
1361 | 1361 | ||
1362 | void *s_fs_info; /* Filesystem private info */ | 1362 | void *s_fs_info; /* Filesystem private info */ |
1363 | fmode_t s_mode; | 1363 | fmode_t s_mode; |
1364 | 1364 | ||
1365 | /* Granularity of c/m/atime in ns. | 1365 | /* Granularity of c/m/atime in ns. |
1366 | Cannot be worse than a second */ | 1366 | Cannot be worse than a second */ |
1367 | u32 s_time_gran; | 1367 | u32 s_time_gran; |
1368 | 1368 | ||
1369 | /* | 1369 | /* |
1370 | * The next field is for VFS *only*. No filesystems have any business | 1370 | * The next field is for VFS *only*. No filesystems have any business |
1371 | * even looking at it. You had been warned. | 1371 | * even looking at it. You had been warned. |
1372 | */ | 1372 | */ |
1373 | struct mutex s_vfs_rename_mutex; /* Kludge */ | 1373 | struct mutex s_vfs_rename_mutex; /* Kludge */ |
1374 | 1374 | ||
1375 | /* | 1375 | /* |
1376 | * Filesystem subtype. If non-empty the filesystem type field | 1376 | * Filesystem subtype. If non-empty the filesystem type field |
1377 | * in /proc/mounts will be "type.subtype" | 1377 | * in /proc/mounts will be "type.subtype" |
1378 | */ | 1378 | */ |
1379 | char *s_subtype; | 1379 | char *s_subtype; |
1380 | 1380 | ||
1381 | /* | 1381 | /* |
1382 | * Saved mount options for lazy filesystems using | 1382 | * Saved mount options for lazy filesystems using |
1383 | * generic_show_options() | 1383 | * generic_show_options() |
1384 | */ | 1384 | */ |
1385 | char *s_options; | 1385 | char *s_options; |
1386 | }; | 1386 | }; |
1387 | 1387 | ||
1388 | extern struct timespec current_fs_time(struct super_block *sb); | 1388 | extern struct timespec current_fs_time(struct super_block *sb); |
1389 | 1389 | ||
1390 | /* | 1390 | /* |
1391 | * Snapshotting support. | 1391 | * Snapshotting support. |
1392 | */ | 1392 | */ |
1393 | enum { | 1393 | enum { |
1394 | SB_UNFROZEN = 0, | 1394 | SB_UNFROZEN = 0, |
1395 | SB_FREEZE_WRITE = 1, | 1395 | SB_FREEZE_WRITE = 1, |
1396 | SB_FREEZE_TRANS = 2, | 1396 | SB_FREEZE_TRANS = 2, |
1397 | }; | 1397 | }; |
1398 | 1398 | ||
1399 | #define vfs_check_frozen(sb, level) \ | 1399 | #define vfs_check_frozen(sb, level) \ |
1400 | wait_event((sb)->s_wait_unfrozen, ((sb)->s_frozen < (level))) | 1400 | wait_event((sb)->s_wait_unfrozen, ((sb)->s_frozen < (level))) |
1401 | 1401 | ||
1402 | #define get_fs_excl() atomic_inc(¤t->fs_excl) | 1402 | #define get_fs_excl() atomic_inc(¤t->fs_excl) |
1403 | #define put_fs_excl() atomic_dec(¤t->fs_excl) | 1403 | #define put_fs_excl() atomic_dec(¤t->fs_excl) |
1404 | #define has_fs_excl() atomic_read(¤t->fs_excl) | 1404 | #define has_fs_excl() atomic_read(¤t->fs_excl) |
1405 | 1405 | ||
1406 | #define is_owner_or_cap(inode) \ | 1406 | #define is_owner_or_cap(inode) \ |
1407 | ((current_fsuid() == (inode)->i_uid) || capable(CAP_FOWNER)) | 1407 | ((current_fsuid() == (inode)->i_uid) || capable(CAP_FOWNER)) |
1408 | 1408 | ||
1409 | /* not quite ready to be deprecated, but... */ | 1409 | /* not quite ready to be deprecated, but... */ |
1410 | extern void lock_super(struct super_block *); | 1410 | extern void lock_super(struct super_block *); |
1411 | extern void unlock_super(struct super_block *); | 1411 | extern void unlock_super(struct super_block *); |
1412 | 1412 | ||
1413 | /* | 1413 | /* |
1414 | * VFS helper functions.. | 1414 | * VFS helper functions.. |
1415 | */ | 1415 | */ |
1416 | extern int vfs_create(struct inode *, struct dentry *, int, struct nameidata *); | 1416 | extern int vfs_create(struct inode *, struct dentry *, int, struct nameidata *); |
1417 | extern int vfs_mkdir(struct inode *, struct dentry *, int); | 1417 | extern int vfs_mkdir(struct inode *, struct dentry *, int); |
1418 | extern int vfs_mknod(struct inode *, struct dentry *, int, dev_t); | 1418 | extern int vfs_mknod(struct inode *, struct dentry *, int, dev_t); |
1419 | extern int vfs_symlink(struct inode *, struct dentry *, const char *); | 1419 | extern int vfs_symlink(struct inode *, struct dentry *, const char *); |
1420 | extern int vfs_link(struct dentry *, struct inode *, struct dentry *); | 1420 | extern int vfs_link(struct dentry *, struct inode *, struct dentry *); |
1421 | extern int vfs_rmdir(struct inode *, struct dentry *); | 1421 | extern int vfs_rmdir(struct inode *, struct dentry *); |
1422 | extern int vfs_unlink(struct inode *, struct dentry *); | 1422 | extern int vfs_unlink(struct inode *, struct dentry *); |
1423 | extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *); | 1423 | extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *); |
1424 | 1424 | ||
1425 | /* | 1425 | /* |
1426 | * VFS dentry helper functions. | 1426 | * VFS dentry helper functions. |
1427 | */ | 1427 | */ |
1428 | extern void dentry_unhash(struct dentry *dentry); | 1428 | extern void dentry_unhash(struct dentry *dentry); |
1429 | 1429 | ||
1430 | /* | 1430 | /* |
1431 | * VFS file helper functions. | 1431 | * VFS file helper functions. |
1432 | */ | 1432 | */ |
1433 | extern int file_permission(struct file *, int); | 1433 | extern int file_permission(struct file *, int); |
1434 | 1434 | ||
1435 | /* | 1435 | /* |
1436 | * VFS FS_IOC_FIEMAP helper definitions. | 1436 | * VFS FS_IOC_FIEMAP helper definitions. |
1437 | */ | 1437 | */ |
1438 | struct fiemap_extent_info { | 1438 | struct fiemap_extent_info { |
1439 | unsigned int fi_flags; /* Flags as passed from user */ | 1439 | unsigned int fi_flags; /* Flags as passed from user */ |
1440 | unsigned int fi_extents_mapped; /* Number of mapped extents */ | 1440 | unsigned int fi_extents_mapped; /* Number of mapped extents */ |
1441 | unsigned int fi_extents_max; /* Size of fiemap_extent array */ | 1441 | unsigned int fi_extents_max; /* Size of fiemap_extent array */ |
1442 | struct fiemap_extent *fi_extents_start; /* Start of fiemap_extent | 1442 | struct fiemap_extent *fi_extents_start; /* Start of fiemap_extent |
1443 | * array */ | 1443 | * array */ |
1444 | }; | 1444 | }; |
1445 | int fiemap_fill_next_extent(struct fiemap_extent_info *info, u64 logical, | 1445 | int fiemap_fill_next_extent(struct fiemap_extent_info *info, u64 logical, |
1446 | u64 phys, u64 len, u32 flags); | 1446 | u64 phys, u64 len, u32 flags); |
1447 | int fiemap_check_flags(struct fiemap_extent_info *fieinfo, u32 fs_flags); | 1447 | int fiemap_check_flags(struct fiemap_extent_info *fieinfo, u32 fs_flags); |
1448 | 1448 | ||
1449 | /* | 1449 | /* |
1450 | * File types | 1450 | * File types |
1451 | * | 1451 | * |
1452 | * NOTE! These match bits 12..15 of stat.st_mode | 1452 | * NOTE! These match bits 12..15 of stat.st_mode |
1453 | * (ie "(i_mode >> 12) & 15"). | 1453 | * (ie "(i_mode >> 12) & 15"). |
1454 | */ | 1454 | */ |
1455 | #define DT_UNKNOWN 0 | 1455 | #define DT_UNKNOWN 0 |
1456 | #define DT_FIFO 1 | 1456 | #define DT_FIFO 1 |
1457 | #define DT_CHR 2 | 1457 | #define DT_CHR 2 |
1458 | #define DT_DIR 4 | 1458 | #define DT_DIR 4 |
1459 | #define DT_BLK 6 | 1459 | #define DT_BLK 6 |
1460 | #define DT_REG 8 | 1460 | #define DT_REG 8 |
1461 | #define DT_LNK 10 | 1461 | #define DT_LNK 10 |
1462 | #define DT_SOCK 12 | 1462 | #define DT_SOCK 12 |
1463 | #define DT_WHT 14 | 1463 | #define DT_WHT 14 |
1464 | 1464 | ||
1465 | /* | 1465 | /* |
1466 | * This is the "filldir" function type, used by readdir() to let | 1466 | * This is the "filldir" function type, used by readdir() to let |
1467 | * the kernel specify what kind of dirent layout it wants to have. | 1467 | * the kernel specify what kind of dirent layout it wants to have. |
1468 | * This allows the kernel to read directories into kernel space or | 1468 | * This allows the kernel to read directories into kernel space or |
1469 | * to have different dirent layouts depending on the binary type. | 1469 | * to have different dirent layouts depending on the binary type. |
1470 | */ | 1470 | */ |
1471 | typedef int (*filldir_t)(void *, const char *, int, loff_t, u64, unsigned); | 1471 | typedef int (*filldir_t)(void *, const char *, int, loff_t, u64, unsigned); |
1472 | struct block_device_operations; | 1472 | struct block_device_operations; |
1473 | 1473 | ||
1474 | /* These macros are for out of kernel modules to test that | 1474 | /* These macros are for out of kernel modules to test that |
1475 | * the kernel supports the unlocked_ioctl and compat_ioctl | 1475 | * the kernel supports the unlocked_ioctl and compat_ioctl |
1476 | * fields in struct file_operations. */ | 1476 | * fields in struct file_operations. */ |
1477 | #define HAVE_COMPAT_IOCTL 1 | 1477 | #define HAVE_COMPAT_IOCTL 1 |
1478 | #define HAVE_UNLOCKED_IOCTL 1 | 1478 | #define HAVE_UNLOCKED_IOCTL 1 |
1479 | 1479 | ||
1480 | /* | 1480 | /* |
1481 | * NOTE: | 1481 | * NOTE: |
1482 | * read, write, poll, fsync, readv, writev, unlocked_ioctl and compat_ioctl | 1482 | * read, write, poll, fsync, readv, writev, unlocked_ioctl and compat_ioctl |
1483 | * can be called without the big kernel lock held in all filesystems. | 1483 | * can be called without the big kernel lock held in all filesystems. |
1484 | */ | 1484 | */ |
1485 | struct file_operations { | 1485 | struct file_operations { |
1486 | struct module *owner; | 1486 | struct module *owner; |
1487 | loff_t (*llseek) (struct file *, loff_t, int); | 1487 | loff_t (*llseek) (struct file *, loff_t, int); |
1488 | ssize_t (*read) (struct file *, char __user *, size_t, loff_t *); | 1488 | ssize_t (*read) (struct file *, char __user *, size_t, loff_t *); |
1489 | ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *); | 1489 | ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *); |
1490 | ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t); | 1490 | ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t); |
1491 | ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t); | 1491 | ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t); |
1492 | int (*readdir) (struct file *, void *, filldir_t); | 1492 | int (*readdir) (struct file *, void *, filldir_t); |
1493 | unsigned int (*poll) (struct file *, struct poll_table_struct *); | 1493 | unsigned int (*poll) (struct file *, struct poll_table_struct *); |
1494 | int (*ioctl) (struct inode *, struct file *, unsigned int, unsigned long); | 1494 | int (*ioctl) (struct inode *, struct file *, unsigned int, unsigned long); |
1495 | long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long); | 1495 | long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long); |
1496 | long (*compat_ioctl) (struct file *, unsigned int, unsigned long); | 1496 | long (*compat_ioctl) (struct file *, unsigned int, unsigned long); |
1497 | int (*mmap) (struct file *, struct vm_area_struct *); | 1497 | int (*mmap) (struct file *, struct vm_area_struct *); |
1498 | int (*open) (struct inode *, struct file *); | 1498 | int (*open) (struct inode *, struct file *); |
1499 | int (*flush) (struct file *, fl_owner_t id); | 1499 | int (*flush) (struct file *, fl_owner_t id); |
1500 | int (*release) (struct inode *, struct file *); | 1500 | int (*release) (struct inode *, struct file *); |
1501 | int (*fsync) (struct file *, struct dentry *, int datasync); | 1501 | int (*fsync) (struct file *, struct dentry *, int datasync); |
1502 | int (*aio_fsync) (struct kiocb *, int datasync); | 1502 | int (*aio_fsync) (struct kiocb *, int datasync); |
1503 | int (*fasync) (int, struct file *, int); | 1503 | int (*fasync) (int, struct file *, int); |
1504 | int (*lock) (struct file *, int, struct file_lock *); | 1504 | int (*lock) (struct file *, int, struct file_lock *); |
1505 | ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int); | 1505 | ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int); |
1506 | unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long); | 1506 | unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long); |
1507 | int (*check_flags)(int); | 1507 | int (*check_flags)(int); |
1508 | int (*flock) (struct file *, int, struct file_lock *); | 1508 | int (*flock) (struct file *, int, struct file_lock *); |
1509 | ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int); | 1509 | ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int); |
1510 | ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int); | 1510 | ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int); |
1511 | int (*setlease)(struct file *, long, struct file_lock **); | 1511 | int (*setlease)(struct file *, long, struct file_lock **); |
1512 | }; | 1512 | }; |
1513 | 1513 | ||
1514 | struct inode_operations { | 1514 | struct inode_operations { |
1515 | int (*create) (struct inode *,struct dentry *,int, struct nameidata *); | 1515 | int (*create) (struct inode *,struct dentry *,int, struct nameidata *); |
1516 | struct dentry * (*lookup) (struct inode *,struct dentry *, struct nameidata *); | 1516 | struct dentry * (*lookup) (struct inode *,struct dentry *, struct nameidata *); |
1517 | int (*link) (struct dentry *,struct inode *,struct dentry *); | 1517 | int (*link) (struct dentry *,struct inode *,struct dentry *); |
1518 | int (*unlink) (struct inode *,struct dentry *); | 1518 | int (*unlink) (struct inode *,struct dentry *); |
1519 | int (*symlink) (struct inode *,struct dentry *,const char *); | 1519 | int (*symlink) (struct inode *,struct dentry *,const char *); |
1520 | int (*mkdir) (struct inode *,struct dentry *,int); | 1520 | int (*mkdir) (struct inode *,struct dentry *,int); |
1521 | int (*rmdir) (struct inode *,struct dentry *); | 1521 | int (*rmdir) (struct inode *,struct dentry *); |
1522 | int (*mknod) (struct inode *,struct dentry *,int,dev_t); | 1522 | int (*mknod) (struct inode *,struct dentry *,int,dev_t); |
1523 | int (*rename) (struct inode *, struct dentry *, | 1523 | int (*rename) (struct inode *, struct dentry *, |
1524 | struct inode *, struct dentry *); | 1524 | struct inode *, struct dentry *); |
1525 | int (*readlink) (struct dentry *, char __user *,int); | 1525 | int (*readlink) (struct dentry *, char __user *,int); |
1526 | void * (*follow_link) (struct dentry *, struct nameidata *); | 1526 | void * (*follow_link) (struct dentry *, struct nameidata *); |
1527 | void (*put_link) (struct dentry *, struct nameidata *, void *); | 1527 | void (*put_link) (struct dentry *, struct nameidata *, void *); |
1528 | void (*truncate) (struct inode *); | 1528 | void (*truncate) (struct inode *); |
1529 | int (*permission) (struct inode *, int); | 1529 | int (*permission) (struct inode *, int); |
1530 | int (*check_acl)(struct inode *, int); | 1530 | int (*check_acl)(struct inode *, int); |
1531 | int (*setattr) (struct dentry *, struct iattr *); | 1531 | int (*setattr) (struct dentry *, struct iattr *); |
1532 | int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *); | 1532 | int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *); |
1533 | int (*setxattr) (struct dentry *, const char *,const void *,size_t,int); | 1533 | int (*setxattr) (struct dentry *, const char *,const void *,size_t,int); |
1534 | ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t); | 1534 | ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t); |
1535 | ssize_t (*listxattr) (struct dentry *, char *, size_t); | 1535 | ssize_t (*listxattr) (struct dentry *, char *, size_t); |
1536 | int (*removexattr) (struct dentry *, const char *); | 1536 | int (*removexattr) (struct dentry *, const char *); |
1537 | void (*truncate_range)(struct inode *, loff_t, loff_t); | 1537 | void (*truncate_range)(struct inode *, loff_t, loff_t); |
1538 | long (*fallocate)(struct inode *inode, int mode, loff_t offset, | 1538 | long (*fallocate)(struct inode *inode, int mode, loff_t offset, |
1539 | loff_t len); | 1539 | loff_t len); |
1540 | int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, | 1540 | int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, |
1541 | u64 len); | 1541 | u64 len); |
1542 | }; | 1542 | }; |
1543 | 1543 | ||
1544 | struct seq_file; | 1544 | struct seq_file; |
1545 | 1545 | ||
1546 | ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector, | 1546 | ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector, |
1547 | unsigned long nr_segs, unsigned long fast_segs, | 1547 | unsigned long nr_segs, unsigned long fast_segs, |
1548 | struct iovec *fast_pointer, | 1548 | struct iovec *fast_pointer, |
1549 | struct iovec **ret_pointer); | 1549 | struct iovec **ret_pointer); |
1550 | 1550 | ||
1551 | extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *); | 1551 | extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *); |
1552 | extern ssize_t vfs_write(struct file *, const char __user *, size_t, loff_t *); | 1552 | extern ssize_t vfs_write(struct file *, const char __user *, size_t, loff_t *); |
1553 | extern ssize_t vfs_readv(struct file *, const struct iovec __user *, | 1553 | extern ssize_t vfs_readv(struct file *, const struct iovec __user *, |
1554 | unsigned long, loff_t *); | 1554 | unsigned long, loff_t *); |
1555 | extern ssize_t vfs_writev(struct file *, const struct iovec __user *, | 1555 | extern ssize_t vfs_writev(struct file *, const struct iovec __user *, |
1556 | unsigned long, loff_t *); | 1556 | unsigned long, loff_t *); |
1557 | 1557 | ||
1558 | struct super_operations { | 1558 | struct super_operations { |
1559 | struct inode *(*alloc_inode)(struct super_block *sb); | 1559 | struct inode *(*alloc_inode)(struct super_block *sb); |
1560 | void (*destroy_inode)(struct inode *); | 1560 | void (*destroy_inode)(struct inode *); |
1561 | 1561 | ||
1562 | void (*dirty_inode) (struct inode *); | 1562 | void (*dirty_inode) (struct inode *); |
1563 | int (*write_inode) (struct inode *, struct writeback_control *wbc); | 1563 | int (*write_inode) (struct inode *, struct writeback_control *wbc); |
1564 | void (*drop_inode) (struct inode *); | 1564 | void (*drop_inode) (struct inode *); |
1565 | void (*delete_inode) (struct inode *); | 1565 | void (*delete_inode) (struct inode *); |
1566 | void (*put_super) (struct super_block *); | 1566 | void (*put_super) (struct super_block *); |
1567 | void (*write_super) (struct super_block *); | 1567 | void (*write_super) (struct super_block *); |
1568 | int (*sync_fs)(struct super_block *sb, int wait); | 1568 | int (*sync_fs)(struct super_block *sb, int wait); |
1569 | int (*freeze_fs) (struct super_block *); | 1569 | int (*freeze_fs) (struct super_block *); |
1570 | int (*unfreeze_fs) (struct super_block *); | 1570 | int (*unfreeze_fs) (struct super_block *); |
1571 | int (*statfs) (struct dentry *, struct kstatfs *); | 1571 | int (*statfs) (struct dentry *, struct kstatfs *); |
1572 | int (*remount_fs) (struct super_block *, int *, char *); | 1572 | int (*remount_fs) (struct super_block *, int *, char *); |
1573 | void (*clear_inode) (struct inode *); | 1573 | void (*clear_inode) (struct inode *); |
1574 | void (*umount_begin) (struct super_block *); | 1574 | void (*umount_begin) (struct super_block *); |
1575 | 1575 | ||
1576 | int (*show_options)(struct seq_file *, struct vfsmount *); | 1576 | int (*show_options)(struct seq_file *, struct vfsmount *); |
1577 | int (*show_stats)(struct seq_file *, struct vfsmount *); | 1577 | int (*show_stats)(struct seq_file *, struct vfsmount *); |
1578 | #ifdef CONFIG_QUOTA | 1578 | #ifdef CONFIG_QUOTA |
1579 | ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t); | 1579 | ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t); |
1580 | ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t); | 1580 | ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t); |
1581 | #endif | 1581 | #endif |
1582 | int (*bdev_try_to_free_page)(struct super_block*, struct page*, gfp_t); | 1582 | int (*bdev_try_to_free_page)(struct super_block*, struct page*, gfp_t); |
1583 | }; | 1583 | }; |
1584 | 1584 | ||
1585 | /* | 1585 | /* |
1586 | * Inode state bits. Protected by inode_lock. | 1586 | * Inode state bits. Protected by inode_lock. |
1587 | * | 1587 | * |
1588 | * Three bits determine the dirty state of the inode, I_DIRTY_SYNC, | 1588 | * Three bits determine the dirty state of the inode, I_DIRTY_SYNC, |
1589 | * I_DIRTY_DATASYNC and I_DIRTY_PAGES. | 1589 | * I_DIRTY_DATASYNC and I_DIRTY_PAGES. |
1590 | * | 1590 | * |
1591 | * Four bits define the lifetime of an inode. Initially, inodes are I_NEW, | 1591 | * Four bits define the lifetime of an inode. Initially, inodes are I_NEW, |
1592 | * until that flag is cleared. I_WILL_FREE, I_FREEING and I_CLEAR are set at | 1592 | * until that flag is cleared. I_WILL_FREE, I_FREEING and I_CLEAR are set at |
1593 | * various stages of removing an inode. | 1593 | * various stages of removing an inode. |
1594 | * | 1594 | * |
1595 | * Two bits are used for locking and completion notification, I_NEW and I_SYNC. | 1595 | * Two bits are used for locking and completion notification, I_NEW and I_SYNC. |
1596 | * | 1596 | * |
1597 | * I_DIRTY_SYNC Inode is dirty, but doesn't have to be written on | 1597 | * I_DIRTY_SYNC Inode is dirty, but doesn't have to be written on |
1598 | * fdatasync(). i_atime is the usual cause. | 1598 | * fdatasync(). i_atime is the usual cause. |
1599 | * I_DIRTY_DATASYNC Data-related inode changes pending. We keep track of | 1599 | * I_DIRTY_DATASYNC Data-related inode changes pending. We keep track of |
1600 | * these changes separately from I_DIRTY_SYNC so that we | 1600 | * these changes separately from I_DIRTY_SYNC so that we |
1601 | * don't have to write inode on fdatasync() when only | 1601 | * don't have to write inode on fdatasync() when only |
1602 | * mtime has changed in it. | 1602 | * mtime has changed in it. |
1603 | * I_DIRTY_PAGES Inode has dirty pages. Inode itself may be clean. | 1603 | * I_DIRTY_PAGES Inode has dirty pages. Inode itself may be clean. |
1604 | * I_NEW Serves as both a mutex and completion notification. | 1604 | * I_NEW Serves as both a mutex and completion notification. |
1605 | * New inodes set I_NEW. If two processes both create | 1605 | * New inodes set I_NEW. If two processes both create |
1606 | * the same inode, one of them will release its inode and | 1606 | * the same inode, one of them will release its inode and |
1607 | * wait for I_NEW to be released before returning. | 1607 | * wait for I_NEW to be released before returning. |
1608 | * Inodes in I_WILL_FREE, I_FREEING or I_CLEAR state can | 1608 | * Inodes in I_WILL_FREE, I_FREEING or I_CLEAR state can |
1609 | * also cause waiting on I_NEW, without I_NEW actually | 1609 | * also cause waiting on I_NEW, without I_NEW actually |
1610 | * being set. find_inode() uses this to prevent returning | 1610 | * being set. find_inode() uses this to prevent returning |
1611 | * nearly-dead inodes. | 1611 | * nearly-dead inodes. |
1612 | * I_WILL_FREE Must be set when calling write_inode_now() if i_count | 1612 | * I_WILL_FREE Must be set when calling write_inode_now() if i_count |
1613 | * is zero. I_FREEING must be set when I_WILL_FREE is | 1613 | * is zero. I_FREEING must be set when I_WILL_FREE is |
1614 | * cleared. | 1614 | * cleared. |
1615 | * I_FREEING Set when inode is about to be freed but still has dirty | 1615 | * I_FREEING Set when inode is about to be freed but still has dirty |
1616 | * pages or buffers attached or the inode itself is still | 1616 | * pages or buffers attached or the inode itself is still |
1617 | * dirty. | 1617 | * dirty. |
1618 | * I_CLEAR Set by clear_inode(). In this state the inode is clean | 1618 | * I_CLEAR Set by clear_inode(). In this state the inode is clean |
1619 | * and can be destroyed. | 1619 | * and can be destroyed. |
1620 | * | 1620 | * |
1621 | * Inodes that are I_WILL_FREE, I_FREEING or I_CLEAR are | 1621 | * Inodes that are I_WILL_FREE, I_FREEING or I_CLEAR are |
1622 | * prohibited for many purposes. iget() must wait for | 1622 | * prohibited for many purposes. iget() must wait for |
1623 | * the inode to be completely released, then create it | 1623 | * the inode to be completely released, then create it |
1624 | * anew. Other functions will just ignore such inodes, | 1624 | * anew. Other functions will just ignore such inodes, |
1625 | * if appropriate. I_NEW is used for waiting. | 1625 | * if appropriate. I_NEW is used for waiting. |
1626 | * | 1626 | * |
1627 | * I_SYNC Synchonized write of dirty inode data. The bits is | 1627 | * I_SYNC Synchonized write of dirty inode data. The bits is |
1628 | * set during data writeback, and cleared with a wakeup | 1628 | * set during data writeback, and cleared with a wakeup |
1629 | * on the bit address once it is done. | 1629 | * on the bit address once it is done. |
1630 | * | 1630 | * |
1631 | * Q: What is the difference between I_WILL_FREE and I_FREEING? | 1631 | * Q: What is the difference between I_WILL_FREE and I_FREEING? |
1632 | */ | 1632 | */ |
1633 | #define I_DIRTY_SYNC 1 | 1633 | #define I_DIRTY_SYNC 1 |
1634 | #define I_DIRTY_DATASYNC 2 | 1634 | #define I_DIRTY_DATASYNC 2 |
1635 | #define I_DIRTY_PAGES 4 | 1635 | #define I_DIRTY_PAGES 4 |
1636 | #define __I_NEW 3 | 1636 | #define __I_NEW 3 |
1637 | #define I_NEW (1 << __I_NEW) | 1637 | #define I_NEW (1 << __I_NEW) |
1638 | #define I_WILL_FREE 16 | 1638 | #define I_WILL_FREE 16 |
1639 | #define I_FREEING 32 | 1639 | #define I_FREEING 32 |
1640 | #define I_CLEAR 64 | 1640 | #define I_CLEAR 64 |
1641 | #define __I_SYNC 7 | 1641 | #define __I_SYNC 7 |
1642 | #define I_SYNC (1 << __I_SYNC) | 1642 | #define I_SYNC (1 << __I_SYNC) |
1643 | 1643 | ||
1644 | #define I_DIRTY (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES) | 1644 | #define I_DIRTY (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES) |
1645 | 1645 | ||
1646 | extern void __mark_inode_dirty(struct inode *, int); | 1646 | extern void __mark_inode_dirty(struct inode *, int); |
1647 | static inline void mark_inode_dirty(struct inode *inode) | 1647 | static inline void mark_inode_dirty(struct inode *inode) |
1648 | { | 1648 | { |
1649 | __mark_inode_dirty(inode, I_DIRTY); | 1649 | __mark_inode_dirty(inode, I_DIRTY); |
1650 | } | 1650 | } |
1651 | 1651 | ||
1652 | static inline void mark_inode_dirty_sync(struct inode *inode) | 1652 | static inline void mark_inode_dirty_sync(struct inode *inode) |
1653 | { | 1653 | { |
1654 | __mark_inode_dirty(inode, I_DIRTY_SYNC); | 1654 | __mark_inode_dirty(inode, I_DIRTY_SYNC); |
1655 | } | 1655 | } |
1656 | 1656 | ||
1657 | /** | 1657 | /** |
1658 | * inc_nlink - directly increment an inode's link count | 1658 | * inc_nlink - directly increment an inode's link count |
1659 | * @inode: inode | 1659 | * @inode: inode |
1660 | * | 1660 | * |
1661 | * This is a low-level filesystem helper to replace any | 1661 | * This is a low-level filesystem helper to replace any |
1662 | * direct filesystem manipulation of i_nlink. Currently, | 1662 | * direct filesystem manipulation of i_nlink. Currently, |
1663 | * it is only here for parity with dec_nlink(). | 1663 | * it is only here for parity with dec_nlink(). |
1664 | */ | 1664 | */ |
1665 | static inline void inc_nlink(struct inode *inode) | 1665 | static inline void inc_nlink(struct inode *inode) |
1666 | { | 1666 | { |
1667 | inode->i_nlink++; | 1667 | inode->i_nlink++; |
1668 | } | 1668 | } |
1669 | 1669 | ||
1670 | static inline void inode_inc_link_count(struct inode *inode) | 1670 | static inline void inode_inc_link_count(struct inode *inode) |
1671 | { | 1671 | { |
1672 | inc_nlink(inode); | 1672 | inc_nlink(inode); |
1673 | mark_inode_dirty(inode); | 1673 | mark_inode_dirty(inode); |
1674 | } | 1674 | } |
1675 | 1675 | ||
1676 | /** | 1676 | /** |
1677 | * drop_nlink - directly drop an inode's link count | 1677 | * drop_nlink - directly drop an inode's link count |
1678 | * @inode: inode | 1678 | * @inode: inode |
1679 | * | 1679 | * |
1680 | * This is a low-level filesystem helper to replace any | 1680 | * This is a low-level filesystem helper to replace any |
1681 | * direct filesystem manipulation of i_nlink. In cases | 1681 | * direct filesystem manipulation of i_nlink. In cases |
1682 | * where we are attempting to track writes to the | 1682 | * where we are attempting to track writes to the |
1683 | * filesystem, a decrement to zero means an imminent | 1683 | * filesystem, a decrement to zero means an imminent |
1684 | * write when the file is truncated and actually unlinked | 1684 | * write when the file is truncated and actually unlinked |
1685 | * on the filesystem. | 1685 | * on the filesystem. |
1686 | */ | 1686 | */ |
1687 | static inline void drop_nlink(struct inode *inode) | 1687 | static inline void drop_nlink(struct inode *inode) |
1688 | { | 1688 | { |
1689 | inode->i_nlink--; | 1689 | inode->i_nlink--; |
1690 | } | 1690 | } |
1691 | 1691 | ||
1692 | /** | 1692 | /** |
1693 | * clear_nlink - directly zero an inode's link count | 1693 | * clear_nlink - directly zero an inode's link count |
1694 | * @inode: inode | 1694 | * @inode: inode |
1695 | * | 1695 | * |
1696 | * This is a low-level filesystem helper to replace any | 1696 | * This is a low-level filesystem helper to replace any |
1697 | * direct filesystem manipulation of i_nlink. See | 1697 | * direct filesystem manipulation of i_nlink. See |
1698 | * drop_nlink() for why we care about i_nlink hitting zero. | 1698 | * drop_nlink() for why we care about i_nlink hitting zero. |
1699 | */ | 1699 | */ |
1700 | static inline void clear_nlink(struct inode *inode) | 1700 | static inline void clear_nlink(struct inode *inode) |
1701 | { | 1701 | { |
1702 | inode->i_nlink = 0; | 1702 | inode->i_nlink = 0; |
1703 | } | 1703 | } |
1704 | 1704 | ||
1705 | static inline void inode_dec_link_count(struct inode *inode) | 1705 | static inline void inode_dec_link_count(struct inode *inode) |
1706 | { | 1706 | { |
1707 | drop_nlink(inode); | 1707 | drop_nlink(inode); |
1708 | mark_inode_dirty(inode); | 1708 | mark_inode_dirty(inode); |
1709 | } | 1709 | } |
1710 | 1710 | ||
1711 | /** | 1711 | /** |
1712 | * inode_inc_iversion - increments i_version | 1712 | * inode_inc_iversion - increments i_version |
1713 | * @inode: inode that need to be updated | 1713 | * @inode: inode that need to be updated |
1714 | * | 1714 | * |
1715 | * Every time the inode is modified, the i_version field will be incremented. | 1715 | * Every time the inode is modified, the i_version field will be incremented. |
1716 | * The filesystem has to be mounted with i_version flag | 1716 | * The filesystem has to be mounted with i_version flag |
1717 | */ | 1717 | */ |
1718 | 1718 | ||
1719 | static inline void inode_inc_iversion(struct inode *inode) | 1719 | static inline void inode_inc_iversion(struct inode *inode) |
1720 | { | 1720 | { |
1721 | spin_lock(&inode->i_lock); | 1721 | spin_lock(&inode->i_lock); |
1722 | inode->i_version++; | 1722 | inode->i_version++; |
1723 | spin_unlock(&inode->i_lock); | 1723 | spin_unlock(&inode->i_lock); |
1724 | } | 1724 | } |
1725 | 1725 | ||
1726 | extern void touch_atime(struct vfsmount *mnt, struct dentry *dentry); | 1726 | extern void touch_atime(struct vfsmount *mnt, struct dentry *dentry); |
1727 | static inline void file_accessed(struct file *file) | 1727 | static inline void file_accessed(struct file *file) |
1728 | { | 1728 | { |
1729 | if (!(file->f_flags & O_NOATIME)) | 1729 | if (!(file->f_flags & O_NOATIME)) |
1730 | touch_atime(file->f_path.mnt, file->f_path.dentry); | 1730 | touch_atime(file->f_path.mnt, file->f_path.dentry); |
1731 | } | 1731 | } |
1732 | 1732 | ||
1733 | int sync_inode(struct inode *inode, struct writeback_control *wbc); | 1733 | int sync_inode(struct inode *inode, struct writeback_control *wbc); |
1734 | 1734 | ||
1735 | struct file_system_type { | 1735 | struct file_system_type { |
1736 | const char *name; | 1736 | const char *name; |
1737 | int fs_flags; | 1737 | int fs_flags; |
1738 | int (*get_sb) (struct file_system_type *, int, | 1738 | int (*get_sb) (struct file_system_type *, int, |
1739 | const char *, void *, struct vfsmount *); | 1739 | const char *, void *, struct vfsmount *); |
1740 | void (*kill_sb) (struct super_block *); | 1740 | void (*kill_sb) (struct super_block *); |
1741 | struct module *owner; | 1741 | struct module *owner; |
1742 | struct file_system_type * next; | 1742 | struct file_system_type * next; |
1743 | struct list_head fs_supers; | 1743 | struct list_head fs_supers; |
1744 | 1744 | ||
1745 | struct lock_class_key s_lock_key; | 1745 | struct lock_class_key s_lock_key; |
1746 | struct lock_class_key s_umount_key; | 1746 | struct lock_class_key s_umount_key; |
1747 | 1747 | ||
1748 | struct lock_class_key i_lock_key; | 1748 | struct lock_class_key i_lock_key; |
1749 | struct lock_class_key i_mutex_key; | 1749 | struct lock_class_key i_mutex_key; |
1750 | struct lock_class_key i_mutex_dir_key; | 1750 | struct lock_class_key i_mutex_dir_key; |
1751 | struct lock_class_key i_alloc_sem_key; | 1751 | struct lock_class_key i_alloc_sem_key; |
1752 | }; | 1752 | }; |
1753 | 1753 | ||
1754 | extern int get_sb_ns(struct file_system_type *fs_type, int flags, void *data, | 1754 | extern int get_sb_ns(struct file_system_type *fs_type, int flags, void *data, |
1755 | int (*fill_super)(struct super_block *, void *, int), | 1755 | int (*fill_super)(struct super_block *, void *, int), |
1756 | struct vfsmount *mnt); | 1756 | struct vfsmount *mnt); |
1757 | extern int get_sb_bdev(struct file_system_type *fs_type, | 1757 | extern int get_sb_bdev(struct file_system_type *fs_type, |
1758 | int flags, const char *dev_name, void *data, | 1758 | int flags, const char *dev_name, void *data, |
1759 | int (*fill_super)(struct super_block *, void *, int), | 1759 | int (*fill_super)(struct super_block *, void *, int), |
1760 | struct vfsmount *mnt); | 1760 | struct vfsmount *mnt); |
1761 | extern int get_sb_single(struct file_system_type *fs_type, | 1761 | extern int get_sb_single(struct file_system_type *fs_type, |
1762 | int flags, void *data, | 1762 | int flags, void *data, |
1763 | int (*fill_super)(struct super_block *, void *, int), | 1763 | int (*fill_super)(struct super_block *, void *, int), |
1764 | struct vfsmount *mnt); | 1764 | struct vfsmount *mnt); |
1765 | extern int get_sb_nodev(struct file_system_type *fs_type, | 1765 | extern int get_sb_nodev(struct file_system_type *fs_type, |
1766 | int flags, void *data, | 1766 | int flags, void *data, |
1767 | int (*fill_super)(struct super_block *, void *, int), | 1767 | int (*fill_super)(struct super_block *, void *, int), |
1768 | struct vfsmount *mnt); | 1768 | struct vfsmount *mnt); |
1769 | void generic_shutdown_super(struct super_block *sb); | 1769 | void generic_shutdown_super(struct super_block *sb); |
1770 | void kill_block_super(struct super_block *sb); | 1770 | void kill_block_super(struct super_block *sb); |
1771 | void kill_anon_super(struct super_block *sb); | 1771 | void kill_anon_super(struct super_block *sb); |
1772 | void kill_litter_super(struct super_block *sb); | 1772 | void kill_litter_super(struct super_block *sb); |
1773 | void deactivate_super(struct super_block *sb); | 1773 | void deactivate_super(struct super_block *sb); |
1774 | void deactivate_locked_super(struct super_block *sb); | 1774 | void deactivate_locked_super(struct super_block *sb); |
1775 | int set_anon_super(struct super_block *s, void *data); | 1775 | int set_anon_super(struct super_block *s, void *data); |
1776 | struct super_block *sget(struct file_system_type *type, | 1776 | struct super_block *sget(struct file_system_type *type, |
1777 | int (*test)(struct super_block *,void *), | 1777 | int (*test)(struct super_block *,void *), |
1778 | int (*set)(struct super_block *,void *), | 1778 | int (*set)(struct super_block *,void *), |
1779 | void *data); | 1779 | void *data); |
1780 | extern int get_sb_pseudo(struct file_system_type *, char *, | 1780 | extern int get_sb_pseudo(struct file_system_type *, char *, |
1781 | const struct super_operations *ops, unsigned long, | 1781 | const struct super_operations *ops, unsigned long, |
1782 | struct vfsmount *mnt); | 1782 | struct vfsmount *mnt); |
1783 | extern void simple_set_mnt(struct vfsmount *mnt, struct super_block *sb); | 1783 | extern void simple_set_mnt(struct vfsmount *mnt, struct super_block *sb); |
1784 | int __put_super_and_need_restart(struct super_block *sb); | 1784 | int __put_super_and_need_restart(struct super_block *sb); |
1785 | void put_super(struct super_block *sb); | 1785 | void put_super(struct super_block *sb); |
1786 | 1786 | ||
1787 | /* Alas, no aliases. Too much hassle with bringing module.h everywhere */ | 1787 | /* Alas, no aliases. Too much hassle with bringing module.h everywhere */ |
1788 | #define fops_get(fops) \ | 1788 | #define fops_get(fops) \ |
1789 | (((fops) && try_module_get((fops)->owner) ? (fops) : NULL)) | 1789 | (((fops) && try_module_get((fops)->owner) ? (fops) : NULL)) |
1790 | #define fops_put(fops) \ | 1790 | #define fops_put(fops) \ |
1791 | do { if (fops) module_put((fops)->owner); } while(0) | 1791 | do { if (fops) module_put((fops)->owner); } while(0) |
1792 | 1792 | ||
1793 | extern int register_filesystem(struct file_system_type *); | 1793 | extern int register_filesystem(struct file_system_type *); |
1794 | extern int unregister_filesystem(struct file_system_type *); | 1794 | extern int unregister_filesystem(struct file_system_type *); |
1795 | extern struct vfsmount *kern_mount_data(struct file_system_type *, void *data); | 1795 | extern struct vfsmount *kern_mount_data(struct file_system_type *, void *data); |
1796 | #define kern_mount(type) kern_mount_data(type, NULL) | 1796 | #define kern_mount(type) kern_mount_data(type, NULL) |
1797 | extern int may_umount_tree(struct vfsmount *); | 1797 | extern int may_umount_tree(struct vfsmount *); |
1798 | extern int may_umount(struct vfsmount *); | 1798 | extern int may_umount(struct vfsmount *); |
1799 | extern long do_mount(char *, char *, char *, unsigned long, void *); | 1799 | extern long do_mount(char *, char *, char *, unsigned long, void *); |
1800 | extern struct vfsmount *collect_mounts(struct path *); | 1800 | extern struct vfsmount *collect_mounts(struct path *); |
1801 | extern void drop_collected_mounts(struct vfsmount *); | 1801 | extern void drop_collected_mounts(struct vfsmount *); |
1802 | extern int iterate_mounts(int (*)(struct vfsmount *, void *), void *, | 1802 | extern int iterate_mounts(int (*)(struct vfsmount *, void *), void *, |
1803 | struct vfsmount *); | 1803 | struct vfsmount *); |
1804 | extern int vfs_statfs(struct dentry *, struct kstatfs *); | 1804 | extern int vfs_statfs(struct dentry *, struct kstatfs *); |
1805 | 1805 | ||
1806 | extern int current_umask(void); | 1806 | extern int current_umask(void); |
1807 | 1807 | ||
1808 | /* /sys/fs */ | 1808 | /* /sys/fs */ |
1809 | extern struct kobject *fs_kobj; | 1809 | extern struct kobject *fs_kobj; |
1810 | 1810 | ||
1811 | extern int rw_verify_area(int, struct file *, loff_t *, size_t); | 1811 | extern int rw_verify_area(int, struct file *, loff_t *, size_t); |
1812 | 1812 | ||
1813 | #define FLOCK_VERIFY_READ 1 | 1813 | #define FLOCK_VERIFY_READ 1 |
1814 | #define FLOCK_VERIFY_WRITE 2 | 1814 | #define FLOCK_VERIFY_WRITE 2 |
1815 | 1815 | ||
1816 | #ifdef CONFIG_FILE_LOCKING | 1816 | #ifdef CONFIG_FILE_LOCKING |
1817 | extern int locks_mandatory_locked(struct inode *); | 1817 | extern int locks_mandatory_locked(struct inode *); |
1818 | extern int locks_mandatory_area(int, struct inode *, struct file *, loff_t, size_t); | 1818 | extern int locks_mandatory_area(int, struct inode *, struct file *, loff_t, size_t); |
1819 | 1819 | ||
1820 | /* | 1820 | /* |
1821 | * Candidates for mandatory locking have the setgid bit set | 1821 | * Candidates for mandatory locking have the setgid bit set |
1822 | * but no group execute bit - an otherwise meaningless combination. | 1822 | * but no group execute bit - an otherwise meaningless combination. |
1823 | */ | 1823 | */ |
1824 | 1824 | ||
1825 | static inline int __mandatory_lock(struct inode *ino) | 1825 | static inline int __mandatory_lock(struct inode *ino) |
1826 | { | 1826 | { |
1827 | return (ino->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID; | 1827 | return (ino->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID; |
1828 | } | 1828 | } |
1829 | 1829 | ||
1830 | /* | 1830 | /* |
1831 | * ... and these candidates should be on MS_MANDLOCK mounted fs, | 1831 | * ... and these candidates should be on MS_MANDLOCK mounted fs, |
1832 | * otherwise these will be advisory locks | 1832 | * otherwise these will be advisory locks |
1833 | */ | 1833 | */ |
1834 | 1834 | ||
1835 | static inline int mandatory_lock(struct inode *ino) | 1835 | static inline int mandatory_lock(struct inode *ino) |
1836 | { | 1836 | { |
1837 | return IS_MANDLOCK(ino) && __mandatory_lock(ino); | 1837 | return IS_MANDLOCK(ino) && __mandatory_lock(ino); |
1838 | } | 1838 | } |
1839 | 1839 | ||
1840 | static inline int locks_verify_locked(struct inode *inode) | 1840 | static inline int locks_verify_locked(struct inode *inode) |
1841 | { | 1841 | { |
1842 | if (mandatory_lock(inode)) | 1842 | if (mandatory_lock(inode)) |
1843 | return locks_mandatory_locked(inode); | 1843 | return locks_mandatory_locked(inode); |
1844 | return 0; | 1844 | return 0; |
1845 | } | 1845 | } |
1846 | 1846 | ||
1847 | static inline int locks_verify_truncate(struct inode *inode, | 1847 | static inline int locks_verify_truncate(struct inode *inode, |
1848 | struct file *filp, | 1848 | struct file *filp, |
1849 | loff_t size) | 1849 | loff_t size) |
1850 | { | 1850 | { |
1851 | if (inode->i_flock && mandatory_lock(inode)) | 1851 | if (inode->i_flock && mandatory_lock(inode)) |
1852 | return locks_mandatory_area( | 1852 | return locks_mandatory_area( |
1853 | FLOCK_VERIFY_WRITE, inode, filp, | 1853 | FLOCK_VERIFY_WRITE, inode, filp, |
1854 | size < inode->i_size ? size : inode->i_size, | 1854 | size < inode->i_size ? size : inode->i_size, |
1855 | (size < inode->i_size ? inode->i_size - size | 1855 | (size < inode->i_size ? inode->i_size - size |
1856 | : size - inode->i_size) | 1856 | : size - inode->i_size) |
1857 | ); | 1857 | ); |
1858 | return 0; | 1858 | return 0; |
1859 | } | 1859 | } |
1860 | 1860 | ||
1861 | static inline int break_lease(struct inode *inode, unsigned int mode) | 1861 | static inline int break_lease(struct inode *inode, unsigned int mode) |
1862 | { | 1862 | { |
1863 | if (inode->i_flock) | 1863 | if (inode->i_flock) |
1864 | return __break_lease(inode, mode); | 1864 | return __break_lease(inode, mode); |
1865 | return 0; | 1865 | return 0; |
1866 | } | 1866 | } |
1867 | #else /* !CONFIG_FILE_LOCKING */ | 1867 | #else /* !CONFIG_FILE_LOCKING */ |
1868 | static inline int locks_mandatory_locked(struct inode *inode) | 1868 | static inline int locks_mandatory_locked(struct inode *inode) |
1869 | { | 1869 | { |
1870 | return 0; | 1870 | return 0; |
1871 | } | 1871 | } |
1872 | 1872 | ||
1873 | static inline int locks_mandatory_area(int rw, struct inode *inode, | 1873 | static inline int locks_mandatory_area(int rw, struct inode *inode, |
1874 | struct file *filp, loff_t offset, | 1874 | struct file *filp, loff_t offset, |
1875 | size_t count) | 1875 | size_t count) |
1876 | { | 1876 | { |
1877 | return 0; | 1877 | return 0; |
1878 | } | 1878 | } |
1879 | 1879 | ||
1880 | static inline int __mandatory_lock(struct inode *inode) | 1880 | static inline int __mandatory_lock(struct inode *inode) |
1881 | { | 1881 | { |
1882 | return 0; | 1882 | return 0; |
1883 | } | 1883 | } |
1884 | 1884 | ||
1885 | static inline int mandatory_lock(struct inode *inode) | 1885 | static inline int mandatory_lock(struct inode *inode) |
1886 | { | 1886 | { |
1887 | return 0; | 1887 | return 0; |
1888 | } | 1888 | } |
1889 | 1889 | ||
1890 | static inline int locks_verify_locked(struct inode *inode) | 1890 | static inline int locks_verify_locked(struct inode *inode) |
1891 | { | 1891 | { |
1892 | return 0; | 1892 | return 0; |
1893 | } | 1893 | } |
1894 | 1894 | ||
1895 | static inline int locks_verify_truncate(struct inode *inode, struct file *filp, | 1895 | static inline int locks_verify_truncate(struct inode *inode, struct file *filp, |
1896 | size_t size) | 1896 | size_t size) |
1897 | { | 1897 | { |
1898 | return 0; | 1898 | return 0; |
1899 | } | 1899 | } |
1900 | 1900 | ||
1901 | static inline int break_lease(struct inode *inode, unsigned int mode) | 1901 | static inline int break_lease(struct inode *inode, unsigned int mode) |
1902 | { | 1902 | { |
1903 | return 0; | 1903 | return 0; |
1904 | } | 1904 | } |
1905 | 1905 | ||
1906 | #endif /* CONFIG_FILE_LOCKING */ | 1906 | #endif /* CONFIG_FILE_LOCKING */ |
1907 | 1907 | ||
1908 | /* fs/open.c */ | 1908 | /* fs/open.c */ |
1909 | 1909 | ||
1910 | extern int do_truncate(struct dentry *, loff_t start, unsigned int time_attrs, | 1910 | extern int do_truncate(struct dentry *, loff_t start, unsigned int time_attrs, |
1911 | struct file *filp); | 1911 | struct file *filp); |
1912 | extern int do_fallocate(struct file *file, int mode, loff_t offset, | 1912 | extern int do_fallocate(struct file *file, int mode, loff_t offset, |
1913 | loff_t len); | 1913 | loff_t len); |
1914 | extern long do_sys_open(int dfd, const char __user *filename, int flags, | 1914 | extern long do_sys_open(int dfd, const char __user *filename, int flags, |
1915 | int mode); | 1915 | int mode); |
1916 | extern struct file *filp_open(const char *, int, int); | 1916 | extern struct file *filp_open(const char *, int, int); |
1917 | extern struct file * dentry_open(struct dentry *, struct vfsmount *, int, | 1917 | extern struct file * dentry_open(struct dentry *, struct vfsmount *, int, |
1918 | const struct cred *); | 1918 | const struct cred *); |
1919 | extern int filp_close(struct file *, fl_owner_t id); | 1919 | extern int filp_close(struct file *, fl_owner_t id); |
1920 | extern char * getname(const char __user *); | 1920 | extern char * getname(const char __user *); |
1921 | 1921 | ||
1922 | /* fs/ioctl.c */ | 1922 | /* fs/ioctl.c */ |
1923 | 1923 | ||
1924 | extern int ioctl_preallocate(struct file *filp, void __user *argp); | 1924 | extern int ioctl_preallocate(struct file *filp, void __user *argp); |
1925 | 1925 | ||
1926 | /* fs/dcache.c */ | 1926 | /* fs/dcache.c */ |
1927 | extern void __init vfs_caches_init_early(void); | 1927 | extern void __init vfs_caches_init_early(void); |
1928 | extern void __init vfs_caches_init(unsigned long); | 1928 | extern void __init vfs_caches_init(unsigned long); |
1929 | 1929 | ||
1930 | extern struct kmem_cache *names_cachep; | 1930 | extern struct kmem_cache *names_cachep; |
1931 | 1931 | ||
1932 | #define __getname_gfp(gfp) kmem_cache_alloc(names_cachep, (gfp)) | 1932 | #define __getname_gfp(gfp) kmem_cache_alloc(names_cachep, (gfp)) |
1933 | #define __getname() __getname_gfp(GFP_KERNEL) | 1933 | #define __getname() __getname_gfp(GFP_KERNEL) |
1934 | #define __putname(name) kmem_cache_free(names_cachep, (void *)(name)) | 1934 | #define __putname(name) kmem_cache_free(names_cachep, (void *)(name)) |
1935 | #ifndef CONFIG_AUDITSYSCALL | 1935 | #ifndef CONFIG_AUDITSYSCALL |
1936 | #define putname(name) __putname(name) | 1936 | #define putname(name) __putname(name) |
1937 | #else | 1937 | #else |
1938 | extern void putname(const char *name); | 1938 | extern void putname(const char *name); |
1939 | #endif | 1939 | #endif |
1940 | 1940 | ||
1941 | #ifdef CONFIG_BLOCK | 1941 | #ifdef CONFIG_BLOCK |
1942 | extern int register_blkdev(unsigned int, const char *); | 1942 | extern int register_blkdev(unsigned int, const char *); |
1943 | extern void unregister_blkdev(unsigned int, const char *); | 1943 | extern void unregister_blkdev(unsigned int, const char *); |
1944 | extern struct block_device *bdget(dev_t); | 1944 | extern struct block_device *bdget(dev_t); |
1945 | extern struct block_device *bdgrab(struct block_device *bdev); | 1945 | extern struct block_device *bdgrab(struct block_device *bdev); |
1946 | extern void bd_set_size(struct block_device *, loff_t size); | 1946 | extern void bd_set_size(struct block_device *, loff_t size); |
1947 | extern void bd_forget(struct inode *inode); | 1947 | extern void bd_forget(struct inode *inode); |
1948 | extern void bdput(struct block_device *); | 1948 | extern void bdput(struct block_device *); |
1949 | extern struct block_device *open_by_devnum(dev_t, fmode_t); | 1949 | extern struct block_device *open_by_devnum(dev_t, fmode_t); |
1950 | extern void invalidate_bdev(struct block_device *); | 1950 | extern void invalidate_bdev(struct block_device *); |
1951 | extern int sync_blockdev(struct block_device *bdev); | 1951 | extern int sync_blockdev(struct block_device *bdev); |
1952 | extern struct super_block *freeze_bdev(struct block_device *); | 1952 | extern struct super_block *freeze_bdev(struct block_device *); |
1953 | extern void emergency_thaw_all(void); | 1953 | extern void emergency_thaw_all(void); |
1954 | extern int thaw_bdev(struct block_device *bdev, struct super_block *sb); | 1954 | extern int thaw_bdev(struct block_device *bdev, struct super_block *sb); |
1955 | extern int fsync_bdev(struct block_device *); | 1955 | extern int fsync_bdev(struct block_device *); |
1956 | #else | 1956 | #else |
1957 | static inline void bd_forget(struct inode *inode) {} | 1957 | static inline void bd_forget(struct inode *inode) {} |
1958 | static inline int sync_blockdev(struct block_device *bdev) { return 0; } | 1958 | static inline int sync_blockdev(struct block_device *bdev) { return 0; } |
1959 | static inline void invalidate_bdev(struct block_device *bdev) {} | 1959 | static inline void invalidate_bdev(struct block_device *bdev) {} |
1960 | 1960 | ||
1961 | static inline struct super_block *freeze_bdev(struct block_device *sb) | 1961 | static inline struct super_block *freeze_bdev(struct block_device *sb) |
1962 | { | 1962 | { |
1963 | return NULL; | 1963 | return NULL; |
1964 | } | 1964 | } |
1965 | 1965 | ||
1966 | static inline int thaw_bdev(struct block_device *bdev, struct super_block *sb) | 1966 | static inline int thaw_bdev(struct block_device *bdev, struct super_block *sb) |
1967 | { | 1967 | { |
1968 | return 0; | 1968 | return 0; |
1969 | } | 1969 | } |
1970 | #endif | 1970 | #endif |
1971 | extern int sync_filesystem(struct super_block *); | 1971 | extern int sync_filesystem(struct super_block *); |
1972 | extern const struct file_operations def_blk_fops; | 1972 | extern const struct file_operations def_blk_fops; |
1973 | extern const struct file_operations def_chr_fops; | 1973 | extern const struct file_operations def_chr_fops; |
1974 | extern const struct file_operations bad_sock_fops; | 1974 | extern const struct file_operations bad_sock_fops; |
1975 | extern const struct file_operations def_fifo_fops; | 1975 | extern const struct file_operations def_fifo_fops; |
1976 | #ifdef CONFIG_BLOCK | 1976 | #ifdef CONFIG_BLOCK |
1977 | extern int ioctl_by_bdev(struct block_device *, unsigned, unsigned long); | 1977 | extern int ioctl_by_bdev(struct block_device *, unsigned, unsigned long); |
1978 | extern int blkdev_ioctl(struct block_device *, fmode_t, unsigned, unsigned long); | 1978 | extern int blkdev_ioctl(struct block_device *, fmode_t, unsigned, unsigned long); |
1979 | extern long compat_blkdev_ioctl(struct file *, unsigned, unsigned long); | 1979 | extern long compat_blkdev_ioctl(struct file *, unsigned, unsigned long); |
1980 | extern int blkdev_get(struct block_device *, fmode_t); | 1980 | extern int blkdev_get(struct block_device *, fmode_t); |
1981 | extern int blkdev_put(struct block_device *, fmode_t); | 1981 | extern int blkdev_put(struct block_device *, fmode_t); |
1982 | extern int bd_claim(struct block_device *, void *); | 1982 | extern int bd_claim(struct block_device *, void *); |
1983 | extern void bd_release(struct block_device *); | 1983 | extern void bd_release(struct block_device *); |
1984 | #ifdef CONFIG_SYSFS | 1984 | #ifdef CONFIG_SYSFS |
1985 | extern int bd_claim_by_disk(struct block_device *, void *, struct gendisk *); | 1985 | extern int bd_claim_by_disk(struct block_device *, void *, struct gendisk *); |
1986 | extern void bd_release_from_disk(struct block_device *, struct gendisk *); | 1986 | extern void bd_release_from_disk(struct block_device *, struct gendisk *); |
1987 | #else | 1987 | #else |
1988 | #define bd_claim_by_disk(bdev, holder, disk) bd_claim(bdev, holder) | 1988 | #define bd_claim_by_disk(bdev, holder, disk) bd_claim(bdev, holder) |
1989 | #define bd_release_from_disk(bdev, disk) bd_release(bdev) | 1989 | #define bd_release_from_disk(bdev, disk) bd_release(bdev) |
1990 | #endif | 1990 | #endif |
1991 | #endif | 1991 | #endif |
1992 | 1992 | ||
1993 | /* fs/char_dev.c */ | 1993 | /* fs/char_dev.c */ |
1994 | #define CHRDEV_MAJOR_HASH_SIZE 255 | 1994 | #define CHRDEV_MAJOR_HASH_SIZE 255 |
1995 | extern int alloc_chrdev_region(dev_t *, unsigned, unsigned, const char *); | 1995 | extern int alloc_chrdev_region(dev_t *, unsigned, unsigned, const char *); |
1996 | extern int register_chrdev_region(dev_t, unsigned, const char *); | 1996 | extern int register_chrdev_region(dev_t, unsigned, const char *); |
1997 | extern int __register_chrdev(unsigned int major, unsigned int baseminor, | 1997 | extern int __register_chrdev(unsigned int major, unsigned int baseminor, |
1998 | unsigned int count, const char *name, | 1998 | unsigned int count, const char *name, |
1999 | const struct file_operations *fops); | 1999 | const struct file_operations *fops); |
2000 | extern void __unregister_chrdev(unsigned int major, unsigned int baseminor, | 2000 | extern void __unregister_chrdev(unsigned int major, unsigned int baseminor, |
2001 | unsigned int count, const char *name); | 2001 | unsigned int count, const char *name); |
2002 | extern void unregister_chrdev_region(dev_t, unsigned); | 2002 | extern void unregister_chrdev_region(dev_t, unsigned); |
2003 | extern void chrdev_show(struct seq_file *,off_t); | 2003 | extern void chrdev_show(struct seq_file *,off_t); |
2004 | 2004 | ||
2005 | static inline int register_chrdev(unsigned int major, const char *name, | 2005 | static inline int register_chrdev(unsigned int major, const char *name, |
2006 | const struct file_operations *fops) | 2006 | const struct file_operations *fops) |
2007 | { | 2007 | { |
2008 | return __register_chrdev(major, 0, 256, name, fops); | 2008 | return __register_chrdev(major, 0, 256, name, fops); |
2009 | } | 2009 | } |
2010 | 2010 | ||
2011 | static inline void unregister_chrdev(unsigned int major, const char *name) | 2011 | static inline void unregister_chrdev(unsigned int major, const char *name) |
2012 | { | 2012 | { |
2013 | __unregister_chrdev(major, 0, 256, name); | 2013 | __unregister_chrdev(major, 0, 256, name); |
2014 | } | 2014 | } |
2015 | 2015 | ||
2016 | /* fs/block_dev.c */ | 2016 | /* fs/block_dev.c */ |
2017 | #define BDEVNAME_SIZE 32 /* Largest string for a blockdev identifier */ | 2017 | #define BDEVNAME_SIZE 32 /* Largest string for a blockdev identifier */ |
2018 | #define BDEVT_SIZE 10 /* Largest string for MAJ:MIN for blkdev */ | 2018 | #define BDEVT_SIZE 10 /* Largest string for MAJ:MIN for blkdev */ |
2019 | 2019 | ||
2020 | #ifdef CONFIG_BLOCK | 2020 | #ifdef CONFIG_BLOCK |
2021 | #define BLKDEV_MAJOR_HASH_SIZE 255 | 2021 | #define BLKDEV_MAJOR_HASH_SIZE 255 |
2022 | extern const char *__bdevname(dev_t, char *buffer); | 2022 | extern const char *__bdevname(dev_t, char *buffer); |
2023 | extern const char *bdevname(struct block_device *bdev, char *buffer); | 2023 | extern const char *bdevname(struct block_device *bdev, char *buffer); |
2024 | extern struct block_device *lookup_bdev(const char *); | 2024 | extern struct block_device *lookup_bdev(const char *); |
2025 | extern struct block_device *open_bdev_exclusive(const char *, fmode_t, void *); | 2025 | extern struct block_device *open_bdev_exclusive(const char *, fmode_t, void *); |
2026 | extern void close_bdev_exclusive(struct block_device *, fmode_t); | 2026 | extern void close_bdev_exclusive(struct block_device *, fmode_t); |
2027 | extern void blkdev_show(struct seq_file *,off_t); | 2027 | extern void blkdev_show(struct seq_file *,off_t); |
2028 | 2028 | ||
2029 | #else | 2029 | #else |
2030 | #define BLKDEV_MAJOR_HASH_SIZE 0 | 2030 | #define BLKDEV_MAJOR_HASH_SIZE 0 |
2031 | #endif | 2031 | #endif |
2032 | 2032 | ||
2033 | extern void init_special_inode(struct inode *, umode_t, dev_t); | 2033 | extern void init_special_inode(struct inode *, umode_t, dev_t); |
2034 | 2034 | ||
2035 | /* Invalid inode operations -- fs/bad_inode.c */ | 2035 | /* Invalid inode operations -- fs/bad_inode.c */ |
2036 | extern void make_bad_inode(struct inode *); | 2036 | extern void make_bad_inode(struct inode *); |
2037 | extern int is_bad_inode(struct inode *); | 2037 | extern int is_bad_inode(struct inode *); |
2038 | 2038 | ||
2039 | extern const struct file_operations read_pipefifo_fops; | 2039 | extern const struct file_operations read_pipefifo_fops; |
2040 | extern const struct file_operations write_pipefifo_fops; | 2040 | extern const struct file_operations write_pipefifo_fops; |
2041 | extern const struct file_operations rdwr_pipefifo_fops; | 2041 | extern const struct file_operations rdwr_pipefifo_fops; |
2042 | 2042 | ||
2043 | extern int fs_may_remount_ro(struct super_block *); | 2043 | extern int fs_may_remount_ro(struct super_block *); |
2044 | 2044 | ||
2045 | #ifdef CONFIG_BLOCK | 2045 | #ifdef CONFIG_BLOCK |
2046 | /* | 2046 | /* |
2047 | * return READ, READA, or WRITE | 2047 | * return READ, READA, or WRITE |
2048 | */ | 2048 | */ |
2049 | #define bio_rw(bio) ((bio)->bi_rw & (RW_MASK | RWA_MASK)) | 2049 | #define bio_rw(bio) ((bio)->bi_rw & (RW_MASK | RWA_MASK)) |
2050 | 2050 | ||
2051 | /* | 2051 | /* |
2052 | * return data direction, READ or WRITE | 2052 | * return data direction, READ or WRITE |
2053 | */ | 2053 | */ |
2054 | #define bio_data_dir(bio) ((bio)->bi_rw & 1) | 2054 | #define bio_data_dir(bio) ((bio)->bi_rw & 1) |
2055 | 2055 | ||
2056 | extern void check_disk_size_change(struct gendisk *disk, | 2056 | extern void check_disk_size_change(struct gendisk *disk, |
2057 | struct block_device *bdev); | 2057 | struct block_device *bdev); |
2058 | extern int revalidate_disk(struct gendisk *); | 2058 | extern int revalidate_disk(struct gendisk *); |
2059 | extern int check_disk_change(struct block_device *); | 2059 | extern int check_disk_change(struct block_device *); |
2060 | extern int __invalidate_device(struct block_device *); | 2060 | extern int __invalidate_device(struct block_device *); |
2061 | extern int invalidate_partition(struct gendisk *, int); | 2061 | extern int invalidate_partition(struct gendisk *, int); |
2062 | #endif | 2062 | #endif |
2063 | extern int invalidate_inodes(struct super_block *); | 2063 | extern int invalidate_inodes(struct super_block *); |
2064 | unsigned long invalidate_mapping_pages(struct address_space *mapping, | 2064 | unsigned long invalidate_mapping_pages(struct address_space *mapping, |
2065 | pgoff_t start, pgoff_t end); | 2065 | pgoff_t start, pgoff_t end); |
2066 | 2066 | ||
2067 | static inline void invalidate_remote_inode(struct inode *inode) | 2067 | static inline void invalidate_remote_inode(struct inode *inode) |
2068 | { | 2068 | { |
2069 | if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || | 2069 | if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || |
2070 | S_ISLNK(inode->i_mode)) | 2070 | S_ISLNK(inode->i_mode)) |
2071 | invalidate_mapping_pages(inode->i_mapping, 0, -1); | 2071 | invalidate_mapping_pages(inode->i_mapping, 0, -1); |
2072 | } | 2072 | } |
2073 | extern int invalidate_inode_pages2(struct address_space *mapping); | 2073 | extern int invalidate_inode_pages2(struct address_space *mapping); |
2074 | extern int invalidate_inode_pages2_range(struct address_space *mapping, | 2074 | extern int invalidate_inode_pages2_range(struct address_space *mapping, |
2075 | pgoff_t start, pgoff_t end); | 2075 | pgoff_t start, pgoff_t end); |
2076 | extern int write_inode_now(struct inode *, int); | 2076 | extern int write_inode_now(struct inode *, int); |
2077 | extern int filemap_fdatawrite(struct address_space *); | 2077 | extern int filemap_fdatawrite(struct address_space *); |
2078 | extern int filemap_flush(struct address_space *); | 2078 | extern int filemap_flush(struct address_space *); |
2079 | extern int filemap_fdatawait(struct address_space *); | 2079 | extern int filemap_fdatawait(struct address_space *); |
2080 | extern int filemap_fdatawait_range(struct address_space *, loff_t lstart, | 2080 | extern int filemap_fdatawait_range(struct address_space *, loff_t lstart, |
2081 | loff_t lend); | 2081 | loff_t lend); |
2082 | extern int filemap_write_and_wait(struct address_space *mapping); | 2082 | extern int filemap_write_and_wait(struct address_space *mapping); |
2083 | extern int filemap_write_and_wait_range(struct address_space *mapping, | 2083 | extern int filemap_write_and_wait_range(struct address_space *mapping, |
2084 | loff_t lstart, loff_t lend); | 2084 | loff_t lstart, loff_t lend); |
2085 | extern int __filemap_fdatawrite_range(struct address_space *mapping, | 2085 | extern int __filemap_fdatawrite_range(struct address_space *mapping, |
2086 | loff_t start, loff_t end, int sync_mode); | 2086 | loff_t start, loff_t end, int sync_mode); |
2087 | extern int filemap_fdatawrite_range(struct address_space *mapping, | 2087 | extern int filemap_fdatawrite_range(struct address_space *mapping, |
2088 | loff_t start, loff_t end); | 2088 | loff_t start, loff_t end); |
2089 | 2089 | ||
2090 | extern int vfs_fsync_range(struct file *file, struct dentry *dentry, | 2090 | extern int vfs_fsync_range(struct file *file, struct dentry *dentry, |
2091 | loff_t start, loff_t end, int datasync); | 2091 | loff_t start, loff_t end, int datasync); |
2092 | extern int vfs_fsync(struct file *file, struct dentry *dentry, int datasync); | 2092 | extern int vfs_fsync(struct file *file, struct dentry *dentry, int datasync); |
2093 | extern int generic_write_sync(struct file *file, loff_t pos, loff_t count); | 2093 | extern int generic_write_sync(struct file *file, loff_t pos, loff_t count); |
2094 | extern void sync_supers(void); | 2094 | extern void sync_supers(void); |
2095 | extern void emergency_sync(void); | 2095 | extern void emergency_sync(void); |
2096 | extern void emergency_remount(void); | 2096 | extern void emergency_remount(void); |
2097 | #ifdef CONFIG_BLOCK | 2097 | #ifdef CONFIG_BLOCK |
2098 | extern sector_t bmap(struct inode *, sector_t); | 2098 | extern sector_t bmap(struct inode *, sector_t); |
2099 | #endif | 2099 | #endif |
2100 | extern int notify_change(struct dentry *, struct iattr *); | 2100 | extern int notify_change(struct dentry *, struct iattr *); |
2101 | extern int inode_permission(struct inode *, int); | 2101 | extern int inode_permission(struct inode *, int); |
2102 | extern int generic_permission(struct inode *, int, | 2102 | extern int generic_permission(struct inode *, int, |
2103 | int (*check_acl)(struct inode *, int)); | 2103 | int (*check_acl)(struct inode *, int)); |
2104 | 2104 | ||
2105 | static inline bool execute_ok(struct inode *inode) | 2105 | static inline bool execute_ok(struct inode *inode) |
2106 | { | 2106 | { |
2107 | return (inode->i_mode & S_IXUGO) || S_ISDIR(inode->i_mode); | 2107 | return (inode->i_mode & S_IXUGO) || S_ISDIR(inode->i_mode); |
2108 | } | 2108 | } |
2109 | 2109 | ||
2110 | extern int get_write_access(struct inode *); | 2110 | extern int get_write_access(struct inode *); |
2111 | extern int deny_write_access(struct file *); | 2111 | extern int deny_write_access(struct file *); |
2112 | static inline void put_write_access(struct inode * inode) | 2112 | static inline void put_write_access(struct inode * inode) |
2113 | { | 2113 | { |
2114 | atomic_dec(&inode->i_writecount); | 2114 | atomic_dec(&inode->i_writecount); |
2115 | } | 2115 | } |
2116 | static inline void allow_write_access(struct file *file) | 2116 | static inline void allow_write_access(struct file *file) |
2117 | { | 2117 | { |
2118 | if (file) | 2118 | if (file) |
2119 | atomic_inc(&file->f_path.dentry->d_inode->i_writecount); | 2119 | atomic_inc(&file->f_path.dentry->d_inode->i_writecount); |
2120 | } | 2120 | } |
2121 | extern int do_pipe_flags(int *, int); | 2121 | extern int do_pipe_flags(int *, int); |
2122 | extern struct file *create_read_pipe(struct file *f, int flags); | 2122 | extern struct file *create_read_pipe(struct file *f, int flags); |
2123 | extern struct file *create_write_pipe(int flags); | 2123 | extern struct file *create_write_pipe(int flags); |
2124 | extern void free_write_pipe(struct file *); | 2124 | extern void free_write_pipe(struct file *); |
2125 | 2125 | ||
2126 | extern struct file *do_filp_open(int dfd, const char *pathname, | 2126 | extern struct file *do_filp_open(int dfd, const char *pathname, |
2127 | int open_flag, int mode, int acc_mode); | 2127 | int open_flag, int mode, int acc_mode); |
2128 | extern int may_open(struct path *, int, int); | 2128 | extern int may_open(struct path *, int, int); |
2129 | 2129 | ||
2130 | extern int kernel_read(struct file *, loff_t, char *, unsigned long); | 2130 | extern int kernel_read(struct file *, loff_t, char *, unsigned long); |
2131 | extern struct file * open_exec(const char *); | 2131 | extern struct file * open_exec(const char *); |
2132 | 2132 | ||
2133 | /* fs/dcache.c -- generic fs support functions */ | 2133 | /* fs/dcache.c -- generic fs support functions */ |
2134 | extern int is_subdir(struct dentry *, struct dentry *); | 2134 | extern int is_subdir(struct dentry *, struct dentry *); |
2135 | extern int path_is_under(struct path *, struct path *); | 2135 | extern int path_is_under(struct path *, struct path *); |
2136 | extern ino_t find_inode_number(struct dentry *, struct qstr *); | 2136 | extern ino_t find_inode_number(struct dentry *, struct qstr *); |
2137 | 2137 | ||
2138 | #include <linux/err.h> | 2138 | #include <linux/err.h> |
2139 | 2139 | ||
2140 | /* needed for stackable file system support */ | 2140 | /* needed for stackable file system support */ |
2141 | extern loff_t default_llseek(struct file *file, loff_t offset, int origin); | 2141 | extern loff_t default_llseek(struct file *file, loff_t offset, int origin); |
2142 | 2142 | ||
2143 | extern loff_t vfs_llseek(struct file *file, loff_t offset, int origin); | 2143 | extern loff_t vfs_llseek(struct file *file, loff_t offset, int origin); |
2144 | 2144 | ||
2145 | extern int inode_init_always(struct super_block *, struct inode *); | 2145 | extern int inode_init_always(struct super_block *, struct inode *); |
2146 | extern void inode_init_once(struct inode *); | 2146 | extern void inode_init_once(struct inode *); |
2147 | extern void inode_add_to_lists(struct super_block *, struct inode *); | 2147 | extern void inode_add_to_lists(struct super_block *, struct inode *); |
2148 | extern void iput(struct inode *); | 2148 | extern void iput(struct inode *); |
2149 | extern struct inode * igrab(struct inode *); | 2149 | extern struct inode * igrab(struct inode *); |
2150 | extern ino_t iunique(struct super_block *, ino_t); | 2150 | extern ino_t iunique(struct super_block *, ino_t); |
2151 | extern int inode_needs_sync(struct inode *inode); | 2151 | extern int inode_needs_sync(struct inode *inode); |
2152 | extern void generic_delete_inode(struct inode *inode); | 2152 | extern void generic_delete_inode(struct inode *inode); |
2153 | extern void generic_drop_inode(struct inode *inode); | 2153 | extern void generic_drop_inode(struct inode *inode); |
2154 | extern int generic_detach_inode(struct inode *inode); | 2154 | extern int generic_detach_inode(struct inode *inode); |
2155 | 2155 | ||
2156 | extern struct inode *ilookup5_nowait(struct super_block *sb, | 2156 | extern struct inode *ilookup5_nowait(struct super_block *sb, |
2157 | unsigned long hashval, int (*test)(struct inode *, void *), | 2157 | unsigned long hashval, int (*test)(struct inode *, void *), |
2158 | void *data); | 2158 | void *data); |
2159 | extern struct inode *ilookup5(struct super_block *sb, unsigned long hashval, | 2159 | extern struct inode *ilookup5(struct super_block *sb, unsigned long hashval, |
2160 | int (*test)(struct inode *, void *), void *data); | 2160 | int (*test)(struct inode *, void *), void *data); |
2161 | extern struct inode *ilookup(struct super_block *sb, unsigned long ino); | 2161 | extern struct inode *ilookup(struct super_block *sb, unsigned long ino); |
2162 | 2162 | ||
2163 | extern struct inode * iget5_locked(struct super_block *, unsigned long, int (*test)(struct inode *, void *), int (*set)(struct inode *, void *), void *); | 2163 | extern struct inode * iget5_locked(struct super_block *, unsigned long, int (*test)(struct inode *, void *), int (*set)(struct inode *, void *), void *); |
2164 | extern struct inode * iget_locked(struct super_block *, unsigned long); | 2164 | extern struct inode * iget_locked(struct super_block *, unsigned long); |
2165 | extern int insert_inode_locked4(struct inode *, unsigned long, int (*test)(struct inode *, void *), void *); | 2165 | extern int insert_inode_locked4(struct inode *, unsigned long, int (*test)(struct inode *, void *), void *); |
2166 | extern int insert_inode_locked(struct inode *); | 2166 | extern int insert_inode_locked(struct inode *); |
2167 | extern void unlock_new_inode(struct inode *); | 2167 | extern void unlock_new_inode(struct inode *); |
2168 | 2168 | ||
2169 | extern void __iget(struct inode * inode); | 2169 | extern void __iget(struct inode * inode); |
2170 | extern void iget_failed(struct inode *); | 2170 | extern void iget_failed(struct inode *); |
2171 | extern void clear_inode(struct inode *); | 2171 | extern void clear_inode(struct inode *); |
2172 | extern void destroy_inode(struct inode *); | 2172 | extern void destroy_inode(struct inode *); |
2173 | extern void __destroy_inode(struct inode *); | 2173 | extern void __destroy_inode(struct inode *); |
2174 | extern struct inode *new_inode(struct super_block *); | 2174 | extern struct inode *new_inode(struct super_block *); |
2175 | extern int should_remove_suid(struct dentry *); | 2175 | extern int should_remove_suid(struct dentry *); |
2176 | extern int file_remove_suid(struct file *); | 2176 | extern int file_remove_suid(struct file *); |
2177 | 2177 | ||
2178 | extern void __insert_inode_hash(struct inode *, unsigned long hashval); | 2178 | extern void __insert_inode_hash(struct inode *, unsigned long hashval); |
2179 | extern void remove_inode_hash(struct inode *); | 2179 | extern void remove_inode_hash(struct inode *); |
2180 | static inline void insert_inode_hash(struct inode *inode) { | 2180 | static inline void insert_inode_hash(struct inode *inode) { |
2181 | __insert_inode_hash(inode, inode->i_ino); | 2181 | __insert_inode_hash(inode, inode->i_ino); |
2182 | } | 2182 | } |
2183 | 2183 | ||
2184 | extern void file_move(struct file *f, struct list_head *list); | 2184 | extern void file_move(struct file *f, struct list_head *list); |
2185 | extern void file_kill(struct file *f); | 2185 | extern void file_kill(struct file *f); |
2186 | #ifdef CONFIG_BLOCK | 2186 | #ifdef CONFIG_BLOCK |
2187 | struct bio; | 2187 | struct bio; |
2188 | extern void submit_bio(int, struct bio *); | 2188 | extern void submit_bio(int, struct bio *); |
2189 | extern int bdev_read_only(struct block_device *); | 2189 | extern int bdev_read_only(struct block_device *); |
2190 | #endif | 2190 | #endif |
2191 | extern int set_blocksize(struct block_device *, int); | 2191 | extern int set_blocksize(struct block_device *, int); |
2192 | extern int sb_set_blocksize(struct super_block *, int); | 2192 | extern int sb_set_blocksize(struct super_block *, int); |
2193 | extern int sb_min_blocksize(struct super_block *, int); | 2193 | extern int sb_min_blocksize(struct super_block *, int); |
2194 | 2194 | ||
2195 | extern int generic_file_mmap(struct file *, struct vm_area_struct *); | 2195 | extern int generic_file_mmap(struct file *, struct vm_area_struct *); |
2196 | extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *); | 2196 | extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *); |
2197 | extern int file_read_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size); | 2197 | extern int file_read_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size); |
2198 | int generic_write_checks(struct file *file, loff_t *pos, size_t *count, int isblk); | 2198 | int generic_write_checks(struct file *file, loff_t *pos, size_t *count, int isblk); |
2199 | extern ssize_t generic_file_aio_read(struct kiocb *, const struct iovec *, unsigned long, loff_t); | 2199 | extern ssize_t generic_file_aio_read(struct kiocb *, const struct iovec *, unsigned long, loff_t); |
2200 | extern ssize_t __generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long, | 2200 | extern ssize_t __generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long, |
2201 | loff_t *); | 2201 | loff_t *); |
2202 | extern ssize_t generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long, loff_t); | 2202 | extern ssize_t generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long, loff_t); |
2203 | extern ssize_t generic_file_direct_write(struct kiocb *, const struct iovec *, | 2203 | extern ssize_t generic_file_direct_write(struct kiocb *, const struct iovec *, |
2204 | unsigned long *, loff_t, loff_t *, size_t, size_t); | 2204 | unsigned long *, loff_t, loff_t *, size_t, size_t); |
2205 | extern ssize_t generic_file_buffered_write(struct kiocb *, const struct iovec *, | 2205 | extern ssize_t generic_file_buffered_write(struct kiocb *, const struct iovec *, |
2206 | unsigned long, loff_t, loff_t *, size_t, ssize_t); | 2206 | unsigned long, loff_t, loff_t *, size_t, ssize_t); |
2207 | extern ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos); | 2207 | extern ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos); |
2208 | extern ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos); | 2208 | extern ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos); |
2209 | extern int generic_segment_checks(const struct iovec *iov, | 2209 | extern int generic_segment_checks(const struct iovec *iov, |
2210 | unsigned long *nr_segs, size_t *count, int access_flags); | 2210 | unsigned long *nr_segs, size_t *count, int access_flags); |
2211 | 2211 | ||
2212 | /* fs/block_dev.c */ | 2212 | /* fs/block_dev.c */ |
2213 | extern ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov, | 2213 | extern ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov, |
2214 | unsigned long nr_segs, loff_t pos); | 2214 | unsigned long nr_segs, loff_t pos); |
2215 | extern int blkdev_fsync(struct file *filp, struct dentry *dentry, int datasync); | 2215 | extern int blkdev_fsync(struct file *filp, struct dentry *dentry, int datasync); |
2216 | 2216 | ||
2217 | /* fs/splice.c */ | 2217 | /* fs/splice.c */ |
2218 | extern ssize_t generic_file_splice_read(struct file *, loff_t *, | 2218 | extern ssize_t generic_file_splice_read(struct file *, loff_t *, |
2219 | struct pipe_inode_info *, size_t, unsigned int); | 2219 | struct pipe_inode_info *, size_t, unsigned int); |
2220 | extern ssize_t default_file_splice_read(struct file *, loff_t *, | 2220 | extern ssize_t default_file_splice_read(struct file *, loff_t *, |
2221 | struct pipe_inode_info *, size_t, unsigned int); | 2221 | struct pipe_inode_info *, size_t, unsigned int); |
2222 | extern ssize_t generic_file_splice_write(struct pipe_inode_info *, | 2222 | extern ssize_t generic_file_splice_write(struct pipe_inode_info *, |
2223 | struct file *, loff_t *, size_t, unsigned int); | 2223 | struct file *, loff_t *, size_t, unsigned int); |
2224 | extern ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, | 2224 | extern ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, |
2225 | struct file *out, loff_t *, size_t len, unsigned int flags); | 2225 | struct file *out, loff_t *, size_t len, unsigned int flags); |
2226 | extern long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, | 2226 | extern long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, |
2227 | size_t len, unsigned int flags); | 2227 | size_t len, unsigned int flags); |
2228 | 2228 | ||
2229 | extern void | 2229 | extern void |
2230 | file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping); | 2230 | file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping); |
2231 | extern loff_t no_llseek(struct file *file, loff_t offset, int origin); | 2231 | extern loff_t no_llseek(struct file *file, loff_t offset, int origin); |
2232 | extern loff_t generic_file_llseek(struct file *file, loff_t offset, int origin); | 2232 | extern loff_t generic_file_llseek(struct file *file, loff_t offset, int origin); |
2233 | extern loff_t generic_file_llseek_unlocked(struct file *file, loff_t offset, | 2233 | extern loff_t generic_file_llseek_unlocked(struct file *file, loff_t offset, |
2234 | int origin); | 2234 | int origin); |
2235 | extern int generic_file_open(struct inode * inode, struct file * filp); | 2235 | extern int generic_file_open(struct inode * inode, struct file * filp); |
2236 | extern int nonseekable_open(struct inode * inode, struct file * filp); | 2236 | extern int nonseekable_open(struct inode * inode, struct file * filp); |
2237 | 2237 | ||
2238 | #ifdef CONFIG_FS_XIP | 2238 | #ifdef CONFIG_FS_XIP |
2239 | extern ssize_t xip_file_read(struct file *filp, char __user *buf, size_t len, | 2239 | extern ssize_t xip_file_read(struct file *filp, char __user *buf, size_t len, |
2240 | loff_t *ppos); | 2240 | loff_t *ppos); |
2241 | extern int xip_file_mmap(struct file * file, struct vm_area_struct * vma); | 2241 | extern int xip_file_mmap(struct file * file, struct vm_area_struct * vma); |
2242 | extern ssize_t xip_file_write(struct file *filp, const char __user *buf, | 2242 | extern ssize_t xip_file_write(struct file *filp, const char __user *buf, |
2243 | size_t len, loff_t *ppos); | 2243 | size_t len, loff_t *ppos); |
2244 | extern int xip_truncate_page(struct address_space *mapping, loff_t from); | 2244 | extern int xip_truncate_page(struct address_space *mapping, loff_t from); |
2245 | #else | 2245 | #else |
2246 | static inline int xip_truncate_page(struct address_space *mapping, loff_t from) | 2246 | static inline int xip_truncate_page(struct address_space *mapping, loff_t from) |
2247 | { | 2247 | { |
2248 | return 0; | 2248 | return 0; |
2249 | } | 2249 | } |
2250 | #endif | 2250 | #endif |
2251 | 2251 | ||
2252 | #ifdef CONFIG_BLOCK | 2252 | #ifdef CONFIG_BLOCK |
2253 | ssize_t __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, | 2253 | ssize_t __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, |
2254 | struct block_device *bdev, const struct iovec *iov, loff_t offset, | 2254 | struct block_device *bdev, const struct iovec *iov, loff_t offset, |
2255 | unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io, | 2255 | unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io, |
2256 | int lock_type); | 2256 | int lock_type); |
2257 | 2257 | ||
2258 | enum { | 2258 | enum { |
2259 | /* need locking between buffered and direct access */ | 2259 | /* need locking between buffered and direct access */ |
2260 | DIO_LOCKING = 0x01, | 2260 | DIO_LOCKING = 0x01, |
2261 | 2261 | ||
2262 | /* filesystem does not support filling holes */ | 2262 | /* filesystem does not support filling holes */ |
2263 | DIO_SKIP_HOLES = 0x02, | 2263 | DIO_SKIP_HOLES = 0x02, |
2264 | }; | 2264 | }; |
2265 | 2265 | ||
2266 | static inline ssize_t blockdev_direct_IO(int rw, struct kiocb *iocb, | 2266 | static inline ssize_t blockdev_direct_IO(int rw, struct kiocb *iocb, |
2267 | struct inode *inode, struct block_device *bdev, const struct iovec *iov, | 2267 | struct inode *inode, struct block_device *bdev, const struct iovec *iov, |
2268 | loff_t offset, unsigned long nr_segs, get_block_t get_block, | 2268 | loff_t offset, unsigned long nr_segs, get_block_t get_block, |
2269 | dio_iodone_t end_io) | 2269 | dio_iodone_t end_io) |
2270 | { | 2270 | { |
2271 | return __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset, | 2271 | return __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset, |
2272 | nr_segs, get_block, end_io, | 2272 | nr_segs, get_block, end_io, |
2273 | DIO_LOCKING | DIO_SKIP_HOLES); | 2273 | DIO_LOCKING | DIO_SKIP_HOLES); |
2274 | } | 2274 | } |
2275 | 2275 | ||
2276 | static inline ssize_t blockdev_direct_IO_no_locking(int rw, struct kiocb *iocb, | 2276 | static inline ssize_t blockdev_direct_IO_no_locking(int rw, struct kiocb *iocb, |
2277 | struct inode *inode, struct block_device *bdev, const struct iovec *iov, | 2277 | struct inode *inode, struct block_device *bdev, const struct iovec *iov, |
2278 | loff_t offset, unsigned long nr_segs, get_block_t get_block, | 2278 | loff_t offset, unsigned long nr_segs, get_block_t get_block, |
2279 | dio_iodone_t end_io) | 2279 | dio_iodone_t end_io) |
2280 | { | 2280 | { |
2281 | return __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset, | 2281 | return __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset, |
2282 | nr_segs, get_block, end_io, 0); | 2282 | nr_segs, get_block, end_io, 0); |
2283 | } | 2283 | } |
2284 | #endif | 2284 | #endif |
2285 | 2285 | ||
2286 | extern const struct file_operations generic_ro_fops; | 2286 | extern const struct file_operations generic_ro_fops; |
2287 | 2287 | ||
2288 | #define special_file(m) (S_ISCHR(m)||S_ISBLK(m)||S_ISFIFO(m)||S_ISSOCK(m)) | 2288 | #define special_file(m) (S_ISCHR(m)||S_ISBLK(m)||S_ISFIFO(m)||S_ISSOCK(m)) |
2289 | 2289 | ||
2290 | extern int vfs_readlink(struct dentry *, char __user *, int, const char *); | 2290 | extern int vfs_readlink(struct dentry *, char __user *, int, const char *); |
2291 | extern int vfs_follow_link(struct nameidata *, const char *); | 2291 | extern int vfs_follow_link(struct nameidata *, const char *); |
2292 | extern int page_readlink(struct dentry *, char __user *, int); | 2292 | extern int page_readlink(struct dentry *, char __user *, int); |
2293 | extern void *page_follow_link_light(struct dentry *, struct nameidata *); | 2293 | extern void *page_follow_link_light(struct dentry *, struct nameidata *); |
2294 | extern void page_put_link(struct dentry *, struct nameidata *, void *); | 2294 | extern void page_put_link(struct dentry *, struct nameidata *, void *); |
2295 | extern int __page_symlink(struct inode *inode, const char *symname, int len, | 2295 | extern int __page_symlink(struct inode *inode, const char *symname, int len, |
2296 | int nofs); | 2296 | int nofs); |
2297 | extern int page_symlink(struct inode *inode, const char *symname, int len); | 2297 | extern int page_symlink(struct inode *inode, const char *symname, int len); |
2298 | extern const struct inode_operations page_symlink_inode_operations; | 2298 | extern const struct inode_operations page_symlink_inode_operations; |
2299 | extern int generic_readlink(struct dentry *, char __user *, int); | 2299 | extern int generic_readlink(struct dentry *, char __user *, int); |
2300 | extern void generic_fillattr(struct inode *, struct kstat *); | 2300 | extern void generic_fillattr(struct inode *, struct kstat *); |
2301 | extern int vfs_getattr(struct vfsmount *, struct dentry *, struct kstat *); | 2301 | extern int vfs_getattr(struct vfsmount *, struct dentry *, struct kstat *); |
2302 | void __inode_add_bytes(struct inode *inode, loff_t bytes); | 2302 | void __inode_add_bytes(struct inode *inode, loff_t bytes); |
2303 | void inode_add_bytes(struct inode *inode, loff_t bytes); | 2303 | void inode_add_bytes(struct inode *inode, loff_t bytes); |
2304 | void inode_sub_bytes(struct inode *inode, loff_t bytes); | 2304 | void inode_sub_bytes(struct inode *inode, loff_t bytes); |
2305 | loff_t inode_get_bytes(struct inode *inode); | 2305 | loff_t inode_get_bytes(struct inode *inode); |
2306 | void inode_set_bytes(struct inode *inode, loff_t bytes); | 2306 | void inode_set_bytes(struct inode *inode, loff_t bytes); |
2307 | 2307 | ||
2308 | extern int vfs_readdir(struct file *, filldir_t, void *); | 2308 | extern int vfs_readdir(struct file *, filldir_t, void *); |
2309 | 2309 | ||
2310 | extern int vfs_stat(char __user *, struct kstat *); | 2310 | extern int vfs_stat(char __user *, struct kstat *); |
2311 | extern int vfs_lstat(char __user *, struct kstat *); | 2311 | extern int vfs_lstat(char __user *, struct kstat *); |
2312 | extern int vfs_fstat(unsigned int, struct kstat *); | 2312 | extern int vfs_fstat(unsigned int, struct kstat *); |
2313 | extern int vfs_fstatat(int , char __user *, struct kstat *, int); | 2313 | extern int vfs_fstatat(int , char __user *, struct kstat *, int); |
2314 | 2314 | ||
2315 | extern int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd, | 2315 | extern int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd, |
2316 | unsigned long arg); | 2316 | unsigned long arg); |
2317 | extern int __generic_block_fiemap(struct inode *inode, | 2317 | extern int __generic_block_fiemap(struct inode *inode, |
2318 | struct fiemap_extent_info *fieinfo, u64 start, | 2318 | struct fiemap_extent_info *fieinfo, u64 start, |
2319 | u64 len, get_block_t *get_block); | 2319 | u64 len, get_block_t *get_block); |
2320 | extern int generic_block_fiemap(struct inode *inode, | 2320 | extern int generic_block_fiemap(struct inode *inode, |
2321 | struct fiemap_extent_info *fieinfo, u64 start, | 2321 | struct fiemap_extent_info *fieinfo, u64 start, |
2322 | u64 len, get_block_t *get_block); | 2322 | u64 len, get_block_t *get_block); |
2323 | 2323 | ||
2324 | extern void get_filesystem(struct file_system_type *fs); | 2324 | extern void get_filesystem(struct file_system_type *fs); |
2325 | extern void put_filesystem(struct file_system_type *fs); | 2325 | extern void put_filesystem(struct file_system_type *fs); |
2326 | extern struct file_system_type *get_fs_type(const char *name); | 2326 | extern struct file_system_type *get_fs_type(const char *name); |
2327 | extern struct super_block *get_super(struct block_device *); | 2327 | extern struct super_block *get_super(struct block_device *); |
2328 | extern struct super_block *get_active_super(struct block_device *bdev); | 2328 | extern struct super_block *get_active_super(struct block_device *bdev); |
2329 | extern struct super_block *user_get_super(dev_t); | 2329 | extern struct super_block *user_get_super(dev_t); |
2330 | extern void drop_super(struct super_block *sb); | 2330 | extern void drop_super(struct super_block *sb); |
2331 | 2331 | ||
2332 | extern int dcache_dir_open(struct inode *, struct file *); | 2332 | extern int dcache_dir_open(struct inode *, struct file *); |
2333 | extern int dcache_dir_close(struct inode *, struct file *); | 2333 | extern int dcache_dir_close(struct inode *, struct file *); |
2334 | extern loff_t dcache_dir_lseek(struct file *, loff_t, int); | 2334 | extern loff_t dcache_dir_lseek(struct file *, loff_t, int); |
2335 | extern int dcache_readdir(struct file *, void *, filldir_t); | 2335 | extern int dcache_readdir(struct file *, void *, filldir_t); |
2336 | extern int simple_getattr(struct vfsmount *, struct dentry *, struct kstat *); | 2336 | extern int simple_getattr(struct vfsmount *, struct dentry *, struct kstat *); |
2337 | extern int simple_statfs(struct dentry *, struct kstatfs *); | 2337 | extern int simple_statfs(struct dentry *, struct kstatfs *); |
2338 | extern int simple_link(struct dentry *, struct inode *, struct dentry *); | 2338 | extern int simple_link(struct dentry *, struct inode *, struct dentry *); |
2339 | extern int simple_unlink(struct inode *, struct dentry *); | 2339 | extern int simple_unlink(struct inode *, struct dentry *); |
2340 | extern int simple_rmdir(struct inode *, struct dentry *); | 2340 | extern int simple_rmdir(struct inode *, struct dentry *); |
2341 | extern int simple_rename(struct inode *, struct dentry *, struct inode *, struct dentry *); | 2341 | extern int simple_rename(struct inode *, struct dentry *, struct inode *, struct dentry *); |
2342 | extern int simple_sync_file(struct file *, struct dentry *, int); | 2342 | extern int simple_sync_file(struct file *, struct dentry *, int); |
2343 | extern int simple_empty(struct dentry *); | 2343 | extern int simple_empty(struct dentry *); |
2344 | extern int simple_readpage(struct file *file, struct page *page); | 2344 | extern int simple_readpage(struct file *file, struct page *page); |
2345 | extern int simple_write_begin(struct file *file, struct address_space *mapping, | 2345 | extern int simple_write_begin(struct file *file, struct address_space *mapping, |
2346 | loff_t pos, unsigned len, unsigned flags, | 2346 | loff_t pos, unsigned len, unsigned flags, |
2347 | struct page **pagep, void **fsdata); | 2347 | struct page **pagep, void **fsdata); |
2348 | extern int simple_write_end(struct file *file, struct address_space *mapping, | 2348 | extern int simple_write_end(struct file *file, struct address_space *mapping, |
2349 | loff_t pos, unsigned len, unsigned copied, | 2349 | loff_t pos, unsigned len, unsigned copied, |
2350 | struct page *page, void *fsdata); | 2350 | struct page *page, void *fsdata); |
2351 | 2351 | ||
2352 | extern struct dentry *simple_lookup(struct inode *, struct dentry *, struct nameidata *); | 2352 | extern struct dentry *simple_lookup(struct inode *, struct dentry *, struct nameidata *); |
2353 | extern ssize_t generic_read_dir(struct file *, char __user *, size_t, loff_t *); | 2353 | extern ssize_t generic_read_dir(struct file *, char __user *, size_t, loff_t *); |
2354 | extern const struct file_operations simple_dir_operations; | 2354 | extern const struct file_operations simple_dir_operations; |
2355 | extern const struct inode_operations simple_dir_inode_operations; | 2355 | extern const struct inode_operations simple_dir_inode_operations; |
2356 | struct tree_descr { char *name; const struct file_operations *ops; int mode; }; | 2356 | struct tree_descr { char *name; const struct file_operations *ops; int mode; }; |
2357 | struct dentry *d_alloc_name(struct dentry *, const char *); | 2357 | struct dentry *d_alloc_name(struct dentry *, const char *); |
2358 | extern int simple_fill_super(struct super_block *, int, struct tree_descr *); | 2358 | extern int simple_fill_super(struct super_block *, int, struct tree_descr *); |
2359 | extern int simple_pin_fs(struct file_system_type *, struct vfsmount **mount, int *count); | 2359 | extern int simple_pin_fs(struct file_system_type *, struct vfsmount **mount, int *count); |
2360 | extern void simple_release_fs(struct vfsmount **mount, int *count); | 2360 | extern void simple_release_fs(struct vfsmount **mount, int *count); |
2361 | 2361 | ||
2362 | extern ssize_t simple_read_from_buffer(void __user *to, size_t count, | 2362 | extern ssize_t simple_read_from_buffer(void __user *to, size_t count, |
2363 | loff_t *ppos, const void *from, size_t available); | 2363 | loff_t *ppos, const void *from, size_t available); |
2364 | 2364 | ||
2365 | extern int simple_fsync(struct file *, struct dentry *, int); | 2365 | extern int simple_fsync(struct file *, struct dentry *, int); |
2366 | 2366 | ||
2367 | #ifdef CONFIG_MIGRATION | 2367 | #ifdef CONFIG_MIGRATION |
2368 | extern int buffer_migrate_page(struct address_space *, | 2368 | extern int buffer_migrate_page(struct address_space *, |
2369 | struct page *, struct page *); | 2369 | struct page *, struct page *); |
2370 | #else | 2370 | #else |
2371 | #define buffer_migrate_page NULL | 2371 | #define buffer_migrate_page NULL |
2372 | #endif | 2372 | #endif |
2373 | 2373 | ||
2374 | extern int inode_change_ok(const struct inode *, struct iattr *); | 2374 | extern int inode_change_ok(const struct inode *, struct iattr *); |
2375 | extern int inode_newsize_ok(const struct inode *, loff_t offset); | 2375 | extern int inode_newsize_ok(const struct inode *, loff_t offset); |
2376 | extern int __must_check inode_setattr(struct inode *, struct iattr *); | 2376 | extern int __must_check inode_setattr(struct inode *, struct iattr *); |
2377 | 2377 | ||
2378 | extern void file_update_time(struct file *file); | 2378 | extern void file_update_time(struct file *file); |
2379 | 2379 | ||
2380 | extern int generic_show_options(struct seq_file *m, struct vfsmount *mnt); | 2380 | extern int generic_show_options(struct seq_file *m, struct vfsmount *mnt); |
2381 | extern void save_mount_options(struct super_block *sb, char *options); | 2381 | extern void save_mount_options(struct super_block *sb, char *options); |
2382 | extern void replace_mount_options(struct super_block *sb, char *options); | 2382 | extern void replace_mount_options(struct super_block *sb, char *options); |
2383 | 2383 | ||
2384 | static inline ino_t parent_ino(struct dentry *dentry) | 2384 | static inline ino_t parent_ino(struct dentry *dentry) |
2385 | { | 2385 | { |
2386 | ino_t res; | 2386 | ino_t res; |
2387 | 2387 | ||
2388 | spin_lock(&dentry->d_lock); | 2388 | spin_lock(&dentry->d_lock); |
2389 | res = dentry->d_parent->d_inode->i_ino; | 2389 | res = dentry->d_parent->d_inode->i_ino; |
2390 | spin_unlock(&dentry->d_lock); | 2390 | spin_unlock(&dentry->d_lock); |
2391 | return res; | 2391 | return res; |
2392 | } | 2392 | } |
2393 | 2393 | ||
2394 | /* Transaction based IO helpers */ | 2394 | /* Transaction based IO helpers */ |
2395 | 2395 | ||
2396 | /* | 2396 | /* |
2397 | * An argresp is stored in an allocated page and holds the | 2397 | * An argresp is stored in an allocated page and holds the |
2398 | * size of the argument or response, along with its content | 2398 | * size of the argument or response, along with its content |
2399 | */ | 2399 | */ |
2400 | struct simple_transaction_argresp { | 2400 | struct simple_transaction_argresp { |
2401 | ssize_t size; | 2401 | ssize_t size; |
2402 | char data[0]; | 2402 | char data[0]; |
2403 | }; | 2403 | }; |
2404 | 2404 | ||
2405 | #define SIMPLE_TRANSACTION_LIMIT (PAGE_SIZE - sizeof(struct simple_transaction_argresp)) | 2405 | #define SIMPLE_TRANSACTION_LIMIT (PAGE_SIZE - sizeof(struct simple_transaction_argresp)) |
2406 | 2406 | ||
2407 | char *simple_transaction_get(struct file *file, const char __user *buf, | 2407 | char *simple_transaction_get(struct file *file, const char __user *buf, |
2408 | size_t size); | 2408 | size_t size); |
2409 | ssize_t simple_transaction_read(struct file *file, char __user *buf, | 2409 | ssize_t simple_transaction_read(struct file *file, char __user *buf, |
2410 | size_t size, loff_t *pos); | 2410 | size_t size, loff_t *pos); |
2411 | int simple_transaction_release(struct inode *inode, struct file *file); | 2411 | int simple_transaction_release(struct inode *inode, struct file *file); |
2412 | 2412 | ||
2413 | void simple_transaction_set(struct file *file, size_t n); | 2413 | void simple_transaction_set(struct file *file, size_t n); |
2414 | 2414 | ||
2415 | /* | 2415 | /* |
2416 | * simple attribute files | 2416 | * simple attribute files |
2417 | * | 2417 | * |
2418 | * These attributes behave similar to those in sysfs: | 2418 | * These attributes behave similar to those in sysfs: |
2419 | * | 2419 | * |
2420 | * Writing to an attribute immediately sets a value, an open file can be | 2420 | * Writing to an attribute immediately sets a value, an open file can be |
2421 | * written to multiple times. | 2421 | * written to multiple times. |
2422 | * | 2422 | * |
2423 | * Reading from an attribute creates a buffer from the value that might get | 2423 | * Reading from an attribute creates a buffer from the value that might get |
2424 | * read with multiple read calls. When the attribute has been read | 2424 | * read with multiple read calls. When the attribute has been read |
2425 | * completely, no further read calls are possible until the file is opened | 2425 | * completely, no further read calls are possible until the file is opened |
2426 | * again. | 2426 | * again. |
2427 | * | 2427 | * |
2428 | * All attributes contain a text representation of a numeric value | 2428 | * All attributes contain a text representation of a numeric value |
2429 | * that are accessed with the get() and set() functions. | 2429 | * that are accessed with the get() and set() functions. |
2430 | */ | 2430 | */ |
2431 | #define DEFINE_SIMPLE_ATTRIBUTE(__fops, __get, __set, __fmt) \ | 2431 | #define DEFINE_SIMPLE_ATTRIBUTE(__fops, __get, __set, __fmt) \ |
2432 | static int __fops ## _open(struct inode *inode, struct file *file) \ | 2432 | static int __fops ## _open(struct inode *inode, struct file *file) \ |
2433 | { \ | 2433 | { \ |
2434 | __simple_attr_check_format(__fmt, 0ull); \ | 2434 | __simple_attr_check_format(__fmt, 0ull); \ |
2435 | return simple_attr_open(inode, file, __get, __set, __fmt); \ | 2435 | return simple_attr_open(inode, file, __get, __set, __fmt); \ |
2436 | } \ | 2436 | } \ |
2437 | static const struct file_operations __fops = { \ | 2437 | static const struct file_operations __fops = { \ |
2438 | .owner = THIS_MODULE, \ | 2438 | .owner = THIS_MODULE, \ |
2439 | .open = __fops ## _open, \ | 2439 | .open = __fops ## _open, \ |
2440 | .release = simple_attr_release, \ | 2440 | .release = simple_attr_release, \ |
2441 | .read = simple_attr_read, \ | 2441 | .read = simple_attr_read, \ |
2442 | .write = simple_attr_write, \ | 2442 | .write = simple_attr_write, \ |
2443 | }; | 2443 | }; |
2444 | 2444 | ||
2445 | static inline void __attribute__((format(printf, 1, 2))) | 2445 | static inline void __attribute__((format(printf, 1, 2))) |
2446 | __simple_attr_check_format(const char *fmt, ...) | 2446 | __simple_attr_check_format(const char *fmt, ...) |
2447 | { | 2447 | { |
2448 | /* don't do anything, just let the compiler check the arguments; */ | 2448 | /* don't do anything, just let the compiler check the arguments; */ |
2449 | } | 2449 | } |
2450 | 2450 | ||
2451 | int simple_attr_open(struct inode *inode, struct file *file, | 2451 | int simple_attr_open(struct inode *inode, struct file *file, |
2452 | int (*get)(void *, u64 *), int (*set)(void *, u64), | 2452 | int (*get)(void *, u64 *), int (*set)(void *, u64), |
2453 | const char *fmt); | 2453 | const char *fmt); |
2454 | int simple_attr_release(struct inode *inode, struct file *file); | 2454 | int simple_attr_release(struct inode *inode, struct file *file); |
2455 | ssize_t simple_attr_read(struct file *file, char __user *buf, | 2455 | ssize_t simple_attr_read(struct file *file, char __user *buf, |
2456 | size_t len, loff_t *ppos); | 2456 | size_t len, loff_t *ppos); |
2457 | ssize_t simple_attr_write(struct file *file, const char __user *buf, | 2457 | ssize_t simple_attr_write(struct file *file, const char __user *buf, |
2458 | size_t len, loff_t *ppos); | 2458 | size_t len, loff_t *ppos); |
2459 | 2459 | ||
2460 | struct ctl_table; | 2460 | struct ctl_table; |
2461 | int proc_nr_files(struct ctl_table *table, int write, | 2461 | int proc_nr_files(struct ctl_table *table, int write, |
2462 | void __user *buffer, size_t *lenp, loff_t *ppos); | 2462 | void __user *buffer, size_t *lenp, loff_t *ppos); |
2463 | 2463 | ||
2464 | int __init get_filesystem_list(char *buf); | 2464 | int __init get_filesystem_list(char *buf); |
2465 | 2465 | ||
2466 | #define ACC_MODE(x) ("\004\002\006\006"[(x)&O_ACCMODE]) | 2466 | #define ACC_MODE(x) ("\004\002\006\006"[(x)&O_ACCMODE]) |
2467 | #define OPEN_FMODE(flag) ((__force fmode_t)((flag + 1) & O_ACCMODE)) | 2467 | #define OPEN_FMODE(flag) ((__force fmode_t)((flag + 1) & O_ACCMODE)) |
2468 | 2468 | ||
2469 | #endif /* __KERNEL__ */ | 2469 | #endif /* __KERNEL__ */ |
net/socket.c
1 | /* | 1 | /* |
2 | * NET An implementation of the SOCKET network access protocol. | 2 | * NET An implementation of the SOCKET network access protocol. |
3 | * | 3 | * |
4 | * Version: @(#)socket.c 1.1.93 18/02/95 | 4 | * Version: @(#)socket.c 1.1.93 18/02/95 |
5 | * | 5 | * |
6 | * Authors: Orest Zborowski, <obz@Kodak.COM> | 6 | * Authors: Orest Zborowski, <obz@Kodak.COM> |
7 | * Ross Biro | 7 | * Ross Biro |
8 | * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> | 8 | * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> |
9 | * | 9 | * |
10 | * Fixes: | 10 | * Fixes: |
11 | * Anonymous : NOTSOCK/BADF cleanup. Error fix in | 11 | * Anonymous : NOTSOCK/BADF cleanup. Error fix in |
12 | * shutdown() | 12 | * shutdown() |
13 | * Alan Cox : verify_area() fixes | 13 | * Alan Cox : verify_area() fixes |
14 | * Alan Cox : Removed DDI | 14 | * Alan Cox : Removed DDI |
15 | * Jonathan Kamens : SOCK_DGRAM reconnect bug | 15 | * Jonathan Kamens : SOCK_DGRAM reconnect bug |
16 | * Alan Cox : Moved a load of checks to the very | 16 | * Alan Cox : Moved a load of checks to the very |
17 | * top level. | 17 | * top level. |
18 | * Alan Cox : Move address structures to/from user | 18 | * Alan Cox : Move address structures to/from user |
19 | * mode above the protocol layers. | 19 | * mode above the protocol layers. |
20 | * Rob Janssen : Allow 0 length sends. | 20 | * Rob Janssen : Allow 0 length sends. |
21 | * Alan Cox : Asynchronous I/O support (cribbed from the | 21 | * Alan Cox : Asynchronous I/O support (cribbed from the |
22 | * tty drivers). | 22 | * tty drivers). |
23 | * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style) | 23 | * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style) |
24 | * Jeff Uphoff : Made max number of sockets command-line | 24 | * Jeff Uphoff : Made max number of sockets command-line |
25 | * configurable. | 25 | * configurable. |
26 | * Matti Aarnio : Made the number of sockets dynamic, | 26 | * Matti Aarnio : Made the number of sockets dynamic, |
27 | * to be allocated when needed, and mr. | 27 | * to be allocated when needed, and mr. |
28 | * Uphoff's max is used as max to be | 28 | * Uphoff's max is used as max to be |
29 | * allowed to allocate. | 29 | * allowed to allocate. |
30 | * Linus : Argh. removed all the socket allocation | 30 | * Linus : Argh. removed all the socket allocation |
31 | * altogether: it's in the inode now. | 31 | * altogether: it's in the inode now. |
32 | * Alan Cox : Made sock_alloc()/sock_release() public | 32 | * Alan Cox : Made sock_alloc()/sock_release() public |
33 | * for NetROM and future kernel nfsd type | 33 | * for NetROM and future kernel nfsd type |
34 | * stuff. | 34 | * stuff. |
35 | * Alan Cox : sendmsg/recvmsg basics. | 35 | * Alan Cox : sendmsg/recvmsg basics. |
36 | * Tom Dyas : Export net symbols. | 36 | * Tom Dyas : Export net symbols. |
37 | * Marcin Dalecki : Fixed problems with CONFIG_NET="n". | 37 | * Marcin Dalecki : Fixed problems with CONFIG_NET="n". |
38 | * Alan Cox : Added thread locking to sys_* calls | 38 | * Alan Cox : Added thread locking to sys_* calls |
39 | * for sockets. May have errors at the | 39 | * for sockets. May have errors at the |
40 | * moment. | 40 | * moment. |
41 | * Kevin Buhr : Fixed the dumb errors in the above. | 41 | * Kevin Buhr : Fixed the dumb errors in the above. |
42 | * Andi Kleen : Some small cleanups, optimizations, | 42 | * Andi Kleen : Some small cleanups, optimizations, |
43 | * and fixed a copy_from_user() bug. | 43 | * and fixed a copy_from_user() bug. |
44 | * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0) | 44 | * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0) |
45 | * Tigran Aivazian : Made listen(2) backlog sanity checks | 45 | * Tigran Aivazian : Made listen(2) backlog sanity checks |
46 | * protocol-independent | 46 | * protocol-independent |
47 | * | 47 | * |
48 | * | 48 | * |
49 | * This program is free software; you can redistribute it and/or | 49 | * This program is free software; you can redistribute it and/or |
50 | * modify it under the terms of the GNU General Public License | 50 | * modify it under the terms of the GNU General Public License |
51 | * as published by the Free Software Foundation; either version | 51 | * as published by the Free Software Foundation; either version |
52 | * 2 of the License, or (at your option) any later version. | 52 | * 2 of the License, or (at your option) any later version. |
53 | * | 53 | * |
54 | * | 54 | * |
55 | * This module is effectively the top level interface to the BSD socket | 55 | * This module is effectively the top level interface to the BSD socket |
56 | * paradigm. | 56 | * paradigm. |
57 | * | 57 | * |
58 | * Based upon Swansea University Computer Society NET3.039 | 58 | * Based upon Swansea University Computer Society NET3.039 |
59 | */ | 59 | */ |
60 | 60 | ||
61 | #include <linux/mm.h> | 61 | #include <linux/mm.h> |
62 | #include <linux/socket.h> | 62 | #include <linux/socket.h> |
63 | #include <linux/file.h> | 63 | #include <linux/file.h> |
64 | #include <linux/net.h> | 64 | #include <linux/net.h> |
65 | #include <linux/interrupt.h> | 65 | #include <linux/interrupt.h> |
66 | #include <linux/thread_info.h> | 66 | #include <linux/thread_info.h> |
67 | #include <linux/rcupdate.h> | 67 | #include <linux/rcupdate.h> |
68 | #include <linux/netdevice.h> | 68 | #include <linux/netdevice.h> |
69 | #include <linux/proc_fs.h> | 69 | #include <linux/proc_fs.h> |
70 | #include <linux/seq_file.h> | 70 | #include <linux/seq_file.h> |
71 | #include <linux/mutex.h> | 71 | #include <linux/mutex.h> |
72 | #include <linux/wanrouter.h> | 72 | #include <linux/wanrouter.h> |
73 | #include <linux/if_bridge.h> | 73 | #include <linux/if_bridge.h> |
74 | #include <linux/if_frad.h> | 74 | #include <linux/if_frad.h> |
75 | #include <linux/if_vlan.h> | 75 | #include <linux/if_vlan.h> |
76 | #include <linux/init.h> | 76 | #include <linux/init.h> |
77 | #include <linux/poll.h> | 77 | #include <linux/poll.h> |
78 | #include <linux/cache.h> | 78 | #include <linux/cache.h> |
79 | #include <linux/module.h> | 79 | #include <linux/module.h> |
80 | #include <linux/highmem.h> | 80 | #include <linux/highmem.h> |
81 | #include <linux/mount.h> | 81 | #include <linux/mount.h> |
82 | #include <linux/security.h> | 82 | #include <linux/security.h> |
83 | #include <linux/syscalls.h> | 83 | #include <linux/syscalls.h> |
84 | #include <linux/compat.h> | 84 | #include <linux/compat.h> |
85 | #include <linux/kmod.h> | 85 | #include <linux/kmod.h> |
86 | #include <linux/audit.h> | 86 | #include <linux/audit.h> |
87 | #include <linux/wireless.h> | 87 | #include <linux/wireless.h> |
88 | #include <linux/nsproxy.h> | 88 | #include <linux/nsproxy.h> |
89 | #include <linux/magic.h> | 89 | #include <linux/magic.h> |
90 | #include <linux/slab.h> | 90 | #include <linux/slab.h> |
91 | 91 | ||
92 | #include <asm/uaccess.h> | 92 | #include <asm/uaccess.h> |
93 | #include <asm/unistd.h> | 93 | #include <asm/unistd.h> |
94 | 94 | ||
95 | #include <net/compat.h> | 95 | #include <net/compat.h> |
96 | #include <net/wext.h> | 96 | #include <net/wext.h> |
97 | 97 | ||
98 | #include <net/sock.h> | 98 | #include <net/sock.h> |
99 | #include <linux/netfilter.h> | 99 | #include <linux/netfilter.h> |
100 | 100 | ||
101 | #include <linux/if_tun.h> | 101 | #include <linux/if_tun.h> |
102 | #include <linux/ipv6_route.h> | 102 | #include <linux/ipv6_route.h> |
103 | #include <linux/route.h> | 103 | #include <linux/route.h> |
104 | #include <linux/sockios.h> | 104 | #include <linux/sockios.h> |
105 | #include <linux/atalk.h> | 105 | #include <linux/atalk.h> |
106 | 106 | ||
107 | static int sock_no_open(struct inode *irrelevant, struct file *dontcare); | 107 | static int sock_no_open(struct inode *irrelevant, struct file *dontcare); |
108 | static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov, | 108 | static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov, |
109 | unsigned long nr_segs, loff_t pos); | 109 | unsigned long nr_segs, loff_t pos); |
110 | static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov, | 110 | static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov, |
111 | unsigned long nr_segs, loff_t pos); | 111 | unsigned long nr_segs, loff_t pos); |
112 | static int sock_mmap(struct file *file, struct vm_area_struct *vma); | 112 | static int sock_mmap(struct file *file, struct vm_area_struct *vma); |
113 | 113 | ||
114 | static int sock_close(struct inode *inode, struct file *file); | 114 | static int sock_close(struct inode *inode, struct file *file); |
115 | static unsigned int sock_poll(struct file *file, | 115 | static unsigned int sock_poll(struct file *file, |
116 | struct poll_table_struct *wait); | 116 | struct poll_table_struct *wait); |
117 | static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg); | 117 | static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg); |
118 | #ifdef CONFIG_COMPAT | 118 | #ifdef CONFIG_COMPAT |
119 | static long compat_sock_ioctl(struct file *file, | 119 | static long compat_sock_ioctl(struct file *file, |
120 | unsigned int cmd, unsigned long arg); | 120 | unsigned int cmd, unsigned long arg); |
121 | #endif | 121 | #endif |
122 | static int sock_fasync(int fd, struct file *filp, int on); | 122 | static int sock_fasync(int fd, struct file *filp, int on); |
123 | static ssize_t sock_sendpage(struct file *file, struct page *page, | 123 | static ssize_t sock_sendpage(struct file *file, struct page *page, |
124 | int offset, size_t size, loff_t *ppos, int more); | 124 | int offset, size_t size, loff_t *ppos, int more); |
125 | static ssize_t sock_splice_read(struct file *file, loff_t *ppos, | 125 | static ssize_t sock_splice_read(struct file *file, loff_t *ppos, |
126 | struct pipe_inode_info *pipe, size_t len, | 126 | struct pipe_inode_info *pipe, size_t len, |
127 | unsigned int flags); | 127 | unsigned int flags); |
128 | 128 | ||
129 | /* | 129 | /* |
130 | * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear | 130 | * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear |
131 | * in the operation structures but are done directly via the socketcall() multiplexor. | 131 | * in the operation structures but are done directly via the socketcall() multiplexor. |
132 | */ | 132 | */ |
133 | 133 | ||
134 | static const struct file_operations socket_file_ops = { | 134 | static const struct file_operations socket_file_ops = { |
135 | .owner = THIS_MODULE, | 135 | .owner = THIS_MODULE, |
136 | .llseek = no_llseek, | 136 | .llseek = no_llseek, |
137 | .aio_read = sock_aio_read, | 137 | .aio_read = sock_aio_read, |
138 | .aio_write = sock_aio_write, | 138 | .aio_write = sock_aio_write, |
139 | .poll = sock_poll, | 139 | .poll = sock_poll, |
140 | .unlocked_ioctl = sock_ioctl, | 140 | .unlocked_ioctl = sock_ioctl, |
141 | #ifdef CONFIG_COMPAT | 141 | #ifdef CONFIG_COMPAT |
142 | .compat_ioctl = compat_sock_ioctl, | 142 | .compat_ioctl = compat_sock_ioctl, |
143 | #endif | 143 | #endif |
144 | .mmap = sock_mmap, | 144 | .mmap = sock_mmap, |
145 | .open = sock_no_open, /* special open code to disallow open via /proc */ | 145 | .open = sock_no_open, /* special open code to disallow open via /proc */ |
146 | .release = sock_close, | 146 | .release = sock_close, |
147 | .fasync = sock_fasync, | 147 | .fasync = sock_fasync, |
148 | .sendpage = sock_sendpage, | 148 | .sendpage = sock_sendpage, |
149 | .splice_write = generic_splice_sendpage, | 149 | .splice_write = generic_splice_sendpage, |
150 | .splice_read = sock_splice_read, | 150 | .splice_read = sock_splice_read, |
151 | }; | 151 | }; |
152 | 152 | ||
153 | /* | 153 | /* |
154 | * The protocol list. Each protocol is registered in here. | 154 | * The protocol list. Each protocol is registered in here. |
155 | */ | 155 | */ |
156 | 156 | ||
157 | static DEFINE_SPINLOCK(net_family_lock); | 157 | static DEFINE_SPINLOCK(net_family_lock); |
158 | static const struct net_proto_family *net_families[NPROTO] __read_mostly; | 158 | static const struct net_proto_family *net_families[NPROTO] __read_mostly; |
159 | 159 | ||
160 | /* | 160 | /* |
161 | * Statistics counters of the socket lists | 161 | * Statistics counters of the socket lists |
162 | */ | 162 | */ |
163 | 163 | ||
164 | static DEFINE_PER_CPU(int, sockets_in_use) = 0; | 164 | static DEFINE_PER_CPU(int, sockets_in_use) = 0; |
165 | 165 | ||
166 | /* | 166 | /* |
167 | * Support routines. | 167 | * Support routines. |
168 | * Move socket addresses back and forth across the kernel/user | 168 | * Move socket addresses back and forth across the kernel/user |
169 | * divide and look after the messy bits. | 169 | * divide and look after the messy bits. |
170 | */ | 170 | */ |
171 | 171 | ||
172 | #define MAX_SOCK_ADDR 128 /* 108 for Unix domain - | 172 | #define MAX_SOCK_ADDR 128 /* 108 for Unix domain - |
173 | 16 for IP, 16 for IPX, | 173 | 16 for IP, 16 for IPX, |
174 | 24 for IPv6, | 174 | 24 for IPv6, |
175 | about 80 for AX.25 | 175 | about 80 for AX.25 |
176 | must be at least one bigger than | 176 | must be at least one bigger than |
177 | the AF_UNIX size (see net/unix/af_unix.c | 177 | the AF_UNIX size (see net/unix/af_unix.c |
178 | :unix_mkname()). | 178 | :unix_mkname()). |
179 | */ | 179 | */ |
180 | 180 | ||
181 | /** | 181 | /** |
182 | * move_addr_to_kernel - copy a socket address into kernel space | 182 | * move_addr_to_kernel - copy a socket address into kernel space |
183 | * @uaddr: Address in user space | 183 | * @uaddr: Address in user space |
184 | * @kaddr: Address in kernel space | 184 | * @kaddr: Address in kernel space |
185 | * @ulen: Length in user space | 185 | * @ulen: Length in user space |
186 | * | 186 | * |
187 | * The address is copied into kernel space. If the provided address is | 187 | * The address is copied into kernel space. If the provided address is |
188 | * too long an error code of -EINVAL is returned. If the copy gives | 188 | * too long an error code of -EINVAL is returned. If the copy gives |
189 | * invalid addresses -EFAULT is returned. On a success 0 is returned. | 189 | * invalid addresses -EFAULT is returned. On a success 0 is returned. |
190 | */ | 190 | */ |
191 | 191 | ||
192 | int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr *kaddr) | 192 | int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr *kaddr) |
193 | { | 193 | { |
194 | if (ulen < 0 || ulen > sizeof(struct sockaddr_storage)) | 194 | if (ulen < 0 || ulen > sizeof(struct sockaddr_storage)) |
195 | return -EINVAL; | 195 | return -EINVAL; |
196 | if (ulen == 0) | 196 | if (ulen == 0) |
197 | return 0; | 197 | return 0; |
198 | if (copy_from_user(kaddr, uaddr, ulen)) | 198 | if (copy_from_user(kaddr, uaddr, ulen)) |
199 | return -EFAULT; | 199 | return -EFAULT; |
200 | return audit_sockaddr(ulen, kaddr); | 200 | return audit_sockaddr(ulen, kaddr); |
201 | } | 201 | } |
202 | 202 | ||
203 | /** | 203 | /** |
204 | * move_addr_to_user - copy an address to user space | 204 | * move_addr_to_user - copy an address to user space |
205 | * @kaddr: kernel space address | 205 | * @kaddr: kernel space address |
206 | * @klen: length of address in kernel | 206 | * @klen: length of address in kernel |
207 | * @uaddr: user space address | 207 | * @uaddr: user space address |
208 | * @ulen: pointer to user length field | 208 | * @ulen: pointer to user length field |
209 | * | 209 | * |
210 | * The value pointed to by ulen on entry is the buffer length available. | 210 | * The value pointed to by ulen on entry is the buffer length available. |
211 | * This is overwritten with the buffer space used. -EINVAL is returned | 211 | * This is overwritten with the buffer space used. -EINVAL is returned |
212 | * if an overlong buffer is specified or a negative buffer size. -EFAULT | 212 | * if an overlong buffer is specified or a negative buffer size. -EFAULT |
213 | * is returned if either the buffer or the length field are not | 213 | * is returned if either the buffer or the length field are not |
214 | * accessible. | 214 | * accessible. |
215 | * After copying the data up to the limit the user specifies, the true | 215 | * After copying the data up to the limit the user specifies, the true |
216 | * length of the data is written over the length limit the user | 216 | * length of the data is written over the length limit the user |
217 | * specified. Zero is returned for a success. | 217 | * specified. Zero is returned for a success. |
218 | */ | 218 | */ |
219 | 219 | ||
220 | int move_addr_to_user(struct sockaddr *kaddr, int klen, void __user *uaddr, | 220 | int move_addr_to_user(struct sockaddr *kaddr, int klen, void __user *uaddr, |
221 | int __user *ulen) | 221 | int __user *ulen) |
222 | { | 222 | { |
223 | int err; | 223 | int err; |
224 | int len; | 224 | int len; |
225 | 225 | ||
226 | err = get_user(len, ulen); | 226 | err = get_user(len, ulen); |
227 | if (err) | 227 | if (err) |
228 | return err; | 228 | return err; |
229 | if (len > klen) | 229 | if (len > klen) |
230 | len = klen; | 230 | len = klen; |
231 | if (len < 0 || len > sizeof(struct sockaddr_storage)) | 231 | if (len < 0 || len > sizeof(struct sockaddr_storage)) |
232 | return -EINVAL; | 232 | return -EINVAL; |
233 | if (len) { | 233 | if (len) { |
234 | if (audit_sockaddr(klen, kaddr)) | 234 | if (audit_sockaddr(klen, kaddr)) |
235 | return -ENOMEM; | 235 | return -ENOMEM; |
236 | if (copy_to_user(uaddr, kaddr, len)) | 236 | if (copy_to_user(uaddr, kaddr, len)) |
237 | return -EFAULT; | 237 | return -EFAULT; |
238 | } | 238 | } |
239 | /* | 239 | /* |
240 | * "fromlen shall refer to the value before truncation.." | 240 | * "fromlen shall refer to the value before truncation.." |
241 | * 1003.1g | 241 | * 1003.1g |
242 | */ | 242 | */ |
243 | return __put_user(klen, ulen); | 243 | return __put_user(klen, ulen); |
244 | } | 244 | } |
245 | 245 | ||
246 | static struct kmem_cache *sock_inode_cachep __read_mostly; | 246 | static struct kmem_cache *sock_inode_cachep __read_mostly; |
247 | 247 | ||
248 | static struct inode *sock_alloc_inode(struct super_block *sb) | 248 | static struct inode *sock_alloc_inode(struct super_block *sb) |
249 | { | 249 | { |
250 | struct socket_alloc *ei; | 250 | struct socket_alloc *ei; |
251 | 251 | ||
252 | ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL); | 252 | ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL); |
253 | if (!ei) | 253 | if (!ei) |
254 | return NULL; | 254 | return NULL; |
255 | init_waitqueue_head(&ei->socket.wait); | 255 | init_waitqueue_head(&ei->socket.wait); |
256 | 256 | ||
257 | ei->socket.fasync_list = NULL; | 257 | ei->socket.fasync_list = NULL; |
258 | ei->socket.state = SS_UNCONNECTED; | 258 | ei->socket.state = SS_UNCONNECTED; |
259 | ei->socket.flags = 0; | 259 | ei->socket.flags = 0; |
260 | ei->socket.ops = NULL; | 260 | ei->socket.ops = NULL; |
261 | ei->socket.sk = NULL; | 261 | ei->socket.sk = NULL; |
262 | ei->socket.file = NULL; | 262 | ei->socket.file = NULL; |
263 | 263 | ||
264 | return &ei->vfs_inode; | 264 | return &ei->vfs_inode; |
265 | } | 265 | } |
266 | 266 | ||
267 | static void sock_destroy_inode(struct inode *inode) | 267 | static void sock_destroy_inode(struct inode *inode) |
268 | { | 268 | { |
269 | kmem_cache_free(sock_inode_cachep, | 269 | kmem_cache_free(sock_inode_cachep, |
270 | container_of(inode, struct socket_alloc, vfs_inode)); | 270 | container_of(inode, struct socket_alloc, vfs_inode)); |
271 | } | 271 | } |
272 | 272 | ||
273 | static void init_once(void *foo) | 273 | static void init_once(void *foo) |
274 | { | 274 | { |
275 | struct socket_alloc *ei = (struct socket_alloc *)foo; | 275 | struct socket_alloc *ei = (struct socket_alloc *)foo; |
276 | 276 | ||
277 | inode_init_once(&ei->vfs_inode); | 277 | inode_init_once(&ei->vfs_inode); |
278 | } | 278 | } |
279 | 279 | ||
280 | static int init_inodecache(void) | 280 | static int init_inodecache(void) |
281 | { | 281 | { |
282 | sock_inode_cachep = kmem_cache_create("sock_inode_cache", | 282 | sock_inode_cachep = kmem_cache_create("sock_inode_cache", |
283 | sizeof(struct socket_alloc), | 283 | sizeof(struct socket_alloc), |
284 | 0, | 284 | 0, |
285 | (SLAB_HWCACHE_ALIGN | | 285 | (SLAB_HWCACHE_ALIGN | |
286 | SLAB_RECLAIM_ACCOUNT | | 286 | SLAB_RECLAIM_ACCOUNT | |
287 | SLAB_MEM_SPREAD), | 287 | SLAB_MEM_SPREAD), |
288 | init_once); | 288 | init_once); |
289 | if (sock_inode_cachep == NULL) | 289 | if (sock_inode_cachep == NULL) |
290 | return -ENOMEM; | 290 | return -ENOMEM; |
291 | return 0; | 291 | return 0; |
292 | } | 292 | } |
293 | 293 | ||
294 | static const struct super_operations sockfs_ops = { | 294 | static const struct super_operations sockfs_ops = { |
295 | .alloc_inode = sock_alloc_inode, | 295 | .alloc_inode = sock_alloc_inode, |
296 | .destroy_inode =sock_destroy_inode, | 296 | .destroy_inode =sock_destroy_inode, |
297 | .statfs = simple_statfs, | 297 | .statfs = simple_statfs, |
298 | }; | 298 | }; |
299 | 299 | ||
300 | static int sockfs_get_sb(struct file_system_type *fs_type, | 300 | static int sockfs_get_sb(struct file_system_type *fs_type, |
301 | int flags, const char *dev_name, void *data, | 301 | int flags, const char *dev_name, void *data, |
302 | struct vfsmount *mnt) | 302 | struct vfsmount *mnt) |
303 | { | 303 | { |
304 | return get_sb_pseudo(fs_type, "socket:", &sockfs_ops, SOCKFS_MAGIC, | 304 | return get_sb_pseudo(fs_type, "socket:", &sockfs_ops, SOCKFS_MAGIC, |
305 | mnt); | 305 | mnt); |
306 | } | 306 | } |
307 | 307 | ||
308 | static struct vfsmount *sock_mnt __read_mostly; | 308 | static struct vfsmount *sock_mnt __read_mostly; |
309 | 309 | ||
310 | static struct file_system_type sock_fs_type = { | 310 | static struct file_system_type sock_fs_type = { |
311 | .name = "sockfs", | 311 | .name = "sockfs", |
312 | .get_sb = sockfs_get_sb, | 312 | .get_sb = sockfs_get_sb, |
313 | .kill_sb = kill_anon_super, | 313 | .kill_sb = kill_anon_super, |
314 | }; | 314 | }; |
315 | 315 | ||
316 | /* | 316 | /* |
317 | * sockfs_dname() is called from d_path(). | 317 | * sockfs_dname() is called from d_path(). |
318 | */ | 318 | */ |
319 | static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen) | 319 | static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen) |
320 | { | 320 | { |
321 | return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]", | 321 | return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]", |
322 | dentry->d_inode->i_ino); | 322 | dentry->d_inode->i_ino); |
323 | } | 323 | } |
324 | 324 | ||
325 | static const struct dentry_operations sockfs_dentry_operations = { | 325 | static const struct dentry_operations sockfs_dentry_operations = { |
326 | .d_dname = sockfs_dname, | 326 | .d_dname = sockfs_dname, |
327 | }; | 327 | }; |
328 | 328 | ||
329 | /* | 329 | /* |
330 | * Obtains the first available file descriptor and sets it up for use. | 330 | * Obtains the first available file descriptor and sets it up for use. |
331 | * | 331 | * |
332 | * These functions create file structures and maps them to fd space | 332 | * These functions create file structures and maps them to fd space |
333 | * of the current process. On success it returns file descriptor | 333 | * of the current process. On success it returns file descriptor |
334 | * and file struct implicitly stored in sock->file. | 334 | * and file struct implicitly stored in sock->file. |
335 | * Note that another thread may close file descriptor before we return | 335 | * Note that another thread may close file descriptor before we return |
336 | * from this function. We use the fact that now we do not refer | 336 | * from this function. We use the fact that now we do not refer |
337 | * to socket after mapping. If one day we will need it, this | 337 | * to socket after mapping. If one day we will need it, this |
338 | * function will increment ref. count on file by 1. | 338 | * function will increment ref. count on file by 1. |
339 | * | 339 | * |
340 | * In any case returned fd MAY BE not valid! | 340 | * In any case returned fd MAY BE not valid! |
341 | * This race condition is unavoidable | 341 | * This race condition is unavoidable |
342 | * with shared fd spaces, we cannot solve it inside kernel, | 342 | * with shared fd spaces, we cannot solve it inside kernel, |
343 | * but we take care of internal coherence yet. | 343 | * but we take care of internal coherence yet. |
344 | */ | 344 | */ |
345 | 345 | ||
346 | static int sock_alloc_file(struct socket *sock, struct file **f, int flags) | 346 | static int sock_alloc_file(struct socket *sock, struct file **f, int flags) |
347 | { | 347 | { |
348 | struct qstr name = { .name = "" }; | 348 | struct qstr name = { .name = "" }; |
349 | struct path path; | 349 | struct path path; |
350 | struct file *file; | 350 | struct file *file; |
351 | int fd; | 351 | int fd; |
352 | 352 | ||
353 | fd = get_unused_fd_flags(flags); | 353 | fd = get_unused_fd_flags(flags); |
354 | if (unlikely(fd < 0)) | 354 | if (unlikely(fd < 0)) |
355 | return fd; | 355 | return fd; |
356 | 356 | ||
357 | path.dentry = d_alloc(sock_mnt->mnt_sb->s_root, &name); | 357 | path.dentry = d_alloc(sock_mnt->mnt_sb->s_root, &name); |
358 | if (unlikely(!path.dentry)) { | 358 | if (unlikely(!path.dentry)) { |
359 | put_unused_fd(fd); | 359 | put_unused_fd(fd); |
360 | return -ENOMEM; | 360 | return -ENOMEM; |
361 | } | 361 | } |
362 | path.mnt = mntget(sock_mnt); | 362 | path.mnt = mntget(sock_mnt); |
363 | 363 | ||
364 | path.dentry->d_op = &sockfs_dentry_operations; | 364 | path.dentry->d_op = &sockfs_dentry_operations; |
365 | d_instantiate(path.dentry, SOCK_INODE(sock)); | 365 | d_instantiate(path.dentry, SOCK_INODE(sock)); |
366 | SOCK_INODE(sock)->i_fop = &socket_file_ops; | 366 | SOCK_INODE(sock)->i_fop = &socket_file_ops; |
367 | 367 | ||
368 | file = alloc_file(&path, FMODE_READ | FMODE_WRITE, | 368 | file = alloc_file(&path, FMODE_READ | FMODE_WRITE, |
369 | &socket_file_ops); | 369 | &socket_file_ops); |
370 | if (unlikely(!file)) { | 370 | if (unlikely(!file)) { |
371 | /* drop dentry, keep inode */ | 371 | /* drop dentry, keep inode */ |
372 | atomic_inc(&path.dentry->d_inode->i_count); | 372 | atomic_inc(&path.dentry->d_inode->i_count); |
373 | path_put(&path); | 373 | path_put(&path); |
374 | put_unused_fd(fd); | 374 | put_unused_fd(fd); |
375 | return -ENFILE; | 375 | return -ENFILE; |
376 | } | 376 | } |
377 | 377 | ||
378 | sock->file = file; | 378 | sock->file = file; |
379 | file->f_flags = O_RDWR | (flags & O_NONBLOCK); | 379 | file->f_flags = O_RDWR | (flags & O_NONBLOCK); |
380 | file->f_pos = 0; | 380 | file->f_pos = 0; |
381 | file->private_data = sock; | 381 | file->private_data = sock; |
382 | 382 | ||
383 | *f = file; | 383 | *f = file; |
384 | return fd; | 384 | return fd; |
385 | } | 385 | } |
386 | 386 | ||
387 | int sock_map_fd(struct socket *sock, int flags) | 387 | int sock_map_fd(struct socket *sock, int flags) |
388 | { | 388 | { |
389 | struct file *newfile; | 389 | struct file *newfile; |
390 | int fd = sock_alloc_file(sock, &newfile, flags); | 390 | int fd = sock_alloc_file(sock, &newfile, flags); |
391 | 391 | ||
392 | if (likely(fd >= 0)) | 392 | if (likely(fd >= 0)) |
393 | fd_install(fd, newfile); | 393 | fd_install(fd, newfile); |
394 | 394 | ||
395 | return fd; | 395 | return fd; |
396 | } | 396 | } |
397 | 397 | ||
398 | static struct socket *sock_from_file(struct file *file, int *err) | 398 | static struct socket *sock_from_file(struct file *file, int *err) |
399 | { | 399 | { |
400 | if (file->f_op == &socket_file_ops) | 400 | if (file->f_op == &socket_file_ops) |
401 | return file->private_data; /* set in sock_map_fd */ | 401 | return file->private_data; /* set in sock_map_fd */ |
402 | 402 | ||
403 | *err = -ENOTSOCK; | 403 | *err = -ENOTSOCK; |
404 | return NULL; | 404 | return NULL; |
405 | } | 405 | } |
406 | 406 | ||
407 | /** | 407 | /** |
408 | * sockfd_lookup - Go from a file number to its socket slot | 408 | * sockfd_lookup - Go from a file number to its socket slot |
409 | * @fd: file handle | 409 | * @fd: file handle |
410 | * @err: pointer to an error code return | 410 | * @err: pointer to an error code return |
411 | * | 411 | * |
412 | * The file handle passed in is locked and the socket it is bound | 412 | * The file handle passed in is locked and the socket it is bound |
413 | * too is returned. If an error occurs the err pointer is overwritten | 413 | * too is returned. If an error occurs the err pointer is overwritten |
414 | * with a negative errno code and NULL is returned. The function checks | 414 | * with a negative errno code and NULL is returned. The function checks |
415 | * for both invalid handles and passing a handle which is not a socket. | 415 | * for both invalid handles and passing a handle which is not a socket. |
416 | * | 416 | * |
417 | * On a success the socket object pointer is returned. | 417 | * On a success the socket object pointer is returned. |
418 | */ | 418 | */ |
419 | 419 | ||
420 | struct socket *sockfd_lookup(int fd, int *err) | 420 | struct socket *sockfd_lookup(int fd, int *err) |
421 | { | 421 | { |
422 | struct file *file; | 422 | struct file *file; |
423 | struct socket *sock; | 423 | struct socket *sock; |
424 | 424 | ||
425 | file = fget(fd); | 425 | file = fget(fd); |
426 | if (!file) { | 426 | if (!file) { |
427 | *err = -EBADF; | 427 | *err = -EBADF; |
428 | return NULL; | 428 | return NULL; |
429 | } | 429 | } |
430 | 430 | ||
431 | sock = sock_from_file(file, err); | 431 | sock = sock_from_file(file, err); |
432 | if (!sock) | 432 | if (!sock) |
433 | fput(file); | 433 | fput(file); |
434 | return sock; | 434 | return sock; |
435 | } | 435 | } |
436 | 436 | ||
437 | static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed) | 437 | static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed) |
438 | { | 438 | { |
439 | struct file *file; | 439 | struct file *file; |
440 | struct socket *sock; | 440 | struct socket *sock; |
441 | 441 | ||
442 | *err = -EBADF; | 442 | *err = -EBADF; |
443 | file = fget_light(fd, fput_needed); | 443 | file = fget_light(fd, fput_needed); |
444 | if (file) { | 444 | if (file) { |
445 | sock = sock_from_file(file, err); | 445 | sock = sock_from_file(file, err); |
446 | if (sock) | 446 | if (sock) |
447 | return sock; | 447 | return sock; |
448 | fput_light(file, *fput_needed); | 448 | fput_light(file, *fput_needed); |
449 | } | 449 | } |
450 | return NULL; | 450 | return NULL; |
451 | } | 451 | } |
452 | 452 | ||
453 | /** | 453 | /** |
454 | * sock_alloc - allocate a socket | 454 | * sock_alloc - allocate a socket |
455 | * | 455 | * |
456 | * Allocate a new inode and socket object. The two are bound together | 456 | * Allocate a new inode and socket object. The two are bound together |
457 | * and initialised. The socket is then returned. If we are out of inodes | 457 | * and initialised. The socket is then returned. If we are out of inodes |
458 | * NULL is returned. | 458 | * NULL is returned. |
459 | */ | 459 | */ |
460 | 460 | ||
461 | static struct socket *sock_alloc(void) | 461 | static struct socket *sock_alloc(void) |
462 | { | 462 | { |
463 | struct inode *inode; | 463 | struct inode *inode; |
464 | struct socket *sock; | 464 | struct socket *sock; |
465 | 465 | ||
466 | inode = new_inode(sock_mnt->mnt_sb); | 466 | inode = new_inode(sock_mnt->mnt_sb); |
467 | if (!inode) | 467 | if (!inode) |
468 | return NULL; | 468 | return NULL; |
469 | 469 | ||
470 | sock = SOCKET_I(inode); | 470 | sock = SOCKET_I(inode); |
471 | 471 | ||
472 | kmemcheck_annotate_bitfield(sock, type); | 472 | kmemcheck_annotate_bitfield(sock, type); |
473 | inode->i_mode = S_IFSOCK | S_IRWXUGO; | 473 | inode->i_mode = S_IFSOCK | S_IRWXUGO; |
474 | inode->i_uid = current_fsuid(); | 474 | inode->i_uid = current_fsuid(); |
475 | inode->i_gid = current_fsgid(); | 475 | inode->i_gid = current_fsgid(); |
476 | 476 | ||
477 | percpu_add(sockets_in_use, 1); | 477 | percpu_add(sockets_in_use, 1); |
478 | return sock; | 478 | return sock; |
479 | } | 479 | } |
480 | 480 | ||
481 | /* | 481 | /* |
482 | * In theory you can't get an open on this inode, but /proc provides | 482 | * In theory you can't get an open on this inode, but /proc provides |
483 | * a back door. Remember to keep it shut otherwise you'll let the | 483 | * a back door. Remember to keep it shut otherwise you'll let the |
484 | * creepy crawlies in. | 484 | * creepy crawlies in. |
485 | */ | 485 | */ |
486 | 486 | ||
487 | static int sock_no_open(struct inode *irrelevant, struct file *dontcare) | 487 | static int sock_no_open(struct inode *irrelevant, struct file *dontcare) |
488 | { | 488 | { |
489 | return -ENXIO; | 489 | return -ENXIO; |
490 | } | 490 | } |
491 | 491 | ||
492 | const struct file_operations bad_sock_fops = { | 492 | const struct file_operations bad_sock_fops = { |
493 | .owner = THIS_MODULE, | 493 | .owner = THIS_MODULE, |
494 | .open = sock_no_open, | 494 | .open = sock_no_open, |
495 | }; | 495 | }; |
496 | 496 | ||
497 | /** | 497 | /** |
498 | * sock_release - close a socket | 498 | * sock_release - close a socket |
499 | * @sock: socket to close | 499 | * @sock: socket to close |
500 | * | 500 | * |
501 | * The socket is released from the protocol stack if it has a release | 501 | * The socket is released from the protocol stack if it has a release |
502 | * callback, and the inode is then released if the socket is bound to | 502 | * callback, and the inode is then released if the socket is bound to |
503 | * an inode not a file. | 503 | * an inode not a file. |
504 | */ | 504 | */ |
505 | 505 | ||
506 | void sock_release(struct socket *sock) | 506 | void sock_release(struct socket *sock) |
507 | { | 507 | { |
508 | if (sock->ops) { | 508 | if (sock->ops) { |
509 | struct module *owner = sock->ops->owner; | 509 | struct module *owner = sock->ops->owner; |
510 | 510 | ||
511 | sock->ops->release(sock); | 511 | sock->ops->release(sock); |
512 | sock->ops = NULL; | 512 | sock->ops = NULL; |
513 | module_put(owner); | 513 | module_put(owner); |
514 | } | 514 | } |
515 | 515 | ||
516 | if (sock->fasync_list) | 516 | if (sock->fasync_list) |
517 | printk(KERN_ERR "sock_release: fasync list not empty!\n"); | 517 | printk(KERN_ERR "sock_release: fasync list not empty!\n"); |
518 | 518 | ||
519 | percpu_sub(sockets_in_use, 1); | 519 | percpu_sub(sockets_in_use, 1); |
520 | if (!sock->file) { | 520 | if (!sock->file) { |
521 | iput(SOCK_INODE(sock)); | 521 | iput(SOCK_INODE(sock)); |
522 | return; | 522 | return; |
523 | } | 523 | } |
524 | sock->file = NULL; | 524 | sock->file = NULL; |
525 | } | 525 | } |
526 | 526 | ||
527 | int sock_tx_timestamp(struct msghdr *msg, struct sock *sk, | 527 | int sock_tx_timestamp(struct msghdr *msg, struct sock *sk, |
528 | union skb_shared_tx *shtx) | 528 | union skb_shared_tx *shtx) |
529 | { | 529 | { |
530 | shtx->flags = 0; | 530 | shtx->flags = 0; |
531 | if (sock_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE)) | 531 | if (sock_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE)) |
532 | shtx->hardware = 1; | 532 | shtx->hardware = 1; |
533 | if (sock_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE)) | 533 | if (sock_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE)) |
534 | shtx->software = 1; | 534 | shtx->software = 1; |
535 | return 0; | 535 | return 0; |
536 | } | 536 | } |
537 | EXPORT_SYMBOL(sock_tx_timestamp); | 537 | EXPORT_SYMBOL(sock_tx_timestamp); |
538 | 538 | ||
539 | static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock, | 539 | static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock, |
540 | struct msghdr *msg, size_t size) | 540 | struct msghdr *msg, size_t size) |
541 | { | 541 | { |
542 | struct sock_iocb *si = kiocb_to_siocb(iocb); | 542 | struct sock_iocb *si = kiocb_to_siocb(iocb); |
543 | int err; | 543 | int err; |
544 | 544 | ||
545 | si->sock = sock; | 545 | si->sock = sock; |
546 | si->scm = NULL; | 546 | si->scm = NULL; |
547 | si->msg = msg; | 547 | si->msg = msg; |
548 | si->size = size; | 548 | si->size = size; |
549 | 549 | ||
550 | err = security_socket_sendmsg(sock, msg, size); | 550 | err = security_socket_sendmsg(sock, msg, size); |
551 | if (err) | 551 | if (err) |
552 | return err; | 552 | return err; |
553 | 553 | ||
554 | return sock->ops->sendmsg(iocb, sock, msg, size); | 554 | return sock->ops->sendmsg(iocb, sock, msg, size); |
555 | } | 555 | } |
556 | 556 | ||
557 | int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) | 557 | int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) |
558 | { | 558 | { |
559 | struct kiocb iocb; | 559 | struct kiocb iocb; |
560 | struct sock_iocb siocb; | 560 | struct sock_iocb siocb; |
561 | int ret; | 561 | int ret; |
562 | 562 | ||
563 | init_sync_kiocb(&iocb, NULL); | 563 | init_sync_kiocb(&iocb, NULL); |
564 | iocb.private = &siocb; | 564 | iocb.private = &siocb; |
565 | ret = __sock_sendmsg(&iocb, sock, msg, size); | 565 | ret = __sock_sendmsg(&iocb, sock, msg, size); |
566 | if (-EIOCBQUEUED == ret) | 566 | if (-EIOCBQUEUED == ret) |
567 | ret = wait_on_sync_kiocb(&iocb); | 567 | ret = wait_on_sync_kiocb(&iocb); |
568 | return ret; | 568 | return ret; |
569 | } | 569 | } |
570 | 570 | ||
571 | int kernel_sendmsg(struct socket *sock, struct msghdr *msg, | 571 | int kernel_sendmsg(struct socket *sock, struct msghdr *msg, |
572 | struct kvec *vec, size_t num, size_t size) | 572 | struct kvec *vec, size_t num, size_t size) |
573 | { | 573 | { |
574 | mm_segment_t oldfs = get_fs(); | 574 | mm_segment_t oldfs = get_fs(); |
575 | int result; | 575 | int result; |
576 | 576 | ||
577 | set_fs(KERNEL_DS); | 577 | set_fs(KERNEL_DS); |
578 | /* | 578 | /* |
579 | * the following is safe, since for compiler definitions of kvec and | 579 | * the following is safe, since for compiler definitions of kvec and |
580 | * iovec are identical, yielding the same in-core layout and alignment | 580 | * iovec are identical, yielding the same in-core layout and alignment |
581 | */ | 581 | */ |
582 | msg->msg_iov = (struct iovec *)vec; | 582 | msg->msg_iov = (struct iovec *)vec; |
583 | msg->msg_iovlen = num; | 583 | msg->msg_iovlen = num; |
584 | result = sock_sendmsg(sock, msg, size); | 584 | result = sock_sendmsg(sock, msg, size); |
585 | set_fs(oldfs); | 585 | set_fs(oldfs); |
586 | return result; | 586 | return result; |
587 | } | 587 | } |
588 | 588 | ||
589 | static int ktime2ts(ktime_t kt, struct timespec *ts) | 589 | static int ktime2ts(ktime_t kt, struct timespec *ts) |
590 | { | 590 | { |
591 | if (kt.tv64) { | 591 | if (kt.tv64) { |
592 | *ts = ktime_to_timespec(kt); | 592 | *ts = ktime_to_timespec(kt); |
593 | return 1; | 593 | return 1; |
594 | } else { | 594 | } else { |
595 | return 0; | 595 | return 0; |
596 | } | 596 | } |
597 | } | 597 | } |
598 | 598 | ||
599 | /* | 599 | /* |
600 | * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP) | 600 | * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP) |
601 | */ | 601 | */ |
602 | void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk, | 602 | void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk, |
603 | struct sk_buff *skb) | 603 | struct sk_buff *skb) |
604 | { | 604 | { |
605 | int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP); | 605 | int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP); |
606 | struct timespec ts[3]; | 606 | struct timespec ts[3]; |
607 | int empty = 1; | 607 | int empty = 1; |
608 | struct skb_shared_hwtstamps *shhwtstamps = | 608 | struct skb_shared_hwtstamps *shhwtstamps = |
609 | skb_hwtstamps(skb); | 609 | skb_hwtstamps(skb); |
610 | 610 | ||
611 | /* Race occurred between timestamp enabling and packet | 611 | /* Race occurred between timestamp enabling and packet |
612 | receiving. Fill in the current time for now. */ | 612 | receiving. Fill in the current time for now. */ |
613 | if (need_software_tstamp && skb->tstamp.tv64 == 0) | 613 | if (need_software_tstamp && skb->tstamp.tv64 == 0) |
614 | __net_timestamp(skb); | 614 | __net_timestamp(skb); |
615 | 615 | ||
616 | if (need_software_tstamp) { | 616 | if (need_software_tstamp) { |
617 | if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) { | 617 | if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) { |
618 | struct timeval tv; | 618 | struct timeval tv; |
619 | skb_get_timestamp(skb, &tv); | 619 | skb_get_timestamp(skb, &tv); |
620 | put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP, | 620 | put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP, |
621 | sizeof(tv), &tv); | 621 | sizeof(tv), &tv); |
622 | } else { | 622 | } else { |
623 | skb_get_timestampns(skb, &ts[0]); | 623 | skb_get_timestampns(skb, &ts[0]); |
624 | put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS, | 624 | put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS, |
625 | sizeof(ts[0]), &ts[0]); | 625 | sizeof(ts[0]), &ts[0]); |
626 | } | 626 | } |
627 | } | 627 | } |
628 | 628 | ||
629 | 629 | ||
630 | memset(ts, 0, sizeof(ts)); | 630 | memset(ts, 0, sizeof(ts)); |
631 | if (skb->tstamp.tv64 && | 631 | if (skb->tstamp.tv64 && |
632 | sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE)) { | 632 | sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE)) { |
633 | skb_get_timestampns(skb, ts + 0); | 633 | skb_get_timestampns(skb, ts + 0); |
634 | empty = 0; | 634 | empty = 0; |
635 | } | 635 | } |
636 | if (shhwtstamps) { | 636 | if (shhwtstamps) { |
637 | if (sock_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE) && | 637 | if (sock_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE) && |
638 | ktime2ts(shhwtstamps->syststamp, ts + 1)) | 638 | ktime2ts(shhwtstamps->syststamp, ts + 1)) |
639 | empty = 0; | 639 | empty = 0; |
640 | if (sock_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE) && | 640 | if (sock_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE) && |
641 | ktime2ts(shhwtstamps->hwtstamp, ts + 2)) | 641 | ktime2ts(shhwtstamps->hwtstamp, ts + 2)) |
642 | empty = 0; | 642 | empty = 0; |
643 | } | 643 | } |
644 | if (!empty) | 644 | if (!empty) |
645 | put_cmsg(msg, SOL_SOCKET, | 645 | put_cmsg(msg, SOL_SOCKET, |
646 | SCM_TIMESTAMPING, sizeof(ts), &ts); | 646 | SCM_TIMESTAMPING, sizeof(ts), &ts); |
647 | } | 647 | } |
648 | 648 | ||
649 | EXPORT_SYMBOL_GPL(__sock_recv_timestamp); | 649 | EXPORT_SYMBOL_GPL(__sock_recv_timestamp); |
650 | 650 | ||
651 | inline void sock_recv_drops(struct msghdr *msg, struct sock *sk, struct sk_buff *skb) | 651 | inline void sock_recv_drops(struct msghdr *msg, struct sock *sk, struct sk_buff *skb) |
652 | { | 652 | { |
653 | if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && skb->dropcount) | 653 | if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && skb->dropcount) |
654 | put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL, | 654 | put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL, |
655 | sizeof(__u32), &skb->dropcount); | 655 | sizeof(__u32), &skb->dropcount); |
656 | } | 656 | } |
657 | 657 | ||
658 | void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, | 658 | void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, |
659 | struct sk_buff *skb) | 659 | struct sk_buff *skb) |
660 | { | 660 | { |
661 | sock_recv_timestamp(msg, sk, skb); | 661 | sock_recv_timestamp(msg, sk, skb); |
662 | sock_recv_drops(msg, sk, skb); | 662 | sock_recv_drops(msg, sk, skb); |
663 | } | 663 | } |
664 | EXPORT_SYMBOL_GPL(sock_recv_ts_and_drops); | 664 | EXPORT_SYMBOL_GPL(sock_recv_ts_and_drops); |
665 | 665 | ||
666 | static inline int __sock_recvmsg_nosec(struct kiocb *iocb, struct socket *sock, | 666 | static inline int __sock_recvmsg_nosec(struct kiocb *iocb, struct socket *sock, |
667 | struct msghdr *msg, size_t size, int flags) | 667 | struct msghdr *msg, size_t size, int flags) |
668 | { | 668 | { |
669 | struct sock_iocb *si = kiocb_to_siocb(iocb); | 669 | struct sock_iocb *si = kiocb_to_siocb(iocb); |
670 | 670 | ||
671 | si->sock = sock; | 671 | si->sock = sock; |
672 | si->scm = NULL; | 672 | si->scm = NULL; |
673 | si->msg = msg; | 673 | si->msg = msg; |
674 | si->size = size; | 674 | si->size = size; |
675 | si->flags = flags; | 675 | si->flags = flags; |
676 | 676 | ||
677 | return sock->ops->recvmsg(iocb, sock, msg, size, flags); | 677 | return sock->ops->recvmsg(iocb, sock, msg, size, flags); |
678 | } | 678 | } |
679 | 679 | ||
680 | static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock, | 680 | static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock, |
681 | struct msghdr *msg, size_t size, int flags) | 681 | struct msghdr *msg, size_t size, int flags) |
682 | { | 682 | { |
683 | int err = security_socket_recvmsg(sock, msg, size, flags); | 683 | int err = security_socket_recvmsg(sock, msg, size, flags); |
684 | 684 | ||
685 | return err ?: __sock_recvmsg_nosec(iocb, sock, msg, size, flags); | 685 | return err ?: __sock_recvmsg_nosec(iocb, sock, msg, size, flags); |
686 | } | 686 | } |
687 | 687 | ||
688 | int sock_recvmsg(struct socket *sock, struct msghdr *msg, | 688 | int sock_recvmsg(struct socket *sock, struct msghdr *msg, |
689 | size_t size, int flags) | 689 | size_t size, int flags) |
690 | { | 690 | { |
691 | struct kiocb iocb; | 691 | struct kiocb iocb; |
692 | struct sock_iocb siocb; | 692 | struct sock_iocb siocb; |
693 | int ret; | 693 | int ret; |
694 | 694 | ||
695 | init_sync_kiocb(&iocb, NULL); | 695 | init_sync_kiocb(&iocb, NULL); |
696 | iocb.private = &siocb; | 696 | iocb.private = &siocb; |
697 | ret = __sock_recvmsg(&iocb, sock, msg, size, flags); | 697 | ret = __sock_recvmsg(&iocb, sock, msg, size, flags); |
698 | if (-EIOCBQUEUED == ret) | 698 | if (-EIOCBQUEUED == ret) |
699 | ret = wait_on_sync_kiocb(&iocb); | 699 | ret = wait_on_sync_kiocb(&iocb); |
700 | return ret; | 700 | return ret; |
701 | } | 701 | } |
702 | 702 | ||
703 | static int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg, | 703 | static int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg, |
704 | size_t size, int flags) | 704 | size_t size, int flags) |
705 | { | 705 | { |
706 | struct kiocb iocb; | 706 | struct kiocb iocb; |
707 | struct sock_iocb siocb; | 707 | struct sock_iocb siocb; |
708 | int ret; | 708 | int ret; |
709 | 709 | ||
710 | init_sync_kiocb(&iocb, NULL); | 710 | init_sync_kiocb(&iocb, NULL); |
711 | iocb.private = &siocb; | 711 | iocb.private = &siocb; |
712 | ret = __sock_recvmsg_nosec(&iocb, sock, msg, size, flags); | 712 | ret = __sock_recvmsg_nosec(&iocb, sock, msg, size, flags); |
713 | if (-EIOCBQUEUED == ret) | 713 | if (-EIOCBQUEUED == ret) |
714 | ret = wait_on_sync_kiocb(&iocb); | 714 | ret = wait_on_sync_kiocb(&iocb); |
715 | return ret; | 715 | return ret; |
716 | } | 716 | } |
717 | 717 | ||
718 | int kernel_recvmsg(struct socket *sock, struct msghdr *msg, | 718 | int kernel_recvmsg(struct socket *sock, struct msghdr *msg, |
719 | struct kvec *vec, size_t num, size_t size, int flags) | 719 | struct kvec *vec, size_t num, size_t size, int flags) |
720 | { | 720 | { |
721 | mm_segment_t oldfs = get_fs(); | 721 | mm_segment_t oldfs = get_fs(); |
722 | int result; | 722 | int result; |
723 | 723 | ||
724 | set_fs(KERNEL_DS); | 724 | set_fs(KERNEL_DS); |
725 | /* | 725 | /* |
726 | * the following is safe, since for compiler definitions of kvec and | 726 | * the following is safe, since for compiler definitions of kvec and |
727 | * iovec are identical, yielding the same in-core layout and alignment | 727 | * iovec are identical, yielding the same in-core layout and alignment |
728 | */ | 728 | */ |
729 | msg->msg_iov = (struct iovec *)vec, msg->msg_iovlen = num; | 729 | msg->msg_iov = (struct iovec *)vec, msg->msg_iovlen = num; |
730 | result = sock_recvmsg(sock, msg, size, flags); | 730 | result = sock_recvmsg(sock, msg, size, flags); |
731 | set_fs(oldfs); | 731 | set_fs(oldfs); |
732 | return result; | 732 | return result; |
733 | } | 733 | } |
734 | 734 | ||
735 | static void sock_aio_dtor(struct kiocb *iocb) | 735 | static void sock_aio_dtor(struct kiocb *iocb) |
736 | { | 736 | { |
737 | kfree(iocb->private); | 737 | kfree(iocb->private); |
738 | } | 738 | } |
739 | 739 | ||
740 | static ssize_t sock_sendpage(struct file *file, struct page *page, | 740 | static ssize_t sock_sendpage(struct file *file, struct page *page, |
741 | int offset, size_t size, loff_t *ppos, int more) | 741 | int offset, size_t size, loff_t *ppos, int more) |
742 | { | 742 | { |
743 | struct socket *sock; | 743 | struct socket *sock; |
744 | int flags; | 744 | int flags; |
745 | 745 | ||
746 | sock = file->private_data; | 746 | sock = file->private_data; |
747 | 747 | ||
748 | flags = !(file->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT; | 748 | flags = !(file->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT; |
749 | if (more) | 749 | if (more) |
750 | flags |= MSG_MORE; | 750 | flags |= MSG_MORE; |
751 | 751 | ||
752 | return kernel_sendpage(sock, page, offset, size, flags); | 752 | return kernel_sendpage(sock, page, offset, size, flags); |
753 | } | 753 | } |
754 | 754 | ||
755 | static ssize_t sock_splice_read(struct file *file, loff_t *ppos, | 755 | static ssize_t sock_splice_read(struct file *file, loff_t *ppos, |
756 | struct pipe_inode_info *pipe, size_t len, | 756 | struct pipe_inode_info *pipe, size_t len, |
757 | unsigned int flags) | 757 | unsigned int flags) |
758 | { | 758 | { |
759 | struct socket *sock = file->private_data; | 759 | struct socket *sock = file->private_data; |
760 | 760 | ||
761 | if (unlikely(!sock->ops->splice_read)) | 761 | if (unlikely(!sock->ops->splice_read)) |
762 | return -EINVAL; | 762 | return -EINVAL; |
763 | 763 | ||
764 | return sock->ops->splice_read(sock, ppos, pipe, len, flags); | 764 | return sock->ops->splice_read(sock, ppos, pipe, len, flags); |
765 | } | 765 | } |
766 | 766 | ||
767 | static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb, | 767 | static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb, |
768 | struct sock_iocb *siocb) | 768 | struct sock_iocb *siocb) |
769 | { | 769 | { |
770 | if (!is_sync_kiocb(iocb)) { | 770 | if (!is_sync_kiocb(iocb)) { |
771 | siocb = kmalloc(sizeof(*siocb), GFP_KERNEL); | 771 | siocb = kmalloc(sizeof(*siocb), GFP_KERNEL); |
772 | if (!siocb) | 772 | if (!siocb) |
773 | return NULL; | 773 | return NULL; |
774 | iocb->ki_dtor = sock_aio_dtor; | 774 | iocb->ki_dtor = sock_aio_dtor; |
775 | } | 775 | } |
776 | 776 | ||
777 | siocb->kiocb = iocb; | 777 | siocb->kiocb = iocb; |
778 | iocb->private = siocb; | 778 | iocb->private = siocb; |
779 | return siocb; | 779 | return siocb; |
780 | } | 780 | } |
781 | 781 | ||
782 | static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb, | 782 | static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb, |
783 | struct file *file, const struct iovec *iov, | 783 | struct file *file, const struct iovec *iov, |
784 | unsigned long nr_segs) | 784 | unsigned long nr_segs) |
785 | { | 785 | { |
786 | struct socket *sock = file->private_data; | 786 | struct socket *sock = file->private_data; |
787 | size_t size = 0; | 787 | size_t size = 0; |
788 | int i; | 788 | int i; |
789 | 789 | ||
790 | for (i = 0; i < nr_segs; i++) | 790 | for (i = 0; i < nr_segs; i++) |
791 | size += iov[i].iov_len; | 791 | size += iov[i].iov_len; |
792 | 792 | ||
793 | msg->msg_name = NULL; | 793 | msg->msg_name = NULL; |
794 | msg->msg_namelen = 0; | 794 | msg->msg_namelen = 0; |
795 | msg->msg_control = NULL; | 795 | msg->msg_control = NULL; |
796 | msg->msg_controllen = 0; | 796 | msg->msg_controllen = 0; |
797 | msg->msg_iov = (struct iovec *)iov; | 797 | msg->msg_iov = (struct iovec *)iov; |
798 | msg->msg_iovlen = nr_segs; | 798 | msg->msg_iovlen = nr_segs; |
799 | msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0; | 799 | msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0; |
800 | 800 | ||
801 | return __sock_recvmsg(iocb, sock, msg, size, msg->msg_flags); | 801 | return __sock_recvmsg(iocb, sock, msg, size, msg->msg_flags); |
802 | } | 802 | } |
803 | 803 | ||
804 | static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov, | 804 | static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov, |
805 | unsigned long nr_segs, loff_t pos) | 805 | unsigned long nr_segs, loff_t pos) |
806 | { | 806 | { |
807 | struct sock_iocb siocb, *x; | 807 | struct sock_iocb siocb, *x; |
808 | 808 | ||
809 | if (pos != 0) | 809 | if (pos != 0) |
810 | return -ESPIPE; | 810 | return -ESPIPE; |
811 | 811 | ||
812 | if (iocb->ki_left == 0) /* Match SYS5 behaviour */ | 812 | if (iocb->ki_left == 0) /* Match SYS5 behaviour */ |
813 | return 0; | 813 | return 0; |
814 | 814 | ||
815 | 815 | ||
816 | x = alloc_sock_iocb(iocb, &siocb); | 816 | x = alloc_sock_iocb(iocb, &siocb); |
817 | if (!x) | 817 | if (!x) |
818 | return -ENOMEM; | 818 | return -ENOMEM; |
819 | return do_sock_read(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs); | 819 | return do_sock_read(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs); |
820 | } | 820 | } |
821 | 821 | ||
822 | static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb, | 822 | static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb, |
823 | struct file *file, const struct iovec *iov, | 823 | struct file *file, const struct iovec *iov, |
824 | unsigned long nr_segs) | 824 | unsigned long nr_segs) |
825 | { | 825 | { |
826 | struct socket *sock = file->private_data; | 826 | struct socket *sock = file->private_data; |
827 | size_t size = 0; | 827 | size_t size = 0; |
828 | int i; | 828 | int i; |
829 | 829 | ||
830 | for (i = 0; i < nr_segs; i++) | 830 | for (i = 0; i < nr_segs; i++) |
831 | size += iov[i].iov_len; | 831 | size += iov[i].iov_len; |
832 | 832 | ||
833 | msg->msg_name = NULL; | 833 | msg->msg_name = NULL; |
834 | msg->msg_namelen = 0; | 834 | msg->msg_namelen = 0; |
835 | msg->msg_control = NULL; | 835 | msg->msg_control = NULL; |
836 | msg->msg_controllen = 0; | 836 | msg->msg_controllen = 0; |
837 | msg->msg_iov = (struct iovec *)iov; | 837 | msg->msg_iov = (struct iovec *)iov; |
838 | msg->msg_iovlen = nr_segs; | 838 | msg->msg_iovlen = nr_segs; |
839 | msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0; | 839 | msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0; |
840 | if (sock->type == SOCK_SEQPACKET) | 840 | if (sock->type == SOCK_SEQPACKET) |
841 | msg->msg_flags |= MSG_EOR; | 841 | msg->msg_flags |= MSG_EOR; |
842 | 842 | ||
843 | return __sock_sendmsg(iocb, sock, msg, size); | 843 | return __sock_sendmsg(iocb, sock, msg, size); |
844 | } | 844 | } |
845 | 845 | ||
846 | static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov, | 846 | static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov, |
847 | unsigned long nr_segs, loff_t pos) | 847 | unsigned long nr_segs, loff_t pos) |
848 | { | 848 | { |
849 | struct sock_iocb siocb, *x; | 849 | struct sock_iocb siocb, *x; |
850 | 850 | ||
851 | if (pos != 0) | 851 | if (pos != 0) |
852 | return -ESPIPE; | 852 | return -ESPIPE; |
853 | 853 | ||
854 | x = alloc_sock_iocb(iocb, &siocb); | 854 | x = alloc_sock_iocb(iocb, &siocb); |
855 | if (!x) | 855 | if (!x) |
856 | return -ENOMEM; | 856 | return -ENOMEM; |
857 | 857 | ||
858 | return do_sock_write(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs); | 858 | return do_sock_write(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs); |
859 | } | 859 | } |
860 | 860 | ||
861 | /* | 861 | /* |
862 | * Atomic setting of ioctl hooks to avoid race | 862 | * Atomic setting of ioctl hooks to avoid race |
863 | * with module unload. | 863 | * with module unload. |
864 | */ | 864 | */ |
865 | 865 | ||
866 | static DEFINE_MUTEX(br_ioctl_mutex); | 866 | static DEFINE_MUTEX(br_ioctl_mutex); |
867 | static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg) = NULL; | 867 | static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg) = NULL; |
868 | 868 | ||
869 | void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *)) | 869 | void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *)) |
870 | { | 870 | { |
871 | mutex_lock(&br_ioctl_mutex); | 871 | mutex_lock(&br_ioctl_mutex); |
872 | br_ioctl_hook = hook; | 872 | br_ioctl_hook = hook; |
873 | mutex_unlock(&br_ioctl_mutex); | 873 | mutex_unlock(&br_ioctl_mutex); |
874 | } | 874 | } |
875 | 875 | ||
876 | EXPORT_SYMBOL(brioctl_set); | 876 | EXPORT_SYMBOL(brioctl_set); |
877 | 877 | ||
878 | static DEFINE_MUTEX(vlan_ioctl_mutex); | 878 | static DEFINE_MUTEX(vlan_ioctl_mutex); |
879 | static int (*vlan_ioctl_hook) (struct net *, void __user *arg); | 879 | static int (*vlan_ioctl_hook) (struct net *, void __user *arg); |
880 | 880 | ||
881 | void vlan_ioctl_set(int (*hook) (struct net *, void __user *)) | 881 | void vlan_ioctl_set(int (*hook) (struct net *, void __user *)) |
882 | { | 882 | { |
883 | mutex_lock(&vlan_ioctl_mutex); | 883 | mutex_lock(&vlan_ioctl_mutex); |
884 | vlan_ioctl_hook = hook; | 884 | vlan_ioctl_hook = hook; |
885 | mutex_unlock(&vlan_ioctl_mutex); | 885 | mutex_unlock(&vlan_ioctl_mutex); |
886 | } | 886 | } |
887 | 887 | ||
888 | EXPORT_SYMBOL(vlan_ioctl_set); | 888 | EXPORT_SYMBOL(vlan_ioctl_set); |
889 | 889 | ||
890 | static DEFINE_MUTEX(dlci_ioctl_mutex); | 890 | static DEFINE_MUTEX(dlci_ioctl_mutex); |
891 | static int (*dlci_ioctl_hook) (unsigned int, void __user *); | 891 | static int (*dlci_ioctl_hook) (unsigned int, void __user *); |
892 | 892 | ||
893 | void dlci_ioctl_set(int (*hook) (unsigned int, void __user *)) | 893 | void dlci_ioctl_set(int (*hook) (unsigned int, void __user *)) |
894 | { | 894 | { |
895 | mutex_lock(&dlci_ioctl_mutex); | 895 | mutex_lock(&dlci_ioctl_mutex); |
896 | dlci_ioctl_hook = hook; | 896 | dlci_ioctl_hook = hook; |
897 | mutex_unlock(&dlci_ioctl_mutex); | 897 | mutex_unlock(&dlci_ioctl_mutex); |
898 | } | 898 | } |
899 | 899 | ||
900 | EXPORT_SYMBOL(dlci_ioctl_set); | 900 | EXPORT_SYMBOL(dlci_ioctl_set); |
901 | 901 | ||
902 | static long sock_do_ioctl(struct net *net, struct socket *sock, | 902 | static long sock_do_ioctl(struct net *net, struct socket *sock, |
903 | unsigned int cmd, unsigned long arg) | 903 | unsigned int cmd, unsigned long arg) |
904 | { | 904 | { |
905 | int err; | 905 | int err; |
906 | void __user *argp = (void __user *)arg; | 906 | void __user *argp = (void __user *)arg; |
907 | 907 | ||
908 | err = sock->ops->ioctl(sock, cmd, arg); | 908 | err = sock->ops->ioctl(sock, cmd, arg); |
909 | 909 | ||
910 | /* | 910 | /* |
911 | * If this ioctl is unknown try to hand it down | 911 | * If this ioctl is unknown try to hand it down |
912 | * to the NIC driver. | 912 | * to the NIC driver. |
913 | */ | 913 | */ |
914 | if (err == -ENOIOCTLCMD) | 914 | if (err == -ENOIOCTLCMD) |
915 | err = dev_ioctl(net, cmd, argp); | 915 | err = dev_ioctl(net, cmd, argp); |
916 | 916 | ||
917 | return err; | 917 | return err; |
918 | } | 918 | } |
919 | 919 | ||
920 | /* | 920 | /* |
921 | * With an ioctl, arg may well be a user mode pointer, but we don't know | 921 | * With an ioctl, arg may well be a user mode pointer, but we don't know |
922 | * what to do with it - that's up to the protocol still. | 922 | * what to do with it - that's up to the protocol still. |
923 | */ | 923 | */ |
924 | 924 | ||
925 | static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg) | 925 | static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg) |
926 | { | 926 | { |
927 | struct socket *sock; | 927 | struct socket *sock; |
928 | struct sock *sk; | 928 | struct sock *sk; |
929 | void __user *argp = (void __user *)arg; | 929 | void __user *argp = (void __user *)arg; |
930 | int pid, err; | 930 | int pid, err; |
931 | struct net *net; | 931 | struct net *net; |
932 | 932 | ||
933 | sock = file->private_data; | 933 | sock = file->private_data; |
934 | sk = sock->sk; | 934 | sk = sock->sk; |
935 | net = sock_net(sk); | 935 | net = sock_net(sk); |
936 | if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) { | 936 | if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) { |
937 | err = dev_ioctl(net, cmd, argp); | 937 | err = dev_ioctl(net, cmd, argp); |
938 | } else | 938 | } else |
939 | #ifdef CONFIG_WEXT_CORE | 939 | #ifdef CONFIG_WEXT_CORE |
940 | if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) { | 940 | if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) { |
941 | err = dev_ioctl(net, cmd, argp); | 941 | err = dev_ioctl(net, cmd, argp); |
942 | } else | 942 | } else |
943 | #endif | 943 | #endif |
944 | switch (cmd) { | 944 | switch (cmd) { |
945 | case FIOSETOWN: | 945 | case FIOSETOWN: |
946 | case SIOCSPGRP: | 946 | case SIOCSPGRP: |
947 | err = -EFAULT; | 947 | err = -EFAULT; |
948 | if (get_user(pid, (int __user *)argp)) | 948 | if (get_user(pid, (int __user *)argp)) |
949 | break; | 949 | break; |
950 | err = f_setown(sock->file, pid, 1); | 950 | err = f_setown(sock->file, pid, 1); |
951 | break; | 951 | break; |
952 | case FIOGETOWN: | 952 | case FIOGETOWN: |
953 | case SIOCGPGRP: | 953 | case SIOCGPGRP: |
954 | err = put_user(f_getown(sock->file), | 954 | err = put_user(f_getown(sock->file), |
955 | (int __user *)argp); | 955 | (int __user *)argp); |
956 | break; | 956 | break; |
957 | case SIOCGIFBR: | 957 | case SIOCGIFBR: |
958 | case SIOCSIFBR: | 958 | case SIOCSIFBR: |
959 | case SIOCBRADDBR: | 959 | case SIOCBRADDBR: |
960 | case SIOCBRDELBR: | 960 | case SIOCBRDELBR: |
961 | err = -ENOPKG; | 961 | err = -ENOPKG; |
962 | if (!br_ioctl_hook) | 962 | if (!br_ioctl_hook) |
963 | request_module("bridge"); | 963 | request_module("bridge"); |
964 | 964 | ||
965 | mutex_lock(&br_ioctl_mutex); | 965 | mutex_lock(&br_ioctl_mutex); |
966 | if (br_ioctl_hook) | 966 | if (br_ioctl_hook) |
967 | err = br_ioctl_hook(net, cmd, argp); | 967 | err = br_ioctl_hook(net, cmd, argp); |
968 | mutex_unlock(&br_ioctl_mutex); | 968 | mutex_unlock(&br_ioctl_mutex); |
969 | break; | 969 | break; |
970 | case SIOCGIFVLAN: | 970 | case SIOCGIFVLAN: |
971 | case SIOCSIFVLAN: | 971 | case SIOCSIFVLAN: |
972 | err = -ENOPKG; | 972 | err = -ENOPKG; |
973 | if (!vlan_ioctl_hook) | 973 | if (!vlan_ioctl_hook) |
974 | request_module("8021q"); | 974 | request_module("8021q"); |
975 | 975 | ||
976 | mutex_lock(&vlan_ioctl_mutex); | 976 | mutex_lock(&vlan_ioctl_mutex); |
977 | if (vlan_ioctl_hook) | 977 | if (vlan_ioctl_hook) |
978 | err = vlan_ioctl_hook(net, argp); | 978 | err = vlan_ioctl_hook(net, argp); |
979 | mutex_unlock(&vlan_ioctl_mutex); | 979 | mutex_unlock(&vlan_ioctl_mutex); |
980 | break; | 980 | break; |
981 | case SIOCADDDLCI: | 981 | case SIOCADDDLCI: |
982 | case SIOCDELDLCI: | 982 | case SIOCDELDLCI: |
983 | err = -ENOPKG; | 983 | err = -ENOPKG; |
984 | if (!dlci_ioctl_hook) | 984 | if (!dlci_ioctl_hook) |
985 | request_module("dlci"); | 985 | request_module("dlci"); |
986 | 986 | ||
987 | mutex_lock(&dlci_ioctl_mutex); | 987 | mutex_lock(&dlci_ioctl_mutex); |
988 | if (dlci_ioctl_hook) | 988 | if (dlci_ioctl_hook) |
989 | err = dlci_ioctl_hook(cmd, argp); | 989 | err = dlci_ioctl_hook(cmd, argp); |
990 | mutex_unlock(&dlci_ioctl_mutex); | 990 | mutex_unlock(&dlci_ioctl_mutex); |
991 | break; | 991 | break; |
992 | default: | 992 | default: |
993 | err = sock_do_ioctl(net, sock, cmd, arg); | 993 | err = sock_do_ioctl(net, sock, cmd, arg); |
994 | break; | 994 | break; |
995 | } | 995 | } |
996 | return err; | 996 | return err; |
997 | } | 997 | } |
998 | 998 | ||
999 | int sock_create_lite(int family, int type, int protocol, struct socket **res) | 999 | int sock_create_lite(int family, int type, int protocol, struct socket **res) |
1000 | { | 1000 | { |
1001 | int err; | 1001 | int err; |
1002 | struct socket *sock = NULL; | 1002 | struct socket *sock = NULL; |
1003 | 1003 | ||
1004 | err = security_socket_create(family, type, protocol, 1); | 1004 | err = security_socket_create(family, type, protocol, 1); |
1005 | if (err) | 1005 | if (err) |
1006 | goto out; | 1006 | goto out; |
1007 | 1007 | ||
1008 | sock = sock_alloc(); | 1008 | sock = sock_alloc(); |
1009 | if (!sock) { | 1009 | if (!sock) { |
1010 | err = -ENOMEM; | 1010 | err = -ENOMEM; |
1011 | goto out; | 1011 | goto out; |
1012 | } | 1012 | } |
1013 | 1013 | ||
1014 | sock->type = type; | 1014 | sock->type = type; |
1015 | err = security_socket_post_create(sock, family, type, protocol, 1); | 1015 | err = security_socket_post_create(sock, family, type, protocol, 1); |
1016 | if (err) | 1016 | if (err) |
1017 | goto out_release; | 1017 | goto out_release; |
1018 | 1018 | ||
1019 | out: | 1019 | out: |
1020 | *res = sock; | 1020 | *res = sock; |
1021 | return err; | 1021 | return err; |
1022 | out_release: | 1022 | out_release: |
1023 | sock_release(sock); | 1023 | sock_release(sock); |
1024 | sock = NULL; | 1024 | sock = NULL; |
1025 | goto out; | 1025 | goto out; |
1026 | } | 1026 | } |
1027 | 1027 | ||
1028 | /* No kernel lock held - perfect */ | 1028 | /* No kernel lock held - perfect */ |
1029 | static unsigned int sock_poll(struct file *file, poll_table *wait) | 1029 | static unsigned int sock_poll(struct file *file, poll_table *wait) |
1030 | { | 1030 | { |
1031 | struct socket *sock; | 1031 | struct socket *sock; |
1032 | 1032 | ||
1033 | /* | 1033 | /* |
1034 | * We can't return errors to poll, so it's either yes or no. | 1034 | * We can't return errors to poll, so it's either yes or no. |
1035 | */ | 1035 | */ |
1036 | sock = file->private_data; | 1036 | sock = file->private_data; |
1037 | return sock->ops->poll(file, sock, wait); | 1037 | return sock->ops->poll(file, sock, wait); |
1038 | } | 1038 | } |
1039 | 1039 | ||
1040 | static int sock_mmap(struct file *file, struct vm_area_struct *vma) | 1040 | static int sock_mmap(struct file *file, struct vm_area_struct *vma) |
1041 | { | 1041 | { |
1042 | struct socket *sock = file->private_data; | 1042 | struct socket *sock = file->private_data; |
1043 | 1043 | ||
1044 | return sock->ops->mmap(file, sock, vma); | 1044 | return sock->ops->mmap(file, sock, vma); |
1045 | } | 1045 | } |
1046 | 1046 | ||
1047 | static int sock_close(struct inode *inode, struct file *filp) | 1047 | static int sock_close(struct inode *inode, struct file *filp) |
1048 | { | 1048 | { |
1049 | /* | 1049 | /* |
1050 | * It was possible the inode is NULL we were | 1050 | * It was possible the inode is NULL we were |
1051 | * closing an unfinished socket. | 1051 | * closing an unfinished socket. |
1052 | */ | 1052 | */ |
1053 | 1053 | ||
1054 | if (!inode) { | 1054 | if (!inode) { |
1055 | printk(KERN_DEBUG "sock_close: NULL inode\n"); | 1055 | printk(KERN_DEBUG "sock_close: NULL inode\n"); |
1056 | return 0; | 1056 | return 0; |
1057 | } | 1057 | } |
1058 | sock_release(SOCKET_I(inode)); | 1058 | sock_release(SOCKET_I(inode)); |
1059 | return 0; | 1059 | return 0; |
1060 | } | 1060 | } |
1061 | 1061 | ||
1062 | /* | 1062 | /* |
1063 | * Update the socket async list | 1063 | * Update the socket async list |
1064 | * | 1064 | * |
1065 | * Fasync_list locking strategy. | 1065 | * Fasync_list locking strategy. |
1066 | * | 1066 | * |
1067 | * 1. fasync_list is modified only under process context socket lock | 1067 | * 1. fasync_list is modified only under process context socket lock |
1068 | * i.e. under semaphore. | 1068 | * i.e. under semaphore. |
1069 | * 2. fasync_list is used under read_lock(&sk->sk_callback_lock) | 1069 | * 2. fasync_list is used under read_lock(&sk->sk_callback_lock) |
1070 | * or under socket lock. | 1070 | * or under socket lock |
1071 | * 3. fasync_list can be used from softirq context, so that | ||
1072 | * modification under socket lock have to be enhanced with | ||
1073 | * write_lock_bh(&sk->sk_callback_lock). | ||
1074 | * --ANK (990710) | ||
1075 | */ | 1071 | */ |
1076 | 1072 | ||
1077 | static int sock_fasync(int fd, struct file *filp, int on) | 1073 | static int sock_fasync(int fd, struct file *filp, int on) |
1078 | { | 1074 | { |
1079 | struct fasync_struct *fa, *fna = NULL, **prev; | 1075 | struct socket *sock = filp->private_data; |
1080 | struct socket *sock; | 1076 | struct sock *sk = sock->sk; |
1081 | struct sock *sk; | ||
1082 | 1077 | ||
1083 | if (on) { | 1078 | if (sk == NULL) |
1084 | fna = kmalloc(sizeof(struct fasync_struct), GFP_KERNEL); | ||
1085 | if (fna == NULL) | ||
1086 | return -ENOMEM; | ||
1087 | } | ||
1088 | |||
1089 | sock = filp->private_data; | ||
1090 | |||
1091 | sk = sock->sk; | ||
1092 | if (sk == NULL) { | ||
1093 | kfree(fna); | ||
1094 | return -EINVAL; | 1079 | return -EINVAL; |
1095 | } | ||
1096 | 1080 | ||
1097 | lock_sock(sk); | 1081 | lock_sock(sk); |
1098 | 1082 | ||
1099 | spin_lock(&filp->f_lock); | 1083 | fasync_helper(fd, filp, on, &sock->fasync_list); |
1100 | if (on) | ||
1101 | filp->f_flags |= FASYNC; | ||
1102 | else | ||
1103 | filp->f_flags &= ~FASYNC; | ||
1104 | spin_unlock(&filp->f_lock); | ||
1105 | 1084 | ||
1106 | prev = &(sock->fasync_list); | 1085 | if (!sock->fasync_list) |
1107 | 1086 | sock_reset_flag(sk, SOCK_FASYNC); | |
1108 | for (fa = *prev; fa != NULL; prev = &fa->fa_next, fa = *prev) | 1087 | else |
1109 | if (fa->fa_file == filp) | ||
1110 | break; | ||
1111 | |||
1112 | if (on) { | ||
1113 | if (fa != NULL) { | ||
1114 | write_lock_bh(&sk->sk_callback_lock); | ||
1115 | fa->fa_fd = fd; | ||
1116 | write_unlock_bh(&sk->sk_callback_lock); | ||
1117 | |||
1118 | kfree(fna); | ||
1119 | goto out; | ||
1120 | } | ||
1121 | fna->fa_file = filp; | ||
1122 | fna->fa_fd = fd; | ||
1123 | fna->magic = FASYNC_MAGIC; | ||
1124 | fna->fa_next = sock->fasync_list; | ||
1125 | write_lock_bh(&sk->sk_callback_lock); | ||
1126 | sock->fasync_list = fna; | ||
1127 | sock_set_flag(sk, SOCK_FASYNC); | 1088 | sock_set_flag(sk, SOCK_FASYNC); |
1128 | write_unlock_bh(&sk->sk_callback_lock); | ||
1129 | } else { | ||
1130 | if (fa != NULL) { | ||
1131 | write_lock_bh(&sk->sk_callback_lock); | ||
1132 | *prev = fa->fa_next; | ||
1133 | if (!sock->fasync_list) | ||
1134 | sock_reset_flag(sk, SOCK_FASYNC); | ||
1135 | write_unlock_bh(&sk->sk_callback_lock); | ||
1136 | kfree(fa); | ||
1137 | } | ||
1138 | } | ||
1139 | 1089 | ||
1140 | out: | 1090 | release_sock(sk); |
1141 | release_sock(sock->sk); | ||
1142 | return 0; | 1091 | return 0; |
1143 | } | 1092 | } |
1144 | 1093 | ||
1145 | /* This function may be called only under socket lock or callback_lock */ | 1094 | /* This function may be called only under socket lock or callback_lock */ |
1146 | 1095 | ||
1147 | int sock_wake_async(struct socket *sock, int how, int band) | 1096 | int sock_wake_async(struct socket *sock, int how, int band) |
1148 | { | 1097 | { |
1149 | if (!sock || !sock->fasync_list) | 1098 | if (!sock || !sock->fasync_list) |
1150 | return -1; | 1099 | return -1; |
1151 | switch (how) { | 1100 | switch (how) { |
1152 | case SOCK_WAKE_WAITD: | 1101 | case SOCK_WAKE_WAITD: |
1153 | if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags)) | 1102 | if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags)) |
1154 | break; | 1103 | break; |
1155 | goto call_kill; | 1104 | goto call_kill; |
1156 | case SOCK_WAKE_SPACE: | 1105 | case SOCK_WAKE_SPACE: |
1157 | if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags)) | 1106 | if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags)) |
1158 | break; | 1107 | break; |
1159 | /* fall through */ | 1108 | /* fall through */ |
1160 | case SOCK_WAKE_IO: | 1109 | case SOCK_WAKE_IO: |
1161 | call_kill: | 1110 | call_kill: |
1162 | __kill_fasync(sock->fasync_list, SIGIO, band); | 1111 | kill_fasync(&sock->fasync_list, SIGIO, band); |
1163 | break; | 1112 | break; |
1164 | case SOCK_WAKE_URG: | 1113 | case SOCK_WAKE_URG: |
1165 | __kill_fasync(sock->fasync_list, SIGURG, band); | 1114 | kill_fasync(&sock->fasync_list, SIGURG, band); |
1166 | } | 1115 | } |
1167 | return 0; | 1116 | return 0; |
1168 | } | 1117 | } |
1169 | 1118 | ||
1170 | static int __sock_create(struct net *net, int family, int type, int protocol, | 1119 | static int __sock_create(struct net *net, int family, int type, int protocol, |
1171 | struct socket **res, int kern) | 1120 | struct socket **res, int kern) |
1172 | { | 1121 | { |
1173 | int err; | 1122 | int err; |
1174 | struct socket *sock; | 1123 | struct socket *sock; |
1175 | const struct net_proto_family *pf; | 1124 | const struct net_proto_family *pf; |
1176 | 1125 | ||
1177 | /* | 1126 | /* |
1178 | * Check protocol is in range | 1127 | * Check protocol is in range |
1179 | */ | 1128 | */ |
1180 | if (family < 0 || family >= NPROTO) | 1129 | if (family < 0 || family >= NPROTO) |
1181 | return -EAFNOSUPPORT; | 1130 | return -EAFNOSUPPORT; |
1182 | if (type < 0 || type >= SOCK_MAX) | 1131 | if (type < 0 || type >= SOCK_MAX) |
1183 | return -EINVAL; | 1132 | return -EINVAL; |
1184 | 1133 | ||
1185 | /* Compatibility. | 1134 | /* Compatibility. |
1186 | 1135 | ||
1187 | This uglymoron is moved from INET layer to here to avoid | 1136 | This uglymoron is moved from INET layer to here to avoid |
1188 | deadlock in module load. | 1137 | deadlock in module load. |
1189 | */ | 1138 | */ |
1190 | if (family == PF_INET && type == SOCK_PACKET) { | 1139 | if (family == PF_INET && type == SOCK_PACKET) { |
1191 | static int warned; | 1140 | static int warned; |
1192 | if (!warned) { | 1141 | if (!warned) { |
1193 | warned = 1; | 1142 | warned = 1; |
1194 | printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n", | 1143 | printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n", |
1195 | current->comm); | 1144 | current->comm); |
1196 | } | 1145 | } |
1197 | family = PF_PACKET; | 1146 | family = PF_PACKET; |
1198 | } | 1147 | } |
1199 | 1148 | ||
1200 | err = security_socket_create(family, type, protocol, kern); | 1149 | err = security_socket_create(family, type, protocol, kern); |
1201 | if (err) | 1150 | if (err) |
1202 | return err; | 1151 | return err; |
1203 | 1152 | ||
1204 | /* | 1153 | /* |
1205 | * Allocate the socket and allow the family to set things up. if | 1154 | * Allocate the socket and allow the family to set things up. if |
1206 | * the protocol is 0, the family is instructed to select an appropriate | 1155 | * the protocol is 0, the family is instructed to select an appropriate |
1207 | * default. | 1156 | * default. |
1208 | */ | 1157 | */ |
1209 | sock = sock_alloc(); | 1158 | sock = sock_alloc(); |
1210 | if (!sock) { | 1159 | if (!sock) { |
1211 | if (net_ratelimit()) | 1160 | if (net_ratelimit()) |
1212 | printk(KERN_WARNING "socket: no more sockets\n"); | 1161 | printk(KERN_WARNING "socket: no more sockets\n"); |
1213 | return -ENFILE; /* Not exactly a match, but its the | 1162 | return -ENFILE; /* Not exactly a match, but its the |
1214 | closest posix thing */ | 1163 | closest posix thing */ |
1215 | } | 1164 | } |
1216 | 1165 | ||
1217 | sock->type = type; | 1166 | sock->type = type; |
1218 | 1167 | ||
1219 | #ifdef CONFIG_MODULES | 1168 | #ifdef CONFIG_MODULES |
1220 | /* Attempt to load a protocol module if the find failed. | 1169 | /* Attempt to load a protocol module if the find failed. |
1221 | * | 1170 | * |
1222 | * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user | 1171 | * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user |
1223 | * requested real, full-featured networking support upon configuration. | 1172 | * requested real, full-featured networking support upon configuration. |
1224 | * Otherwise module support will break! | 1173 | * Otherwise module support will break! |
1225 | */ | 1174 | */ |
1226 | if (net_families[family] == NULL) | 1175 | if (net_families[family] == NULL) |
1227 | request_module("net-pf-%d", family); | 1176 | request_module("net-pf-%d", family); |
1228 | #endif | 1177 | #endif |
1229 | 1178 | ||
1230 | rcu_read_lock(); | 1179 | rcu_read_lock(); |
1231 | pf = rcu_dereference(net_families[family]); | 1180 | pf = rcu_dereference(net_families[family]); |
1232 | err = -EAFNOSUPPORT; | 1181 | err = -EAFNOSUPPORT; |
1233 | if (!pf) | 1182 | if (!pf) |
1234 | goto out_release; | 1183 | goto out_release; |
1235 | 1184 | ||
1236 | /* | 1185 | /* |
1237 | * We will call the ->create function, that possibly is in a loadable | 1186 | * We will call the ->create function, that possibly is in a loadable |
1238 | * module, so we have to bump that loadable module refcnt first. | 1187 | * module, so we have to bump that loadable module refcnt first. |
1239 | */ | 1188 | */ |
1240 | if (!try_module_get(pf->owner)) | 1189 | if (!try_module_get(pf->owner)) |
1241 | goto out_release; | 1190 | goto out_release; |
1242 | 1191 | ||
1243 | /* Now protected by module ref count */ | 1192 | /* Now protected by module ref count */ |
1244 | rcu_read_unlock(); | 1193 | rcu_read_unlock(); |
1245 | 1194 | ||
1246 | err = pf->create(net, sock, protocol, kern); | 1195 | err = pf->create(net, sock, protocol, kern); |
1247 | if (err < 0) | 1196 | if (err < 0) |
1248 | goto out_module_put; | 1197 | goto out_module_put; |
1249 | 1198 | ||
1250 | /* | 1199 | /* |
1251 | * Now to bump the refcnt of the [loadable] module that owns this | 1200 | * Now to bump the refcnt of the [loadable] module that owns this |
1252 | * socket at sock_release time we decrement its refcnt. | 1201 | * socket at sock_release time we decrement its refcnt. |
1253 | */ | 1202 | */ |
1254 | if (!try_module_get(sock->ops->owner)) | 1203 | if (!try_module_get(sock->ops->owner)) |
1255 | goto out_module_busy; | 1204 | goto out_module_busy; |
1256 | 1205 | ||
1257 | /* | 1206 | /* |
1258 | * Now that we're done with the ->create function, the [loadable] | 1207 | * Now that we're done with the ->create function, the [loadable] |
1259 | * module can have its refcnt decremented | 1208 | * module can have its refcnt decremented |
1260 | */ | 1209 | */ |
1261 | module_put(pf->owner); | 1210 | module_put(pf->owner); |
1262 | err = security_socket_post_create(sock, family, type, protocol, kern); | 1211 | err = security_socket_post_create(sock, family, type, protocol, kern); |
1263 | if (err) | 1212 | if (err) |
1264 | goto out_sock_release; | 1213 | goto out_sock_release; |
1265 | *res = sock; | 1214 | *res = sock; |
1266 | 1215 | ||
1267 | return 0; | 1216 | return 0; |
1268 | 1217 | ||
1269 | out_module_busy: | 1218 | out_module_busy: |
1270 | err = -EAFNOSUPPORT; | 1219 | err = -EAFNOSUPPORT; |
1271 | out_module_put: | 1220 | out_module_put: |
1272 | sock->ops = NULL; | 1221 | sock->ops = NULL; |
1273 | module_put(pf->owner); | 1222 | module_put(pf->owner); |
1274 | out_sock_release: | 1223 | out_sock_release: |
1275 | sock_release(sock); | 1224 | sock_release(sock); |
1276 | return err; | 1225 | return err; |
1277 | 1226 | ||
1278 | out_release: | 1227 | out_release: |
1279 | rcu_read_unlock(); | 1228 | rcu_read_unlock(); |
1280 | goto out_sock_release; | 1229 | goto out_sock_release; |
1281 | } | 1230 | } |
1282 | 1231 | ||
1283 | int sock_create(int family, int type, int protocol, struct socket **res) | 1232 | int sock_create(int family, int type, int protocol, struct socket **res) |
1284 | { | 1233 | { |
1285 | return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0); | 1234 | return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0); |
1286 | } | 1235 | } |
1287 | 1236 | ||
1288 | int sock_create_kern(int family, int type, int protocol, struct socket **res) | 1237 | int sock_create_kern(int family, int type, int protocol, struct socket **res) |
1289 | { | 1238 | { |
1290 | return __sock_create(&init_net, family, type, protocol, res, 1); | 1239 | return __sock_create(&init_net, family, type, protocol, res, 1); |
1291 | } | 1240 | } |
1292 | 1241 | ||
1293 | SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol) | 1242 | SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol) |
1294 | { | 1243 | { |
1295 | int retval; | 1244 | int retval; |
1296 | struct socket *sock; | 1245 | struct socket *sock; |
1297 | int flags; | 1246 | int flags; |
1298 | 1247 | ||
1299 | /* Check the SOCK_* constants for consistency. */ | 1248 | /* Check the SOCK_* constants for consistency. */ |
1300 | BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC); | 1249 | BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC); |
1301 | BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK); | 1250 | BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK); |
1302 | BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK); | 1251 | BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK); |
1303 | BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK); | 1252 | BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK); |
1304 | 1253 | ||
1305 | flags = type & ~SOCK_TYPE_MASK; | 1254 | flags = type & ~SOCK_TYPE_MASK; |
1306 | if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) | 1255 | if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) |
1307 | return -EINVAL; | 1256 | return -EINVAL; |
1308 | type &= SOCK_TYPE_MASK; | 1257 | type &= SOCK_TYPE_MASK; |
1309 | 1258 | ||
1310 | if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK)) | 1259 | if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK)) |
1311 | flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK; | 1260 | flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK; |
1312 | 1261 | ||
1313 | retval = sock_create(family, type, protocol, &sock); | 1262 | retval = sock_create(family, type, protocol, &sock); |
1314 | if (retval < 0) | 1263 | if (retval < 0) |
1315 | goto out; | 1264 | goto out; |
1316 | 1265 | ||
1317 | retval = sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK)); | 1266 | retval = sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK)); |
1318 | if (retval < 0) | 1267 | if (retval < 0) |
1319 | goto out_release; | 1268 | goto out_release; |
1320 | 1269 | ||
1321 | out: | 1270 | out: |
1322 | /* It may be already another descriptor 8) Not kernel problem. */ | 1271 | /* It may be already another descriptor 8) Not kernel problem. */ |
1323 | return retval; | 1272 | return retval; |
1324 | 1273 | ||
1325 | out_release: | 1274 | out_release: |
1326 | sock_release(sock); | 1275 | sock_release(sock); |
1327 | return retval; | 1276 | return retval; |
1328 | } | 1277 | } |
1329 | 1278 | ||
1330 | /* | 1279 | /* |
1331 | * Create a pair of connected sockets. | 1280 | * Create a pair of connected sockets. |
1332 | */ | 1281 | */ |
1333 | 1282 | ||
1334 | SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol, | 1283 | SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol, |
1335 | int __user *, usockvec) | 1284 | int __user *, usockvec) |
1336 | { | 1285 | { |
1337 | struct socket *sock1, *sock2; | 1286 | struct socket *sock1, *sock2; |
1338 | int fd1, fd2, err; | 1287 | int fd1, fd2, err; |
1339 | struct file *newfile1, *newfile2; | 1288 | struct file *newfile1, *newfile2; |
1340 | int flags; | 1289 | int flags; |
1341 | 1290 | ||
1342 | flags = type & ~SOCK_TYPE_MASK; | 1291 | flags = type & ~SOCK_TYPE_MASK; |
1343 | if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) | 1292 | if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) |
1344 | return -EINVAL; | 1293 | return -EINVAL; |
1345 | type &= SOCK_TYPE_MASK; | 1294 | type &= SOCK_TYPE_MASK; |
1346 | 1295 | ||
1347 | if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK)) | 1296 | if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK)) |
1348 | flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK; | 1297 | flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK; |
1349 | 1298 | ||
1350 | /* | 1299 | /* |
1351 | * Obtain the first socket and check if the underlying protocol | 1300 | * Obtain the first socket and check if the underlying protocol |
1352 | * supports the socketpair call. | 1301 | * supports the socketpair call. |
1353 | */ | 1302 | */ |
1354 | 1303 | ||
1355 | err = sock_create(family, type, protocol, &sock1); | 1304 | err = sock_create(family, type, protocol, &sock1); |
1356 | if (err < 0) | 1305 | if (err < 0) |
1357 | goto out; | 1306 | goto out; |
1358 | 1307 | ||
1359 | err = sock_create(family, type, protocol, &sock2); | 1308 | err = sock_create(family, type, protocol, &sock2); |
1360 | if (err < 0) | 1309 | if (err < 0) |
1361 | goto out_release_1; | 1310 | goto out_release_1; |
1362 | 1311 | ||
1363 | err = sock1->ops->socketpair(sock1, sock2); | 1312 | err = sock1->ops->socketpair(sock1, sock2); |
1364 | if (err < 0) | 1313 | if (err < 0) |
1365 | goto out_release_both; | 1314 | goto out_release_both; |
1366 | 1315 | ||
1367 | fd1 = sock_alloc_file(sock1, &newfile1, flags); | 1316 | fd1 = sock_alloc_file(sock1, &newfile1, flags); |
1368 | if (unlikely(fd1 < 0)) { | 1317 | if (unlikely(fd1 < 0)) { |
1369 | err = fd1; | 1318 | err = fd1; |
1370 | goto out_release_both; | 1319 | goto out_release_both; |
1371 | } | 1320 | } |
1372 | 1321 | ||
1373 | fd2 = sock_alloc_file(sock2, &newfile2, flags); | 1322 | fd2 = sock_alloc_file(sock2, &newfile2, flags); |
1374 | if (unlikely(fd2 < 0)) { | 1323 | if (unlikely(fd2 < 0)) { |
1375 | err = fd2; | 1324 | err = fd2; |
1376 | fput(newfile1); | 1325 | fput(newfile1); |
1377 | put_unused_fd(fd1); | 1326 | put_unused_fd(fd1); |
1378 | sock_release(sock2); | 1327 | sock_release(sock2); |
1379 | goto out; | 1328 | goto out; |
1380 | } | 1329 | } |
1381 | 1330 | ||
1382 | audit_fd_pair(fd1, fd2); | 1331 | audit_fd_pair(fd1, fd2); |
1383 | fd_install(fd1, newfile1); | 1332 | fd_install(fd1, newfile1); |
1384 | fd_install(fd2, newfile2); | 1333 | fd_install(fd2, newfile2); |
1385 | /* fd1 and fd2 may be already another descriptors. | 1334 | /* fd1 and fd2 may be already another descriptors. |
1386 | * Not kernel problem. | 1335 | * Not kernel problem. |
1387 | */ | 1336 | */ |
1388 | 1337 | ||
1389 | err = put_user(fd1, &usockvec[0]); | 1338 | err = put_user(fd1, &usockvec[0]); |
1390 | if (!err) | 1339 | if (!err) |
1391 | err = put_user(fd2, &usockvec[1]); | 1340 | err = put_user(fd2, &usockvec[1]); |
1392 | if (!err) | 1341 | if (!err) |
1393 | return 0; | 1342 | return 0; |
1394 | 1343 | ||
1395 | sys_close(fd2); | 1344 | sys_close(fd2); |
1396 | sys_close(fd1); | 1345 | sys_close(fd1); |
1397 | return err; | 1346 | return err; |
1398 | 1347 | ||
1399 | out_release_both: | 1348 | out_release_both: |
1400 | sock_release(sock2); | 1349 | sock_release(sock2); |
1401 | out_release_1: | 1350 | out_release_1: |
1402 | sock_release(sock1); | 1351 | sock_release(sock1); |
1403 | out: | 1352 | out: |
1404 | return err; | 1353 | return err; |
1405 | } | 1354 | } |
1406 | 1355 | ||
1407 | /* | 1356 | /* |
1408 | * Bind a name to a socket. Nothing much to do here since it's | 1357 | * Bind a name to a socket. Nothing much to do here since it's |
1409 | * the protocol's responsibility to handle the local address. | 1358 | * the protocol's responsibility to handle the local address. |
1410 | * | 1359 | * |
1411 | * We move the socket address to kernel space before we call | 1360 | * We move the socket address to kernel space before we call |
1412 | * the protocol layer (having also checked the address is ok). | 1361 | * the protocol layer (having also checked the address is ok). |
1413 | */ | 1362 | */ |
1414 | 1363 | ||
1415 | SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen) | 1364 | SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen) |
1416 | { | 1365 | { |
1417 | struct socket *sock; | 1366 | struct socket *sock; |
1418 | struct sockaddr_storage address; | 1367 | struct sockaddr_storage address; |
1419 | int err, fput_needed; | 1368 | int err, fput_needed; |
1420 | 1369 | ||
1421 | sock = sockfd_lookup_light(fd, &err, &fput_needed); | 1370 | sock = sockfd_lookup_light(fd, &err, &fput_needed); |
1422 | if (sock) { | 1371 | if (sock) { |
1423 | err = move_addr_to_kernel(umyaddr, addrlen, (struct sockaddr *)&address); | 1372 | err = move_addr_to_kernel(umyaddr, addrlen, (struct sockaddr *)&address); |
1424 | if (err >= 0) { | 1373 | if (err >= 0) { |
1425 | err = security_socket_bind(sock, | 1374 | err = security_socket_bind(sock, |
1426 | (struct sockaddr *)&address, | 1375 | (struct sockaddr *)&address, |
1427 | addrlen); | 1376 | addrlen); |
1428 | if (!err) | 1377 | if (!err) |
1429 | err = sock->ops->bind(sock, | 1378 | err = sock->ops->bind(sock, |
1430 | (struct sockaddr *) | 1379 | (struct sockaddr *) |
1431 | &address, addrlen); | 1380 | &address, addrlen); |
1432 | } | 1381 | } |
1433 | fput_light(sock->file, fput_needed); | 1382 | fput_light(sock->file, fput_needed); |
1434 | } | 1383 | } |
1435 | return err; | 1384 | return err; |
1436 | } | 1385 | } |
1437 | 1386 | ||
1438 | /* | 1387 | /* |
1439 | * Perform a listen. Basically, we allow the protocol to do anything | 1388 | * Perform a listen. Basically, we allow the protocol to do anything |
1440 | * necessary for a listen, and if that works, we mark the socket as | 1389 | * necessary for a listen, and if that works, we mark the socket as |
1441 | * ready for listening. | 1390 | * ready for listening. |
1442 | */ | 1391 | */ |
1443 | 1392 | ||
1444 | SYSCALL_DEFINE2(listen, int, fd, int, backlog) | 1393 | SYSCALL_DEFINE2(listen, int, fd, int, backlog) |
1445 | { | 1394 | { |
1446 | struct socket *sock; | 1395 | struct socket *sock; |
1447 | int err, fput_needed; | 1396 | int err, fput_needed; |
1448 | int somaxconn; | 1397 | int somaxconn; |
1449 | 1398 | ||
1450 | sock = sockfd_lookup_light(fd, &err, &fput_needed); | 1399 | sock = sockfd_lookup_light(fd, &err, &fput_needed); |
1451 | if (sock) { | 1400 | if (sock) { |
1452 | somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn; | 1401 | somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn; |
1453 | if ((unsigned)backlog > somaxconn) | 1402 | if ((unsigned)backlog > somaxconn) |
1454 | backlog = somaxconn; | 1403 | backlog = somaxconn; |
1455 | 1404 | ||
1456 | err = security_socket_listen(sock, backlog); | 1405 | err = security_socket_listen(sock, backlog); |
1457 | if (!err) | 1406 | if (!err) |
1458 | err = sock->ops->listen(sock, backlog); | 1407 | err = sock->ops->listen(sock, backlog); |
1459 | 1408 | ||
1460 | fput_light(sock->file, fput_needed); | 1409 | fput_light(sock->file, fput_needed); |
1461 | } | 1410 | } |
1462 | return err; | 1411 | return err; |
1463 | } | 1412 | } |
1464 | 1413 | ||
1465 | /* | 1414 | /* |
1466 | * For accept, we attempt to create a new socket, set up the link | 1415 | * For accept, we attempt to create a new socket, set up the link |
1467 | * with the client, wake up the client, then return the new | 1416 | * with the client, wake up the client, then return the new |
1468 | * connected fd. We collect the address of the connector in kernel | 1417 | * connected fd. We collect the address of the connector in kernel |
1469 | * space and move it to user at the very end. This is unclean because | 1418 | * space and move it to user at the very end. This is unclean because |
1470 | * we open the socket then return an error. | 1419 | * we open the socket then return an error. |
1471 | * | 1420 | * |
1472 | * 1003.1g adds the ability to recvmsg() to query connection pending | 1421 | * 1003.1g adds the ability to recvmsg() to query connection pending |
1473 | * status to recvmsg. We need to add that support in a way thats | 1422 | * status to recvmsg. We need to add that support in a way thats |
1474 | * clean when we restucture accept also. | 1423 | * clean when we restucture accept also. |
1475 | */ | 1424 | */ |
1476 | 1425 | ||
1477 | SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr, | 1426 | SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr, |
1478 | int __user *, upeer_addrlen, int, flags) | 1427 | int __user *, upeer_addrlen, int, flags) |
1479 | { | 1428 | { |
1480 | struct socket *sock, *newsock; | 1429 | struct socket *sock, *newsock; |
1481 | struct file *newfile; | 1430 | struct file *newfile; |
1482 | int err, len, newfd, fput_needed; | 1431 | int err, len, newfd, fput_needed; |
1483 | struct sockaddr_storage address; | 1432 | struct sockaddr_storage address; |
1484 | 1433 | ||
1485 | if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) | 1434 | if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) |
1486 | return -EINVAL; | 1435 | return -EINVAL; |
1487 | 1436 | ||
1488 | if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK)) | 1437 | if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK)) |
1489 | flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK; | 1438 | flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK; |
1490 | 1439 | ||
1491 | sock = sockfd_lookup_light(fd, &err, &fput_needed); | 1440 | sock = sockfd_lookup_light(fd, &err, &fput_needed); |
1492 | if (!sock) | 1441 | if (!sock) |
1493 | goto out; | 1442 | goto out; |
1494 | 1443 | ||
1495 | err = -ENFILE; | 1444 | err = -ENFILE; |
1496 | if (!(newsock = sock_alloc())) | 1445 | if (!(newsock = sock_alloc())) |
1497 | goto out_put; | 1446 | goto out_put; |
1498 | 1447 | ||
1499 | newsock->type = sock->type; | 1448 | newsock->type = sock->type; |
1500 | newsock->ops = sock->ops; | 1449 | newsock->ops = sock->ops; |
1501 | 1450 | ||
1502 | /* | 1451 | /* |
1503 | * We don't need try_module_get here, as the listening socket (sock) | 1452 | * We don't need try_module_get here, as the listening socket (sock) |
1504 | * has the protocol module (sock->ops->owner) held. | 1453 | * has the protocol module (sock->ops->owner) held. |
1505 | */ | 1454 | */ |
1506 | __module_get(newsock->ops->owner); | 1455 | __module_get(newsock->ops->owner); |
1507 | 1456 | ||
1508 | newfd = sock_alloc_file(newsock, &newfile, flags); | 1457 | newfd = sock_alloc_file(newsock, &newfile, flags); |
1509 | if (unlikely(newfd < 0)) { | 1458 | if (unlikely(newfd < 0)) { |
1510 | err = newfd; | 1459 | err = newfd; |
1511 | sock_release(newsock); | 1460 | sock_release(newsock); |
1512 | goto out_put; | 1461 | goto out_put; |
1513 | } | 1462 | } |
1514 | 1463 | ||
1515 | err = security_socket_accept(sock, newsock); | 1464 | err = security_socket_accept(sock, newsock); |
1516 | if (err) | 1465 | if (err) |
1517 | goto out_fd; | 1466 | goto out_fd; |
1518 | 1467 | ||
1519 | err = sock->ops->accept(sock, newsock, sock->file->f_flags); | 1468 | err = sock->ops->accept(sock, newsock, sock->file->f_flags); |
1520 | if (err < 0) | 1469 | if (err < 0) |
1521 | goto out_fd; | 1470 | goto out_fd; |
1522 | 1471 | ||
1523 | if (upeer_sockaddr) { | 1472 | if (upeer_sockaddr) { |
1524 | if (newsock->ops->getname(newsock, (struct sockaddr *)&address, | 1473 | if (newsock->ops->getname(newsock, (struct sockaddr *)&address, |
1525 | &len, 2) < 0) { | 1474 | &len, 2) < 0) { |
1526 | err = -ECONNABORTED; | 1475 | err = -ECONNABORTED; |
1527 | goto out_fd; | 1476 | goto out_fd; |
1528 | } | 1477 | } |
1529 | err = move_addr_to_user((struct sockaddr *)&address, | 1478 | err = move_addr_to_user((struct sockaddr *)&address, |
1530 | len, upeer_sockaddr, upeer_addrlen); | 1479 | len, upeer_sockaddr, upeer_addrlen); |
1531 | if (err < 0) | 1480 | if (err < 0) |
1532 | goto out_fd; | 1481 | goto out_fd; |
1533 | } | 1482 | } |
1534 | 1483 | ||
1535 | /* File flags are not inherited via accept() unlike another OSes. */ | 1484 | /* File flags are not inherited via accept() unlike another OSes. */ |
1536 | 1485 | ||
1537 | fd_install(newfd, newfile); | 1486 | fd_install(newfd, newfile); |
1538 | err = newfd; | 1487 | err = newfd; |
1539 | 1488 | ||
1540 | out_put: | 1489 | out_put: |
1541 | fput_light(sock->file, fput_needed); | 1490 | fput_light(sock->file, fput_needed); |
1542 | out: | 1491 | out: |
1543 | return err; | 1492 | return err; |
1544 | out_fd: | 1493 | out_fd: |
1545 | fput(newfile); | 1494 | fput(newfile); |
1546 | put_unused_fd(newfd); | 1495 | put_unused_fd(newfd); |
1547 | goto out_put; | 1496 | goto out_put; |
1548 | } | 1497 | } |
1549 | 1498 | ||
1550 | SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr, | 1499 | SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr, |
1551 | int __user *, upeer_addrlen) | 1500 | int __user *, upeer_addrlen) |
1552 | { | 1501 | { |
1553 | return sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0); | 1502 | return sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0); |
1554 | } | 1503 | } |
1555 | 1504 | ||
1556 | /* | 1505 | /* |
1557 | * Attempt to connect to a socket with the server address. The address | 1506 | * Attempt to connect to a socket with the server address. The address |
1558 | * is in user space so we verify it is OK and move it to kernel space. | 1507 | * is in user space so we verify it is OK and move it to kernel space. |
1559 | * | 1508 | * |
1560 | * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to | 1509 | * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to |
1561 | * break bindings | 1510 | * break bindings |
1562 | * | 1511 | * |
1563 | * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and | 1512 | * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and |
1564 | * other SEQPACKET protocols that take time to connect() as it doesn't | 1513 | * other SEQPACKET protocols that take time to connect() as it doesn't |
1565 | * include the -EINPROGRESS status for such sockets. | 1514 | * include the -EINPROGRESS status for such sockets. |
1566 | */ | 1515 | */ |
1567 | 1516 | ||
1568 | SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr, | 1517 | SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr, |
1569 | int, addrlen) | 1518 | int, addrlen) |
1570 | { | 1519 | { |
1571 | struct socket *sock; | 1520 | struct socket *sock; |
1572 | struct sockaddr_storage address; | 1521 | struct sockaddr_storage address; |
1573 | int err, fput_needed; | 1522 | int err, fput_needed; |
1574 | 1523 | ||
1575 | sock = sockfd_lookup_light(fd, &err, &fput_needed); | 1524 | sock = sockfd_lookup_light(fd, &err, &fput_needed); |
1576 | if (!sock) | 1525 | if (!sock) |
1577 | goto out; | 1526 | goto out; |
1578 | err = move_addr_to_kernel(uservaddr, addrlen, (struct sockaddr *)&address); | 1527 | err = move_addr_to_kernel(uservaddr, addrlen, (struct sockaddr *)&address); |
1579 | if (err < 0) | 1528 | if (err < 0) |
1580 | goto out_put; | 1529 | goto out_put; |
1581 | 1530 | ||
1582 | err = | 1531 | err = |
1583 | security_socket_connect(sock, (struct sockaddr *)&address, addrlen); | 1532 | security_socket_connect(sock, (struct sockaddr *)&address, addrlen); |
1584 | if (err) | 1533 | if (err) |
1585 | goto out_put; | 1534 | goto out_put; |
1586 | 1535 | ||
1587 | err = sock->ops->connect(sock, (struct sockaddr *)&address, addrlen, | 1536 | err = sock->ops->connect(sock, (struct sockaddr *)&address, addrlen, |
1588 | sock->file->f_flags); | 1537 | sock->file->f_flags); |
1589 | out_put: | 1538 | out_put: |
1590 | fput_light(sock->file, fput_needed); | 1539 | fput_light(sock->file, fput_needed); |
1591 | out: | 1540 | out: |
1592 | return err; | 1541 | return err; |
1593 | } | 1542 | } |
1594 | 1543 | ||
1595 | /* | 1544 | /* |
1596 | * Get the local address ('name') of a socket object. Move the obtained | 1545 | * Get the local address ('name') of a socket object. Move the obtained |
1597 | * name to user space. | 1546 | * name to user space. |
1598 | */ | 1547 | */ |
1599 | 1548 | ||
1600 | SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr, | 1549 | SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr, |
1601 | int __user *, usockaddr_len) | 1550 | int __user *, usockaddr_len) |
1602 | { | 1551 | { |
1603 | struct socket *sock; | 1552 | struct socket *sock; |
1604 | struct sockaddr_storage address; | 1553 | struct sockaddr_storage address; |
1605 | int len, err, fput_needed; | 1554 | int len, err, fput_needed; |
1606 | 1555 | ||
1607 | sock = sockfd_lookup_light(fd, &err, &fput_needed); | 1556 | sock = sockfd_lookup_light(fd, &err, &fput_needed); |
1608 | if (!sock) | 1557 | if (!sock) |
1609 | goto out; | 1558 | goto out; |
1610 | 1559 | ||
1611 | err = security_socket_getsockname(sock); | 1560 | err = security_socket_getsockname(sock); |
1612 | if (err) | 1561 | if (err) |
1613 | goto out_put; | 1562 | goto out_put; |
1614 | 1563 | ||
1615 | err = sock->ops->getname(sock, (struct sockaddr *)&address, &len, 0); | 1564 | err = sock->ops->getname(sock, (struct sockaddr *)&address, &len, 0); |
1616 | if (err) | 1565 | if (err) |
1617 | goto out_put; | 1566 | goto out_put; |
1618 | err = move_addr_to_user((struct sockaddr *)&address, len, usockaddr, usockaddr_len); | 1567 | err = move_addr_to_user((struct sockaddr *)&address, len, usockaddr, usockaddr_len); |
1619 | 1568 | ||
1620 | out_put: | 1569 | out_put: |
1621 | fput_light(sock->file, fput_needed); | 1570 | fput_light(sock->file, fput_needed); |
1622 | out: | 1571 | out: |
1623 | return err; | 1572 | return err; |
1624 | } | 1573 | } |
1625 | 1574 | ||
1626 | /* | 1575 | /* |
1627 | * Get the remote address ('name') of a socket object. Move the obtained | 1576 | * Get the remote address ('name') of a socket object. Move the obtained |
1628 | * name to user space. | 1577 | * name to user space. |
1629 | */ | 1578 | */ |
1630 | 1579 | ||
1631 | SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr, | 1580 | SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr, |
1632 | int __user *, usockaddr_len) | 1581 | int __user *, usockaddr_len) |
1633 | { | 1582 | { |
1634 | struct socket *sock; | 1583 | struct socket *sock; |
1635 | struct sockaddr_storage address; | 1584 | struct sockaddr_storage address; |
1636 | int len, err, fput_needed; | 1585 | int len, err, fput_needed; |
1637 | 1586 | ||
1638 | sock = sockfd_lookup_light(fd, &err, &fput_needed); | 1587 | sock = sockfd_lookup_light(fd, &err, &fput_needed); |
1639 | if (sock != NULL) { | 1588 | if (sock != NULL) { |
1640 | err = security_socket_getpeername(sock); | 1589 | err = security_socket_getpeername(sock); |
1641 | if (err) { | 1590 | if (err) { |
1642 | fput_light(sock->file, fput_needed); | 1591 | fput_light(sock->file, fput_needed); |
1643 | return err; | 1592 | return err; |
1644 | } | 1593 | } |
1645 | 1594 | ||
1646 | err = | 1595 | err = |
1647 | sock->ops->getname(sock, (struct sockaddr *)&address, &len, | 1596 | sock->ops->getname(sock, (struct sockaddr *)&address, &len, |
1648 | 1); | 1597 | 1); |
1649 | if (!err) | 1598 | if (!err) |
1650 | err = move_addr_to_user((struct sockaddr *)&address, len, usockaddr, | 1599 | err = move_addr_to_user((struct sockaddr *)&address, len, usockaddr, |
1651 | usockaddr_len); | 1600 | usockaddr_len); |
1652 | fput_light(sock->file, fput_needed); | 1601 | fput_light(sock->file, fput_needed); |
1653 | } | 1602 | } |
1654 | return err; | 1603 | return err; |
1655 | } | 1604 | } |
1656 | 1605 | ||
1657 | /* | 1606 | /* |
1658 | * Send a datagram to a given address. We move the address into kernel | 1607 | * Send a datagram to a given address. We move the address into kernel |
1659 | * space and check the user space data area is readable before invoking | 1608 | * space and check the user space data area is readable before invoking |
1660 | * the protocol. | 1609 | * the protocol. |
1661 | */ | 1610 | */ |
1662 | 1611 | ||
1663 | SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len, | 1612 | SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len, |
1664 | unsigned, flags, struct sockaddr __user *, addr, | 1613 | unsigned, flags, struct sockaddr __user *, addr, |
1665 | int, addr_len) | 1614 | int, addr_len) |
1666 | { | 1615 | { |
1667 | struct socket *sock; | 1616 | struct socket *sock; |
1668 | struct sockaddr_storage address; | 1617 | struct sockaddr_storage address; |
1669 | int err; | 1618 | int err; |
1670 | struct msghdr msg; | 1619 | struct msghdr msg; |
1671 | struct iovec iov; | 1620 | struct iovec iov; |
1672 | int fput_needed; | 1621 | int fput_needed; |
1673 | 1622 | ||
1674 | sock = sockfd_lookup_light(fd, &err, &fput_needed); | 1623 | sock = sockfd_lookup_light(fd, &err, &fput_needed); |
1675 | if (!sock) | 1624 | if (!sock) |
1676 | goto out; | 1625 | goto out; |
1677 | 1626 | ||
1678 | iov.iov_base = buff; | 1627 | iov.iov_base = buff; |
1679 | iov.iov_len = len; | 1628 | iov.iov_len = len; |
1680 | msg.msg_name = NULL; | 1629 | msg.msg_name = NULL; |
1681 | msg.msg_iov = &iov; | 1630 | msg.msg_iov = &iov; |
1682 | msg.msg_iovlen = 1; | 1631 | msg.msg_iovlen = 1; |
1683 | msg.msg_control = NULL; | 1632 | msg.msg_control = NULL; |
1684 | msg.msg_controllen = 0; | 1633 | msg.msg_controllen = 0; |
1685 | msg.msg_namelen = 0; | 1634 | msg.msg_namelen = 0; |
1686 | if (addr) { | 1635 | if (addr) { |
1687 | err = move_addr_to_kernel(addr, addr_len, (struct sockaddr *)&address); | 1636 | err = move_addr_to_kernel(addr, addr_len, (struct sockaddr *)&address); |
1688 | if (err < 0) | 1637 | if (err < 0) |
1689 | goto out_put; | 1638 | goto out_put; |
1690 | msg.msg_name = (struct sockaddr *)&address; | 1639 | msg.msg_name = (struct sockaddr *)&address; |
1691 | msg.msg_namelen = addr_len; | 1640 | msg.msg_namelen = addr_len; |
1692 | } | 1641 | } |
1693 | if (sock->file->f_flags & O_NONBLOCK) | 1642 | if (sock->file->f_flags & O_NONBLOCK) |
1694 | flags |= MSG_DONTWAIT; | 1643 | flags |= MSG_DONTWAIT; |
1695 | msg.msg_flags = flags; | 1644 | msg.msg_flags = flags; |
1696 | err = sock_sendmsg(sock, &msg, len); | 1645 | err = sock_sendmsg(sock, &msg, len); |
1697 | 1646 | ||
1698 | out_put: | 1647 | out_put: |
1699 | fput_light(sock->file, fput_needed); | 1648 | fput_light(sock->file, fput_needed); |
1700 | out: | 1649 | out: |
1701 | return err; | 1650 | return err; |
1702 | } | 1651 | } |
1703 | 1652 | ||
1704 | /* | 1653 | /* |
1705 | * Send a datagram down a socket. | 1654 | * Send a datagram down a socket. |
1706 | */ | 1655 | */ |
1707 | 1656 | ||
1708 | SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len, | 1657 | SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len, |
1709 | unsigned, flags) | 1658 | unsigned, flags) |
1710 | { | 1659 | { |
1711 | return sys_sendto(fd, buff, len, flags, NULL, 0); | 1660 | return sys_sendto(fd, buff, len, flags, NULL, 0); |
1712 | } | 1661 | } |
1713 | 1662 | ||
1714 | /* | 1663 | /* |
1715 | * Receive a frame from the socket and optionally record the address of the | 1664 | * Receive a frame from the socket and optionally record the address of the |
1716 | * sender. We verify the buffers are writable and if needed move the | 1665 | * sender. We verify the buffers are writable and if needed move the |
1717 | * sender address from kernel to user space. | 1666 | * sender address from kernel to user space. |
1718 | */ | 1667 | */ |
1719 | 1668 | ||
1720 | SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size, | 1669 | SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size, |
1721 | unsigned, flags, struct sockaddr __user *, addr, | 1670 | unsigned, flags, struct sockaddr __user *, addr, |
1722 | int __user *, addr_len) | 1671 | int __user *, addr_len) |
1723 | { | 1672 | { |
1724 | struct socket *sock; | 1673 | struct socket *sock; |
1725 | struct iovec iov; | 1674 | struct iovec iov; |
1726 | struct msghdr msg; | 1675 | struct msghdr msg; |
1727 | struct sockaddr_storage address; | 1676 | struct sockaddr_storage address; |
1728 | int err, err2; | 1677 | int err, err2; |
1729 | int fput_needed; | 1678 | int fput_needed; |
1730 | 1679 | ||
1731 | sock = sockfd_lookup_light(fd, &err, &fput_needed); | 1680 | sock = sockfd_lookup_light(fd, &err, &fput_needed); |
1732 | if (!sock) | 1681 | if (!sock) |
1733 | goto out; | 1682 | goto out; |
1734 | 1683 | ||
1735 | msg.msg_control = NULL; | 1684 | msg.msg_control = NULL; |
1736 | msg.msg_controllen = 0; | 1685 | msg.msg_controllen = 0; |
1737 | msg.msg_iovlen = 1; | 1686 | msg.msg_iovlen = 1; |
1738 | msg.msg_iov = &iov; | 1687 | msg.msg_iov = &iov; |
1739 | iov.iov_len = size; | 1688 | iov.iov_len = size; |
1740 | iov.iov_base = ubuf; | 1689 | iov.iov_base = ubuf; |
1741 | msg.msg_name = (struct sockaddr *)&address; | 1690 | msg.msg_name = (struct sockaddr *)&address; |
1742 | msg.msg_namelen = sizeof(address); | 1691 | msg.msg_namelen = sizeof(address); |
1743 | if (sock->file->f_flags & O_NONBLOCK) | 1692 | if (sock->file->f_flags & O_NONBLOCK) |
1744 | flags |= MSG_DONTWAIT; | 1693 | flags |= MSG_DONTWAIT; |
1745 | err = sock_recvmsg(sock, &msg, size, flags); | 1694 | err = sock_recvmsg(sock, &msg, size, flags); |
1746 | 1695 | ||
1747 | if (err >= 0 && addr != NULL) { | 1696 | if (err >= 0 && addr != NULL) { |
1748 | err2 = move_addr_to_user((struct sockaddr *)&address, | 1697 | err2 = move_addr_to_user((struct sockaddr *)&address, |
1749 | msg.msg_namelen, addr, addr_len); | 1698 | msg.msg_namelen, addr, addr_len); |
1750 | if (err2 < 0) | 1699 | if (err2 < 0) |
1751 | err = err2; | 1700 | err = err2; |
1752 | } | 1701 | } |
1753 | 1702 | ||
1754 | fput_light(sock->file, fput_needed); | 1703 | fput_light(sock->file, fput_needed); |
1755 | out: | 1704 | out: |
1756 | return err; | 1705 | return err; |
1757 | } | 1706 | } |
1758 | 1707 | ||
1759 | /* | 1708 | /* |
1760 | * Receive a datagram from a socket. | 1709 | * Receive a datagram from a socket. |
1761 | */ | 1710 | */ |
1762 | 1711 | ||
1763 | asmlinkage long sys_recv(int fd, void __user *ubuf, size_t size, | 1712 | asmlinkage long sys_recv(int fd, void __user *ubuf, size_t size, |
1764 | unsigned flags) | 1713 | unsigned flags) |
1765 | { | 1714 | { |
1766 | return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL); | 1715 | return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL); |
1767 | } | 1716 | } |
1768 | 1717 | ||
1769 | /* | 1718 | /* |
1770 | * Set a socket option. Because we don't know the option lengths we have | 1719 | * Set a socket option. Because we don't know the option lengths we have |
1771 | * to pass the user mode parameter for the protocols to sort out. | 1720 | * to pass the user mode parameter for the protocols to sort out. |
1772 | */ | 1721 | */ |
1773 | 1722 | ||
1774 | SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname, | 1723 | SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname, |
1775 | char __user *, optval, int, optlen) | 1724 | char __user *, optval, int, optlen) |
1776 | { | 1725 | { |
1777 | int err, fput_needed; | 1726 | int err, fput_needed; |
1778 | struct socket *sock; | 1727 | struct socket *sock; |
1779 | 1728 | ||
1780 | if (optlen < 0) | 1729 | if (optlen < 0) |
1781 | return -EINVAL; | 1730 | return -EINVAL; |
1782 | 1731 | ||
1783 | sock = sockfd_lookup_light(fd, &err, &fput_needed); | 1732 | sock = sockfd_lookup_light(fd, &err, &fput_needed); |
1784 | if (sock != NULL) { | 1733 | if (sock != NULL) { |
1785 | err = security_socket_setsockopt(sock, level, optname); | 1734 | err = security_socket_setsockopt(sock, level, optname); |
1786 | if (err) | 1735 | if (err) |
1787 | goto out_put; | 1736 | goto out_put; |
1788 | 1737 | ||
1789 | if (level == SOL_SOCKET) | 1738 | if (level == SOL_SOCKET) |
1790 | err = | 1739 | err = |
1791 | sock_setsockopt(sock, level, optname, optval, | 1740 | sock_setsockopt(sock, level, optname, optval, |
1792 | optlen); | 1741 | optlen); |
1793 | else | 1742 | else |
1794 | err = | 1743 | err = |
1795 | sock->ops->setsockopt(sock, level, optname, optval, | 1744 | sock->ops->setsockopt(sock, level, optname, optval, |
1796 | optlen); | 1745 | optlen); |
1797 | out_put: | 1746 | out_put: |
1798 | fput_light(sock->file, fput_needed); | 1747 | fput_light(sock->file, fput_needed); |
1799 | } | 1748 | } |
1800 | return err; | 1749 | return err; |
1801 | } | 1750 | } |
1802 | 1751 | ||
1803 | /* | 1752 | /* |
1804 | * Get a socket option. Because we don't know the option lengths we have | 1753 | * Get a socket option. Because we don't know the option lengths we have |
1805 | * to pass a user mode parameter for the protocols to sort out. | 1754 | * to pass a user mode parameter for the protocols to sort out. |
1806 | */ | 1755 | */ |
1807 | 1756 | ||
1808 | SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname, | 1757 | SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname, |
1809 | char __user *, optval, int __user *, optlen) | 1758 | char __user *, optval, int __user *, optlen) |
1810 | { | 1759 | { |
1811 | int err, fput_needed; | 1760 | int err, fput_needed; |
1812 | struct socket *sock; | 1761 | struct socket *sock; |
1813 | 1762 | ||
1814 | sock = sockfd_lookup_light(fd, &err, &fput_needed); | 1763 | sock = sockfd_lookup_light(fd, &err, &fput_needed); |
1815 | if (sock != NULL) { | 1764 | if (sock != NULL) { |
1816 | err = security_socket_getsockopt(sock, level, optname); | 1765 | err = security_socket_getsockopt(sock, level, optname); |
1817 | if (err) | 1766 | if (err) |
1818 | goto out_put; | 1767 | goto out_put; |
1819 | 1768 | ||
1820 | if (level == SOL_SOCKET) | 1769 | if (level == SOL_SOCKET) |
1821 | err = | 1770 | err = |
1822 | sock_getsockopt(sock, level, optname, optval, | 1771 | sock_getsockopt(sock, level, optname, optval, |
1823 | optlen); | 1772 | optlen); |
1824 | else | 1773 | else |
1825 | err = | 1774 | err = |
1826 | sock->ops->getsockopt(sock, level, optname, optval, | 1775 | sock->ops->getsockopt(sock, level, optname, optval, |
1827 | optlen); | 1776 | optlen); |
1828 | out_put: | 1777 | out_put: |
1829 | fput_light(sock->file, fput_needed); | 1778 | fput_light(sock->file, fput_needed); |
1830 | } | 1779 | } |
1831 | return err; | 1780 | return err; |
1832 | } | 1781 | } |
1833 | 1782 | ||
1834 | /* | 1783 | /* |
1835 | * Shutdown a socket. | 1784 | * Shutdown a socket. |
1836 | */ | 1785 | */ |
1837 | 1786 | ||
1838 | SYSCALL_DEFINE2(shutdown, int, fd, int, how) | 1787 | SYSCALL_DEFINE2(shutdown, int, fd, int, how) |
1839 | { | 1788 | { |
1840 | int err, fput_needed; | 1789 | int err, fput_needed; |
1841 | struct socket *sock; | 1790 | struct socket *sock; |
1842 | 1791 | ||
1843 | sock = sockfd_lookup_light(fd, &err, &fput_needed); | 1792 | sock = sockfd_lookup_light(fd, &err, &fput_needed); |
1844 | if (sock != NULL) { | 1793 | if (sock != NULL) { |
1845 | err = security_socket_shutdown(sock, how); | 1794 | err = security_socket_shutdown(sock, how); |
1846 | if (!err) | 1795 | if (!err) |
1847 | err = sock->ops->shutdown(sock, how); | 1796 | err = sock->ops->shutdown(sock, how); |
1848 | fput_light(sock->file, fput_needed); | 1797 | fput_light(sock->file, fput_needed); |
1849 | } | 1798 | } |
1850 | return err; | 1799 | return err; |
1851 | } | 1800 | } |
1852 | 1801 | ||
1853 | /* A couple of helpful macros for getting the address of the 32/64 bit | 1802 | /* A couple of helpful macros for getting the address of the 32/64 bit |
1854 | * fields which are the same type (int / unsigned) on our platforms. | 1803 | * fields which are the same type (int / unsigned) on our platforms. |
1855 | */ | 1804 | */ |
1856 | #define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member) | 1805 | #define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member) |
1857 | #define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen) | 1806 | #define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen) |
1858 | #define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags) | 1807 | #define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags) |
1859 | 1808 | ||
1860 | /* | 1809 | /* |
1861 | * BSD sendmsg interface | 1810 | * BSD sendmsg interface |
1862 | */ | 1811 | */ |
1863 | 1812 | ||
1864 | SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned, flags) | 1813 | SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned, flags) |
1865 | { | 1814 | { |
1866 | struct compat_msghdr __user *msg_compat = | 1815 | struct compat_msghdr __user *msg_compat = |
1867 | (struct compat_msghdr __user *)msg; | 1816 | (struct compat_msghdr __user *)msg; |
1868 | struct socket *sock; | 1817 | struct socket *sock; |
1869 | struct sockaddr_storage address; | 1818 | struct sockaddr_storage address; |
1870 | struct iovec iovstack[UIO_FASTIOV], *iov = iovstack; | 1819 | struct iovec iovstack[UIO_FASTIOV], *iov = iovstack; |
1871 | unsigned char ctl[sizeof(struct cmsghdr) + 20] | 1820 | unsigned char ctl[sizeof(struct cmsghdr) + 20] |
1872 | __attribute__ ((aligned(sizeof(__kernel_size_t)))); | 1821 | __attribute__ ((aligned(sizeof(__kernel_size_t)))); |
1873 | /* 20 is size of ipv6_pktinfo */ | 1822 | /* 20 is size of ipv6_pktinfo */ |
1874 | unsigned char *ctl_buf = ctl; | 1823 | unsigned char *ctl_buf = ctl; |
1875 | struct msghdr msg_sys; | 1824 | struct msghdr msg_sys; |
1876 | int err, ctl_len, iov_size, total_len; | 1825 | int err, ctl_len, iov_size, total_len; |
1877 | int fput_needed; | 1826 | int fput_needed; |
1878 | 1827 | ||
1879 | err = -EFAULT; | 1828 | err = -EFAULT; |
1880 | if (MSG_CMSG_COMPAT & flags) { | 1829 | if (MSG_CMSG_COMPAT & flags) { |
1881 | if (get_compat_msghdr(&msg_sys, msg_compat)) | 1830 | if (get_compat_msghdr(&msg_sys, msg_compat)) |
1882 | return -EFAULT; | 1831 | return -EFAULT; |
1883 | } | 1832 | } |
1884 | else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr))) | 1833 | else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr))) |
1885 | return -EFAULT; | 1834 | return -EFAULT; |
1886 | 1835 | ||
1887 | sock = sockfd_lookup_light(fd, &err, &fput_needed); | 1836 | sock = sockfd_lookup_light(fd, &err, &fput_needed); |
1888 | if (!sock) | 1837 | if (!sock) |
1889 | goto out; | 1838 | goto out; |
1890 | 1839 | ||
1891 | /* do not move before msg_sys is valid */ | 1840 | /* do not move before msg_sys is valid */ |
1892 | err = -EMSGSIZE; | 1841 | err = -EMSGSIZE; |
1893 | if (msg_sys.msg_iovlen > UIO_MAXIOV) | 1842 | if (msg_sys.msg_iovlen > UIO_MAXIOV) |
1894 | goto out_put; | 1843 | goto out_put; |
1895 | 1844 | ||
1896 | /* Check whether to allocate the iovec area */ | 1845 | /* Check whether to allocate the iovec area */ |
1897 | err = -ENOMEM; | 1846 | err = -ENOMEM; |
1898 | iov_size = msg_sys.msg_iovlen * sizeof(struct iovec); | 1847 | iov_size = msg_sys.msg_iovlen * sizeof(struct iovec); |
1899 | if (msg_sys.msg_iovlen > UIO_FASTIOV) { | 1848 | if (msg_sys.msg_iovlen > UIO_FASTIOV) { |
1900 | iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL); | 1849 | iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL); |
1901 | if (!iov) | 1850 | if (!iov) |
1902 | goto out_put; | 1851 | goto out_put; |
1903 | } | 1852 | } |
1904 | 1853 | ||
1905 | /* This will also move the address data into kernel space */ | 1854 | /* This will also move the address data into kernel space */ |
1906 | if (MSG_CMSG_COMPAT & flags) { | 1855 | if (MSG_CMSG_COMPAT & flags) { |
1907 | err = verify_compat_iovec(&msg_sys, iov, | 1856 | err = verify_compat_iovec(&msg_sys, iov, |
1908 | (struct sockaddr *)&address, | 1857 | (struct sockaddr *)&address, |
1909 | VERIFY_READ); | 1858 | VERIFY_READ); |
1910 | } else | 1859 | } else |
1911 | err = verify_iovec(&msg_sys, iov, | 1860 | err = verify_iovec(&msg_sys, iov, |
1912 | (struct sockaddr *)&address, | 1861 | (struct sockaddr *)&address, |
1913 | VERIFY_READ); | 1862 | VERIFY_READ); |
1914 | if (err < 0) | 1863 | if (err < 0) |
1915 | goto out_freeiov; | 1864 | goto out_freeiov; |
1916 | total_len = err; | 1865 | total_len = err; |
1917 | 1866 | ||
1918 | err = -ENOBUFS; | 1867 | err = -ENOBUFS; |
1919 | 1868 | ||
1920 | if (msg_sys.msg_controllen > INT_MAX) | 1869 | if (msg_sys.msg_controllen > INT_MAX) |
1921 | goto out_freeiov; | 1870 | goto out_freeiov; |
1922 | ctl_len = msg_sys.msg_controllen; | 1871 | ctl_len = msg_sys.msg_controllen; |
1923 | if ((MSG_CMSG_COMPAT & flags) && ctl_len) { | 1872 | if ((MSG_CMSG_COMPAT & flags) && ctl_len) { |
1924 | err = | 1873 | err = |
1925 | cmsghdr_from_user_compat_to_kern(&msg_sys, sock->sk, ctl, | 1874 | cmsghdr_from_user_compat_to_kern(&msg_sys, sock->sk, ctl, |
1926 | sizeof(ctl)); | 1875 | sizeof(ctl)); |
1927 | if (err) | 1876 | if (err) |
1928 | goto out_freeiov; | 1877 | goto out_freeiov; |
1929 | ctl_buf = msg_sys.msg_control; | 1878 | ctl_buf = msg_sys.msg_control; |
1930 | ctl_len = msg_sys.msg_controllen; | 1879 | ctl_len = msg_sys.msg_controllen; |
1931 | } else if (ctl_len) { | 1880 | } else if (ctl_len) { |
1932 | if (ctl_len > sizeof(ctl)) { | 1881 | if (ctl_len > sizeof(ctl)) { |
1933 | ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL); | 1882 | ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL); |
1934 | if (ctl_buf == NULL) | 1883 | if (ctl_buf == NULL) |
1935 | goto out_freeiov; | 1884 | goto out_freeiov; |
1936 | } | 1885 | } |
1937 | err = -EFAULT; | 1886 | err = -EFAULT; |
1938 | /* | 1887 | /* |
1939 | * Careful! Before this, msg_sys.msg_control contains a user pointer. | 1888 | * Careful! Before this, msg_sys.msg_control contains a user pointer. |
1940 | * Afterwards, it will be a kernel pointer. Thus the compiler-assisted | 1889 | * Afterwards, it will be a kernel pointer. Thus the compiler-assisted |
1941 | * checking falls down on this. | 1890 | * checking falls down on this. |
1942 | */ | 1891 | */ |
1943 | if (copy_from_user(ctl_buf, (void __user *)msg_sys.msg_control, | 1892 | if (copy_from_user(ctl_buf, (void __user *)msg_sys.msg_control, |
1944 | ctl_len)) | 1893 | ctl_len)) |
1945 | goto out_freectl; | 1894 | goto out_freectl; |
1946 | msg_sys.msg_control = ctl_buf; | 1895 | msg_sys.msg_control = ctl_buf; |
1947 | } | 1896 | } |
1948 | msg_sys.msg_flags = flags; | 1897 | msg_sys.msg_flags = flags; |
1949 | 1898 | ||
1950 | if (sock->file->f_flags & O_NONBLOCK) | 1899 | if (sock->file->f_flags & O_NONBLOCK) |
1951 | msg_sys.msg_flags |= MSG_DONTWAIT; | 1900 | msg_sys.msg_flags |= MSG_DONTWAIT; |
1952 | err = sock_sendmsg(sock, &msg_sys, total_len); | 1901 | err = sock_sendmsg(sock, &msg_sys, total_len); |
1953 | 1902 | ||
1954 | out_freectl: | 1903 | out_freectl: |
1955 | if (ctl_buf != ctl) | 1904 | if (ctl_buf != ctl) |
1956 | sock_kfree_s(sock->sk, ctl_buf, ctl_len); | 1905 | sock_kfree_s(sock->sk, ctl_buf, ctl_len); |
1957 | out_freeiov: | 1906 | out_freeiov: |
1958 | if (iov != iovstack) | 1907 | if (iov != iovstack) |
1959 | sock_kfree_s(sock->sk, iov, iov_size); | 1908 | sock_kfree_s(sock->sk, iov, iov_size); |
1960 | out_put: | 1909 | out_put: |
1961 | fput_light(sock->file, fput_needed); | 1910 | fput_light(sock->file, fput_needed); |
1962 | out: | 1911 | out: |
1963 | return err; | 1912 | return err; |
1964 | } | 1913 | } |
1965 | 1914 | ||
1966 | static int __sys_recvmsg(struct socket *sock, struct msghdr __user *msg, | 1915 | static int __sys_recvmsg(struct socket *sock, struct msghdr __user *msg, |
1967 | struct msghdr *msg_sys, unsigned flags, int nosec) | 1916 | struct msghdr *msg_sys, unsigned flags, int nosec) |
1968 | { | 1917 | { |
1969 | struct compat_msghdr __user *msg_compat = | 1918 | struct compat_msghdr __user *msg_compat = |
1970 | (struct compat_msghdr __user *)msg; | 1919 | (struct compat_msghdr __user *)msg; |
1971 | struct iovec iovstack[UIO_FASTIOV]; | 1920 | struct iovec iovstack[UIO_FASTIOV]; |
1972 | struct iovec *iov = iovstack; | 1921 | struct iovec *iov = iovstack; |
1973 | unsigned long cmsg_ptr; | 1922 | unsigned long cmsg_ptr; |
1974 | int err, iov_size, total_len, len; | 1923 | int err, iov_size, total_len, len; |
1975 | 1924 | ||
1976 | /* kernel mode address */ | 1925 | /* kernel mode address */ |
1977 | struct sockaddr_storage addr; | 1926 | struct sockaddr_storage addr; |
1978 | 1927 | ||
1979 | /* user mode address pointers */ | 1928 | /* user mode address pointers */ |
1980 | struct sockaddr __user *uaddr; | 1929 | struct sockaddr __user *uaddr; |
1981 | int __user *uaddr_len; | 1930 | int __user *uaddr_len; |
1982 | 1931 | ||
1983 | if (MSG_CMSG_COMPAT & flags) { | 1932 | if (MSG_CMSG_COMPAT & flags) { |
1984 | if (get_compat_msghdr(msg_sys, msg_compat)) | 1933 | if (get_compat_msghdr(msg_sys, msg_compat)) |
1985 | return -EFAULT; | 1934 | return -EFAULT; |
1986 | } | 1935 | } |
1987 | else if (copy_from_user(msg_sys, msg, sizeof(struct msghdr))) | 1936 | else if (copy_from_user(msg_sys, msg, sizeof(struct msghdr))) |
1988 | return -EFAULT; | 1937 | return -EFAULT; |
1989 | 1938 | ||
1990 | err = -EMSGSIZE; | 1939 | err = -EMSGSIZE; |
1991 | if (msg_sys->msg_iovlen > UIO_MAXIOV) | 1940 | if (msg_sys->msg_iovlen > UIO_MAXIOV) |
1992 | goto out; | 1941 | goto out; |
1993 | 1942 | ||
1994 | /* Check whether to allocate the iovec area */ | 1943 | /* Check whether to allocate the iovec area */ |
1995 | err = -ENOMEM; | 1944 | err = -ENOMEM; |
1996 | iov_size = msg_sys->msg_iovlen * sizeof(struct iovec); | 1945 | iov_size = msg_sys->msg_iovlen * sizeof(struct iovec); |
1997 | if (msg_sys->msg_iovlen > UIO_FASTIOV) { | 1946 | if (msg_sys->msg_iovlen > UIO_FASTIOV) { |
1998 | iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL); | 1947 | iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL); |
1999 | if (!iov) | 1948 | if (!iov) |
2000 | goto out; | 1949 | goto out; |
2001 | } | 1950 | } |
2002 | 1951 | ||
2003 | /* | 1952 | /* |
2004 | * Save the user-mode address (verify_iovec will change the | 1953 | * Save the user-mode address (verify_iovec will change the |
2005 | * kernel msghdr to use the kernel address space) | 1954 | * kernel msghdr to use the kernel address space) |
2006 | */ | 1955 | */ |
2007 | 1956 | ||
2008 | uaddr = (__force void __user *)msg_sys->msg_name; | 1957 | uaddr = (__force void __user *)msg_sys->msg_name; |
2009 | uaddr_len = COMPAT_NAMELEN(msg); | 1958 | uaddr_len = COMPAT_NAMELEN(msg); |
2010 | if (MSG_CMSG_COMPAT & flags) { | 1959 | if (MSG_CMSG_COMPAT & flags) { |
2011 | err = verify_compat_iovec(msg_sys, iov, | 1960 | err = verify_compat_iovec(msg_sys, iov, |
2012 | (struct sockaddr *)&addr, | 1961 | (struct sockaddr *)&addr, |
2013 | VERIFY_WRITE); | 1962 | VERIFY_WRITE); |
2014 | } else | 1963 | } else |
2015 | err = verify_iovec(msg_sys, iov, | 1964 | err = verify_iovec(msg_sys, iov, |
2016 | (struct sockaddr *)&addr, | 1965 | (struct sockaddr *)&addr, |
2017 | VERIFY_WRITE); | 1966 | VERIFY_WRITE); |
2018 | if (err < 0) | 1967 | if (err < 0) |
2019 | goto out_freeiov; | 1968 | goto out_freeiov; |
2020 | total_len = err; | 1969 | total_len = err; |
2021 | 1970 | ||
2022 | cmsg_ptr = (unsigned long)msg_sys->msg_control; | 1971 | cmsg_ptr = (unsigned long)msg_sys->msg_control; |
2023 | msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT); | 1972 | msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT); |
2024 | 1973 | ||
2025 | if (sock->file->f_flags & O_NONBLOCK) | 1974 | if (sock->file->f_flags & O_NONBLOCK) |
2026 | flags |= MSG_DONTWAIT; | 1975 | flags |= MSG_DONTWAIT; |
2027 | err = (nosec ? sock_recvmsg_nosec : sock_recvmsg)(sock, msg_sys, | 1976 | err = (nosec ? sock_recvmsg_nosec : sock_recvmsg)(sock, msg_sys, |
2028 | total_len, flags); | 1977 | total_len, flags); |
2029 | if (err < 0) | 1978 | if (err < 0) |
2030 | goto out_freeiov; | 1979 | goto out_freeiov; |
2031 | len = err; | 1980 | len = err; |
2032 | 1981 | ||
2033 | if (uaddr != NULL) { | 1982 | if (uaddr != NULL) { |
2034 | err = move_addr_to_user((struct sockaddr *)&addr, | 1983 | err = move_addr_to_user((struct sockaddr *)&addr, |
2035 | msg_sys->msg_namelen, uaddr, | 1984 | msg_sys->msg_namelen, uaddr, |
2036 | uaddr_len); | 1985 | uaddr_len); |
2037 | if (err < 0) | 1986 | if (err < 0) |
2038 | goto out_freeiov; | 1987 | goto out_freeiov; |
2039 | } | 1988 | } |
2040 | err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT), | 1989 | err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT), |
2041 | COMPAT_FLAGS(msg)); | 1990 | COMPAT_FLAGS(msg)); |
2042 | if (err) | 1991 | if (err) |
2043 | goto out_freeiov; | 1992 | goto out_freeiov; |
2044 | if (MSG_CMSG_COMPAT & flags) | 1993 | if (MSG_CMSG_COMPAT & flags) |
2045 | err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr, | 1994 | err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr, |
2046 | &msg_compat->msg_controllen); | 1995 | &msg_compat->msg_controllen); |
2047 | else | 1996 | else |
2048 | err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr, | 1997 | err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr, |
2049 | &msg->msg_controllen); | 1998 | &msg->msg_controllen); |
2050 | if (err) | 1999 | if (err) |
2051 | goto out_freeiov; | 2000 | goto out_freeiov; |
2052 | err = len; | 2001 | err = len; |
2053 | 2002 | ||
2054 | out_freeiov: | 2003 | out_freeiov: |
2055 | if (iov != iovstack) | 2004 | if (iov != iovstack) |
2056 | sock_kfree_s(sock->sk, iov, iov_size); | 2005 | sock_kfree_s(sock->sk, iov, iov_size); |
2057 | out: | 2006 | out: |
2058 | return err; | 2007 | return err; |
2059 | } | 2008 | } |
2060 | 2009 | ||
2061 | /* | 2010 | /* |
2062 | * BSD recvmsg interface | 2011 | * BSD recvmsg interface |
2063 | */ | 2012 | */ |
2064 | 2013 | ||
2065 | SYSCALL_DEFINE3(recvmsg, int, fd, struct msghdr __user *, msg, | 2014 | SYSCALL_DEFINE3(recvmsg, int, fd, struct msghdr __user *, msg, |
2066 | unsigned int, flags) | 2015 | unsigned int, flags) |
2067 | { | 2016 | { |
2068 | int fput_needed, err; | 2017 | int fput_needed, err; |
2069 | struct msghdr msg_sys; | 2018 | struct msghdr msg_sys; |
2070 | struct socket *sock = sockfd_lookup_light(fd, &err, &fput_needed); | 2019 | struct socket *sock = sockfd_lookup_light(fd, &err, &fput_needed); |
2071 | 2020 | ||
2072 | if (!sock) | 2021 | if (!sock) |
2073 | goto out; | 2022 | goto out; |
2074 | 2023 | ||
2075 | err = __sys_recvmsg(sock, msg, &msg_sys, flags, 0); | 2024 | err = __sys_recvmsg(sock, msg, &msg_sys, flags, 0); |
2076 | 2025 | ||
2077 | fput_light(sock->file, fput_needed); | 2026 | fput_light(sock->file, fput_needed); |
2078 | out: | 2027 | out: |
2079 | return err; | 2028 | return err; |
2080 | } | 2029 | } |
2081 | 2030 | ||
2082 | /* | 2031 | /* |
2083 | * Linux recvmmsg interface | 2032 | * Linux recvmmsg interface |
2084 | */ | 2033 | */ |
2085 | 2034 | ||
2086 | int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen, | 2035 | int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen, |
2087 | unsigned int flags, struct timespec *timeout) | 2036 | unsigned int flags, struct timespec *timeout) |
2088 | { | 2037 | { |
2089 | int fput_needed, err, datagrams; | 2038 | int fput_needed, err, datagrams; |
2090 | struct socket *sock; | 2039 | struct socket *sock; |
2091 | struct mmsghdr __user *entry; | 2040 | struct mmsghdr __user *entry; |
2092 | struct compat_mmsghdr __user *compat_entry; | 2041 | struct compat_mmsghdr __user *compat_entry; |
2093 | struct msghdr msg_sys; | 2042 | struct msghdr msg_sys; |
2094 | struct timespec end_time; | 2043 | struct timespec end_time; |
2095 | 2044 | ||
2096 | if (timeout && | 2045 | if (timeout && |
2097 | poll_select_set_timeout(&end_time, timeout->tv_sec, | 2046 | poll_select_set_timeout(&end_time, timeout->tv_sec, |
2098 | timeout->tv_nsec)) | 2047 | timeout->tv_nsec)) |
2099 | return -EINVAL; | 2048 | return -EINVAL; |
2100 | 2049 | ||
2101 | datagrams = 0; | 2050 | datagrams = 0; |
2102 | 2051 | ||
2103 | sock = sockfd_lookup_light(fd, &err, &fput_needed); | 2052 | sock = sockfd_lookup_light(fd, &err, &fput_needed); |
2104 | if (!sock) | 2053 | if (!sock) |
2105 | return err; | 2054 | return err; |
2106 | 2055 | ||
2107 | err = sock_error(sock->sk); | 2056 | err = sock_error(sock->sk); |
2108 | if (err) | 2057 | if (err) |
2109 | goto out_put; | 2058 | goto out_put; |
2110 | 2059 | ||
2111 | entry = mmsg; | 2060 | entry = mmsg; |
2112 | compat_entry = (struct compat_mmsghdr __user *)mmsg; | 2061 | compat_entry = (struct compat_mmsghdr __user *)mmsg; |
2113 | 2062 | ||
2114 | while (datagrams < vlen) { | 2063 | while (datagrams < vlen) { |
2115 | /* | 2064 | /* |
2116 | * No need to ask LSM for more than the first datagram. | 2065 | * No need to ask LSM for more than the first datagram. |
2117 | */ | 2066 | */ |
2118 | if (MSG_CMSG_COMPAT & flags) { | 2067 | if (MSG_CMSG_COMPAT & flags) { |
2119 | err = __sys_recvmsg(sock, (struct msghdr __user *)compat_entry, | 2068 | err = __sys_recvmsg(sock, (struct msghdr __user *)compat_entry, |
2120 | &msg_sys, flags, datagrams); | 2069 | &msg_sys, flags, datagrams); |
2121 | if (err < 0) | 2070 | if (err < 0) |
2122 | break; | 2071 | break; |
2123 | err = __put_user(err, &compat_entry->msg_len); | 2072 | err = __put_user(err, &compat_entry->msg_len); |
2124 | ++compat_entry; | 2073 | ++compat_entry; |
2125 | } else { | 2074 | } else { |
2126 | err = __sys_recvmsg(sock, (struct msghdr __user *)entry, | 2075 | err = __sys_recvmsg(sock, (struct msghdr __user *)entry, |
2127 | &msg_sys, flags, datagrams); | 2076 | &msg_sys, flags, datagrams); |
2128 | if (err < 0) | 2077 | if (err < 0) |
2129 | break; | 2078 | break; |
2130 | err = put_user(err, &entry->msg_len); | 2079 | err = put_user(err, &entry->msg_len); |
2131 | ++entry; | 2080 | ++entry; |
2132 | } | 2081 | } |
2133 | 2082 | ||
2134 | if (err) | 2083 | if (err) |
2135 | break; | 2084 | break; |
2136 | ++datagrams; | 2085 | ++datagrams; |
2137 | 2086 | ||
2138 | /* MSG_WAITFORONE turns on MSG_DONTWAIT after one packet */ | 2087 | /* MSG_WAITFORONE turns on MSG_DONTWAIT after one packet */ |
2139 | if (flags & MSG_WAITFORONE) | 2088 | if (flags & MSG_WAITFORONE) |
2140 | flags |= MSG_DONTWAIT; | 2089 | flags |= MSG_DONTWAIT; |
2141 | 2090 | ||
2142 | if (timeout) { | 2091 | if (timeout) { |
2143 | ktime_get_ts(timeout); | 2092 | ktime_get_ts(timeout); |
2144 | *timeout = timespec_sub(end_time, *timeout); | 2093 | *timeout = timespec_sub(end_time, *timeout); |
2145 | if (timeout->tv_sec < 0) { | 2094 | if (timeout->tv_sec < 0) { |
2146 | timeout->tv_sec = timeout->tv_nsec = 0; | 2095 | timeout->tv_sec = timeout->tv_nsec = 0; |
2147 | break; | 2096 | break; |
2148 | } | 2097 | } |
2149 | 2098 | ||
2150 | /* Timeout, return less than vlen datagrams */ | 2099 | /* Timeout, return less than vlen datagrams */ |
2151 | if (timeout->tv_nsec == 0 && timeout->tv_sec == 0) | 2100 | if (timeout->tv_nsec == 0 && timeout->tv_sec == 0) |
2152 | break; | 2101 | break; |
2153 | } | 2102 | } |
2154 | 2103 | ||
2155 | /* Out of band data, return right away */ | 2104 | /* Out of band data, return right away */ |
2156 | if (msg_sys.msg_flags & MSG_OOB) | 2105 | if (msg_sys.msg_flags & MSG_OOB) |
2157 | break; | 2106 | break; |
2158 | } | 2107 | } |
2159 | 2108 | ||
2160 | out_put: | 2109 | out_put: |
2161 | fput_light(sock->file, fput_needed); | 2110 | fput_light(sock->file, fput_needed); |
2162 | 2111 | ||
2163 | if (err == 0) | 2112 | if (err == 0) |
2164 | return datagrams; | 2113 | return datagrams; |
2165 | 2114 | ||
2166 | if (datagrams != 0) { | 2115 | if (datagrams != 0) { |
2167 | /* | 2116 | /* |
2168 | * We may return less entries than requested (vlen) if the | 2117 | * We may return less entries than requested (vlen) if the |
2169 | * sock is non block and there aren't enough datagrams... | 2118 | * sock is non block and there aren't enough datagrams... |
2170 | */ | 2119 | */ |
2171 | if (err != -EAGAIN) { | 2120 | if (err != -EAGAIN) { |
2172 | /* | 2121 | /* |
2173 | * ... or if recvmsg returns an error after we | 2122 | * ... or if recvmsg returns an error after we |
2174 | * received some datagrams, where we record the | 2123 | * received some datagrams, where we record the |
2175 | * error to return on the next call or if the | 2124 | * error to return on the next call or if the |
2176 | * app asks about it using getsockopt(SO_ERROR). | 2125 | * app asks about it using getsockopt(SO_ERROR). |
2177 | */ | 2126 | */ |
2178 | sock->sk->sk_err = -err; | 2127 | sock->sk->sk_err = -err; |
2179 | } | 2128 | } |
2180 | 2129 | ||
2181 | return datagrams; | 2130 | return datagrams; |
2182 | } | 2131 | } |
2183 | 2132 | ||
2184 | return err; | 2133 | return err; |
2185 | } | 2134 | } |
2186 | 2135 | ||
2187 | SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg, | 2136 | SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg, |
2188 | unsigned int, vlen, unsigned int, flags, | 2137 | unsigned int, vlen, unsigned int, flags, |
2189 | struct timespec __user *, timeout) | 2138 | struct timespec __user *, timeout) |
2190 | { | 2139 | { |
2191 | int datagrams; | 2140 | int datagrams; |
2192 | struct timespec timeout_sys; | 2141 | struct timespec timeout_sys; |
2193 | 2142 | ||
2194 | if (!timeout) | 2143 | if (!timeout) |
2195 | return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL); | 2144 | return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL); |
2196 | 2145 | ||
2197 | if (copy_from_user(&timeout_sys, timeout, sizeof(timeout_sys))) | 2146 | if (copy_from_user(&timeout_sys, timeout, sizeof(timeout_sys))) |
2198 | return -EFAULT; | 2147 | return -EFAULT; |
2199 | 2148 | ||
2200 | datagrams = __sys_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys); | 2149 | datagrams = __sys_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys); |
2201 | 2150 | ||
2202 | if (datagrams > 0 && | 2151 | if (datagrams > 0 && |
2203 | copy_to_user(timeout, &timeout_sys, sizeof(timeout_sys))) | 2152 | copy_to_user(timeout, &timeout_sys, sizeof(timeout_sys))) |
2204 | datagrams = -EFAULT; | 2153 | datagrams = -EFAULT; |
2205 | 2154 | ||
2206 | return datagrams; | 2155 | return datagrams; |
2207 | } | 2156 | } |
2208 | 2157 | ||
2209 | #ifdef __ARCH_WANT_SYS_SOCKETCALL | 2158 | #ifdef __ARCH_WANT_SYS_SOCKETCALL |
2210 | /* Argument list sizes for sys_socketcall */ | 2159 | /* Argument list sizes for sys_socketcall */ |
2211 | #define AL(x) ((x) * sizeof(unsigned long)) | 2160 | #define AL(x) ((x) * sizeof(unsigned long)) |
2212 | static const unsigned char nargs[20] = { | 2161 | static const unsigned char nargs[20] = { |
2213 | AL(0),AL(3),AL(3),AL(3),AL(2),AL(3), | 2162 | AL(0),AL(3),AL(3),AL(3),AL(2),AL(3), |
2214 | AL(3),AL(3),AL(4),AL(4),AL(4),AL(6), | 2163 | AL(3),AL(3),AL(4),AL(4),AL(4),AL(6), |
2215 | AL(6),AL(2),AL(5),AL(5),AL(3),AL(3), | 2164 | AL(6),AL(2),AL(5),AL(5),AL(3),AL(3), |
2216 | AL(4),AL(5) | 2165 | AL(4),AL(5) |
2217 | }; | 2166 | }; |
2218 | 2167 | ||
2219 | #undef AL | 2168 | #undef AL |
2220 | 2169 | ||
2221 | /* | 2170 | /* |
2222 | * System call vectors. | 2171 | * System call vectors. |
2223 | * | 2172 | * |
2224 | * Argument checking cleaned up. Saved 20% in size. | 2173 | * Argument checking cleaned up. Saved 20% in size. |
2225 | * This function doesn't need to set the kernel lock because | 2174 | * This function doesn't need to set the kernel lock because |
2226 | * it is set by the callees. | 2175 | * it is set by the callees. |
2227 | */ | 2176 | */ |
2228 | 2177 | ||
2229 | SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args) | 2178 | SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args) |
2230 | { | 2179 | { |
2231 | unsigned long a[6]; | 2180 | unsigned long a[6]; |
2232 | unsigned long a0, a1; | 2181 | unsigned long a0, a1; |
2233 | int err; | 2182 | int err; |
2234 | unsigned int len; | 2183 | unsigned int len; |
2235 | 2184 | ||
2236 | if (call < 1 || call > SYS_RECVMMSG) | 2185 | if (call < 1 || call > SYS_RECVMMSG) |
2237 | return -EINVAL; | 2186 | return -EINVAL; |
2238 | 2187 | ||
2239 | len = nargs[call]; | 2188 | len = nargs[call]; |
2240 | if (len > sizeof(a)) | 2189 | if (len > sizeof(a)) |
2241 | return -EINVAL; | 2190 | return -EINVAL; |
2242 | 2191 | ||
2243 | /* copy_from_user should be SMP safe. */ | 2192 | /* copy_from_user should be SMP safe. */ |
2244 | if (copy_from_user(a, args, len)) | 2193 | if (copy_from_user(a, args, len)) |
2245 | return -EFAULT; | 2194 | return -EFAULT; |
2246 | 2195 | ||
2247 | audit_socketcall(nargs[call] / sizeof(unsigned long), a); | 2196 | audit_socketcall(nargs[call] / sizeof(unsigned long), a); |
2248 | 2197 | ||
2249 | a0 = a[0]; | 2198 | a0 = a[0]; |
2250 | a1 = a[1]; | 2199 | a1 = a[1]; |
2251 | 2200 | ||
2252 | switch (call) { | 2201 | switch (call) { |
2253 | case SYS_SOCKET: | 2202 | case SYS_SOCKET: |
2254 | err = sys_socket(a0, a1, a[2]); | 2203 | err = sys_socket(a0, a1, a[2]); |
2255 | break; | 2204 | break; |
2256 | case SYS_BIND: | 2205 | case SYS_BIND: |
2257 | err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]); | 2206 | err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]); |
2258 | break; | 2207 | break; |
2259 | case SYS_CONNECT: | 2208 | case SYS_CONNECT: |
2260 | err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]); | 2209 | err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]); |
2261 | break; | 2210 | break; |
2262 | case SYS_LISTEN: | 2211 | case SYS_LISTEN: |
2263 | err = sys_listen(a0, a1); | 2212 | err = sys_listen(a0, a1); |
2264 | break; | 2213 | break; |
2265 | case SYS_ACCEPT: | 2214 | case SYS_ACCEPT: |
2266 | err = sys_accept4(a0, (struct sockaddr __user *)a1, | 2215 | err = sys_accept4(a0, (struct sockaddr __user *)a1, |
2267 | (int __user *)a[2], 0); | 2216 | (int __user *)a[2], 0); |
2268 | break; | 2217 | break; |
2269 | case SYS_GETSOCKNAME: | 2218 | case SYS_GETSOCKNAME: |
2270 | err = | 2219 | err = |
2271 | sys_getsockname(a0, (struct sockaddr __user *)a1, | 2220 | sys_getsockname(a0, (struct sockaddr __user *)a1, |
2272 | (int __user *)a[2]); | 2221 | (int __user *)a[2]); |
2273 | break; | 2222 | break; |
2274 | case SYS_GETPEERNAME: | 2223 | case SYS_GETPEERNAME: |
2275 | err = | 2224 | err = |
2276 | sys_getpeername(a0, (struct sockaddr __user *)a1, | 2225 | sys_getpeername(a0, (struct sockaddr __user *)a1, |
2277 | (int __user *)a[2]); | 2226 | (int __user *)a[2]); |
2278 | break; | 2227 | break; |
2279 | case SYS_SOCKETPAIR: | 2228 | case SYS_SOCKETPAIR: |
2280 | err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]); | 2229 | err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]); |
2281 | break; | 2230 | break; |
2282 | case SYS_SEND: | 2231 | case SYS_SEND: |
2283 | err = sys_send(a0, (void __user *)a1, a[2], a[3]); | 2232 | err = sys_send(a0, (void __user *)a1, a[2], a[3]); |
2284 | break; | 2233 | break; |
2285 | case SYS_SENDTO: | 2234 | case SYS_SENDTO: |
2286 | err = sys_sendto(a0, (void __user *)a1, a[2], a[3], | 2235 | err = sys_sendto(a0, (void __user *)a1, a[2], a[3], |
2287 | (struct sockaddr __user *)a[4], a[5]); | 2236 | (struct sockaddr __user *)a[4], a[5]); |
2288 | break; | 2237 | break; |
2289 | case SYS_RECV: | 2238 | case SYS_RECV: |
2290 | err = sys_recv(a0, (void __user *)a1, a[2], a[3]); | 2239 | err = sys_recv(a0, (void __user *)a1, a[2], a[3]); |
2291 | break; | 2240 | break; |
2292 | case SYS_RECVFROM: | 2241 | case SYS_RECVFROM: |
2293 | err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3], | 2242 | err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3], |
2294 | (struct sockaddr __user *)a[4], | 2243 | (struct sockaddr __user *)a[4], |
2295 | (int __user *)a[5]); | 2244 | (int __user *)a[5]); |
2296 | break; | 2245 | break; |
2297 | case SYS_SHUTDOWN: | 2246 | case SYS_SHUTDOWN: |
2298 | err = sys_shutdown(a0, a1); | 2247 | err = sys_shutdown(a0, a1); |
2299 | break; | 2248 | break; |
2300 | case SYS_SETSOCKOPT: | 2249 | case SYS_SETSOCKOPT: |
2301 | err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]); | 2250 | err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]); |
2302 | break; | 2251 | break; |
2303 | case SYS_GETSOCKOPT: | 2252 | case SYS_GETSOCKOPT: |
2304 | err = | 2253 | err = |
2305 | sys_getsockopt(a0, a1, a[2], (char __user *)a[3], | 2254 | sys_getsockopt(a0, a1, a[2], (char __user *)a[3], |
2306 | (int __user *)a[4]); | 2255 | (int __user *)a[4]); |
2307 | break; | 2256 | break; |
2308 | case SYS_SENDMSG: | 2257 | case SYS_SENDMSG: |
2309 | err = sys_sendmsg(a0, (struct msghdr __user *)a1, a[2]); | 2258 | err = sys_sendmsg(a0, (struct msghdr __user *)a1, a[2]); |
2310 | break; | 2259 | break; |
2311 | case SYS_RECVMSG: | 2260 | case SYS_RECVMSG: |
2312 | err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]); | 2261 | err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]); |
2313 | break; | 2262 | break; |
2314 | case SYS_RECVMMSG: | 2263 | case SYS_RECVMMSG: |
2315 | err = sys_recvmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3], | 2264 | err = sys_recvmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3], |
2316 | (struct timespec __user *)a[4]); | 2265 | (struct timespec __user *)a[4]); |
2317 | break; | 2266 | break; |
2318 | case SYS_ACCEPT4: | 2267 | case SYS_ACCEPT4: |
2319 | err = sys_accept4(a0, (struct sockaddr __user *)a1, | 2268 | err = sys_accept4(a0, (struct sockaddr __user *)a1, |
2320 | (int __user *)a[2], a[3]); | 2269 | (int __user *)a[2], a[3]); |
2321 | break; | 2270 | break; |
2322 | default: | 2271 | default: |
2323 | err = -EINVAL; | 2272 | err = -EINVAL; |
2324 | break; | 2273 | break; |
2325 | } | 2274 | } |
2326 | return err; | 2275 | return err; |
2327 | } | 2276 | } |
2328 | 2277 | ||
2329 | #endif /* __ARCH_WANT_SYS_SOCKETCALL */ | 2278 | #endif /* __ARCH_WANT_SYS_SOCKETCALL */ |
2330 | 2279 | ||
2331 | /** | 2280 | /** |
2332 | * sock_register - add a socket protocol handler | 2281 | * sock_register - add a socket protocol handler |
2333 | * @ops: description of protocol | 2282 | * @ops: description of protocol |
2334 | * | 2283 | * |
2335 | * This function is called by a protocol handler that wants to | 2284 | * This function is called by a protocol handler that wants to |
2336 | * advertise its address family, and have it linked into the | 2285 | * advertise its address family, and have it linked into the |
2337 | * socket interface. The value ops->family coresponds to the | 2286 | * socket interface. The value ops->family coresponds to the |
2338 | * socket system call protocol family. | 2287 | * socket system call protocol family. |
2339 | */ | 2288 | */ |
2340 | int sock_register(const struct net_proto_family *ops) | 2289 | int sock_register(const struct net_proto_family *ops) |
2341 | { | 2290 | { |
2342 | int err; | 2291 | int err; |
2343 | 2292 | ||
2344 | if (ops->family >= NPROTO) { | 2293 | if (ops->family >= NPROTO) { |
2345 | printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family, | 2294 | printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family, |
2346 | NPROTO); | 2295 | NPROTO); |
2347 | return -ENOBUFS; | 2296 | return -ENOBUFS; |
2348 | } | 2297 | } |
2349 | 2298 | ||
2350 | spin_lock(&net_family_lock); | 2299 | spin_lock(&net_family_lock); |
2351 | if (net_families[ops->family]) | 2300 | if (net_families[ops->family]) |
2352 | err = -EEXIST; | 2301 | err = -EEXIST; |
2353 | else { | 2302 | else { |
2354 | net_families[ops->family] = ops; | 2303 | net_families[ops->family] = ops; |
2355 | err = 0; | 2304 | err = 0; |
2356 | } | 2305 | } |
2357 | spin_unlock(&net_family_lock); | 2306 | spin_unlock(&net_family_lock); |
2358 | 2307 | ||
2359 | printk(KERN_INFO "NET: Registered protocol family %d\n", ops->family); | 2308 | printk(KERN_INFO "NET: Registered protocol family %d\n", ops->family); |
2360 | return err; | 2309 | return err; |
2361 | } | 2310 | } |
2362 | 2311 | ||
2363 | /** | 2312 | /** |
2364 | * sock_unregister - remove a protocol handler | 2313 | * sock_unregister - remove a protocol handler |
2365 | * @family: protocol family to remove | 2314 | * @family: protocol family to remove |
2366 | * | 2315 | * |
2367 | * This function is called by a protocol handler that wants to | 2316 | * This function is called by a protocol handler that wants to |
2368 | * remove its address family, and have it unlinked from the | 2317 | * remove its address family, and have it unlinked from the |
2369 | * new socket creation. | 2318 | * new socket creation. |
2370 | * | 2319 | * |
2371 | * If protocol handler is a module, then it can use module reference | 2320 | * If protocol handler is a module, then it can use module reference |
2372 | * counts to protect against new references. If protocol handler is not | 2321 | * counts to protect against new references. If protocol handler is not |
2373 | * a module then it needs to provide its own protection in | 2322 | * a module then it needs to provide its own protection in |
2374 | * the ops->create routine. | 2323 | * the ops->create routine. |
2375 | */ | 2324 | */ |
2376 | void sock_unregister(int family) | 2325 | void sock_unregister(int family) |
2377 | { | 2326 | { |
2378 | BUG_ON(family < 0 || family >= NPROTO); | 2327 | BUG_ON(family < 0 || family >= NPROTO); |
2379 | 2328 | ||
2380 | spin_lock(&net_family_lock); | 2329 | spin_lock(&net_family_lock); |
2381 | net_families[family] = NULL; | 2330 | net_families[family] = NULL; |
2382 | spin_unlock(&net_family_lock); | 2331 | spin_unlock(&net_family_lock); |
2383 | 2332 | ||
2384 | synchronize_rcu(); | 2333 | synchronize_rcu(); |
2385 | 2334 | ||
2386 | printk(KERN_INFO "NET: Unregistered protocol family %d\n", family); | 2335 | printk(KERN_INFO "NET: Unregistered protocol family %d\n", family); |
2387 | } | 2336 | } |
2388 | 2337 | ||
2389 | static int __init sock_init(void) | 2338 | static int __init sock_init(void) |
2390 | { | 2339 | { |
2391 | /* | 2340 | /* |
2392 | * Initialize sock SLAB cache. | 2341 | * Initialize sock SLAB cache. |
2393 | */ | 2342 | */ |
2394 | 2343 | ||
2395 | sk_init(); | 2344 | sk_init(); |
2396 | 2345 | ||
2397 | /* | 2346 | /* |
2398 | * Initialize skbuff SLAB cache | 2347 | * Initialize skbuff SLAB cache |
2399 | */ | 2348 | */ |
2400 | skb_init(); | 2349 | skb_init(); |
2401 | 2350 | ||
2402 | /* | 2351 | /* |
2403 | * Initialize the protocols module. | 2352 | * Initialize the protocols module. |
2404 | */ | 2353 | */ |
2405 | 2354 | ||
2406 | init_inodecache(); | 2355 | init_inodecache(); |
2407 | register_filesystem(&sock_fs_type); | 2356 | register_filesystem(&sock_fs_type); |
2408 | sock_mnt = kern_mount(&sock_fs_type); | 2357 | sock_mnt = kern_mount(&sock_fs_type); |
2409 | 2358 | ||
2410 | /* The real protocol initialization is performed in later initcalls. | 2359 | /* The real protocol initialization is performed in later initcalls. |
2411 | */ | 2360 | */ |
2412 | 2361 | ||
2413 | #ifdef CONFIG_NETFILTER | 2362 | #ifdef CONFIG_NETFILTER |
2414 | netfilter_init(); | 2363 | netfilter_init(); |
2415 | #endif | 2364 | #endif |
2416 | 2365 | ||
2417 | return 0; | 2366 | return 0; |
2418 | } | 2367 | } |
2419 | 2368 | ||
2420 | core_initcall(sock_init); /* early initcall */ | 2369 | core_initcall(sock_init); /* early initcall */ |
2421 | 2370 | ||
2422 | #ifdef CONFIG_PROC_FS | 2371 | #ifdef CONFIG_PROC_FS |
2423 | void socket_seq_show(struct seq_file *seq) | 2372 | void socket_seq_show(struct seq_file *seq) |
2424 | { | 2373 | { |
2425 | int cpu; | 2374 | int cpu; |
2426 | int counter = 0; | 2375 | int counter = 0; |
2427 | 2376 | ||
2428 | for_each_possible_cpu(cpu) | 2377 | for_each_possible_cpu(cpu) |
2429 | counter += per_cpu(sockets_in_use, cpu); | 2378 | counter += per_cpu(sockets_in_use, cpu); |
2430 | 2379 | ||
2431 | /* It can be negative, by the way. 8) */ | 2380 | /* It can be negative, by the way. 8) */ |
2432 | if (counter < 0) | 2381 | if (counter < 0) |
2433 | counter = 0; | 2382 | counter = 0; |
2434 | 2383 | ||
2435 | seq_printf(seq, "sockets: used %d\n", counter); | 2384 | seq_printf(seq, "sockets: used %d\n", counter); |
2436 | } | 2385 | } |
2437 | #endif /* CONFIG_PROC_FS */ | 2386 | #endif /* CONFIG_PROC_FS */ |
2438 | 2387 | ||
2439 | #ifdef CONFIG_COMPAT | 2388 | #ifdef CONFIG_COMPAT |
2440 | static int do_siocgstamp(struct net *net, struct socket *sock, | 2389 | static int do_siocgstamp(struct net *net, struct socket *sock, |
2441 | unsigned int cmd, struct compat_timeval __user *up) | 2390 | unsigned int cmd, struct compat_timeval __user *up) |
2442 | { | 2391 | { |
2443 | mm_segment_t old_fs = get_fs(); | 2392 | mm_segment_t old_fs = get_fs(); |
2444 | struct timeval ktv; | 2393 | struct timeval ktv; |
2445 | int err; | 2394 | int err; |
2446 | 2395 | ||
2447 | set_fs(KERNEL_DS); | 2396 | set_fs(KERNEL_DS); |
2448 | err = sock_do_ioctl(net, sock, cmd, (unsigned long)&ktv); | 2397 | err = sock_do_ioctl(net, sock, cmd, (unsigned long)&ktv); |
2449 | set_fs(old_fs); | 2398 | set_fs(old_fs); |
2450 | if (!err) { | 2399 | if (!err) { |
2451 | err = put_user(ktv.tv_sec, &up->tv_sec); | 2400 | err = put_user(ktv.tv_sec, &up->tv_sec); |
2452 | err |= __put_user(ktv.tv_usec, &up->tv_usec); | 2401 | err |= __put_user(ktv.tv_usec, &up->tv_usec); |
2453 | } | 2402 | } |
2454 | return err; | 2403 | return err; |
2455 | } | 2404 | } |
2456 | 2405 | ||
2457 | static int do_siocgstampns(struct net *net, struct socket *sock, | 2406 | static int do_siocgstampns(struct net *net, struct socket *sock, |
2458 | unsigned int cmd, struct compat_timespec __user *up) | 2407 | unsigned int cmd, struct compat_timespec __user *up) |
2459 | { | 2408 | { |
2460 | mm_segment_t old_fs = get_fs(); | 2409 | mm_segment_t old_fs = get_fs(); |
2461 | struct timespec kts; | 2410 | struct timespec kts; |
2462 | int err; | 2411 | int err; |
2463 | 2412 | ||
2464 | set_fs(KERNEL_DS); | 2413 | set_fs(KERNEL_DS); |
2465 | err = sock_do_ioctl(net, sock, cmd, (unsigned long)&kts); | 2414 | err = sock_do_ioctl(net, sock, cmd, (unsigned long)&kts); |
2466 | set_fs(old_fs); | 2415 | set_fs(old_fs); |
2467 | if (!err) { | 2416 | if (!err) { |
2468 | err = put_user(kts.tv_sec, &up->tv_sec); | 2417 | err = put_user(kts.tv_sec, &up->tv_sec); |
2469 | err |= __put_user(kts.tv_nsec, &up->tv_nsec); | 2418 | err |= __put_user(kts.tv_nsec, &up->tv_nsec); |
2470 | } | 2419 | } |
2471 | return err; | 2420 | return err; |
2472 | } | 2421 | } |
2473 | 2422 | ||
2474 | static int dev_ifname32(struct net *net, struct compat_ifreq __user *uifr32) | 2423 | static int dev_ifname32(struct net *net, struct compat_ifreq __user *uifr32) |
2475 | { | 2424 | { |
2476 | struct ifreq __user *uifr; | 2425 | struct ifreq __user *uifr; |
2477 | int err; | 2426 | int err; |
2478 | 2427 | ||
2479 | uifr = compat_alloc_user_space(sizeof(struct ifreq)); | 2428 | uifr = compat_alloc_user_space(sizeof(struct ifreq)); |
2480 | if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq))) | 2429 | if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq))) |
2481 | return -EFAULT; | 2430 | return -EFAULT; |
2482 | 2431 | ||
2483 | err = dev_ioctl(net, SIOCGIFNAME, uifr); | 2432 | err = dev_ioctl(net, SIOCGIFNAME, uifr); |
2484 | if (err) | 2433 | if (err) |
2485 | return err; | 2434 | return err; |
2486 | 2435 | ||
2487 | if (copy_in_user(uifr32, uifr, sizeof(struct compat_ifreq))) | 2436 | if (copy_in_user(uifr32, uifr, sizeof(struct compat_ifreq))) |
2488 | return -EFAULT; | 2437 | return -EFAULT; |
2489 | 2438 | ||
2490 | return 0; | 2439 | return 0; |
2491 | } | 2440 | } |
2492 | 2441 | ||
2493 | static int dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32) | 2442 | static int dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32) |
2494 | { | 2443 | { |
2495 | struct compat_ifconf ifc32; | 2444 | struct compat_ifconf ifc32; |
2496 | struct ifconf ifc; | 2445 | struct ifconf ifc; |
2497 | struct ifconf __user *uifc; | 2446 | struct ifconf __user *uifc; |
2498 | struct compat_ifreq __user *ifr32; | 2447 | struct compat_ifreq __user *ifr32; |
2499 | struct ifreq __user *ifr; | 2448 | struct ifreq __user *ifr; |
2500 | unsigned int i, j; | 2449 | unsigned int i, j; |
2501 | int err; | 2450 | int err; |
2502 | 2451 | ||
2503 | if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf))) | 2452 | if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf))) |
2504 | return -EFAULT; | 2453 | return -EFAULT; |
2505 | 2454 | ||
2506 | if (ifc32.ifcbuf == 0) { | 2455 | if (ifc32.ifcbuf == 0) { |
2507 | ifc32.ifc_len = 0; | 2456 | ifc32.ifc_len = 0; |
2508 | ifc.ifc_len = 0; | 2457 | ifc.ifc_len = 0; |
2509 | ifc.ifc_req = NULL; | 2458 | ifc.ifc_req = NULL; |
2510 | uifc = compat_alloc_user_space(sizeof(struct ifconf)); | 2459 | uifc = compat_alloc_user_space(sizeof(struct ifconf)); |
2511 | } else { | 2460 | } else { |
2512 | size_t len =((ifc32.ifc_len / sizeof (struct compat_ifreq)) + 1) * | 2461 | size_t len =((ifc32.ifc_len / sizeof (struct compat_ifreq)) + 1) * |
2513 | sizeof (struct ifreq); | 2462 | sizeof (struct ifreq); |
2514 | uifc = compat_alloc_user_space(sizeof(struct ifconf) + len); | 2463 | uifc = compat_alloc_user_space(sizeof(struct ifconf) + len); |
2515 | ifc.ifc_len = len; | 2464 | ifc.ifc_len = len; |
2516 | ifr = ifc.ifc_req = (void __user *)(uifc + 1); | 2465 | ifr = ifc.ifc_req = (void __user *)(uifc + 1); |
2517 | ifr32 = compat_ptr(ifc32.ifcbuf); | 2466 | ifr32 = compat_ptr(ifc32.ifcbuf); |
2518 | for (i = 0; i < ifc32.ifc_len; i += sizeof (struct compat_ifreq)) { | 2467 | for (i = 0; i < ifc32.ifc_len; i += sizeof (struct compat_ifreq)) { |
2519 | if (copy_in_user(ifr, ifr32, sizeof(struct compat_ifreq))) | 2468 | if (copy_in_user(ifr, ifr32, sizeof(struct compat_ifreq))) |
2520 | return -EFAULT; | 2469 | return -EFAULT; |
2521 | ifr++; | 2470 | ifr++; |
2522 | ifr32++; | 2471 | ifr32++; |
2523 | } | 2472 | } |
2524 | } | 2473 | } |
2525 | if (copy_to_user(uifc, &ifc, sizeof(struct ifconf))) | 2474 | if (copy_to_user(uifc, &ifc, sizeof(struct ifconf))) |
2526 | return -EFAULT; | 2475 | return -EFAULT; |
2527 | 2476 | ||
2528 | err = dev_ioctl(net, SIOCGIFCONF, uifc); | 2477 | err = dev_ioctl(net, SIOCGIFCONF, uifc); |
2529 | if (err) | 2478 | if (err) |
2530 | return err; | 2479 | return err; |
2531 | 2480 | ||
2532 | if (copy_from_user(&ifc, uifc, sizeof(struct ifconf))) | 2481 | if (copy_from_user(&ifc, uifc, sizeof(struct ifconf))) |
2533 | return -EFAULT; | 2482 | return -EFAULT; |
2534 | 2483 | ||
2535 | ifr = ifc.ifc_req; | 2484 | ifr = ifc.ifc_req; |
2536 | ifr32 = compat_ptr(ifc32.ifcbuf); | 2485 | ifr32 = compat_ptr(ifc32.ifcbuf); |
2537 | for (i = 0, j = 0; | 2486 | for (i = 0, j = 0; |
2538 | i + sizeof (struct compat_ifreq) <= ifc32.ifc_len && j < ifc.ifc_len; | 2487 | i + sizeof (struct compat_ifreq) <= ifc32.ifc_len && j < ifc.ifc_len; |
2539 | i += sizeof (struct compat_ifreq), j += sizeof (struct ifreq)) { | 2488 | i += sizeof (struct compat_ifreq), j += sizeof (struct ifreq)) { |
2540 | if (copy_in_user(ifr32, ifr, sizeof (struct compat_ifreq))) | 2489 | if (copy_in_user(ifr32, ifr, sizeof (struct compat_ifreq))) |
2541 | return -EFAULT; | 2490 | return -EFAULT; |
2542 | ifr32++; | 2491 | ifr32++; |
2543 | ifr++; | 2492 | ifr++; |
2544 | } | 2493 | } |
2545 | 2494 | ||
2546 | if (ifc32.ifcbuf == 0) { | 2495 | if (ifc32.ifcbuf == 0) { |
2547 | /* Translate from 64-bit structure multiple to | 2496 | /* Translate from 64-bit structure multiple to |
2548 | * a 32-bit one. | 2497 | * a 32-bit one. |
2549 | */ | 2498 | */ |
2550 | i = ifc.ifc_len; | 2499 | i = ifc.ifc_len; |
2551 | i = ((i / sizeof(struct ifreq)) * sizeof(struct compat_ifreq)); | 2500 | i = ((i / sizeof(struct ifreq)) * sizeof(struct compat_ifreq)); |
2552 | ifc32.ifc_len = i; | 2501 | ifc32.ifc_len = i; |
2553 | } else { | 2502 | } else { |
2554 | ifc32.ifc_len = i; | 2503 | ifc32.ifc_len = i; |
2555 | } | 2504 | } |
2556 | if (copy_to_user(uifc32, &ifc32, sizeof(struct compat_ifconf))) | 2505 | if (copy_to_user(uifc32, &ifc32, sizeof(struct compat_ifconf))) |
2557 | return -EFAULT; | 2506 | return -EFAULT; |
2558 | 2507 | ||
2559 | return 0; | 2508 | return 0; |
2560 | } | 2509 | } |
2561 | 2510 | ||
2562 | static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32) | 2511 | static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32) |
2563 | { | 2512 | { |
2564 | struct ifreq __user *ifr; | 2513 | struct ifreq __user *ifr; |
2565 | u32 data; | 2514 | u32 data; |
2566 | void __user *datap; | 2515 | void __user *datap; |
2567 | 2516 | ||
2568 | ifr = compat_alloc_user_space(sizeof(*ifr)); | 2517 | ifr = compat_alloc_user_space(sizeof(*ifr)); |
2569 | 2518 | ||
2570 | if (copy_in_user(&ifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ)) | 2519 | if (copy_in_user(&ifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ)) |
2571 | return -EFAULT; | 2520 | return -EFAULT; |
2572 | 2521 | ||
2573 | if (get_user(data, &ifr32->ifr_ifru.ifru_data)) | 2522 | if (get_user(data, &ifr32->ifr_ifru.ifru_data)) |
2574 | return -EFAULT; | 2523 | return -EFAULT; |
2575 | 2524 | ||
2576 | datap = compat_ptr(data); | 2525 | datap = compat_ptr(data); |
2577 | if (put_user(datap, &ifr->ifr_ifru.ifru_data)) | 2526 | if (put_user(datap, &ifr->ifr_ifru.ifru_data)) |
2578 | return -EFAULT; | 2527 | return -EFAULT; |
2579 | 2528 | ||
2580 | return dev_ioctl(net, SIOCETHTOOL, ifr); | 2529 | return dev_ioctl(net, SIOCETHTOOL, ifr); |
2581 | } | 2530 | } |
2582 | 2531 | ||
2583 | static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32) | 2532 | static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32) |
2584 | { | 2533 | { |
2585 | void __user *uptr; | 2534 | void __user *uptr; |
2586 | compat_uptr_t uptr32; | 2535 | compat_uptr_t uptr32; |
2587 | struct ifreq __user *uifr; | 2536 | struct ifreq __user *uifr; |
2588 | 2537 | ||
2589 | uifr = compat_alloc_user_space(sizeof (*uifr)); | 2538 | uifr = compat_alloc_user_space(sizeof (*uifr)); |
2590 | if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq))) | 2539 | if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq))) |
2591 | return -EFAULT; | 2540 | return -EFAULT; |
2592 | 2541 | ||
2593 | if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu)) | 2542 | if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu)) |
2594 | return -EFAULT; | 2543 | return -EFAULT; |
2595 | 2544 | ||
2596 | uptr = compat_ptr(uptr32); | 2545 | uptr = compat_ptr(uptr32); |
2597 | 2546 | ||
2598 | if (put_user(uptr, &uifr->ifr_settings.ifs_ifsu.raw_hdlc)) | 2547 | if (put_user(uptr, &uifr->ifr_settings.ifs_ifsu.raw_hdlc)) |
2599 | return -EFAULT; | 2548 | return -EFAULT; |
2600 | 2549 | ||
2601 | return dev_ioctl(net, SIOCWANDEV, uifr); | 2550 | return dev_ioctl(net, SIOCWANDEV, uifr); |
2602 | } | 2551 | } |
2603 | 2552 | ||
2604 | static int bond_ioctl(struct net *net, unsigned int cmd, | 2553 | static int bond_ioctl(struct net *net, unsigned int cmd, |
2605 | struct compat_ifreq __user *ifr32) | 2554 | struct compat_ifreq __user *ifr32) |
2606 | { | 2555 | { |
2607 | struct ifreq kifr; | 2556 | struct ifreq kifr; |
2608 | struct ifreq __user *uifr; | 2557 | struct ifreq __user *uifr; |
2609 | mm_segment_t old_fs; | 2558 | mm_segment_t old_fs; |
2610 | int err; | 2559 | int err; |
2611 | u32 data; | 2560 | u32 data; |
2612 | void __user *datap; | 2561 | void __user *datap; |
2613 | 2562 | ||
2614 | switch (cmd) { | 2563 | switch (cmd) { |
2615 | case SIOCBONDENSLAVE: | 2564 | case SIOCBONDENSLAVE: |
2616 | case SIOCBONDRELEASE: | 2565 | case SIOCBONDRELEASE: |
2617 | case SIOCBONDSETHWADDR: | 2566 | case SIOCBONDSETHWADDR: |
2618 | case SIOCBONDCHANGEACTIVE: | 2567 | case SIOCBONDCHANGEACTIVE: |
2619 | if (copy_from_user(&kifr, ifr32, sizeof(struct compat_ifreq))) | 2568 | if (copy_from_user(&kifr, ifr32, sizeof(struct compat_ifreq))) |
2620 | return -EFAULT; | 2569 | return -EFAULT; |
2621 | 2570 | ||
2622 | old_fs = get_fs(); | 2571 | old_fs = get_fs(); |
2623 | set_fs (KERNEL_DS); | 2572 | set_fs (KERNEL_DS); |
2624 | err = dev_ioctl(net, cmd, &kifr); | 2573 | err = dev_ioctl(net, cmd, &kifr); |
2625 | set_fs (old_fs); | 2574 | set_fs (old_fs); |
2626 | 2575 | ||
2627 | return err; | 2576 | return err; |
2628 | case SIOCBONDSLAVEINFOQUERY: | 2577 | case SIOCBONDSLAVEINFOQUERY: |
2629 | case SIOCBONDINFOQUERY: | 2578 | case SIOCBONDINFOQUERY: |
2630 | uifr = compat_alloc_user_space(sizeof(*uifr)); | 2579 | uifr = compat_alloc_user_space(sizeof(*uifr)); |
2631 | if (copy_in_user(&uifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ)) | 2580 | if (copy_in_user(&uifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ)) |
2632 | return -EFAULT; | 2581 | return -EFAULT; |
2633 | 2582 | ||
2634 | if (get_user(data, &ifr32->ifr_ifru.ifru_data)) | 2583 | if (get_user(data, &ifr32->ifr_ifru.ifru_data)) |
2635 | return -EFAULT; | 2584 | return -EFAULT; |
2636 | 2585 | ||
2637 | datap = compat_ptr(data); | 2586 | datap = compat_ptr(data); |
2638 | if (put_user(datap, &uifr->ifr_ifru.ifru_data)) | 2587 | if (put_user(datap, &uifr->ifr_ifru.ifru_data)) |
2639 | return -EFAULT; | 2588 | return -EFAULT; |
2640 | 2589 | ||
2641 | return dev_ioctl(net, cmd, uifr); | 2590 | return dev_ioctl(net, cmd, uifr); |
2642 | default: | 2591 | default: |
2643 | return -EINVAL; | 2592 | return -EINVAL; |
2644 | }; | 2593 | }; |
2645 | } | 2594 | } |
2646 | 2595 | ||
2647 | static int siocdevprivate_ioctl(struct net *net, unsigned int cmd, | 2596 | static int siocdevprivate_ioctl(struct net *net, unsigned int cmd, |
2648 | struct compat_ifreq __user *u_ifreq32) | 2597 | struct compat_ifreq __user *u_ifreq32) |
2649 | { | 2598 | { |
2650 | struct ifreq __user *u_ifreq64; | 2599 | struct ifreq __user *u_ifreq64; |
2651 | char tmp_buf[IFNAMSIZ]; | 2600 | char tmp_buf[IFNAMSIZ]; |
2652 | void __user *data64; | 2601 | void __user *data64; |
2653 | u32 data32; | 2602 | u32 data32; |
2654 | 2603 | ||
2655 | if (copy_from_user(&tmp_buf[0], &(u_ifreq32->ifr_ifrn.ifrn_name[0]), | 2604 | if (copy_from_user(&tmp_buf[0], &(u_ifreq32->ifr_ifrn.ifrn_name[0]), |
2656 | IFNAMSIZ)) | 2605 | IFNAMSIZ)) |
2657 | return -EFAULT; | 2606 | return -EFAULT; |
2658 | if (__get_user(data32, &u_ifreq32->ifr_ifru.ifru_data)) | 2607 | if (__get_user(data32, &u_ifreq32->ifr_ifru.ifru_data)) |
2659 | return -EFAULT; | 2608 | return -EFAULT; |
2660 | data64 = compat_ptr(data32); | 2609 | data64 = compat_ptr(data32); |
2661 | 2610 | ||
2662 | u_ifreq64 = compat_alloc_user_space(sizeof(*u_ifreq64)); | 2611 | u_ifreq64 = compat_alloc_user_space(sizeof(*u_ifreq64)); |
2663 | 2612 | ||
2664 | /* Don't check these user accesses, just let that get trapped | 2613 | /* Don't check these user accesses, just let that get trapped |
2665 | * in the ioctl handler instead. | 2614 | * in the ioctl handler instead. |
2666 | */ | 2615 | */ |
2667 | if (copy_to_user(&u_ifreq64->ifr_ifrn.ifrn_name[0], &tmp_buf[0], | 2616 | if (copy_to_user(&u_ifreq64->ifr_ifrn.ifrn_name[0], &tmp_buf[0], |
2668 | IFNAMSIZ)) | 2617 | IFNAMSIZ)) |
2669 | return -EFAULT; | 2618 | return -EFAULT; |
2670 | if (__put_user(data64, &u_ifreq64->ifr_ifru.ifru_data)) | 2619 | if (__put_user(data64, &u_ifreq64->ifr_ifru.ifru_data)) |
2671 | return -EFAULT; | 2620 | return -EFAULT; |
2672 | 2621 | ||
2673 | return dev_ioctl(net, cmd, u_ifreq64); | 2622 | return dev_ioctl(net, cmd, u_ifreq64); |
2674 | } | 2623 | } |
2675 | 2624 | ||
2676 | static int dev_ifsioc(struct net *net, struct socket *sock, | 2625 | static int dev_ifsioc(struct net *net, struct socket *sock, |
2677 | unsigned int cmd, struct compat_ifreq __user *uifr32) | 2626 | unsigned int cmd, struct compat_ifreq __user *uifr32) |
2678 | { | 2627 | { |
2679 | struct ifreq __user *uifr; | 2628 | struct ifreq __user *uifr; |
2680 | int err; | 2629 | int err; |
2681 | 2630 | ||
2682 | uifr = compat_alloc_user_space(sizeof(*uifr)); | 2631 | uifr = compat_alloc_user_space(sizeof(*uifr)); |
2683 | if (copy_in_user(uifr, uifr32, sizeof(*uifr32))) | 2632 | if (copy_in_user(uifr, uifr32, sizeof(*uifr32))) |
2684 | return -EFAULT; | 2633 | return -EFAULT; |
2685 | 2634 | ||
2686 | err = sock_do_ioctl(net, sock, cmd, (unsigned long)uifr); | 2635 | err = sock_do_ioctl(net, sock, cmd, (unsigned long)uifr); |
2687 | 2636 | ||
2688 | if (!err) { | 2637 | if (!err) { |
2689 | switch (cmd) { | 2638 | switch (cmd) { |
2690 | case SIOCGIFFLAGS: | 2639 | case SIOCGIFFLAGS: |
2691 | case SIOCGIFMETRIC: | 2640 | case SIOCGIFMETRIC: |
2692 | case SIOCGIFMTU: | 2641 | case SIOCGIFMTU: |
2693 | case SIOCGIFMEM: | 2642 | case SIOCGIFMEM: |
2694 | case SIOCGIFHWADDR: | 2643 | case SIOCGIFHWADDR: |
2695 | case SIOCGIFINDEX: | 2644 | case SIOCGIFINDEX: |
2696 | case SIOCGIFADDR: | 2645 | case SIOCGIFADDR: |
2697 | case SIOCGIFBRDADDR: | 2646 | case SIOCGIFBRDADDR: |
2698 | case SIOCGIFDSTADDR: | 2647 | case SIOCGIFDSTADDR: |
2699 | case SIOCGIFNETMASK: | 2648 | case SIOCGIFNETMASK: |
2700 | case SIOCGIFPFLAGS: | 2649 | case SIOCGIFPFLAGS: |
2701 | case SIOCGIFTXQLEN: | 2650 | case SIOCGIFTXQLEN: |
2702 | case SIOCGMIIPHY: | 2651 | case SIOCGMIIPHY: |
2703 | case SIOCGMIIREG: | 2652 | case SIOCGMIIREG: |
2704 | if (copy_in_user(uifr32, uifr, sizeof(*uifr32))) | 2653 | if (copy_in_user(uifr32, uifr, sizeof(*uifr32))) |
2705 | err = -EFAULT; | 2654 | err = -EFAULT; |
2706 | break; | 2655 | break; |
2707 | } | 2656 | } |
2708 | } | 2657 | } |
2709 | return err; | 2658 | return err; |
2710 | } | 2659 | } |
2711 | 2660 | ||
2712 | static int compat_sioc_ifmap(struct net *net, unsigned int cmd, | 2661 | static int compat_sioc_ifmap(struct net *net, unsigned int cmd, |
2713 | struct compat_ifreq __user *uifr32) | 2662 | struct compat_ifreq __user *uifr32) |
2714 | { | 2663 | { |
2715 | struct ifreq ifr; | 2664 | struct ifreq ifr; |
2716 | struct compat_ifmap __user *uifmap32; | 2665 | struct compat_ifmap __user *uifmap32; |
2717 | mm_segment_t old_fs; | 2666 | mm_segment_t old_fs; |
2718 | int err; | 2667 | int err; |
2719 | 2668 | ||
2720 | uifmap32 = &uifr32->ifr_ifru.ifru_map; | 2669 | uifmap32 = &uifr32->ifr_ifru.ifru_map; |
2721 | err = copy_from_user(&ifr, uifr32, sizeof(ifr.ifr_name)); | 2670 | err = copy_from_user(&ifr, uifr32, sizeof(ifr.ifr_name)); |
2722 | err |= __get_user(ifr.ifr_map.mem_start, &uifmap32->mem_start); | 2671 | err |= __get_user(ifr.ifr_map.mem_start, &uifmap32->mem_start); |
2723 | err |= __get_user(ifr.ifr_map.mem_end, &uifmap32->mem_end); | 2672 | err |= __get_user(ifr.ifr_map.mem_end, &uifmap32->mem_end); |
2724 | err |= __get_user(ifr.ifr_map.base_addr, &uifmap32->base_addr); | 2673 | err |= __get_user(ifr.ifr_map.base_addr, &uifmap32->base_addr); |
2725 | err |= __get_user(ifr.ifr_map.irq, &uifmap32->irq); | 2674 | err |= __get_user(ifr.ifr_map.irq, &uifmap32->irq); |
2726 | err |= __get_user(ifr.ifr_map.dma, &uifmap32->dma); | 2675 | err |= __get_user(ifr.ifr_map.dma, &uifmap32->dma); |
2727 | err |= __get_user(ifr.ifr_map.port, &uifmap32->port); | 2676 | err |= __get_user(ifr.ifr_map.port, &uifmap32->port); |
2728 | if (err) | 2677 | if (err) |
2729 | return -EFAULT; | 2678 | return -EFAULT; |
2730 | 2679 | ||
2731 | old_fs = get_fs(); | 2680 | old_fs = get_fs(); |
2732 | set_fs (KERNEL_DS); | 2681 | set_fs (KERNEL_DS); |
2733 | err = dev_ioctl(net, cmd, (void __user *)&ifr); | 2682 | err = dev_ioctl(net, cmd, (void __user *)&ifr); |
2734 | set_fs (old_fs); | 2683 | set_fs (old_fs); |
2735 | 2684 | ||
2736 | if (cmd == SIOCGIFMAP && !err) { | 2685 | if (cmd == SIOCGIFMAP && !err) { |
2737 | err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name)); | 2686 | err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name)); |
2738 | err |= __put_user(ifr.ifr_map.mem_start, &uifmap32->mem_start); | 2687 | err |= __put_user(ifr.ifr_map.mem_start, &uifmap32->mem_start); |
2739 | err |= __put_user(ifr.ifr_map.mem_end, &uifmap32->mem_end); | 2688 | err |= __put_user(ifr.ifr_map.mem_end, &uifmap32->mem_end); |
2740 | err |= __put_user(ifr.ifr_map.base_addr, &uifmap32->base_addr); | 2689 | err |= __put_user(ifr.ifr_map.base_addr, &uifmap32->base_addr); |
2741 | err |= __put_user(ifr.ifr_map.irq, &uifmap32->irq); | 2690 | err |= __put_user(ifr.ifr_map.irq, &uifmap32->irq); |
2742 | err |= __put_user(ifr.ifr_map.dma, &uifmap32->dma); | 2691 | err |= __put_user(ifr.ifr_map.dma, &uifmap32->dma); |
2743 | err |= __put_user(ifr.ifr_map.port, &uifmap32->port); | 2692 | err |= __put_user(ifr.ifr_map.port, &uifmap32->port); |
2744 | if (err) | 2693 | if (err) |
2745 | err = -EFAULT; | 2694 | err = -EFAULT; |
2746 | } | 2695 | } |
2747 | return err; | 2696 | return err; |
2748 | } | 2697 | } |
2749 | 2698 | ||
2750 | static int compat_siocshwtstamp(struct net *net, struct compat_ifreq __user *uifr32) | 2699 | static int compat_siocshwtstamp(struct net *net, struct compat_ifreq __user *uifr32) |
2751 | { | 2700 | { |
2752 | void __user *uptr; | 2701 | void __user *uptr; |
2753 | compat_uptr_t uptr32; | 2702 | compat_uptr_t uptr32; |
2754 | struct ifreq __user *uifr; | 2703 | struct ifreq __user *uifr; |
2755 | 2704 | ||
2756 | uifr = compat_alloc_user_space(sizeof (*uifr)); | 2705 | uifr = compat_alloc_user_space(sizeof (*uifr)); |
2757 | if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq))) | 2706 | if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq))) |
2758 | return -EFAULT; | 2707 | return -EFAULT; |
2759 | 2708 | ||
2760 | if (get_user(uptr32, &uifr32->ifr_data)) | 2709 | if (get_user(uptr32, &uifr32->ifr_data)) |
2761 | return -EFAULT; | 2710 | return -EFAULT; |
2762 | 2711 | ||
2763 | uptr = compat_ptr(uptr32); | 2712 | uptr = compat_ptr(uptr32); |
2764 | 2713 | ||
2765 | if (put_user(uptr, &uifr->ifr_data)) | 2714 | if (put_user(uptr, &uifr->ifr_data)) |
2766 | return -EFAULT; | 2715 | return -EFAULT; |
2767 | 2716 | ||
2768 | return dev_ioctl(net, SIOCSHWTSTAMP, uifr); | 2717 | return dev_ioctl(net, SIOCSHWTSTAMP, uifr); |
2769 | } | 2718 | } |
2770 | 2719 | ||
2771 | struct rtentry32 { | 2720 | struct rtentry32 { |
2772 | u32 rt_pad1; | 2721 | u32 rt_pad1; |
2773 | struct sockaddr rt_dst; /* target address */ | 2722 | struct sockaddr rt_dst; /* target address */ |
2774 | struct sockaddr rt_gateway; /* gateway addr (RTF_GATEWAY) */ | 2723 | struct sockaddr rt_gateway; /* gateway addr (RTF_GATEWAY) */ |
2775 | struct sockaddr rt_genmask; /* target network mask (IP) */ | 2724 | struct sockaddr rt_genmask; /* target network mask (IP) */ |
2776 | unsigned short rt_flags; | 2725 | unsigned short rt_flags; |
2777 | short rt_pad2; | 2726 | short rt_pad2; |
2778 | u32 rt_pad3; | 2727 | u32 rt_pad3; |
2779 | unsigned char rt_tos; | 2728 | unsigned char rt_tos; |
2780 | unsigned char rt_class; | 2729 | unsigned char rt_class; |
2781 | short rt_pad4; | 2730 | short rt_pad4; |
2782 | short rt_metric; /* +1 for binary compatibility! */ | 2731 | short rt_metric; /* +1 for binary compatibility! */ |
2783 | /* char * */ u32 rt_dev; /* forcing the device at add */ | 2732 | /* char * */ u32 rt_dev; /* forcing the device at add */ |
2784 | u32 rt_mtu; /* per route MTU/Window */ | 2733 | u32 rt_mtu; /* per route MTU/Window */ |
2785 | u32 rt_window; /* Window clamping */ | 2734 | u32 rt_window; /* Window clamping */ |
2786 | unsigned short rt_irtt; /* Initial RTT */ | 2735 | unsigned short rt_irtt; /* Initial RTT */ |
2787 | }; | 2736 | }; |
2788 | 2737 | ||
2789 | struct in6_rtmsg32 { | 2738 | struct in6_rtmsg32 { |
2790 | struct in6_addr rtmsg_dst; | 2739 | struct in6_addr rtmsg_dst; |
2791 | struct in6_addr rtmsg_src; | 2740 | struct in6_addr rtmsg_src; |
2792 | struct in6_addr rtmsg_gateway; | 2741 | struct in6_addr rtmsg_gateway; |
2793 | u32 rtmsg_type; | 2742 | u32 rtmsg_type; |
2794 | u16 rtmsg_dst_len; | 2743 | u16 rtmsg_dst_len; |
2795 | u16 rtmsg_src_len; | 2744 | u16 rtmsg_src_len; |
2796 | u32 rtmsg_metric; | 2745 | u32 rtmsg_metric; |
2797 | u32 rtmsg_info; | 2746 | u32 rtmsg_info; |
2798 | u32 rtmsg_flags; | 2747 | u32 rtmsg_flags; |
2799 | s32 rtmsg_ifindex; | 2748 | s32 rtmsg_ifindex; |
2800 | }; | 2749 | }; |
2801 | 2750 | ||
2802 | static int routing_ioctl(struct net *net, struct socket *sock, | 2751 | static int routing_ioctl(struct net *net, struct socket *sock, |
2803 | unsigned int cmd, void __user *argp) | 2752 | unsigned int cmd, void __user *argp) |
2804 | { | 2753 | { |
2805 | int ret; | 2754 | int ret; |
2806 | void *r = NULL; | 2755 | void *r = NULL; |
2807 | struct in6_rtmsg r6; | 2756 | struct in6_rtmsg r6; |
2808 | struct rtentry r4; | 2757 | struct rtentry r4; |
2809 | char devname[16]; | 2758 | char devname[16]; |
2810 | u32 rtdev; | 2759 | u32 rtdev; |
2811 | mm_segment_t old_fs = get_fs(); | 2760 | mm_segment_t old_fs = get_fs(); |
2812 | 2761 | ||
2813 | if (sock && sock->sk && sock->sk->sk_family == AF_INET6) { /* ipv6 */ | 2762 | if (sock && sock->sk && sock->sk->sk_family == AF_INET6) { /* ipv6 */ |
2814 | struct in6_rtmsg32 __user *ur6 = argp; | 2763 | struct in6_rtmsg32 __user *ur6 = argp; |
2815 | ret = copy_from_user (&r6.rtmsg_dst, &(ur6->rtmsg_dst), | 2764 | ret = copy_from_user (&r6.rtmsg_dst, &(ur6->rtmsg_dst), |
2816 | 3 * sizeof(struct in6_addr)); | 2765 | 3 * sizeof(struct in6_addr)); |
2817 | ret |= __get_user (r6.rtmsg_type, &(ur6->rtmsg_type)); | 2766 | ret |= __get_user (r6.rtmsg_type, &(ur6->rtmsg_type)); |
2818 | ret |= __get_user (r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len)); | 2767 | ret |= __get_user (r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len)); |
2819 | ret |= __get_user (r6.rtmsg_src_len, &(ur6->rtmsg_src_len)); | 2768 | ret |= __get_user (r6.rtmsg_src_len, &(ur6->rtmsg_src_len)); |
2820 | ret |= __get_user (r6.rtmsg_metric, &(ur6->rtmsg_metric)); | 2769 | ret |= __get_user (r6.rtmsg_metric, &(ur6->rtmsg_metric)); |
2821 | ret |= __get_user (r6.rtmsg_info, &(ur6->rtmsg_info)); | 2770 | ret |= __get_user (r6.rtmsg_info, &(ur6->rtmsg_info)); |
2822 | ret |= __get_user (r6.rtmsg_flags, &(ur6->rtmsg_flags)); | 2771 | ret |= __get_user (r6.rtmsg_flags, &(ur6->rtmsg_flags)); |
2823 | ret |= __get_user (r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex)); | 2772 | ret |= __get_user (r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex)); |
2824 | 2773 | ||
2825 | r = (void *) &r6; | 2774 | r = (void *) &r6; |
2826 | } else { /* ipv4 */ | 2775 | } else { /* ipv4 */ |
2827 | struct rtentry32 __user *ur4 = argp; | 2776 | struct rtentry32 __user *ur4 = argp; |
2828 | ret = copy_from_user (&r4.rt_dst, &(ur4->rt_dst), | 2777 | ret = copy_from_user (&r4.rt_dst, &(ur4->rt_dst), |
2829 | 3 * sizeof(struct sockaddr)); | 2778 | 3 * sizeof(struct sockaddr)); |
2830 | ret |= __get_user (r4.rt_flags, &(ur4->rt_flags)); | 2779 | ret |= __get_user (r4.rt_flags, &(ur4->rt_flags)); |
2831 | ret |= __get_user (r4.rt_metric, &(ur4->rt_metric)); | 2780 | ret |= __get_user (r4.rt_metric, &(ur4->rt_metric)); |
2832 | ret |= __get_user (r4.rt_mtu, &(ur4->rt_mtu)); | 2781 | ret |= __get_user (r4.rt_mtu, &(ur4->rt_mtu)); |
2833 | ret |= __get_user (r4.rt_window, &(ur4->rt_window)); | 2782 | ret |= __get_user (r4.rt_window, &(ur4->rt_window)); |
2834 | ret |= __get_user (r4.rt_irtt, &(ur4->rt_irtt)); | 2783 | ret |= __get_user (r4.rt_irtt, &(ur4->rt_irtt)); |
2835 | ret |= __get_user (rtdev, &(ur4->rt_dev)); | 2784 | ret |= __get_user (rtdev, &(ur4->rt_dev)); |
2836 | if (rtdev) { | 2785 | if (rtdev) { |
2837 | ret |= copy_from_user (devname, compat_ptr(rtdev), 15); | 2786 | ret |= copy_from_user (devname, compat_ptr(rtdev), 15); |
2838 | r4.rt_dev = devname; devname[15] = 0; | 2787 | r4.rt_dev = devname; devname[15] = 0; |
2839 | } else | 2788 | } else |
2840 | r4.rt_dev = NULL; | 2789 | r4.rt_dev = NULL; |
2841 | 2790 | ||
2842 | r = (void *) &r4; | 2791 | r = (void *) &r4; |
2843 | } | 2792 | } |
2844 | 2793 | ||
2845 | if (ret) { | 2794 | if (ret) { |
2846 | ret = -EFAULT; | 2795 | ret = -EFAULT; |
2847 | goto out; | 2796 | goto out; |
2848 | } | 2797 | } |
2849 | 2798 | ||
2850 | set_fs (KERNEL_DS); | 2799 | set_fs (KERNEL_DS); |
2851 | ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r); | 2800 | ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r); |
2852 | set_fs (old_fs); | 2801 | set_fs (old_fs); |
2853 | 2802 | ||
2854 | out: | 2803 | out: |
2855 | return ret; | 2804 | return ret; |
2856 | } | 2805 | } |
2857 | 2806 | ||
2858 | /* Since old style bridge ioctl's endup using SIOCDEVPRIVATE | 2807 | /* Since old style bridge ioctl's endup using SIOCDEVPRIVATE |
2859 | * for some operations; this forces use of the newer bridge-utils that | 2808 | * for some operations; this forces use of the newer bridge-utils that |
2860 | * use compatiable ioctls | 2809 | * use compatiable ioctls |
2861 | */ | 2810 | */ |
2862 | static int old_bridge_ioctl(compat_ulong_t __user *argp) | 2811 | static int old_bridge_ioctl(compat_ulong_t __user *argp) |
2863 | { | 2812 | { |
2864 | compat_ulong_t tmp; | 2813 | compat_ulong_t tmp; |
2865 | 2814 | ||
2866 | if (get_user(tmp, argp)) | 2815 | if (get_user(tmp, argp)) |
2867 | return -EFAULT; | 2816 | return -EFAULT; |
2868 | if (tmp == BRCTL_GET_VERSION) | 2817 | if (tmp == BRCTL_GET_VERSION) |
2869 | return BRCTL_VERSION + 1; | 2818 | return BRCTL_VERSION + 1; |
2870 | return -EINVAL; | 2819 | return -EINVAL; |
2871 | } | 2820 | } |
2872 | 2821 | ||
2873 | static int compat_sock_ioctl_trans(struct file *file, struct socket *sock, | 2822 | static int compat_sock_ioctl_trans(struct file *file, struct socket *sock, |
2874 | unsigned int cmd, unsigned long arg) | 2823 | unsigned int cmd, unsigned long arg) |
2875 | { | 2824 | { |
2876 | void __user *argp = compat_ptr(arg); | 2825 | void __user *argp = compat_ptr(arg); |
2877 | struct sock *sk = sock->sk; | 2826 | struct sock *sk = sock->sk; |
2878 | struct net *net = sock_net(sk); | 2827 | struct net *net = sock_net(sk); |
2879 | 2828 | ||
2880 | if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) | 2829 | if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) |
2881 | return siocdevprivate_ioctl(net, cmd, argp); | 2830 | return siocdevprivate_ioctl(net, cmd, argp); |
2882 | 2831 | ||
2883 | switch (cmd) { | 2832 | switch (cmd) { |
2884 | case SIOCSIFBR: | 2833 | case SIOCSIFBR: |
2885 | case SIOCGIFBR: | 2834 | case SIOCGIFBR: |
2886 | return old_bridge_ioctl(argp); | 2835 | return old_bridge_ioctl(argp); |
2887 | case SIOCGIFNAME: | 2836 | case SIOCGIFNAME: |
2888 | return dev_ifname32(net, argp); | 2837 | return dev_ifname32(net, argp); |
2889 | case SIOCGIFCONF: | 2838 | case SIOCGIFCONF: |
2890 | return dev_ifconf(net, argp); | 2839 | return dev_ifconf(net, argp); |
2891 | case SIOCETHTOOL: | 2840 | case SIOCETHTOOL: |
2892 | return ethtool_ioctl(net, argp); | 2841 | return ethtool_ioctl(net, argp); |
2893 | case SIOCWANDEV: | 2842 | case SIOCWANDEV: |
2894 | return compat_siocwandev(net, argp); | 2843 | return compat_siocwandev(net, argp); |
2895 | case SIOCGIFMAP: | 2844 | case SIOCGIFMAP: |
2896 | case SIOCSIFMAP: | 2845 | case SIOCSIFMAP: |
2897 | return compat_sioc_ifmap(net, cmd, argp); | 2846 | return compat_sioc_ifmap(net, cmd, argp); |
2898 | case SIOCBONDENSLAVE: | 2847 | case SIOCBONDENSLAVE: |
2899 | case SIOCBONDRELEASE: | 2848 | case SIOCBONDRELEASE: |
2900 | case SIOCBONDSETHWADDR: | 2849 | case SIOCBONDSETHWADDR: |
2901 | case SIOCBONDSLAVEINFOQUERY: | 2850 | case SIOCBONDSLAVEINFOQUERY: |
2902 | case SIOCBONDINFOQUERY: | 2851 | case SIOCBONDINFOQUERY: |
2903 | case SIOCBONDCHANGEACTIVE: | 2852 | case SIOCBONDCHANGEACTIVE: |
2904 | return bond_ioctl(net, cmd, argp); | 2853 | return bond_ioctl(net, cmd, argp); |
2905 | case SIOCADDRT: | 2854 | case SIOCADDRT: |
2906 | case SIOCDELRT: | 2855 | case SIOCDELRT: |
2907 | return routing_ioctl(net, sock, cmd, argp); | 2856 | return routing_ioctl(net, sock, cmd, argp); |
2908 | case SIOCGSTAMP: | 2857 | case SIOCGSTAMP: |
2909 | return do_siocgstamp(net, sock, cmd, argp); | 2858 | return do_siocgstamp(net, sock, cmd, argp); |
2910 | case SIOCGSTAMPNS: | 2859 | case SIOCGSTAMPNS: |
2911 | return do_siocgstampns(net, sock, cmd, argp); | 2860 | return do_siocgstampns(net, sock, cmd, argp); |
2912 | case SIOCSHWTSTAMP: | 2861 | case SIOCSHWTSTAMP: |
2913 | return compat_siocshwtstamp(net, argp); | 2862 | return compat_siocshwtstamp(net, argp); |
2914 | 2863 | ||
2915 | case FIOSETOWN: | 2864 | case FIOSETOWN: |
2916 | case SIOCSPGRP: | 2865 | case SIOCSPGRP: |
2917 | case FIOGETOWN: | 2866 | case FIOGETOWN: |
2918 | case SIOCGPGRP: | 2867 | case SIOCGPGRP: |
2919 | case SIOCBRADDBR: | 2868 | case SIOCBRADDBR: |
2920 | case SIOCBRDELBR: | 2869 | case SIOCBRDELBR: |
2921 | case SIOCGIFVLAN: | 2870 | case SIOCGIFVLAN: |
2922 | case SIOCSIFVLAN: | 2871 | case SIOCSIFVLAN: |
2923 | case SIOCADDDLCI: | 2872 | case SIOCADDDLCI: |
2924 | case SIOCDELDLCI: | 2873 | case SIOCDELDLCI: |
2925 | return sock_ioctl(file, cmd, arg); | 2874 | return sock_ioctl(file, cmd, arg); |
2926 | 2875 | ||
2927 | case SIOCGIFFLAGS: | 2876 | case SIOCGIFFLAGS: |
2928 | case SIOCSIFFLAGS: | 2877 | case SIOCSIFFLAGS: |
2929 | case SIOCGIFMETRIC: | 2878 | case SIOCGIFMETRIC: |
2930 | case SIOCSIFMETRIC: | 2879 | case SIOCSIFMETRIC: |
2931 | case SIOCGIFMTU: | 2880 | case SIOCGIFMTU: |
2932 | case SIOCSIFMTU: | 2881 | case SIOCSIFMTU: |
2933 | case SIOCGIFMEM: | 2882 | case SIOCGIFMEM: |
2934 | case SIOCSIFMEM: | 2883 | case SIOCSIFMEM: |
2935 | case SIOCGIFHWADDR: | 2884 | case SIOCGIFHWADDR: |
2936 | case SIOCSIFHWADDR: | 2885 | case SIOCSIFHWADDR: |
2937 | case SIOCADDMULTI: | 2886 | case SIOCADDMULTI: |
2938 | case SIOCDELMULTI: | 2887 | case SIOCDELMULTI: |
2939 | case SIOCGIFINDEX: | 2888 | case SIOCGIFINDEX: |
2940 | case SIOCGIFADDR: | 2889 | case SIOCGIFADDR: |
2941 | case SIOCSIFADDR: | 2890 | case SIOCSIFADDR: |
2942 | case SIOCSIFHWBROADCAST: | 2891 | case SIOCSIFHWBROADCAST: |
2943 | case SIOCDIFADDR: | 2892 | case SIOCDIFADDR: |
2944 | case SIOCGIFBRDADDR: | 2893 | case SIOCGIFBRDADDR: |
2945 | case SIOCSIFBRDADDR: | 2894 | case SIOCSIFBRDADDR: |
2946 | case SIOCGIFDSTADDR: | 2895 | case SIOCGIFDSTADDR: |
2947 | case SIOCSIFDSTADDR: | 2896 | case SIOCSIFDSTADDR: |
2948 | case SIOCGIFNETMASK: | 2897 | case SIOCGIFNETMASK: |
2949 | case SIOCSIFNETMASK: | 2898 | case SIOCSIFNETMASK: |
2950 | case SIOCSIFPFLAGS: | 2899 | case SIOCSIFPFLAGS: |
2951 | case SIOCGIFPFLAGS: | 2900 | case SIOCGIFPFLAGS: |
2952 | case SIOCGIFTXQLEN: | 2901 | case SIOCGIFTXQLEN: |
2953 | case SIOCSIFTXQLEN: | 2902 | case SIOCSIFTXQLEN: |
2954 | case SIOCBRADDIF: | 2903 | case SIOCBRADDIF: |
2955 | case SIOCBRDELIF: | 2904 | case SIOCBRDELIF: |
2956 | case SIOCSIFNAME: | 2905 | case SIOCSIFNAME: |
2957 | case SIOCGMIIPHY: | 2906 | case SIOCGMIIPHY: |
2958 | case SIOCGMIIREG: | 2907 | case SIOCGMIIREG: |
2959 | case SIOCSMIIREG: | 2908 | case SIOCSMIIREG: |
2960 | return dev_ifsioc(net, sock, cmd, argp); | 2909 | return dev_ifsioc(net, sock, cmd, argp); |
2961 | 2910 | ||
2962 | case SIOCSARP: | 2911 | case SIOCSARP: |
2963 | case SIOCGARP: | 2912 | case SIOCGARP: |
2964 | case SIOCDARP: | 2913 | case SIOCDARP: |
2965 | case SIOCATMARK: | 2914 | case SIOCATMARK: |
2966 | return sock_do_ioctl(net, sock, cmd, arg); | 2915 | return sock_do_ioctl(net, sock, cmd, arg); |
2967 | } | 2916 | } |
2968 | 2917 | ||
2969 | /* Prevent warning from compat_sys_ioctl, these always | 2918 | /* Prevent warning from compat_sys_ioctl, these always |
2970 | * result in -EINVAL in the native case anyway. */ | 2919 | * result in -EINVAL in the native case anyway. */ |
2971 | switch (cmd) { | 2920 | switch (cmd) { |
2972 | case SIOCRTMSG: | 2921 | case SIOCRTMSG: |
2973 | case SIOCGIFCOUNT: | 2922 | case SIOCGIFCOUNT: |
2974 | case SIOCSRARP: | 2923 | case SIOCSRARP: |
2975 | case SIOCGRARP: | 2924 | case SIOCGRARP: |
2976 | case SIOCDRARP: | 2925 | case SIOCDRARP: |
2977 | case SIOCSIFLINK: | 2926 | case SIOCSIFLINK: |
2978 | case SIOCGIFSLAVE: | 2927 | case SIOCGIFSLAVE: |
2979 | case SIOCSIFSLAVE: | 2928 | case SIOCSIFSLAVE: |
2980 | return -EINVAL; | 2929 | return -EINVAL; |
2981 | } | 2930 | } |
2982 | 2931 | ||
2983 | return -ENOIOCTLCMD; | 2932 | return -ENOIOCTLCMD; |
2984 | } | 2933 | } |
2985 | 2934 | ||
2986 | static long compat_sock_ioctl(struct file *file, unsigned cmd, | 2935 | static long compat_sock_ioctl(struct file *file, unsigned cmd, |
2987 | unsigned long arg) | 2936 | unsigned long arg) |
2988 | { | 2937 | { |
2989 | struct socket *sock = file->private_data; | 2938 | struct socket *sock = file->private_data; |
2990 | int ret = -ENOIOCTLCMD; | 2939 | int ret = -ENOIOCTLCMD; |
2991 | struct sock *sk; | 2940 | struct sock *sk; |
2992 | struct net *net; | 2941 | struct net *net; |
2993 | 2942 | ||
2994 | sk = sock->sk; | 2943 | sk = sock->sk; |
2995 | net = sock_net(sk); | 2944 | net = sock_net(sk); |
2996 | 2945 | ||
2997 | if (sock->ops->compat_ioctl) | 2946 | if (sock->ops->compat_ioctl) |
2998 | ret = sock->ops->compat_ioctl(sock, cmd, arg); | 2947 | ret = sock->ops->compat_ioctl(sock, cmd, arg); |
2999 | 2948 | ||
3000 | if (ret == -ENOIOCTLCMD && | 2949 | if (ret == -ENOIOCTLCMD && |
3001 | (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST)) | 2950 | (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST)) |
3002 | ret = compat_wext_handle_ioctl(net, cmd, arg); | 2951 | ret = compat_wext_handle_ioctl(net, cmd, arg); |
3003 | 2952 | ||
3004 | if (ret == -ENOIOCTLCMD) | 2953 | if (ret == -ENOIOCTLCMD) |
3005 | ret = compat_sock_ioctl_trans(file, sock, cmd, arg); | 2954 | ret = compat_sock_ioctl_trans(file, sock, cmd, arg); |
3006 | 2955 | ||
3007 | return ret; | 2956 | return ret; |
3008 | } | 2957 | } |
3009 | #endif | 2958 | #endif |
3010 | 2959 | ||
3011 | int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen) | 2960 | int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen) |
3012 | { | 2961 | { |
3013 | return sock->ops->bind(sock, addr, addrlen); | 2962 | return sock->ops->bind(sock, addr, addrlen); |
3014 | } | 2963 | } |
3015 | 2964 | ||
3016 | int kernel_listen(struct socket *sock, int backlog) | 2965 | int kernel_listen(struct socket *sock, int backlog) |
3017 | { | 2966 | { |
3018 | return sock->ops->listen(sock, backlog); | 2967 | return sock->ops->listen(sock, backlog); |
3019 | } | 2968 | } |
3020 | 2969 | ||
3021 | int kernel_accept(struct socket *sock, struct socket **newsock, int flags) | 2970 | int kernel_accept(struct socket *sock, struct socket **newsock, int flags) |
3022 | { | 2971 | { |
3023 | struct sock *sk = sock->sk; | 2972 | struct sock *sk = sock->sk; |
3024 | int err; | 2973 | int err; |
3025 | 2974 | ||
3026 | err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol, | 2975 | err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol, |
3027 | newsock); | 2976 | newsock); |
3028 | if (err < 0) | 2977 | if (err < 0) |
3029 | goto done; | 2978 | goto done; |
3030 | 2979 | ||
3031 | err = sock->ops->accept(sock, *newsock, flags); | 2980 | err = sock->ops->accept(sock, *newsock, flags); |
3032 | if (err < 0) { | 2981 | if (err < 0) { |
3033 | sock_release(*newsock); | 2982 | sock_release(*newsock); |
3034 | *newsock = NULL; | 2983 | *newsock = NULL; |
3035 | goto done; | 2984 | goto done; |
3036 | } | 2985 | } |
3037 | 2986 | ||
3038 | (*newsock)->ops = sock->ops; | 2987 | (*newsock)->ops = sock->ops; |
3039 | __module_get((*newsock)->ops->owner); | 2988 | __module_get((*newsock)->ops->owner); |
3040 | 2989 | ||
3041 | done: | 2990 | done: |
3042 | return err; | 2991 | return err; |
3043 | } | 2992 | } |
3044 | 2993 | ||
3045 | int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen, | 2994 | int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen, |
3046 | int flags) | 2995 | int flags) |
3047 | { | 2996 | { |
3048 | return sock->ops->connect(sock, addr, addrlen, flags); | 2997 | return sock->ops->connect(sock, addr, addrlen, flags); |
3049 | } | 2998 | } |
3050 | 2999 | ||
3051 | int kernel_getsockname(struct socket *sock, struct sockaddr *addr, | 3000 | int kernel_getsockname(struct socket *sock, struct sockaddr *addr, |
3052 | int *addrlen) | 3001 | int *addrlen) |
3053 | { | 3002 | { |
3054 | return sock->ops->getname(sock, addr, addrlen, 0); | 3003 | return sock->ops->getname(sock, addr, addrlen, 0); |
3055 | } | 3004 | } |
3056 | 3005 | ||
3057 | int kernel_getpeername(struct socket *sock, struct sockaddr *addr, | 3006 | int kernel_getpeername(struct socket *sock, struct sockaddr *addr, |
3058 | int *addrlen) | 3007 | int *addrlen) |
3059 | { | 3008 | { |
3060 | return sock->ops->getname(sock, addr, addrlen, 1); | 3009 | return sock->ops->getname(sock, addr, addrlen, 1); |
3061 | } | 3010 | } |
3062 | 3011 | ||
3063 | int kernel_getsockopt(struct socket *sock, int level, int optname, | 3012 | int kernel_getsockopt(struct socket *sock, int level, int optname, |
3064 | char *optval, int *optlen) | 3013 | char *optval, int *optlen) |
3065 | { | 3014 | { |
3066 | mm_segment_t oldfs = get_fs(); | 3015 | mm_segment_t oldfs = get_fs(); |
3067 | int err; | 3016 | int err; |
3068 | 3017 | ||
3069 | set_fs(KERNEL_DS); | 3018 | set_fs(KERNEL_DS); |
3070 | if (level == SOL_SOCKET) | 3019 | if (level == SOL_SOCKET) |
3071 | err = sock_getsockopt(sock, level, optname, optval, optlen); | 3020 | err = sock_getsockopt(sock, level, optname, optval, optlen); |
3072 | else | 3021 | else |
3073 | err = sock->ops->getsockopt(sock, level, optname, optval, | 3022 | err = sock->ops->getsockopt(sock, level, optname, optval, |
3074 | optlen); | 3023 | optlen); |
3075 | set_fs(oldfs); | 3024 | set_fs(oldfs); |
3076 | return err; | 3025 | return err; |
3077 | } | 3026 | } |
3078 | 3027 | ||
3079 | int kernel_setsockopt(struct socket *sock, int level, int optname, | 3028 | int kernel_setsockopt(struct socket *sock, int level, int optname, |
3080 | char *optval, unsigned int optlen) | 3029 | char *optval, unsigned int optlen) |
3081 | { | 3030 | { |
3082 | mm_segment_t oldfs = get_fs(); | 3031 | mm_segment_t oldfs = get_fs(); |
3083 | int err; | 3032 | int err; |
3084 | 3033 | ||
3085 | set_fs(KERNEL_DS); | 3034 | set_fs(KERNEL_DS); |
3086 | if (level == SOL_SOCKET) | 3035 | if (level == SOL_SOCKET) |
3087 | err = sock_setsockopt(sock, level, optname, optval, optlen); | 3036 | err = sock_setsockopt(sock, level, optname, optval, optlen); |
3088 | else | 3037 | else |
3089 | err = sock->ops->setsockopt(sock, level, optname, optval, | 3038 | err = sock->ops->setsockopt(sock, level, optname, optval, |
3090 | optlen); | 3039 | optlen); |
3091 | set_fs(oldfs); | 3040 | set_fs(oldfs); |
3092 | return err; | 3041 | return err; |
3093 | } | 3042 | } |
3094 | 3043 | ||
3095 | int kernel_sendpage(struct socket *sock, struct page *page, int offset, | 3044 | int kernel_sendpage(struct socket *sock, struct page *page, int offset, |
3096 | size_t size, int flags) | 3045 | size_t size, int flags) |
3097 | { | 3046 | { |
3098 | if (sock->ops->sendpage) | 3047 | if (sock->ops->sendpage) |
3099 | return sock->ops->sendpage(sock, page, offset, size, flags); | 3048 | return sock->ops->sendpage(sock, page, offset, size, flags); |
3100 | 3049 | ||
3101 | return sock_no_sendpage(sock, page, offset, size, flags); | 3050 | return sock_no_sendpage(sock, page, offset, size, flags); |
3102 | } | 3051 | } |
3103 | 3052 | ||
3104 | int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg) | 3053 | int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg) |
3105 | { | 3054 | { |
3106 | mm_segment_t oldfs = get_fs(); | 3055 | mm_segment_t oldfs = get_fs(); |
3107 | int err; | 3056 | int err; |
3108 | 3057 | ||
3109 | set_fs(KERNEL_DS); | 3058 | set_fs(KERNEL_DS); |
3110 | err = sock->ops->ioctl(sock, cmd, arg); | 3059 | err = sock->ops->ioctl(sock, cmd, arg); |
3111 | set_fs(oldfs); | 3060 | set_fs(oldfs); |
3112 | 3061 | ||
3113 | return err; | 3062 | return err; |
3114 | } | 3063 | } |
3115 | 3064 | ||
3116 | int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how) | 3065 | int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how) |
3117 | { | 3066 | { |
3118 | return sock->ops->shutdown(sock, how); | 3067 | return sock->ops->shutdown(sock, how); |
3119 | } | 3068 | } |
3120 | 3069 | ||
3121 | EXPORT_SYMBOL(sock_create); | 3070 | EXPORT_SYMBOL(sock_create); |
3122 | EXPORT_SYMBOL(sock_create_kern); | 3071 | EXPORT_SYMBOL(sock_create_kern); |
3123 | EXPORT_SYMBOL(sock_create_lite); | 3072 | EXPORT_SYMBOL(sock_create_lite); |
3124 | EXPORT_SYMBOL(sock_map_fd); | 3073 | EXPORT_SYMBOL(sock_map_fd); |
3125 | EXPORT_SYMBOL(sock_recvmsg); | 3074 | EXPORT_SYMBOL(sock_recvmsg); |
3126 | EXPORT_SYMBOL(sock_register); | 3075 | EXPORT_SYMBOL(sock_register); |
3127 | EXPORT_SYMBOL(sock_release); | 3076 | EXPORT_SYMBOL(sock_release); |
3128 | EXPORT_SYMBOL(sock_sendmsg); | 3077 | EXPORT_SYMBOL(sock_sendmsg); |
3129 | EXPORT_SYMBOL(sock_unregister); | 3078 | EXPORT_SYMBOL(sock_unregister); |
3130 | EXPORT_SYMBOL(sock_wake_async); | 3079 | EXPORT_SYMBOL(sock_wake_async); |
3131 | EXPORT_SYMBOL(sockfd_lookup); | 3080 | EXPORT_SYMBOL(sockfd_lookup); |
3132 | EXPORT_SYMBOL(kernel_sendmsg); | 3081 | EXPORT_SYMBOL(kernel_sendmsg); |
3133 | EXPORT_SYMBOL(kernel_recvmsg); | 3082 | EXPORT_SYMBOL(kernel_recvmsg); |
3134 | EXPORT_SYMBOL(kernel_bind); | 3083 | EXPORT_SYMBOL(kernel_bind); |
3135 | EXPORT_SYMBOL(kernel_listen); | 3084 | EXPORT_SYMBOL(kernel_listen); |
3136 | EXPORT_SYMBOL(kernel_accept); | 3085 | EXPORT_SYMBOL(kernel_accept); |
3137 | EXPORT_SYMBOL(kernel_connect); | 3086 | EXPORT_SYMBOL(kernel_connect); |
3138 | EXPORT_SYMBOL(kernel_getsockname); | 3087 | EXPORT_SYMBOL(kernel_getsockname); |
3139 | EXPORT_SYMBOL(kernel_getpeername); | 3088 | EXPORT_SYMBOL(kernel_getpeername); |
3140 | EXPORT_SYMBOL(kernel_getsockopt); | 3089 | EXPORT_SYMBOL(kernel_getsockopt); |
3141 | EXPORT_SYMBOL(kernel_setsockopt); | 3090 | EXPORT_SYMBOL(kernel_setsockopt); |
3142 | EXPORT_SYMBOL(kernel_sendpage); | 3091 | EXPORT_SYMBOL(kernel_sendpage); |
3143 | EXPORT_SYMBOL(kernel_sock_ioctl); | 3092 | EXPORT_SYMBOL(kernel_sock_ioctl); |
3144 | EXPORT_SYMBOL(kernel_sock_shutdown); | 3093 | EXPORT_SYMBOL(kernel_sock_shutdown); |
3145 | 3094 |