Commit 625bba662c17e917e164dc37f61aebbc49987ed6
Exists in
master
and in
13 other branches
Merge tag 'locks-v3.15-2' of git://git.samba.org/jlayton/linux
Pull file locking fixes from Jeff Layton: "File locking related bugfixes for v3.15 (pile #2) - fix for a long-standing bug in __break_lease that can cause soft lockups - renaming of file-private locks to "open file description" locks, and the command macros to more visually distinct names The fix for __break_lease is also in the pile of patches for which Bruce sent a pull request, but I assume that your merge procedure will handle that correctly. For the other patches, I don't like the fact that we need to rename this stuff at this late stage, but it should be settled now (hopefully)" * tag 'locks-v3.15-2' of git://git.samba.org/jlayton/linux: locks: rename FL_FILE_PVT and IS_FILE_PVT to use "*_OFDLCK" instead locks: rename file-private locks to "open file description locks" locks: allow __break_lease to sleep even when break_time is 0
Showing 7 changed files Inline Diff
arch/arm/kernel/sys_oabi-compat.c
1 | /* | 1 | /* |
2 | * arch/arm/kernel/sys_oabi-compat.c | 2 | * arch/arm/kernel/sys_oabi-compat.c |
3 | * | 3 | * |
4 | * Compatibility wrappers for syscalls that are used from | 4 | * Compatibility wrappers for syscalls that are used from |
5 | * old ABI user space binaries with an EABI kernel. | 5 | * old ABI user space binaries with an EABI kernel. |
6 | * | 6 | * |
7 | * Author: Nicolas Pitre | 7 | * Author: Nicolas Pitre |
8 | * Created: Oct 7, 2005 | 8 | * Created: Oct 7, 2005 |
9 | * Copyright: MontaVista Software, Inc. | 9 | * Copyright: MontaVista Software, Inc. |
10 | * | 10 | * |
11 | * This program is free software; you can redistribute it and/or modify | 11 | * This program is free software; you can redistribute it and/or modify |
12 | * it under the terms of the GNU General Public License version 2 as | 12 | * it under the terms of the GNU General Public License version 2 as |
13 | * published by the Free Software Foundation. | 13 | * published by the Free Software Foundation. |
14 | */ | 14 | */ |
15 | 15 | ||
16 | /* | 16 | /* |
17 | * The legacy ABI and the new ARM EABI have different rules making some | 17 | * The legacy ABI and the new ARM EABI have different rules making some |
18 | * syscalls incompatible especially with structure arguments. | 18 | * syscalls incompatible especially with structure arguments. |
19 | * Most notably, Eabi says 64-bit members should be 64-bit aligned instead of | 19 | * Most notably, Eabi says 64-bit members should be 64-bit aligned instead of |
20 | * simply word aligned. EABI also pads structures to the size of the largest | 20 | * simply word aligned. EABI also pads structures to the size of the largest |
21 | * member it contains instead of the invariant 32-bit. | 21 | * member it contains instead of the invariant 32-bit. |
22 | * | 22 | * |
23 | * The following syscalls are affected: | 23 | * The following syscalls are affected: |
24 | * | 24 | * |
25 | * sys_stat64: | 25 | * sys_stat64: |
26 | * sys_lstat64: | 26 | * sys_lstat64: |
27 | * sys_fstat64: | 27 | * sys_fstat64: |
28 | * sys_fstatat64: | 28 | * sys_fstatat64: |
29 | * | 29 | * |
30 | * struct stat64 has different sizes and some members are shifted | 30 | * struct stat64 has different sizes and some members are shifted |
31 | * Compatibility wrappers are needed for them and provided below. | 31 | * Compatibility wrappers are needed for them and provided below. |
32 | * | 32 | * |
33 | * sys_fcntl64: | 33 | * sys_fcntl64: |
34 | * | 34 | * |
35 | * struct flock64 has different sizes and some members are shifted | 35 | * struct flock64 has different sizes and some members are shifted |
36 | * A compatibility wrapper is needed and provided below. | 36 | * A compatibility wrapper is needed and provided below. |
37 | * | 37 | * |
38 | * sys_statfs64: | 38 | * sys_statfs64: |
39 | * sys_fstatfs64: | 39 | * sys_fstatfs64: |
40 | * | 40 | * |
41 | * struct statfs64 has extra padding with EABI growing its size from | 41 | * struct statfs64 has extra padding with EABI growing its size from |
42 | * 84 to 88. This struct is now __attribute__((packed,aligned(4))) | 42 | * 84 to 88. This struct is now __attribute__((packed,aligned(4))) |
43 | * with a small assembly wrapper to force the sz argument to 84 if it is 88 | 43 | * with a small assembly wrapper to force the sz argument to 84 if it is 88 |
44 | * to avoid copying the extra padding over user space unexpecting it. | 44 | * to avoid copying the extra padding over user space unexpecting it. |
45 | * | 45 | * |
46 | * sys_newuname: | 46 | * sys_newuname: |
47 | * | 47 | * |
48 | * struct new_utsname has no padding with EABI. No problem there. | 48 | * struct new_utsname has no padding with EABI. No problem there. |
49 | * | 49 | * |
50 | * sys_epoll_ctl: | 50 | * sys_epoll_ctl: |
51 | * sys_epoll_wait: | 51 | * sys_epoll_wait: |
52 | * | 52 | * |
53 | * struct epoll_event has its second member shifted also affecting the | 53 | * struct epoll_event has its second member shifted also affecting the |
54 | * structure size. Compatibility wrappers are needed and provided below. | 54 | * structure size. Compatibility wrappers are needed and provided below. |
55 | * | 55 | * |
56 | * sys_ipc: | 56 | * sys_ipc: |
57 | * sys_semop: | 57 | * sys_semop: |
58 | * sys_semtimedop: | 58 | * sys_semtimedop: |
59 | * | 59 | * |
60 | * struct sembuf loses its padding with EABI. Since arrays of them are | 60 | * struct sembuf loses its padding with EABI. Since arrays of them are |
61 | * used they have to be copyed to remove the padding. Compatibility wrappers | 61 | * used they have to be copyed to remove the padding. Compatibility wrappers |
62 | * provided below. | 62 | * provided below. |
63 | * | 63 | * |
64 | * sys_bind: | 64 | * sys_bind: |
65 | * sys_connect: | 65 | * sys_connect: |
66 | * sys_sendmsg: | 66 | * sys_sendmsg: |
67 | * sys_sendto: | 67 | * sys_sendto: |
68 | * sys_socketcall: | 68 | * sys_socketcall: |
69 | * | 69 | * |
70 | * struct sockaddr_un loses its padding with EABI. Since the size of the | 70 | * struct sockaddr_un loses its padding with EABI. Since the size of the |
71 | * structure is used as a validation test in unix_mkname(), we need to | 71 | * structure is used as a validation test in unix_mkname(), we need to |
72 | * change the length argument to 110 whenever it is 112. Compatibility | 72 | * change the length argument to 110 whenever it is 112. Compatibility |
73 | * wrappers provided below. | 73 | * wrappers provided below. |
74 | */ | 74 | */ |
75 | 75 | ||
76 | #include <linux/syscalls.h> | 76 | #include <linux/syscalls.h> |
77 | #include <linux/errno.h> | 77 | #include <linux/errno.h> |
78 | #include <linux/fs.h> | 78 | #include <linux/fs.h> |
79 | #include <linux/fcntl.h> | 79 | #include <linux/fcntl.h> |
80 | #include <linux/eventpoll.h> | 80 | #include <linux/eventpoll.h> |
81 | #include <linux/sem.h> | 81 | #include <linux/sem.h> |
82 | #include <linux/socket.h> | 82 | #include <linux/socket.h> |
83 | #include <linux/net.h> | 83 | #include <linux/net.h> |
84 | #include <linux/ipc.h> | 84 | #include <linux/ipc.h> |
85 | #include <linux/uaccess.h> | 85 | #include <linux/uaccess.h> |
86 | #include <linux/slab.h> | 86 | #include <linux/slab.h> |
87 | 87 | ||
88 | struct oldabi_stat64 { | 88 | struct oldabi_stat64 { |
89 | unsigned long long st_dev; | 89 | unsigned long long st_dev; |
90 | unsigned int __pad1; | 90 | unsigned int __pad1; |
91 | unsigned long __st_ino; | 91 | unsigned long __st_ino; |
92 | unsigned int st_mode; | 92 | unsigned int st_mode; |
93 | unsigned int st_nlink; | 93 | unsigned int st_nlink; |
94 | 94 | ||
95 | unsigned long st_uid; | 95 | unsigned long st_uid; |
96 | unsigned long st_gid; | 96 | unsigned long st_gid; |
97 | 97 | ||
98 | unsigned long long st_rdev; | 98 | unsigned long long st_rdev; |
99 | unsigned int __pad2; | 99 | unsigned int __pad2; |
100 | 100 | ||
101 | long long st_size; | 101 | long long st_size; |
102 | unsigned long st_blksize; | 102 | unsigned long st_blksize; |
103 | unsigned long long st_blocks; | 103 | unsigned long long st_blocks; |
104 | 104 | ||
105 | unsigned long st_atime; | 105 | unsigned long st_atime; |
106 | unsigned long st_atime_nsec; | 106 | unsigned long st_atime_nsec; |
107 | 107 | ||
108 | unsigned long st_mtime; | 108 | unsigned long st_mtime; |
109 | unsigned long st_mtime_nsec; | 109 | unsigned long st_mtime_nsec; |
110 | 110 | ||
111 | unsigned long st_ctime; | 111 | unsigned long st_ctime; |
112 | unsigned long st_ctime_nsec; | 112 | unsigned long st_ctime_nsec; |
113 | 113 | ||
114 | unsigned long long st_ino; | 114 | unsigned long long st_ino; |
115 | } __attribute__ ((packed,aligned(4))); | 115 | } __attribute__ ((packed,aligned(4))); |
116 | 116 | ||
117 | static long cp_oldabi_stat64(struct kstat *stat, | 117 | static long cp_oldabi_stat64(struct kstat *stat, |
118 | struct oldabi_stat64 __user *statbuf) | 118 | struct oldabi_stat64 __user *statbuf) |
119 | { | 119 | { |
120 | struct oldabi_stat64 tmp; | 120 | struct oldabi_stat64 tmp; |
121 | 121 | ||
122 | tmp.st_dev = huge_encode_dev(stat->dev); | 122 | tmp.st_dev = huge_encode_dev(stat->dev); |
123 | tmp.__pad1 = 0; | 123 | tmp.__pad1 = 0; |
124 | tmp.__st_ino = stat->ino; | 124 | tmp.__st_ino = stat->ino; |
125 | tmp.st_mode = stat->mode; | 125 | tmp.st_mode = stat->mode; |
126 | tmp.st_nlink = stat->nlink; | 126 | tmp.st_nlink = stat->nlink; |
127 | tmp.st_uid = from_kuid_munged(current_user_ns(), stat->uid); | 127 | tmp.st_uid = from_kuid_munged(current_user_ns(), stat->uid); |
128 | tmp.st_gid = from_kgid_munged(current_user_ns(), stat->gid); | 128 | tmp.st_gid = from_kgid_munged(current_user_ns(), stat->gid); |
129 | tmp.st_rdev = huge_encode_dev(stat->rdev); | 129 | tmp.st_rdev = huge_encode_dev(stat->rdev); |
130 | tmp.st_size = stat->size; | 130 | tmp.st_size = stat->size; |
131 | tmp.st_blocks = stat->blocks; | 131 | tmp.st_blocks = stat->blocks; |
132 | tmp.__pad2 = 0; | 132 | tmp.__pad2 = 0; |
133 | tmp.st_blksize = stat->blksize; | 133 | tmp.st_blksize = stat->blksize; |
134 | tmp.st_atime = stat->atime.tv_sec; | 134 | tmp.st_atime = stat->atime.tv_sec; |
135 | tmp.st_atime_nsec = stat->atime.tv_nsec; | 135 | tmp.st_atime_nsec = stat->atime.tv_nsec; |
136 | tmp.st_mtime = stat->mtime.tv_sec; | 136 | tmp.st_mtime = stat->mtime.tv_sec; |
137 | tmp.st_mtime_nsec = stat->mtime.tv_nsec; | 137 | tmp.st_mtime_nsec = stat->mtime.tv_nsec; |
138 | tmp.st_ctime = stat->ctime.tv_sec; | 138 | tmp.st_ctime = stat->ctime.tv_sec; |
139 | tmp.st_ctime_nsec = stat->ctime.tv_nsec; | 139 | tmp.st_ctime_nsec = stat->ctime.tv_nsec; |
140 | tmp.st_ino = stat->ino; | 140 | tmp.st_ino = stat->ino; |
141 | return copy_to_user(statbuf,&tmp,sizeof(tmp)) ? -EFAULT : 0; | 141 | return copy_to_user(statbuf,&tmp,sizeof(tmp)) ? -EFAULT : 0; |
142 | } | 142 | } |
143 | 143 | ||
144 | asmlinkage long sys_oabi_stat64(const char __user * filename, | 144 | asmlinkage long sys_oabi_stat64(const char __user * filename, |
145 | struct oldabi_stat64 __user * statbuf) | 145 | struct oldabi_stat64 __user * statbuf) |
146 | { | 146 | { |
147 | struct kstat stat; | 147 | struct kstat stat; |
148 | int error = vfs_stat(filename, &stat); | 148 | int error = vfs_stat(filename, &stat); |
149 | if (!error) | 149 | if (!error) |
150 | error = cp_oldabi_stat64(&stat, statbuf); | 150 | error = cp_oldabi_stat64(&stat, statbuf); |
151 | return error; | 151 | return error; |
152 | } | 152 | } |
153 | 153 | ||
154 | asmlinkage long sys_oabi_lstat64(const char __user * filename, | 154 | asmlinkage long sys_oabi_lstat64(const char __user * filename, |
155 | struct oldabi_stat64 __user * statbuf) | 155 | struct oldabi_stat64 __user * statbuf) |
156 | { | 156 | { |
157 | struct kstat stat; | 157 | struct kstat stat; |
158 | int error = vfs_lstat(filename, &stat); | 158 | int error = vfs_lstat(filename, &stat); |
159 | if (!error) | 159 | if (!error) |
160 | error = cp_oldabi_stat64(&stat, statbuf); | 160 | error = cp_oldabi_stat64(&stat, statbuf); |
161 | return error; | 161 | return error; |
162 | } | 162 | } |
163 | 163 | ||
164 | asmlinkage long sys_oabi_fstat64(unsigned long fd, | 164 | asmlinkage long sys_oabi_fstat64(unsigned long fd, |
165 | struct oldabi_stat64 __user * statbuf) | 165 | struct oldabi_stat64 __user * statbuf) |
166 | { | 166 | { |
167 | struct kstat stat; | 167 | struct kstat stat; |
168 | int error = vfs_fstat(fd, &stat); | 168 | int error = vfs_fstat(fd, &stat); |
169 | if (!error) | 169 | if (!error) |
170 | error = cp_oldabi_stat64(&stat, statbuf); | 170 | error = cp_oldabi_stat64(&stat, statbuf); |
171 | return error; | 171 | return error; |
172 | } | 172 | } |
173 | 173 | ||
174 | asmlinkage long sys_oabi_fstatat64(int dfd, | 174 | asmlinkage long sys_oabi_fstatat64(int dfd, |
175 | const char __user *filename, | 175 | const char __user *filename, |
176 | struct oldabi_stat64 __user *statbuf, | 176 | struct oldabi_stat64 __user *statbuf, |
177 | int flag) | 177 | int flag) |
178 | { | 178 | { |
179 | struct kstat stat; | 179 | struct kstat stat; |
180 | int error; | 180 | int error; |
181 | 181 | ||
182 | error = vfs_fstatat(dfd, filename, &stat, flag); | 182 | error = vfs_fstatat(dfd, filename, &stat, flag); |
183 | if (error) | 183 | if (error) |
184 | return error; | 184 | return error; |
185 | return cp_oldabi_stat64(&stat, statbuf); | 185 | return cp_oldabi_stat64(&stat, statbuf); |
186 | } | 186 | } |
187 | 187 | ||
188 | struct oabi_flock64 { | 188 | struct oabi_flock64 { |
189 | short l_type; | 189 | short l_type; |
190 | short l_whence; | 190 | short l_whence; |
191 | loff_t l_start; | 191 | loff_t l_start; |
192 | loff_t l_len; | 192 | loff_t l_len; |
193 | pid_t l_pid; | 193 | pid_t l_pid; |
194 | } __attribute__ ((packed,aligned(4))); | 194 | } __attribute__ ((packed,aligned(4))); |
195 | 195 | ||
196 | asmlinkage long sys_oabi_fcntl64(unsigned int fd, unsigned int cmd, | 196 | asmlinkage long sys_oabi_fcntl64(unsigned int fd, unsigned int cmd, |
197 | unsigned long arg) | 197 | unsigned long arg) |
198 | { | 198 | { |
199 | struct oabi_flock64 user; | 199 | struct oabi_flock64 user; |
200 | struct flock64 kernel; | 200 | struct flock64 kernel; |
201 | mm_segment_t fs = USER_DS; /* initialized to kill a warning */ | 201 | mm_segment_t fs = USER_DS; /* initialized to kill a warning */ |
202 | unsigned long local_arg = arg; | 202 | unsigned long local_arg = arg; |
203 | int ret; | 203 | int ret; |
204 | 204 | ||
205 | switch (cmd) { | 205 | switch (cmd) { |
206 | case F_GETLKP: | 206 | case F_OFD_GETLK: |
207 | case F_SETLKP: | 207 | case F_OFD_SETLK: |
208 | case F_SETLKPW: | 208 | case F_OFD_SETLKW: |
209 | case F_GETLK64: | 209 | case F_GETLK64: |
210 | case F_SETLK64: | 210 | case F_SETLK64: |
211 | case F_SETLKW64: | 211 | case F_SETLKW64: |
212 | if (copy_from_user(&user, (struct oabi_flock64 __user *)arg, | 212 | if (copy_from_user(&user, (struct oabi_flock64 __user *)arg, |
213 | sizeof(user))) | 213 | sizeof(user))) |
214 | return -EFAULT; | 214 | return -EFAULT; |
215 | kernel.l_type = user.l_type; | 215 | kernel.l_type = user.l_type; |
216 | kernel.l_whence = user.l_whence; | 216 | kernel.l_whence = user.l_whence; |
217 | kernel.l_start = user.l_start; | 217 | kernel.l_start = user.l_start; |
218 | kernel.l_len = user.l_len; | 218 | kernel.l_len = user.l_len; |
219 | kernel.l_pid = user.l_pid; | 219 | kernel.l_pid = user.l_pid; |
220 | local_arg = (unsigned long)&kernel; | 220 | local_arg = (unsigned long)&kernel; |
221 | fs = get_fs(); | 221 | fs = get_fs(); |
222 | set_fs(KERNEL_DS); | 222 | set_fs(KERNEL_DS); |
223 | } | 223 | } |
224 | 224 | ||
225 | ret = sys_fcntl64(fd, cmd, local_arg); | 225 | ret = sys_fcntl64(fd, cmd, local_arg); |
226 | 226 | ||
227 | switch (cmd) { | 227 | switch (cmd) { |
228 | case F_GETLK64: | 228 | case F_GETLK64: |
229 | if (!ret) { | 229 | if (!ret) { |
230 | user.l_type = kernel.l_type; | 230 | user.l_type = kernel.l_type; |
231 | user.l_whence = kernel.l_whence; | 231 | user.l_whence = kernel.l_whence; |
232 | user.l_start = kernel.l_start; | 232 | user.l_start = kernel.l_start; |
233 | user.l_len = kernel.l_len; | 233 | user.l_len = kernel.l_len; |
234 | user.l_pid = kernel.l_pid; | 234 | user.l_pid = kernel.l_pid; |
235 | if (copy_to_user((struct oabi_flock64 __user *)arg, | 235 | if (copy_to_user((struct oabi_flock64 __user *)arg, |
236 | &user, sizeof(user))) | 236 | &user, sizeof(user))) |
237 | ret = -EFAULT; | 237 | ret = -EFAULT; |
238 | } | 238 | } |
239 | case F_SETLK64: | 239 | case F_SETLK64: |
240 | case F_SETLKW64: | 240 | case F_SETLKW64: |
241 | set_fs(fs); | 241 | set_fs(fs); |
242 | } | 242 | } |
243 | 243 | ||
244 | return ret; | 244 | return ret; |
245 | } | 245 | } |
246 | 246 | ||
247 | struct oabi_epoll_event { | 247 | struct oabi_epoll_event { |
248 | __u32 events; | 248 | __u32 events; |
249 | __u64 data; | 249 | __u64 data; |
250 | } __attribute__ ((packed,aligned(4))); | 250 | } __attribute__ ((packed,aligned(4))); |
251 | 251 | ||
252 | asmlinkage long sys_oabi_epoll_ctl(int epfd, int op, int fd, | 252 | asmlinkage long sys_oabi_epoll_ctl(int epfd, int op, int fd, |
253 | struct oabi_epoll_event __user *event) | 253 | struct oabi_epoll_event __user *event) |
254 | { | 254 | { |
255 | struct oabi_epoll_event user; | 255 | struct oabi_epoll_event user; |
256 | struct epoll_event kernel; | 256 | struct epoll_event kernel; |
257 | mm_segment_t fs; | 257 | mm_segment_t fs; |
258 | long ret; | 258 | long ret; |
259 | 259 | ||
260 | if (op == EPOLL_CTL_DEL) | 260 | if (op == EPOLL_CTL_DEL) |
261 | return sys_epoll_ctl(epfd, op, fd, NULL); | 261 | return sys_epoll_ctl(epfd, op, fd, NULL); |
262 | if (copy_from_user(&user, event, sizeof(user))) | 262 | if (copy_from_user(&user, event, sizeof(user))) |
263 | return -EFAULT; | 263 | return -EFAULT; |
264 | kernel.events = user.events; | 264 | kernel.events = user.events; |
265 | kernel.data = user.data; | 265 | kernel.data = user.data; |
266 | fs = get_fs(); | 266 | fs = get_fs(); |
267 | set_fs(KERNEL_DS); | 267 | set_fs(KERNEL_DS); |
268 | ret = sys_epoll_ctl(epfd, op, fd, &kernel); | 268 | ret = sys_epoll_ctl(epfd, op, fd, &kernel); |
269 | set_fs(fs); | 269 | set_fs(fs); |
270 | return ret; | 270 | return ret; |
271 | } | 271 | } |
272 | 272 | ||
273 | asmlinkage long sys_oabi_epoll_wait(int epfd, | 273 | asmlinkage long sys_oabi_epoll_wait(int epfd, |
274 | struct oabi_epoll_event __user *events, | 274 | struct oabi_epoll_event __user *events, |
275 | int maxevents, int timeout) | 275 | int maxevents, int timeout) |
276 | { | 276 | { |
277 | struct epoll_event *kbuf; | 277 | struct epoll_event *kbuf; |
278 | mm_segment_t fs; | 278 | mm_segment_t fs; |
279 | long ret, err, i; | 279 | long ret, err, i; |
280 | 280 | ||
281 | if (maxevents <= 0 || maxevents > (INT_MAX/sizeof(struct epoll_event))) | 281 | if (maxevents <= 0 || maxevents > (INT_MAX/sizeof(struct epoll_event))) |
282 | return -EINVAL; | 282 | return -EINVAL; |
283 | kbuf = kmalloc(sizeof(*kbuf) * maxevents, GFP_KERNEL); | 283 | kbuf = kmalloc(sizeof(*kbuf) * maxevents, GFP_KERNEL); |
284 | if (!kbuf) | 284 | if (!kbuf) |
285 | return -ENOMEM; | 285 | return -ENOMEM; |
286 | fs = get_fs(); | 286 | fs = get_fs(); |
287 | set_fs(KERNEL_DS); | 287 | set_fs(KERNEL_DS); |
288 | ret = sys_epoll_wait(epfd, kbuf, maxevents, timeout); | 288 | ret = sys_epoll_wait(epfd, kbuf, maxevents, timeout); |
289 | set_fs(fs); | 289 | set_fs(fs); |
290 | err = 0; | 290 | err = 0; |
291 | for (i = 0; i < ret; i++) { | 291 | for (i = 0; i < ret; i++) { |
292 | __put_user_error(kbuf[i].events, &events->events, err); | 292 | __put_user_error(kbuf[i].events, &events->events, err); |
293 | __put_user_error(kbuf[i].data, &events->data, err); | 293 | __put_user_error(kbuf[i].data, &events->data, err); |
294 | events++; | 294 | events++; |
295 | } | 295 | } |
296 | kfree(kbuf); | 296 | kfree(kbuf); |
297 | return err ? -EFAULT : ret; | 297 | return err ? -EFAULT : ret; |
298 | } | 298 | } |
299 | 299 | ||
300 | struct oabi_sembuf { | 300 | struct oabi_sembuf { |
301 | unsigned short sem_num; | 301 | unsigned short sem_num; |
302 | short sem_op; | 302 | short sem_op; |
303 | short sem_flg; | 303 | short sem_flg; |
304 | unsigned short __pad; | 304 | unsigned short __pad; |
305 | }; | 305 | }; |
306 | 306 | ||
307 | asmlinkage long sys_oabi_semtimedop(int semid, | 307 | asmlinkage long sys_oabi_semtimedop(int semid, |
308 | struct oabi_sembuf __user *tsops, | 308 | struct oabi_sembuf __user *tsops, |
309 | unsigned nsops, | 309 | unsigned nsops, |
310 | const struct timespec __user *timeout) | 310 | const struct timespec __user *timeout) |
311 | { | 311 | { |
312 | struct sembuf *sops; | 312 | struct sembuf *sops; |
313 | struct timespec local_timeout; | 313 | struct timespec local_timeout; |
314 | long err; | 314 | long err; |
315 | int i; | 315 | int i; |
316 | 316 | ||
317 | if (nsops < 1 || nsops > SEMOPM) | 317 | if (nsops < 1 || nsops > SEMOPM) |
318 | return -EINVAL; | 318 | return -EINVAL; |
319 | sops = kmalloc(sizeof(*sops) * nsops, GFP_KERNEL); | 319 | sops = kmalloc(sizeof(*sops) * nsops, GFP_KERNEL); |
320 | if (!sops) | 320 | if (!sops) |
321 | return -ENOMEM; | 321 | return -ENOMEM; |
322 | err = 0; | 322 | err = 0; |
323 | for (i = 0; i < nsops; i++) { | 323 | for (i = 0; i < nsops; i++) { |
324 | __get_user_error(sops[i].sem_num, &tsops->sem_num, err); | 324 | __get_user_error(sops[i].sem_num, &tsops->sem_num, err); |
325 | __get_user_error(sops[i].sem_op, &tsops->sem_op, err); | 325 | __get_user_error(sops[i].sem_op, &tsops->sem_op, err); |
326 | __get_user_error(sops[i].sem_flg, &tsops->sem_flg, err); | 326 | __get_user_error(sops[i].sem_flg, &tsops->sem_flg, err); |
327 | tsops++; | 327 | tsops++; |
328 | } | 328 | } |
329 | if (timeout) { | 329 | if (timeout) { |
330 | /* copy this as well before changing domain protection */ | 330 | /* copy this as well before changing domain protection */ |
331 | err |= copy_from_user(&local_timeout, timeout, sizeof(*timeout)); | 331 | err |= copy_from_user(&local_timeout, timeout, sizeof(*timeout)); |
332 | timeout = &local_timeout; | 332 | timeout = &local_timeout; |
333 | } | 333 | } |
334 | if (err) { | 334 | if (err) { |
335 | err = -EFAULT; | 335 | err = -EFAULT; |
336 | } else { | 336 | } else { |
337 | mm_segment_t fs = get_fs(); | 337 | mm_segment_t fs = get_fs(); |
338 | set_fs(KERNEL_DS); | 338 | set_fs(KERNEL_DS); |
339 | err = sys_semtimedop(semid, sops, nsops, timeout); | 339 | err = sys_semtimedop(semid, sops, nsops, timeout); |
340 | set_fs(fs); | 340 | set_fs(fs); |
341 | } | 341 | } |
342 | kfree(sops); | 342 | kfree(sops); |
343 | return err; | 343 | return err; |
344 | } | 344 | } |
345 | 345 | ||
346 | asmlinkage long sys_oabi_semop(int semid, struct oabi_sembuf __user *tsops, | 346 | asmlinkage long sys_oabi_semop(int semid, struct oabi_sembuf __user *tsops, |
347 | unsigned nsops) | 347 | unsigned nsops) |
348 | { | 348 | { |
349 | return sys_oabi_semtimedop(semid, tsops, nsops, NULL); | 349 | return sys_oabi_semtimedop(semid, tsops, nsops, NULL); |
350 | } | 350 | } |
351 | 351 | ||
352 | asmlinkage int sys_oabi_ipc(uint call, int first, int second, int third, | 352 | asmlinkage int sys_oabi_ipc(uint call, int first, int second, int third, |
353 | void __user *ptr, long fifth) | 353 | void __user *ptr, long fifth) |
354 | { | 354 | { |
355 | switch (call & 0xffff) { | 355 | switch (call & 0xffff) { |
356 | case SEMOP: | 356 | case SEMOP: |
357 | return sys_oabi_semtimedop(first, | 357 | return sys_oabi_semtimedop(first, |
358 | (struct oabi_sembuf __user *)ptr, | 358 | (struct oabi_sembuf __user *)ptr, |
359 | second, NULL); | 359 | second, NULL); |
360 | case SEMTIMEDOP: | 360 | case SEMTIMEDOP: |
361 | return sys_oabi_semtimedop(first, | 361 | return sys_oabi_semtimedop(first, |
362 | (struct oabi_sembuf __user *)ptr, | 362 | (struct oabi_sembuf __user *)ptr, |
363 | second, | 363 | second, |
364 | (const struct timespec __user *)fifth); | 364 | (const struct timespec __user *)fifth); |
365 | default: | 365 | default: |
366 | return sys_ipc(call, first, second, third, ptr, fifth); | 366 | return sys_ipc(call, first, second, third, ptr, fifth); |
367 | } | 367 | } |
368 | } | 368 | } |
369 | 369 | ||
370 | asmlinkage long sys_oabi_bind(int fd, struct sockaddr __user *addr, int addrlen) | 370 | asmlinkage long sys_oabi_bind(int fd, struct sockaddr __user *addr, int addrlen) |
371 | { | 371 | { |
372 | sa_family_t sa_family; | 372 | sa_family_t sa_family; |
373 | if (addrlen == 112 && | 373 | if (addrlen == 112 && |
374 | get_user(sa_family, &addr->sa_family) == 0 && | 374 | get_user(sa_family, &addr->sa_family) == 0 && |
375 | sa_family == AF_UNIX) | 375 | sa_family == AF_UNIX) |
376 | addrlen = 110; | 376 | addrlen = 110; |
377 | return sys_bind(fd, addr, addrlen); | 377 | return sys_bind(fd, addr, addrlen); |
378 | } | 378 | } |
379 | 379 | ||
380 | asmlinkage long sys_oabi_connect(int fd, struct sockaddr __user *addr, int addrlen) | 380 | asmlinkage long sys_oabi_connect(int fd, struct sockaddr __user *addr, int addrlen) |
381 | { | 381 | { |
382 | sa_family_t sa_family; | 382 | sa_family_t sa_family; |
383 | if (addrlen == 112 && | 383 | if (addrlen == 112 && |
384 | get_user(sa_family, &addr->sa_family) == 0 && | 384 | get_user(sa_family, &addr->sa_family) == 0 && |
385 | sa_family == AF_UNIX) | 385 | sa_family == AF_UNIX) |
386 | addrlen = 110; | 386 | addrlen = 110; |
387 | return sys_connect(fd, addr, addrlen); | 387 | return sys_connect(fd, addr, addrlen); |
388 | } | 388 | } |
389 | 389 | ||
390 | asmlinkage long sys_oabi_sendto(int fd, void __user *buff, | 390 | asmlinkage long sys_oabi_sendto(int fd, void __user *buff, |
391 | size_t len, unsigned flags, | 391 | size_t len, unsigned flags, |
392 | struct sockaddr __user *addr, | 392 | struct sockaddr __user *addr, |
393 | int addrlen) | 393 | int addrlen) |
394 | { | 394 | { |
395 | sa_family_t sa_family; | 395 | sa_family_t sa_family; |
396 | if (addrlen == 112 && | 396 | if (addrlen == 112 && |
397 | get_user(sa_family, &addr->sa_family) == 0 && | 397 | get_user(sa_family, &addr->sa_family) == 0 && |
398 | sa_family == AF_UNIX) | 398 | sa_family == AF_UNIX) |
399 | addrlen = 110; | 399 | addrlen = 110; |
400 | return sys_sendto(fd, buff, len, flags, addr, addrlen); | 400 | return sys_sendto(fd, buff, len, flags, addr, addrlen); |
401 | } | 401 | } |
402 | 402 | ||
403 | asmlinkage long sys_oabi_sendmsg(int fd, struct msghdr __user *msg, unsigned flags) | 403 | asmlinkage long sys_oabi_sendmsg(int fd, struct msghdr __user *msg, unsigned flags) |
404 | { | 404 | { |
405 | struct sockaddr __user *addr; | 405 | struct sockaddr __user *addr; |
406 | int msg_namelen; | 406 | int msg_namelen; |
407 | sa_family_t sa_family; | 407 | sa_family_t sa_family; |
408 | if (msg && | 408 | if (msg && |
409 | get_user(msg_namelen, &msg->msg_namelen) == 0 && | 409 | get_user(msg_namelen, &msg->msg_namelen) == 0 && |
410 | msg_namelen == 112 && | 410 | msg_namelen == 112 && |
411 | get_user(addr, &msg->msg_name) == 0 && | 411 | get_user(addr, &msg->msg_name) == 0 && |
412 | get_user(sa_family, &addr->sa_family) == 0 && | 412 | get_user(sa_family, &addr->sa_family) == 0 && |
413 | sa_family == AF_UNIX) | 413 | sa_family == AF_UNIX) |
414 | { | 414 | { |
415 | /* | 415 | /* |
416 | * HACK ALERT: there is a limit to how much backward bending | 416 | * HACK ALERT: there is a limit to how much backward bending |
417 | * we should do for what is actually a transitional | 417 | * we should do for what is actually a transitional |
418 | * compatibility layer. This already has known flaws with | 418 | * compatibility layer. This already has known flaws with |
419 | * a few ioctls that we don't intend to fix. Therefore | 419 | * a few ioctls that we don't intend to fix. Therefore |
420 | * consider this blatent hack as another one... and take care | 420 | * consider this blatent hack as another one... and take care |
421 | * to run for cover. In most cases it will "just work fine". | 421 | * to run for cover. In most cases it will "just work fine". |
422 | * If it doesn't, well, tough. | 422 | * If it doesn't, well, tough. |
423 | */ | 423 | */ |
424 | put_user(110, &msg->msg_namelen); | 424 | put_user(110, &msg->msg_namelen); |
425 | } | 425 | } |
426 | return sys_sendmsg(fd, msg, flags); | 426 | return sys_sendmsg(fd, msg, flags); |
427 | } | 427 | } |
428 | 428 | ||
429 | asmlinkage long sys_oabi_socketcall(int call, unsigned long __user *args) | 429 | asmlinkage long sys_oabi_socketcall(int call, unsigned long __user *args) |
430 | { | 430 | { |
431 | unsigned long r = -EFAULT, a[6]; | 431 | unsigned long r = -EFAULT, a[6]; |
432 | 432 | ||
433 | switch (call) { | 433 | switch (call) { |
434 | case SYS_BIND: | 434 | case SYS_BIND: |
435 | if (copy_from_user(a, args, 3 * sizeof(long)) == 0) | 435 | if (copy_from_user(a, args, 3 * sizeof(long)) == 0) |
436 | r = sys_oabi_bind(a[0], (struct sockaddr __user *)a[1], a[2]); | 436 | r = sys_oabi_bind(a[0], (struct sockaddr __user *)a[1], a[2]); |
437 | break; | 437 | break; |
438 | case SYS_CONNECT: | 438 | case SYS_CONNECT: |
439 | if (copy_from_user(a, args, 3 * sizeof(long)) == 0) | 439 | if (copy_from_user(a, args, 3 * sizeof(long)) == 0) |
440 | r = sys_oabi_connect(a[0], (struct sockaddr __user *)a[1], a[2]); | 440 | r = sys_oabi_connect(a[0], (struct sockaddr __user *)a[1], a[2]); |
441 | break; | 441 | break; |
442 | case SYS_SENDTO: | 442 | case SYS_SENDTO: |
443 | if (copy_from_user(a, args, 6 * sizeof(long)) == 0) | 443 | if (copy_from_user(a, args, 6 * sizeof(long)) == 0) |
444 | r = sys_oabi_sendto(a[0], (void __user *)a[1], a[2], a[3], | 444 | r = sys_oabi_sendto(a[0], (void __user *)a[1], a[2], a[3], |
445 | (struct sockaddr __user *)a[4], a[5]); | 445 | (struct sockaddr __user *)a[4], a[5]); |
446 | break; | 446 | break; |
447 | case SYS_SENDMSG: | 447 | case SYS_SENDMSG: |
448 | if (copy_from_user(a, args, 3 * sizeof(long)) == 0) | 448 | if (copy_from_user(a, args, 3 * sizeof(long)) == 0) |
449 | r = sys_oabi_sendmsg(a[0], (struct msghdr __user *)a[1], a[2]); | 449 | r = sys_oabi_sendmsg(a[0], (struct msghdr __user *)a[1], a[2]); |
450 | break; | 450 | break; |
451 | default: | 451 | default: |
452 | r = sys_socketcall(call, args); | 452 | r = sys_socketcall(call, args); |
453 | } | 453 | } |
454 | 454 | ||
455 | return r; | 455 | return r; |
456 | } | 456 | } |
457 | 457 |
fs/compat.c
1 | /* | 1 | /* |
2 | * linux/fs/compat.c | 2 | * linux/fs/compat.c |
3 | * | 3 | * |
4 | * Kernel compatibililty routines for e.g. 32 bit syscall support | 4 | * Kernel compatibililty routines for e.g. 32 bit syscall support |
5 | * on 64 bit kernels. | 5 | * on 64 bit kernels. |
6 | * | 6 | * |
7 | * Copyright (C) 2002 Stephen Rothwell, IBM Corporation | 7 | * Copyright (C) 2002 Stephen Rothwell, IBM Corporation |
8 | * Copyright (C) 1997-2000 Jakub Jelinek (jakub@redhat.com) | 8 | * Copyright (C) 1997-2000 Jakub Jelinek (jakub@redhat.com) |
9 | * Copyright (C) 1998 Eddie C. Dost (ecd@skynet.be) | 9 | * Copyright (C) 1998 Eddie C. Dost (ecd@skynet.be) |
10 | * Copyright (C) 2001,2002 Andi Kleen, SuSE Labs | 10 | * Copyright (C) 2001,2002 Andi Kleen, SuSE Labs |
11 | * Copyright (C) 2003 Pavel Machek (pavel@ucw.cz) | 11 | * Copyright (C) 2003 Pavel Machek (pavel@ucw.cz) |
12 | * | 12 | * |
13 | * This program is free software; you can redistribute it and/or modify | 13 | * This program is free software; you can redistribute it and/or modify |
14 | * it under the terms of the GNU General Public License version 2 as | 14 | * it under the terms of the GNU General Public License version 2 as |
15 | * published by the Free Software Foundation. | 15 | * published by the Free Software Foundation. |
16 | */ | 16 | */ |
17 | 17 | ||
18 | #include <linux/stddef.h> | 18 | #include <linux/stddef.h> |
19 | #include <linux/kernel.h> | 19 | #include <linux/kernel.h> |
20 | #include <linux/linkage.h> | 20 | #include <linux/linkage.h> |
21 | #include <linux/compat.h> | 21 | #include <linux/compat.h> |
22 | #include <linux/errno.h> | 22 | #include <linux/errno.h> |
23 | #include <linux/time.h> | 23 | #include <linux/time.h> |
24 | #include <linux/fs.h> | 24 | #include <linux/fs.h> |
25 | #include <linux/fcntl.h> | 25 | #include <linux/fcntl.h> |
26 | #include <linux/namei.h> | 26 | #include <linux/namei.h> |
27 | #include <linux/file.h> | 27 | #include <linux/file.h> |
28 | #include <linux/fdtable.h> | 28 | #include <linux/fdtable.h> |
29 | #include <linux/vfs.h> | 29 | #include <linux/vfs.h> |
30 | #include <linux/ioctl.h> | 30 | #include <linux/ioctl.h> |
31 | #include <linux/init.h> | 31 | #include <linux/init.h> |
32 | #include <linux/ncp_mount.h> | 32 | #include <linux/ncp_mount.h> |
33 | #include <linux/nfs4_mount.h> | 33 | #include <linux/nfs4_mount.h> |
34 | #include <linux/syscalls.h> | 34 | #include <linux/syscalls.h> |
35 | #include <linux/ctype.h> | 35 | #include <linux/ctype.h> |
36 | #include <linux/dirent.h> | 36 | #include <linux/dirent.h> |
37 | #include <linux/fsnotify.h> | 37 | #include <linux/fsnotify.h> |
38 | #include <linux/highuid.h> | 38 | #include <linux/highuid.h> |
39 | #include <linux/personality.h> | 39 | #include <linux/personality.h> |
40 | #include <linux/rwsem.h> | 40 | #include <linux/rwsem.h> |
41 | #include <linux/tsacct_kern.h> | 41 | #include <linux/tsacct_kern.h> |
42 | #include <linux/security.h> | 42 | #include <linux/security.h> |
43 | #include <linux/highmem.h> | 43 | #include <linux/highmem.h> |
44 | #include <linux/signal.h> | 44 | #include <linux/signal.h> |
45 | #include <linux/poll.h> | 45 | #include <linux/poll.h> |
46 | #include <linux/mm.h> | 46 | #include <linux/mm.h> |
47 | #include <linux/fs_struct.h> | 47 | #include <linux/fs_struct.h> |
48 | #include <linux/slab.h> | 48 | #include <linux/slab.h> |
49 | #include <linux/pagemap.h> | 49 | #include <linux/pagemap.h> |
50 | #include <linux/aio.h> | 50 | #include <linux/aio.h> |
51 | 51 | ||
52 | #include <asm/uaccess.h> | 52 | #include <asm/uaccess.h> |
53 | #include <asm/mmu_context.h> | 53 | #include <asm/mmu_context.h> |
54 | #include <asm/ioctls.h> | 54 | #include <asm/ioctls.h> |
55 | #include "internal.h" | 55 | #include "internal.h" |
56 | 56 | ||
57 | int compat_log = 1; | 57 | int compat_log = 1; |
58 | 58 | ||
59 | int compat_printk(const char *fmt, ...) | 59 | int compat_printk(const char *fmt, ...) |
60 | { | 60 | { |
61 | va_list ap; | 61 | va_list ap; |
62 | int ret; | 62 | int ret; |
63 | if (!compat_log) | 63 | if (!compat_log) |
64 | return 0; | 64 | return 0; |
65 | va_start(ap, fmt); | 65 | va_start(ap, fmt); |
66 | ret = vprintk(fmt, ap); | 66 | ret = vprintk(fmt, ap); |
67 | va_end(ap); | 67 | va_end(ap); |
68 | return ret; | 68 | return ret; |
69 | } | 69 | } |
70 | 70 | ||
71 | /* | 71 | /* |
72 | * Not all architectures have sys_utime, so implement this in terms | 72 | * Not all architectures have sys_utime, so implement this in terms |
73 | * of sys_utimes. | 73 | * of sys_utimes. |
74 | */ | 74 | */ |
75 | COMPAT_SYSCALL_DEFINE2(utime, const char __user *, filename, | 75 | COMPAT_SYSCALL_DEFINE2(utime, const char __user *, filename, |
76 | struct compat_utimbuf __user *, t) | 76 | struct compat_utimbuf __user *, t) |
77 | { | 77 | { |
78 | struct timespec tv[2]; | 78 | struct timespec tv[2]; |
79 | 79 | ||
80 | if (t) { | 80 | if (t) { |
81 | if (get_user(tv[0].tv_sec, &t->actime) || | 81 | if (get_user(tv[0].tv_sec, &t->actime) || |
82 | get_user(tv[1].tv_sec, &t->modtime)) | 82 | get_user(tv[1].tv_sec, &t->modtime)) |
83 | return -EFAULT; | 83 | return -EFAULT; |
84 | tv[0].tv_nsec = 0; | 84 | tv[0].tv_nsec = 0; |
85 | tv[1].tv_nsec = 0; | 85 | tv[1].tv_nsec = 0; |
86 | } | 86 | } |
87 | return do_utimes(AT_FDCWD, filename, t ? tv : NULL, 0); | 87 | return do_utimes(AT_FDCWD, filename, t ? tv : NULL, 0); |
88 | } | 88 | } |
89 | 89 | ||
90 | COMPAT_SYSCALL_DEFINE4(utimensat, unsigned int, dfd, const char __user *, filename, struct compat_timespec __user *, t, int, flags) | 90 | COMPAT_SYSCALL_DEFINE4(utimensat, unsigned int, dfd, const char __user *, filename, struct compat_timespec __user *, t, int, flags) |
91 | { | 91 | { |
92 | struct timespec tv[2]; | 92 | struct timespec tv[2]; |
93 | 93 | ||
94 | if (t) { | 94 | if (t) { |
95 | if (compat_get_timespec(&tv[0], &t[0]) || | 95 | if (compat_get_timespec(&tv[0], &t[0]) || |
96 | compat_get_timespec(&tv[1], &t[1])) | 96 | compat_get_timespec(&tv[1], &t[1])) |
97 | return -EFAULT; | 97 | return -EFAULT; |
98 | 98 | ||
99 | if (tv[0].tv_nsec == UTIME_OMIT && tv[1].tv_nsec == UTIME_OMIT) | 99 | if (tv[0].tv_nsec == UTIME_OMIT && tv[1].tv_nsec == UTIME_OMIT) |
100 | return 0; | 100 | return 0; |
101 | } | 101 | } |
102 | return do_utimes(dfd, filename, t ? tv : NULL, flags); | 102 | return do_utimes(dfd, filename, t ? tv : NULL, flags); |
103 | } | 103 | } |
104 | 104 | ||
105 | COMPAT_SYSCALL_DEFINE3(futimesat, unsigned int, dfd, const char __user *, filename, struct compat_timeval __user *, t) | 105 | COMPAT_SYSCALL_DEFINE3(futimesat, unsigned int, dfd, const char __user *, filename, struct compat_timeval __user *, t) |
106 | { | 106 | { |
107 | struct timespec tv[2]; | 107 | struct timespec tv[2]; |
108 | 108 | ||
109 | if (t) { | 109 | if (t) { |
110 | if (get_user(tv[0].tv_sec, &t[0].tv_sec) || | 110 | if (get_user(tv[0].tv_sec, &t[0].tv_sec) || |
111 | get_user(tv[0].tv_nsec, &t[0].tv_usec) || | 111 | get_user(tv[0].tv_nsec, &t[0].tv_usec) || |
112 | get_user(tv[1].tv_sec, &t[1].tv_sec) || | 112 | get_user(tv[1].tv_sec, &t[1].tv_sec) || |
113 | get_user(tv[1].tv_nsec, &t[1].tv_usec)) | 113 | get_user(tv[1].tv_nsec, &t[1].tv_usec)) |
114 | return -EFAULT; | 114 | return -EFAULT; |
115 | if (tv[0].tv_nsec >= 1000000 || tv[0].tv_nsec < 0 || | 115 | if (tv[0].tv_nsec >= 1000000 || tv[0].tv_nsec < 0 || |
116 | tv[1].tv_nsec >= 1000000 || tv[1].tv_nsec < 0) | 116 | tv[1].tv_nsec >= 1000000 || tv[1].tv_nsec < 0) |
117 | return -EINVAL; | 117 | return -EINVAL; |
118 | tv[0].tv_nsec *= 1000; | 118 | tv[0].tv_nsec *= 1000; |
119 | tv[1].tv_nsec *= 1000; | 119 | tv[1].tv_nsec *= 1000; |
120 | } | 120 | } |
121 | return do_utimes(dfd, filename, t ? tv : NULL, 0); | 121 | return do_utimes(dfd, filename, t ? tv : NULL, 0); |
122 | } | 122 | } |
123 | 123 | ||
124 | COMPAT_SYSCALL_DEFINE2(utimes, const char __user *, filename, struct compat_timeval __user *, t) | 124 | COMPAT_SYSCALL_DEFINE2(utimes, const char __user *, filename, struct compat_timeval __user *, t) |
125 | { | 125 | { |
126 | return compat_sys_futimesat(AT_FDCWD, filename, t); | 126 | return compat_sys_futimesat(AT_FDCWD, filename, t); |
127 | } | 127 | } |
128 | 128 | ||
129 | static int cp_compat_stat(struct kstat *stat, struct compat_stat __user *ubuf) | 129 | static int cp_compat_stat(struct kstat *stat, struct compat_stat __user *ubuf) |
130 | { | 130 | { |
131 | struct compat_stat tmp; | 131 | struct compat_stat tmp; |
132 | 132 | ||
133 | if (!old_valid_dev(stat->dev) || !old_valid_dev(stat->rdev)) | 133 | if (!old_valid_dev(stat->dev) || !old_valid_dev(stat->rdev)) |
134 | return -EOVERFLOW; | 134 | return -EOVERFLOW; |
135 | 135 | ||
136 | memset(&tmp, 0, sizeof(tmp)); | 136 | memset(&tmp, 0, sizeof(tmp)); |
137 | tmp.st_dev = old_encode_dev(stat->dev); | 137 | tmp.st_dev = old_encode_dev(stat->dev); |
138 | tmp.st_ino = stat->ino; | 138 | tmp.st_ino = stat->ino; |
139 | if (sizeof(tmp.st_ino) < sizeof(stat->ino) && tmp.st_ino != stat->ino) | 139 | if (sizeof(tmp.st_ino) < sizeof(stat->ino) && tmp.st_ino != stat->ino) |
140 | return -EOVERFLOW; | 140 | return -EOVERFLOW; |
141 | tmp.st_mode = stat->mode; | 141 | tmp.st_mode = stat->mode; |
142 | tmp.st_nlink = stat->nlink; | 142 | tmp.st_nlink = stat->nlink; |
143 | if (tmp.st_nlink != stat->nlink) | 143 | if (tmp.st_nlink != stat->nlink) |
144 | return -EOVERFLOW; | 144 | return -EOVERFLOW; |
145 | SET_UID(tmp.st_uid, from_kuid_munged(current_user_ns(), stat->uid)); | 145 | SET_UID(tmp.st_uid, from_kuid_munged(current_user_ns(), stat->uid)); |
146 | SET_GID(tmp.st_gid, from_kgid_munged(current_user_ns(), stat->gid)); | 146 | SET_GID(tmp.st_gid, from_kgid_munged(current_user_ns(), stat->gid)); |
147 | tmp.st_rdev = old_encode_dev(stat->rdev); | 147 | tmp.st_rdev = old_encode_dev(stat->rdev); |
148 | if ((u64) stat->size > MAX_NON_LFS) | 148 | if ((u64) stat->size > MAX_NON_LFS) |
149 | return -EOVERFLOW; | 149 | return -EOVERFLOW; |
150 | tmp.st_size = stat->size; | 150 | tmp.st_size = stat->size; |
151 | tmp.st_atime = stat->atime.tv_sec; | 151 | tmp.st_atime = stat->atime.tv_sec; |
152 | tmp.st_atime_nsec = stat->atime.tv_nsec; | 152 | tmp.st_atime_nsec = stat->atime.tv_nsec; |
153 | tmp.st_mtime = stat->mtime.tv_sec; | 153 | tmp.st_mtime = stat->mtime.tv_sec; |
154 | tmp.st_mtime_nsec = stat->mtime.tv_nsec; | 154 | tmp.st_mtime_nsec = stat->mtime.tv_nsec; |
155 | tmp.st_ctime = stat->ctime.tv_sec; | 155 | tmp.st_ctime = stat->ctime.tv_sec; |
156 | tmp.st_ctime_nsec = stat->ctime.tv_nsec; | 156 | tmp.st_ctime_nsec = stat->ctime.tv_nsec; |
157 | tmp.st_blocks = stat->blocks; | 157 | tmp.st_blocks = stat->blocks; |
158 | tmp.st_blksize = stat->blksize; | 158 | tmp.st_blksize = stat->blksize; |
159 | return copy_to_user(ubuf, &tmp, sizeof(tmp)) ? -EFAULT : 0; | 159 | return copy_to_user(ubuf, &tmp, sizeof(tmp)) ? -EFAULT : 0; |
160 | } | 160 | } |
161 | 161 | ||
162 | COMPAT_SYSCALL_DEFINE2(newstat, const char __user *, filename, | 162 | COMPAT_SYSCALL_DEFINE2(newstat, const char __user *, filename, |
163 | struct compat_stat __user *, statbuf) | 163 | struct compat_stat __user *, statbuf) |
164 | { | 164 | { |
165 | struct kstat stat; | 165 | struct kstat stat; |
166 | int error; | 166 | int error; |
167 | 167 | ||
168 | error = vfs_stat(filename, &stat); | 168 | error = vfs_stat(filename, &stat); |
169 | if (error) | 169 | if (error) |
170 | return error; | 170 | return error; |
171 | return cp_compat_stat(&stat, statbuf); | 171 | return cp_compat_stat(&stat, statbuf); |
172 | } | 172 | } |
173 | 173 | ||
174 | COMPAT_SYSCALL_DEFINE2(newlstat, const char __user *, filename, | 174 | COMPAT_SYSCALL_DEFINE2(newlstat, const char __user *, filename, |
175 | struct compat_stat __user *, statbuf) | 175 | struct compat_stat __user *, statbuf) |
176 | { | 176 | { |
177 | struct kstat stat; | 177 | struct kstat stat; |
178 | int error; | 178 | int error; |
179 | 179 | ||
180 | error = vfs_lstat(filename, &stat); | 180 | error = vfs_lstat(filename, &stat); |
181 | if (error) | 181 | if (error) |
182 | return error; | 182 | return error; |
183 | return cp_compat_stat(&stat, statbuf); | 183 | return cp_compat_stat(&stat, statbuf); |
184 | } | 184 | } |
185 | 185 | ||
186 | #ifndef __ARCH_WANT_STAT64 | 186 | #ifndef __ARCH_WANT_STAT64 |
187 | COMPAT_SYSCALL_DEFINE4(newfstatat, unsigned int, dfd, | 187 | COMPAT_SYSCALL_DEFINE4(newfstatat, unsigned int, dfd, |
188 | const char __user *, filename, | 188 | const char __user *, filename, |
189 | struct compat_stat __user *, statbuf, int, flag) | 189 | struct compat_stat __user *, statbuf, int, flag) |
190 | { | 190 | { |
191 | struct kstat stat; | 191 | struct kstat stat; |
192 | int error; | 192 | int error; |
193 | 193 | ||
194 | error = vfs_fstatat(dfd, filename, &stat, flag); | 194 | error = vfs_fstatat(dfd, filename, &stat, flag); |
195 | if (error) | 195 | if (error) |
196 | return error; | 196 | return error; |
197 | return cp_compat_stat(&stat, statbuf); | 197 | return cp_compat_stat(&stat, statbuf); |
198 | } | 198 | } |
199 | #endif | 199 | #endif |
200 | 200 | ||
201 | COMPAT_SYSCALL_DEFINE2(newfstat, unsigned int, fd, | 201 | COMPAT_SYSCALL_DEFINE2(newfstat, unsigned int, fd, |
202 | struct compat_stat __user *, statbuf) | 202 | struct compat_stat __user *, statbuf) |
203 | { | 203 | { |
204 | struct kstat stat; | 204 | struct kstat stat; |
205 | int error = vfs_fstat(fd, &stat); | 205 | int error = vfs_fstat(fd, &stat); |
206 | 206 | ||
207 | if (!error) | 207 | if (!error) |
208 | error = cp_compat_stat(&stat, statbuf); | 208 | error = cp_compat_stat(&stat, statbuf); |
209 | return error; | 209 | return error; |
210 | } | 210 | } |
211 | 211 | ||
212 | static int put_compat_statfs(struct compat_statfs __user *ubuf, struct kstatfs *kbuf) | 212 | static int put_compat_statfs(struct compat_statfs __user *ubuf, struct kstatfs *kbuf) |
213 | { | 213 | { |
214 | 214 | ||
215 | if (sizeof ubuf->f_blocks == 4) { | 215 | if (sizeof ubuf->f_blocks == 4) { |
216 | if ((kbuf->f_blocks | kbuf->f_bfree | kbuf->f_bavail | | 216 | if ((kbuf->f_blocks | kbuf->f_bfree | kbuf->f_bavail | |
217 | kbuf->f_bsize | kbuf->f_frsize) & 0xffffffff00000000ULL) | 217 | kbuf->f_bsize | kbuf->f_frsize) & 0xffffffff00000000ULL) |
218 | return -EOVERFLOW; | 218 | return -EOVERFLOW; |
219 | /* f_files and f_ffree may be -1; it's okay | 219 | /* f_files and f_ffree may be -1; it's okay |
220 | * to stuff that into 32 bits */ | 220 | * to stuff that into 32 bits */ |
221 | if (kbuf->f_files != 0xffffffffffffffffULL | 221 | if (kbuf->f_files != 0xffffffffffffffffULL |
222 | && (kbuf->f_files & 0xffffffff00000000ULL)) | 222 | && (kbuf->f_files & 0xffffffff00000000ULL)) |
223 | return -EOVERFLOW; | 223 | return -EOVERFLOW; |
224 | if (kbuf->f_ffree != 0xffffffffffffffffULL | 224 | if (kbuf->f_ffree != 0xffffffffffffffffULL |
225 | && (kbuf->f_ffree & 0xffffffff00000000ULL)) | 225 | && (kbuf->f_ffree & 0xffffffff00000000ULL)) |
226 | return -EOVERFLOW; | 226 | return -EOVERFLOW; |
227 | } | 227 | } |
228 | if (!access_ok(VERIFY_WRITE, ubuf, sizeof(*ubuf)) || | 228 | if (!access_ok(VERIFY_WRITE, ubuf, sizeof(*ubuf)) || |
229 | __put_user(kbuf->f_type, &ubuf->f_type) || | 229 | __put_user(kbuf->f_type, &ubuf->f_type) || |
230 | __put_user(kbuf->f_bsize, &ubuf->f_bsize) || | 230 | __put_user(kbuf->f_bsize, &ubuf->f_bsize) || |
231 | __put_user(kbuf->f_blocks, &ubuf->f_blocks) || | 231 | __put_user(kbuf->f_blocks, &ubuf->f_blocks) || |
232 | __put_user(kbuf->f_bfree, &ubuf->f_bfree) || | 232 | __put_user(kbuf->f_bfree, &ubuf->f_bfree) || |
233 | __put_user(kbuf->f_bavail, &ubuf->f_bavail) || | 233 | __put_user(kbuf->f_bavail, &ubuf->f_bavail) || |
234 | __put_user(kbuf->f_files, &ubuf->f_files) || | 234 | __put_user(kbuf->f_files, &ubuf->f_files) || |
235 | __put_user(kbuf->f_ffree, &ubuf->f_ffree) || | 235 | __put_user(kbuf->f_ffree, &ubuf->f_ffree) || |
236 | __put_user(kbuf->f_namelen, &ubuf->f_namelen) || | 236 | __put_user(kbuf->f_namelen, &ubuf->f_namelen) || |
237 | __put_user(kbuf->f_fsid.val[0], &ubuf->f_fsid.val[0]) || | 237 | __put_user(kbuf->f_fsid.val[0], &ubuf->f_fsid.val[0]) || |
238 | __put_user(kbuf->f_fsid.val[1], &ubuf->f_fsid.val[1]) || | 238 | __put_user(kbuf->f_fsid.val[1], &ubuf->f_fsid.val[1]) || |
239 | __put_user(kbuf->f_frsize, &ubuf->f_frsize) || | 239 | __put_user(kbuf->f_frsize, &ubuf->f_frsize) || |
240 | __put_user(kbuf->f_flags, &ubuf->f_flags) || | 240 | __put_user(kbuf->f_flags, &ubuf->f_flags) || |
241 | __clear_user(ubuf->f_spare, sizeof(ubuf->f_spare))) | 241 | __clear_user(ubuf->f_spare, sizeof(ubuf->f_spare))) |
242 | return -EFAULT; | 242 | return -EFAULT; |
243 | return 0; | 243 | return 0; |
244 | } | 244 | } |
245 | 245 | ||
246 | /* | 246 | /* |
247 | * The following statfs calls are copies of code from fs/statfs.c and | 247 | * The following statfs calls are copies of code from fs/statfs.c and |
248 | * should be checked against those from time to time | 248 | * should be checked against those from time to time |
249 | */ | 249 | */ |
250 | COMPAT_SYSCALL_DEFINE2(statfs, const char __user *, pathname, struct compat_statfs __user *, buf) | 250 | COMPAT_SYSCALL_DEFINE2(statfs, const char __user *, pathname, struct compat_statfs __user *, buf) |
251 | { | 251 | { |
252 | struct kstatfs tmp; | 252 | struct kstatfs tmp; |
253 | int error = user_statfs(pathname, &tmp); | 253 | int error = user_statfs(pathname, &tmp); |
254 | if (!error) | 254 | if (!error) |
255 | error = put_compat_statfs(buf, &tmp); | 255 | error = put_compat_statfs(buf, &tmp); |
256 | return error; | 256 | return error; |
257 | } | 257 | } |
258 | 258 | ||
259 | COMPAT_SYSCALL_DEFINE2(fstatfs, unsigned int, fd, struct compat_statfs __user *, buf) | 259 | COMPAT_SYSCALL_DEFINE2(fstatfs, unsigned int, fd, struct compat_statfs __user *, buf) |
260 | { | 260 | { |
261 | struct kstatfs tmp; | 261 | struct kstatfs tmp; |
262 | int error = fd_statfs(fd, &tmp); | 262 | int error = fd_statfs(fd, &tmp); |
263 | if (!error) | 263 | if (!error) |
264 | error = put_compat_statfs(buf, &tmp); | 264 | error = put_compat_statfs(buf, &tmp); |
265 | return error; | 265 | return error; |
266 | } | 266 | } |
267 | 267 | ||
268 | static int put_compat_statfs64(struct compat_statfs64 __user *ubuf, struct kstatfs *kbuf) | 268 | static int put_compat_statfs64(struct compat_statfs64 __user *ubuf, struct kstatfs *kbuf) |
269 | { | 269 | { |
270 | if (sizeof ubuf->f_blocks == 4) { | 270 | if (sizeof ubuf->f_blocks == 4) { |
271 | if ((kbuf->f_blocks | kbuf->f_bfree | kbuf->f_bavail | | 271 | if ((kbuf->f_blocks | kbuf->f_bfree | kbuf->f_bavail | |
272 | kbuf->f_bsize | kbuf->f_frsize) & 0xffffffff00000000ULL) | 272 | kbuf->f_bsize | kbuf->f_frsize) & 0xffffffff00000000ULL) |
273 | return -EOVERFLOW; | 273 | return -EOVERFLOW; |
274 | /* f_files and f_ffree may be -1; it's okay | 274 | /* f_files and f_ffree may be -1; it's okay |
275 | * to stuff that into 32 bits */ | 275 | * to stuff that into 32 bits */ |
276 | if (kbuf->f_files != 0xffffffffffffffffULL | 276 | if (kbuf->f_files != 0xffffffffffffffffULL |
277 | && (kbuf->f_files & 0xffffffff00000000ULL)) | 277 | && (kbuf->f_files & 0xffffffff00000000ULL)) |
278 | return -EOVERFLOW; | 278 | return -EOVERFLOW; |
279 | if (kbuf->f_ffree != 0xffffffffffffffffULL | 279 | if (kbuf->f_ffree != 0xffffffffffffffffULL |
280 | && (kbuf->f_ffree & 0xffffffff00000000ULL)) | 280 | && (kbuf->f_ffree & 0xffffffff00000000ULL)) |
281 | return -EOVERFLOW; | 281 | return -EOVERFLOW; |
282 | } | 282 | } |
283 | if (!access_ok(VERIFY_WRITE, ubuf, sizeof(*ubuf)) || | 283 | if (!access_ok(VERIFY_WRITE, ubuf, sizeof(*ubuf)) || |
284 | __put_user(kbuf->f_type, &ubuf->f_type) || | 284 | __put_user(kbuf->f_type, &ubuf->f_type) || |
285 | __put_user(kbuf->f_bsize, &ubuf->f_bsize) || | 285 | __put_user(kbuf->f_bsize, &ubuf->f_bsize) || |
286 | __put_user(kbuf->f_blocks, &ubuf->f_blocks) || | 286 | __put_user(kbuf->f_blocks, &ubuf->f_blocks) || |
287 | __put_user(kbuf->f_bfree, &ubuf->f_bfree) || | 287 | __put_user(kbuf->f_bfree, &ubuf->f_bfree) || |
288 | __put_user(kbuf->f_bavail, &ubuf->f_bavail) || | 288 | __put_user(kbuf->f_bavail, &ubuf->f_bavail) || |
289 | __put_user(kbuf->f_files, &ubuf->f_files) || | 289 | __put_user(kbuf->f_files, &ubuf->f_files) || |
290 | __put_user(kbuf->f_ffree, &ubuf->f_ffree) || | 290 | __put_user(kbuf->f_ffree, &ubuf->f_ffree) || |
291 | __put_user(kbuf->f_namelen, &ubuf->f_namelen) || | 291 | __put_user(kbuf->f_namelen, &ubuf->f_namelen) || |
292 | __put_user(kbuf->f_fsid.val[0], &ubuf->f_fsid.val[0]) || | 292 | __put_user(kbuf->f_fsid.val[0], &ubuf->f_fsid.val[0]) || |
293 | __put_user(kbuf->f_fsid.val[1], &ubuf->f_fsid.val[1]) || | 293 | __put_user(kbuf->f_fsid.val[1], &ubuf->f_fsid.val[1]) || |
294 | __put_user(kbuf->f_frsize, &ubuf->f_frsize) || | 294 | __put_user(kbuf->f_frsize, &ubuf->f_frsize) || |
295 | __put_user(kbuf->f_flags, &ubuf->f_flags) || | 295 | __put_user(kbuf->f_flags, &ubuf->f_flags) || |
296 | __clear_user(ubuf->f_spare, sizeof(ubuf->f_spare))) | 296 | __clear_user(ubuf->f_spare, sizeof(ubuf->f_spare))) |
297 | return -EFAULT; | 297 | return -EFAULT; |
298 | return 0; | 298 | return 0; |
299 | } | 299 | } |
300 | 300 | ||
301 | COMPAT_SYSCALL_DEFINE3(statfs64, const char __user *, pathname, compat_size_t, sz, struct compat_statfs64 __user *, buf) | 301 | COMPAT_SYSCALL_DEFINE3(statfs64, const char __user *, pathname, compat_size_t, sz, struct compat_statfs64 __user *, buf) |
302 | { | 302 | { |
303 | struct kstatfs tmp; | 303 | struct kstatfs tmp; |
304 | int error; | 304 | int error; |
305 | 305 | ||
306 | if (sz != sizeof(*buf)) | 306 | if (sz != sizeof(*buf)) |
307 | return -EINVAL; | 307 | return -EINVAL; |
308 | 308 | ||
309 | error = user_statfs(pathname, &tmp); | 309 | error = user_statfs(pathname, &tmp); |
310 | if (!error) | 310 | if (!error) |
311 | error = put_compat_statfs64(buf, &tmp); | 311 | error = put_compat_statfs64(buf, &tmp); |
312 | return error; | 312 | return error; |
313 | } | 313 | } |
314 | 314 | ||
315 | COMPAT_SYSCALL_DEFINE3(fstatfs64, unsigned int, fd, compat_size_t, sz, struct compat_statfs64 __user *, buf) | 315 | COMPAT_SYSCALL_DEFINE3(fstatfs64, unsigned int, fd, compat_size_t, sz, struct compat_statfs64 __user *, buf) |
316 | { | 316 | { |
317 | struct kstatfs tmp; | 317 | struct kstatfs tmp; |
318 | int error; | 318 | int error; |
319 | 319 | ||
320 | if (sz != sizeof(*buf)) | 320 | if (sz != sizeof(*buf)) |
321 | return -EINVAL; | 321 | return -EINVAL; |
322 | 322 | ||
323 | error = fd_statfs(fd, &tmp); | 323 | error = fd_statfs(fd, &tmp); |
324 | if (!error) | 324 | if (!error) |
325 | error = put_compat_statfs64(buf, &tmp); | 325 | error = put_compat_statfs64(buf, &tmp); |
326 | return error; | 326 | return error; |
327 | } | 327 | } |
328 | 328 | ||
329 | /* | 329 | /* |
330 | * This is a copy of sys_ustat, just dealing with a structure layout. | 330 | * This is a copy of sys_ustat, just dealing with a structure layout. |
331 | * Given how simple this syscall is that apporach is more maintainable | 331 | * Given how simple this syscall is that apporach is more maintainable |
332 | * than the various conversion hacks. | 332 | * than the various conversion hacks. |
333 | */ | 333 | */ |
334 | COMPAT_SYSCALL_DEFINE2(ustat, unsigned, dev, struct compat_ustat __user *, u) | 334 | COMPAT_SYSCALL_DEFINE2(ustat, unsigned, dev, struct compat_ustat __user *, u) |
335 | { | 335 | { |
336 | struct compat_ustat tmp; | 336 | struct compat_ustat tmp; |
337 | struct kstatfs sbuf; | 337 | struct kstatfs sbuf; |
338 | int err = vfs_ustat(new_decode_dev(dev), &sbuf); | 338 | int err = vfs_ustat(new_decode_dev(dev), &sbuf); |
339 | if (err) | 339 | if (err) |
340 | return err; | 340 | return err; |
341 | 341 | ||
342 | memset(&tmp, 0, sizeof(struct compat_ustat)); | 342 | memset(&tmp, 0, sizeof(struct compat_ustat)); |
343 | tmp.f_tfree = sbuf.f_bfree; | 343 | tmp.f_tfree = sbuf.f_bfree; |
344 | tmp.f_tinode = sbuf.f_ffree; | 344 | tmp.f_tinode = sbuf.f_ffree; |
345 | if (copy_to_user(u, &tmp, sizeof(struct compat_ustat))) | 345 | if (copy_to_user(u, &tmp, sizeof(struct compat_ustat))) |
346 | return -EFAULT; | 346 | return -EFAULT; |
347 | return 0; | 347 | return 0; |
348 | } | 348 | } |
349 | 349 | ||
350 | static int get_compat_flock(struct flock *kfl, struct compat_flock __user *ufl) | 350 | static int get_compat_flock(struct flock *kfl, struct compat_flock __user *ufl) |
351 | { | 351 | { |
352 | if (!access_ok(VERIFY_READ, ufl, sizeof(*ufl)) || | 352 | if (!access_ok(VERIFY_READ, ufl, sizeof(*ufl)) || |
353 | __get_user(kfl->l_type, &ufl->l_type) || | 353 | __get_user(kfl->l_type, &ufl->l_type) || |
354 | __get_user(kfl->l_whence, &ufl->l_whence) || | 354 | __get_user(kfl->l_whence, &ufl->l_whence) || |
355 | __get_user(kfl->l_start, &ufl->l_start) || | 355 | __get_user(kfl->l_start, &ufl->l_start) || |
356 | __get_user(kfl->l_len, &ufl->l_len) || | 356 | __get_user(kfl->l_len, &ufl->l_len) || |
357 | __get_user(kfl->l_pid, &ufl->l_pid)) | 357 | __get_user(kfl->l_pid, &ufl->l_pid)) |
358 | return -EFAULT; | 358 | return -EFAULT; |
359 | return 0; | 359 | return 0; |
360 | } | 360 | } |
361 | 361 | ||
362 | static int put_compat_flock(struct flock *kfl, struct compat_flock __user *ufl) | 362 | static int put_compat_flock(struct flock *kfl, struct compat_flock __user *ufl) |
363 | { | 363 | { |
364 | if (!access_ok(VERIFY_WRITE, ufl, sizeof(*ufl)) || | 364 | if (!access_ok(VERIFY_WRITE, ufl, sizeof(*ufl)) || |
365 | __put_user(kfl->l_type, &ufl->l_type) || | 365 | __put_user(kfl->l_type, &ufl->l_type) || |
366 | __put_user(kfl->l_whence, &ufl->l_whence) || | 366 | __put_user(kfl->l_whence, &ufl->l_whence) || |
367 | __put_user(kfl->l_start, &ufl->l_start) || | 367 | __put_user(kfl->l_start, &ufl->l_start) || |
368 | __put_user(kfl->l_len, &ufl->l_len) || | 368 | __put_user(kfl->l_len, &ufl->l_len) || |
369 | __put_user(kfl->l_pid, &ufl->l_pid)) | 369 | __put_user(kfl->l_pid, &ufl->l_pid)) |
370 | return -EFAULT; | 370 | return -EFAULT; |
371 | return 0; | 371 | return 0; |
372 | } | 372 | } |
373 | 373 | ||
374 | #ifndef HAVE_ARCH_GET_COMPAT_FLOCK64 | 374 | #ifndef HAVE_ARCH_GET_COMPAT_FLOCK64 |
375 | static int get_compat_flock64(struct flock *kfl, struct compat_flock64 __user *ufl) | 375 | static int get_compat_flock64(struct flock *kfl, struct compat_flock64 __user *ufl) |
376 | { | 376 | { |
377 | if (!access_ok(VERIFY_READ, ufl, sizeof(*ufl)) || | 377 | if (!access_ok(VERIFY_READ, ufl, sizeof(*ufl)) || |
378 | __get_user(kfl->l_type, &ufl->l_type) || | 378 | __get_user(kfl->l_type, &ufl->l_type) || |
379 | __get_user(kfl->l_whence, &ufl->l_whence) || | 379 | __get_user(kfl->l_whence, &ufl->l_whence) || |
380 | __get_user(kfl->l_start, &ufl->l_start) || | 380 | __get_user(kfl->l_start, &ufl->l_start) || |
381 | __get_user(kfl->l_len, &ufl->l_len) || | 381 | __get_user(kfl->l_len, &ufl->l_len) || |
382 | __get_user(kfl->l_pid, &ufl->l_pid)) | 382 | __get_user(kfl->l_pid, &ufl->l_pid)) |
383 | return -EFAULT; | 383 | return -EFAULT; |
384 | return 0; | 384 | return 0; |
385 | } | 385 | } |
386 | #endif | 386 | #endif |
387 | 387 | ||
388 | #ifndef HAVE_ARCH_PUT_COMPAT_FLOCK64 | 388 | #ifndef HAVE_ARCH_PUT_COMPAT_FLOCK64 |
389 | static int put_compat_flock64(struct flock *kfl, struct compat_flock64 __user *ufl) | 389 | static int put_compat_flock64(struct flock *kfl, struct compat_flock64 __user *ufl) |
390 | { | 390 | { |
391 | if (!access_ok(VERIFY_WRITE, ufl, sizeof(*ufl)) || | 391 | if (!access_ok(VERIFY_WRITE, ufl, sizeof(*ufl)) || |
392 | __put_user(kfl->l_type, &ufl->l_type) || | 392 | __put_user(kfl->l_type, &ufl->l_type) || |
393 | __put_user(kfl->l_whence, &ufl->l_whence) || | 393 | __put_user(kfl->l_whence, &ufl->l_whence) || |
394 | __put_user(kfl->l_start, &ufl->l_start) || | 394 | __put_user(kfl->l_start, &ufl->l_start) || |
395 | __put_user(kfl->l_len, &ufl->l_len) || | 395 | __put_user(kfl->l_len, &ufl->l_len) || |
396 | __put_user(kfl->l_pid, &ufl->l_pid)) | 396 | __put_user(kfl->l_pid, &ufl->l_pid)) |
397 | return -EFAULT; | 397 | return -EFAULT; |
398 | return 0; | 398 | return 0; |
399 | } | 399 | } |
400 | #endif | 400 | #endif |
401 | 401 | ||
402 | static unsigned int | 402 | static unsigned int |
403 | convert_fcntl_cmd(unsigned int cmd) | 403 | convert_fcntl_cmd(unsigned int cmd) |
404 | { | 404 | { |
405 | switch (cmd) { | 405 | switch (cmd) { |
406 | case F_GETLK64: | 406 | case F_GETLK64: |
407 | return F_GETLK; | 407 | return F_GETLK; |
408 | case F_SETLK64: | 408 | case F_SETLK64: |
409 | return F_SETLK; | 409 | return F_SETLK; |
410 | case F_SETLKW64: | 410 | case F_SETLKW64: |
411 | return F_SETLKW; | 411 | return F_SETLKW; |
412 | } | 412 | } |
413 | 413 | ||
414 | return cmd; | 414 | return cmd; |
415 | } | 415 | } |
416 | 416 | ||
417 | COMPAT_SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd, | 417 | COMPAT_SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd, |
418 | compat_ulong_t, arg) | 418 | compat_ulong_t, arg) |
419 | { | 419 | { |
420 | mm_segment_t old_fs; | 420 | mm_segment_t old_fs; |
421 | struct flock f; | 421 | struct flock f; |
422 | long ret; | 422 | long ret; |
423 | unsigned int conv_cmd; | 423 | unsigned int conv_cmd; |
424 | 424 | ||
425 | switch (cmd) { | 425 | switch (cmd) { |
426 | case F_GETLK: | 426 | case F_GETLK: |
427 | case F_SETLK: | 427 | case F_SETLK: |
428 | case F_SETLKW: | 428 | case F_SETLKW: |
429 | ret = get_compat_flock(&f, compat_ptr(arg)); | 429 | ret = get_compat_flock(&f, compat_ptr(arg)); |
430 | if (ret != 0) | 430 | if (ret != 0) |
431 | break; | 431 | break; |
432 | old_fs = get_fs(); | 432 | old_fs = get_fs(); |
433 | set_fs(KERNEL_DS); | 433 | set_fs(KERNEL_DS); |
434 | ret = sys_fcntl(fd, cmd, (unsigned long)&f); | 434 | ret = sys_fcntl(fd, cmd, (unsigned long)&f); |
435 | set_fs(old_fs); | 435 | set_fs(old_fs); |
436 | if (cmd == F_GETLK && ret == 0) { | 436 | if (cmd == F_GETLK && ret == 0) { |
437 | /* GETLK was successful and we need to return the data... | 437 | /* GETLK was successful and we need to return the data... |
438 | * but it needs to fit in the compat structure. | 438 | * but it needs to fit in the compat structure. |
439 | * l_start shouldn't be too big, unless the original | 439 | * l_start shouldn't be too big, unless the original |
440 | * start + end is greater than COMPAT_OFF_T_MAX, in which | 440 | * start + end is greater than COMPAT_OFF_T_MAX, in which |
441 | * case the app was asking for trouble, so we return | 441 | * case the app was asking for trouble, so we return |
442 | * -EOVERFLOW in that case. | 442 | * -EOVERFLOW in that case. |
443 | * l_len could be too big, in which case we just truncate it, | 443 | * l_len could be too big, in which case we just truncate it, |
444 | * and only allow the app to see that part of the conflicting | 444 | * and only allow the app to see that part of the conflicting |
445 | * lock that might make sense to it anyway | 445 | * lock that might make sense to it anyway |
446 | */ | 446 | */ |
447 | 447 | ||
448 | if (f.l_start > COMPAT_OFF_T_MAX) | 448 | if (f.l_start > COMPAT_OFF_T_MAX) |
449 | ret = -EOVERFLOW; | 449 | ret = -EOVERFLOW; |
450 | if (f.l_len > COMPAT_OFF_T_MAX) | 450 | if (f.l_len > COMPAT_OFF_T_MAX) |
451 | f.l_len = COMPAT_OFF_T_MAX; | 451 | f.l_len = COMPAT_OFF_T_MAX; |
452 | if (ret == 0) | 452 | if (ret == 0) |
453 | ret = put_compat_flock(&f, compat_ptr(arg)); | 453 | ret = put_compat_flock(&f, compat_ptr(arg)); |
454 | } | 454 | } |
455 | break; | 455 | break; |
456 | 456 | ||
457 | case F_GETLK64: | 457 | case F_GETLK64: |
458 | case F_SETLK64: | 458 | case F_SETLK64: |
459 | case F_SETLKW64: | 459 | case F_SETLKW64: |
460 | case F_GETLKP: | 460 | case F_OFD_GETLK: |
461 | case F_SETLKP: | 461 | case F_OFD_SETLK: |
462 | case F_SETLKPW: | 462 | case F_OFD_SETLKW: |
463 | ret = get_compat_flock64(&f, compat_ptr(arg)); | 463 | ret = get_compat_flock64(&f, compat_ptr(arg)); |
464 | if (ret != 0) | 464 | if (ret != 0) |
465 | break; | 465 | break; |
466 | old_fs = get_fs(); | 466 | old_fs = get_fs(); |
467 | set_fs(KERNEL_DS); | 467 | set_fs(KERNEL_DS); |
468 | conv_cmd = convert_fcntl_cmd(cmd); | 468 | conv_cmd = convert_fcntl_cmd(cmd); |
469 | ret = sys_fcntl(fd, conv_cmd, (unsigned long)&f); | 469 | ret = sys_fcntl(fd, conv_cmd, (unsigned long)&f); |
470 | set_fs(old_fs); | 470 | set_fs(old_fs); |
471 | if ((conv_cmd == F_GETLK || conv_cmd == F_GETLKP) && ret == 0) { | 471 | if ((conv_cmd == F_GETLK || conv_cmd == F_OFD_GETLK) && ret == 0) { |
472 | /* need to return lock information - see above for commentary */ | 472 | /* need to return lock information - see above for commentary */ |
473 | if (f.l_start > COMPAT_LOFF_T_MAX) | 473 | if (f.l_start > COMPAT_LOFF_T_MAX) |
474 | ret = -EOVERFLOW; | 474 | ret = -EOVERFLOW; |
475 | if (f.l_len > COMPAT_LOFF_T_MAX) | 475 | if (f.l_len > COMPAT_LOFF_T_MAX) |
476 | f.l_len = COMPAT_LOFF_T_MAX; | 476 | f.l_len = COMPAT_LOFF_T_MAX; |
477 | if (ret == 0) | 477 | if (ret == 0) |
478 | ret = put_compat_flock64(&f, compat_ptr(arg)); | 478 | ret = put_compat_flock64(&f, compat_ptr(arg)); |
479 | } | 479 | } |
480 | break; | 480 | break; |
481 | 481 | ||
482 | default: | 482 | default: |
483 | ret = sys_fcntl(fd, cmd, arg); | 483 | ret = sys_fcntl(fd, cmd, arg); |
484 | break; | 484 | break; |
485 | } | 485 | } |
486 | return ret; | 486 | return ret; |
487 | } | 487 | } |
488 | 488 | ||
489 | COMPAT_SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, | 489 | COMPAT_SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, |
490 | compat_ulong_t, arg) | 490 | compat_ulong_t, arg) |
491 | { | 491 | { |
492 | switch (cmd) { | 492 | switch (cmd) { |
493 | case F_GETLK64: | 493 | case F_GETLK64: |
494 | case F_SETLK64: | 494 | case F_SETLK64: |
495 | case F_SETLKW64: | 495 | case F_SETLKW64: |
496 | case F_GETLKP: | 496 | case F_OFD_GETLK: |
497 | case F_SETLKP: | 497 | case F_OFD_SETLK: |
498 | case F_SETLKPW: | 498 | case F_OFD_SETLKW: |
499 | return -EINVAL; | 499 | return -EINVAL; |
500 | } | 500 | } |
501 | return compat_sys_fcntl64(fd, cmd, arg); | 501 | return compat_sys_fcntl64(fd, cmd, arg); |
502 | } | 502 | } |
503 | 503 | ||
504 | COMPAT_SYSCALL_DEFINE2(io_setup, unsigned, nr_reqs, u32 __user *, ctx32p) | 504 | COMPAT_SYSCALL_DEFINE2(io_setup, unsigned, nr_reqs, u32 __user *, ctx32p) |
505 | { | 505 | { |
506 | long ret; | 506 | long ret; |
507 | aio_context_t ctx64; | 507 | aio_context_t ctx64; |
508 | 508 | ||
509 | mm_segment_t oldfs = get_fs(); | 509 | mm_segment_t oldfs = get_fs(); |
510 | if (unlikely(get_user(ctx64, ctx32p))) | 510 | if (unlikely(get_user(ctx64, ctx32p))) |
511 | return -EFAULT; | 511 | return -EFAULT; |
512 | 512 | ||
513 | set_fs(KERNEL_DS); | 513 | set_fs(KERNEL_DS); |
514 | /* The __user pointer cast is valid because of the set_fs() */ | 514 | /* The __user pointer cast is valid because of the set_fs() */ |
515 | ret = sys_io_setup(nr_reqs, (aio_context_t __user *) &ctx64); | 515 | ret = sys_io_setup(nr_reqs, (aio_context_t __user *) &ctx64); |
516 | set_fs(oldfs); | 516 | set_fs(oldfs); |
517 | /* truncating is ok because it's a user address */ | 517 | /* truncating is ok because it's a user address */ |
518 | if (!ret) | 518 | if (!ret) |
519 | ret = put_user((u32) ctx64, ctx32p); | 519 | ret = put_user((u32) ctx64, ctx32p); |
520 | return ret; | 520 | return ret; |
521 | } | 521 | } |
522 | 522 | ||
523 | COMPAT_SYSCALL_DEFINE5(io_getevents, compat_aio_context_t, ctx_id, | 523 | COMPAT_SYSCALL_DEFINE5(io_getevents, compat_aio_context_t, ctx_id, |
524 | compat_long_t, min_nr, | 524 | compat_long_t, min_nr, |
525 | compat_long_t, nr, | 525 | compat_long_t, nr, |
526 | struct io_event __user *, events, | 526 | struct io_event __user *, events, |
527 | struct compat_timespec __user *, timeout) | 527 | struct compat_timespec __user *, timeout) |
528 | { | 528 | { |
529 | struct timespec t; | 529 | struct timespec t; |
530 | struct timespec __user *ut = NULL; | 530 | struct timespec __user *ut = NULL; |
531 | 531 | ||
532 | if (timeout) { | 532 | if (timeout) { |
533 | if (compat_get_timespec(&t, timeout)) | 533 | if (compat_get_timespec(&t, timeout)) |
534 | return -EFAULT; | 534 | return -EFAULT; |
535 | 535 | ||
536 | ut = compat_alloc_user_space(sizeof(*ut)); | 536 | ut = compat_alloc_user_space(sizeof(*ut)); |
537 | if (copy_to_user(ut, &t, sizeof(t)) ) | 537 | if (copy_to_user(ut, &t, sizeof(t)) ) |
538 | return -EFAULT; | 538 | return -EFAULT; |
539 | } | 539 | } |
540 | return sys_io_getevents(ctx_id, min_nr, nr, events, ut); | 540 | return sys_io_getevents(ctx_id, min_nr, nr, events, ut); |
541 | } | 541 | } |
542 | 542 | ||
543 | /* A write operation does a read from user space and vice versa */ | 543 | /* A write operation does a read from user space and vice versa */ |
544 | #define vrfy_dir(type) ((type) == READ ? VERIFY_WRITE : VERIFY_READ) | 544 | #define vrfy_dir(type) ((type) == READ ? VERIFY_WRITE : VERIFY_READ) |
545 | 545 | ||
546 | ssize_t compat_rw_copy_check_uvector(int type, | 546 | ssize_t compat_rw_copy_check_uvector(int type, |
547 | const struct compat_iovec __user *uvector, unsigned long nr_segs, | 547 | const struct compat_iovec __user *uvector, unsigned long nr_segs, |
548 | unsigned long fast_segs, struct iovec *fast_pointer, | 548 | unsigned long fast_segs, struct iovec *fast_pointer, |
549 | struct iovec **ret_pointer) | 549 | struct iovec **ret_pointer) |
550 | { | 550 | { |
551 | compat_ssize_t tot_len; | 551 | compat_ssize_t tot_len; |
552 | struct iovec *iov = *ret_pointer = fast_pointer; | 552 | struct iovec *iov = *ret_pointer = fast_pointer; |
553 | ssize_t ret = 0; | 553 | ssize_t ret = 0; |
554 | int seg; | 554 | int seg; |
555 | 555 | ||
556 | /* | 556 | /* |
557 | * SuS says "The readv() function *may* fail if the iovcnt argument | 557 | * SuS says "The readv() function *may* fail if the iovcnt argument |
558 | * was less than or equal to 0, or greater than {IOV_MAX}. Linux has | 558 | * was less than or equal to 0, or greater than {IOV_MAX}. Linux has |
559 | * traditionally returned zero for zero segments, so... | 559 | * traditionally returned zero for zero segments, so... |
560 | */ | 560 | */ |
561 | if (nr_segs == 0) | 561 | if (nr_segs == 0) |
562 | goto out; | 562 | goto out; |
563 | 563 | ||
564 | ret = -EINVAL; | 564 | ret = -EINVAL; |
565 | if (nr_segs > UIO_MAXIOV || nr_segs < 0) | 565 | if (nr_segs > UIO_MAXIOV || nr_segs < 0) |
566 | goto out; | 566 | goto out; |
567 | if (nr_segs > fast_segs) { | 567 | if (nr_segs > fast_segs) { |
568 | ret = -ENOMEM; | 568 | ret = -ENOMEM; |
569 | iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL); | 569 | iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL); |
570 | if (iov == NULL) | 570 | if (iov == NULL) |
571 | goto out; | 571 | goto out; |
572 | } | 572 | } |
573 | *ret_pointer = iov; | 573 | *ret_pointer = iov; |
574 | 574 | ||
575 | ret = -EFAULT; | 575 | ret = -EFAULT; |
576 | if (!access_ok(VERIFY_READ, uvector, nr_segs*sizeof(*uvector))) | 576 | if (!access_ok(VERIFY_READ, uvector, nr_segs*sizeof(*uvector))) |
577 | goto out; | 577 | goto out; |
578 | 578 | ||
579 | /* | 579 | /* |
580 | * Single unix specification: | 580 | * Single unix specification: |
581 | * We should -EINVAL if an element length is not >= 0 and fitting an | 581 | * We should -EINVAL if an element length is not >= 0 and fitting an |
582 | * ssize_t. | 582 | * ssize_t. |
583 | * | 583 | * |
584 | * In Linux, the total length is limited to MAX_RW_COUNT, there is | 584 | * In Linux, the total length is limited to MAX_RW_COUNT, there is |
585 | * no overflow possibility. | 585 | * no overflow possibility. |
586 | */ | 586 | */ |
587 | tot_len = 0; | 587 | tot_len = 0; |
588 | ret = -EINVAL; | 588 | ret = -EINVAL; |
589 | for (seg = 0; seg < nr_segs; seg++) { | 589 | for (seg = 0; seg < nr_segs; seg++) { |
590 | compat_uptr_t buf; | 590 | compat_uptr_t buf; |
591 | compat_ssize_t len; | 591 | compat_ssize_t len; |
592 | 592 | ||
593 | if (__get_user(len, &uvector->iov_len) || | 593 | if (__get_user(len, &uvector->iov_len) || |
594 | __get_user(buf, &uvector->iov_base)) { | 594 | __get_user(buf, &uvector->iov_base)) { |
595 | ret = -EFAULT; | 595 | ret = -EFAULT; |
596 | goto out; | 596 | goto out; |
597 | } | 597 | } |
598 | if (len < 0) /* size_t not fitting in compat_ssize_t .. */ | 598 | if (len < 0) /* size_t not fitting in compat_ssize_t .. */ |
599 | goto out; | 599 | goto out; |
600 | if (type >= 0 && | 600 | if (type >= 0 && |
601 | !access_ok(vrfy_dir(type), compat_ptr(buf), len)) { | 601 | !access_ok(vrfy_dir(type), compat_ptr(buf), len)) { |
602 | ret = -EFAULT; | 602 | ret = -EFAULT; |
603 | goto out; | 603 | goto out; |
604 | } | 604 | } |
605 | if (len > MAX_RW_COUNT - tot_len) | 605 | if (len > MAX_RW_COUNT - tot_len) |
606 | len = MAX_RW_COUNT - tot_len; | 606 | len = MAX_RW_COUNT - tot_len; |
607 | tot_len += len; | 607 | tot_len += len; |
608 | iov->iov_base = compat_ptr(buf); | 608 | iov->iov_base = compat_ptr(buf); |
609 | iov->iov_len = (compat_size_t) len; | 609 | iov->iov_len = (compat_size_t) len; |
610 | uvector++; | 610 | uvector++; |
611 | iov++; | 611 | iov++; |
612 | } | 612 | } |
613 | ret = tot_len; | 613 | ret = tot_len; |
614 | 614 | ||
615 | out: | 615 | out: |
616 | return ret; | 616 | return ret; |
617 | } | 617 | } |
618 | 618 | ||
619 | static inline long | 619 | static inline long |
620 | copy_iocb(long nr, u32 __user *ptr32, struct iocb __user * __user *ptr64) | 620 | copy_iocb(long nr, u32 __user *ptr32, struct iocb __user * __user *ptr64) |
621 | { | 621 | { |
622 | compat_uptr_t uptr; | 622 | compat_uptr_t uptr; |
623 | int i; | 623 | int i; |
624 | 624 | ||
625 | for (i = 0; i < nr; ++i) { | 625 | for (i = 0; i < nr; ++i) { |
626 | if (get_user(uptr, ptr32 + i)) | 626 | if (get_user(uptr, ptr32 + i)) |
627 | return -EFAULT; | 627 | return -EFAULT; |
628 | if (put_user(compat_ptr(uptr), ptr64 + i)) | 628 | if (put_user(compat_ptr(uptr), ptr64 + i)) |
629 | return -EFAULT; | 629 | return -EFAULT; |
630 | } | 630 | } |
631 | return 0; | 631 | return 0; |
632 | } | 632 | } |
633 | 633 | ||
634 | #define MAX_AIO_SUBMITS (PAGE_SIZE/sizeof(struct iocb *)) | 634 | #define MAX_AIO_SUBMITS (PAGE_SIZE/sizeof(struct iocb *)) |
635 | 635 | ||
636 | COMPAT_SYSCALL_DEFINE3(io_submit, compat_aio_context_t, ctx_id, | 636 | COMPAT_SYSCALL_DEFINE3(io_submit, compat_aio_context_t, ctx_id, |
637 | int, nr, u32 __user *, iocb) | 637 | int, nr, u32 __user *, iocb) |
638 | { | 638 | { |
639 | struct iocb __user * __user *iocb64; | 639 | struct iocb __user * __user *iocb64; |
640 | long ret; | 640 | long ret; |
641 | 641 | ||
642 | if (unlikely(nr < 0)) | 642 | if (unlikely(nr < 0)) |
643 | return -EINVAL; | 643 | return -EINVAL; |
644 | 644 | ||
645 | if (nr > MAX_AIO_SUBMITS) | 645 | if (nr > MAX_AIO_SUBMITS) |
646 | nr = MAX_AIO_SUBMITS; | 646 | nr = MAX_AIO_SUBMITS; |
647 | 647 | ||
648 | iocb64 = compat_alloc_user_space(nr * sizeof(*iocb64)); | 648 | iocb64 = compat_alloc_user_space(nr * sizeof(*iocb64)); |
649 | ret = copy_iocb(nr, iocb, iocb64); | 649 | ret = copy_iocb(nr, iocb, iocb64); |
650 | if (!ret) | 650 | if (!ret) |
651 | ret = do_io_submit(ctx_id, nr, iocb64, 1); | 651 | ret = do_io_submit(ctx_id, nr, iocb64, 1); |
652 | return ret; | 652 | return ret; |
653 | } | 653 | } |
654 | 654 | ||
655 | struct compat_ncp_mount_data { | 655 | struct compat_ncp_mount_data { |
656 | compat_int_t version; | 656 | compat_int_t version; |
657 | compat_uint_t ncp_fd; | 657 | compat_uint_t ncp_fd; |
658 | __compat_uid_t mounted_uid; | 658 | __compat_uid_t mounted_uid; |
659 | compat_pid_t wdog_pid; | 659 | compat_pid_t wdog_pid; |
660 | unsigned char mounted_vol[NCP_VOLNAME_LEN + 1]; | 660 | unsigned char mounted_vol[NCP_VOLNAME_LEN + 1]; |
661 | compat_uint_t time_out; | 661 | compat_uint_t time_out; |
662 | compat_uint_t retry_count; | 662 | compat_uint_t retry_count; |
663 | compat_uint_t flags; | 663 | compat_uint_t flags; |
664 | __compat_uid_t uid; | 664 | __compat_uid_t uid; |
665 | __compat_gid_t gid; | 665 | __compat_gid_t gid; |
666 | compat_mode_t file_mode; | 666 | compat_mode_t file_mode; |
667 | compat_mode_t dir_mode; | 667 | compat_mode_t dir_mode; |
668 | }; | 668 | }; |
669 | 669 | ||
670 | struct compat_ncp_mount_data_v4 { | 670 | struct compat_ncp_mount_data_v4 { |
671 | compat_int_t version; | 671 | compat_int_t version; |
672 | compat_ulong_t flags; | 672 | compat_ulong_t flags; |
673 | compat_ulong_t mounted_uid; | 673 | compat_ulong_t mounted_uid; |
674 | compat_long_t wdog_pid; | 674 | compat_long_t wdog_pid; |
675 | compat_uint_t ncp_fd; | 675 | compat_uint_t ncp_fd; |
676 | compat_uint_t time_out; | 676 | compat_uint_t time_out; |
677 | compat_uint_t retry_count; | 677 | compat_uint_t retry_count; |
678 | compat_ulong_t uid; | 678 | compat_ulong_t uid; |
679 | compat_ulong_t gid; | 679 | compat_ulong_t gid; |
680 | compat_ulong_t file_mode; | 680 | compat_ulong_t file_mode; |
681 | compat_ulong_t dir_mode; | 681 | compat_ulong_t dir_mode; |
682 | }; | 682 | }; |
683 | 683 | ||
684 | static void *do_ncp_super_data_conv(void *raw_data) | 684 | static void *do_ncp_super_data_conv(void *raw_data) |
685 | { | 685 | { |
686 | int version = *(unsigned int *)raw_data; | 686 | int version = *(unsigned int *)raw_data; |
687 | 687 | ||
688 | if (version == 3) { | 688 | if (version == 3) { |
689 | struct compat_ncp_mount_data *c_n = raw_data; | 689 | struct compat_ncp_mount_data *c_n = raw_data; |
690 | struct ncp_mount_data *n = raw_data; | 690 | struct ncp_mount_data *n = raw_data; |
691 | 691 | ||
692 | n->dir_mode = c_n->dir_mode; | 692 | n->dir_mode = c_n->dir_mode; |
693 | n->file_mode = c_n->file_mode; | 693 | n->file_mode = c_n->file_mode; |
694 | n->gid = c_n->gid; | 694 | n->gid = c_n->gid; |
695 | n->uid = c_n->uid; | 695 | n->uid = c_n->uid; |
696 | memmove (n->mounted_vol, c_n->mounted_vol, (sizeof (c_n->mounted_vol) + 3 * sizeof (unsigned int))); | 696 | memmove (n->mounted_vol, c_n->mounted_vol, (sizeof (c_n->mounted_vol) + 3 * sizeof (unsigned int))); |
697 | n->wdog_pid = c_n->wdog_pid; | 697 | n->wdog_pid = c_n->wdog_pid; |
698 | n->mounted_uid = c_n->mounted_uid; | 698 | n->mounted_uid = c_n->mounted_uid; |
699 | } else if (version == 4) { | 699 | } else if (version == 4) { |
700 | struct compat_ncp_mount_data_v4 *c_n = raw_data; | 700 | struct compat_ncp_mount_data_v4 *c_n = raw_data; |
701 | struct ncp_mount_data_v4 *n = raw_data; | 701 | struct ncp_mount_data_v4 *n = raw_data; |
702 | 702 | ||
703 | n->dir_mode = c_n->dir_mode; | 703 | n->dir_mode = c_n->dir_mode; |
704 | n->file_mode = c_n->file_mode; | 704 | n->file_mode = c_n->file_mode; |
705 | n->gid = c_n->gid; | 705 | n->gid = c_n->gid; |
706 | n->uid = c_n->uid; | 706 | n->uid = c_n->uid; |
707 | n->retry_count = c_n->retry_count; | 707 | n->retry_count = c_n->retry_count; |
708 | n->time_out = c_n->time_out; | 708 | n->time_out = c_n->time_out; |
709 | n->ncp_fd = c_n->ncp_fd; | 709 | n->ncp_fd = c_n->ncp_fd; |
710 | n->wdog_pid = c_n->wdog_pid; | 710 | n->wdog_pid = c_n->wdog_pid; |
711 | n->mounted_uid = c_n->mounted_uid; | 711 | n->mounted_uid = c_n->mounted_uid; |
712 | n->flags = c_n->flags; | 712 | n->flags = c_n->flags; |
713 | } else if (version != 5) { | 713 | } else if (version != 5) { |
714 | return NULL; | 714 | return NULL; |
715 | } | 715 | } |
716 | 716 | ||
717 | return raw_data; | 717 | return raw_data; |
718 | } | 718 | } |
719 | 719 | ||
720 | 720 | ||
721 | struct compat_nfs_string { | 721 | struct compat_nfs_string { |
722 | compat_uint_t len; | 722 | compat_uint_t len; |
723 | compat_uptr_t data; | 723 | compat_uptr_t data; |
724 | }; | 724 | }; |
725 | 725 | ||
726 | static inline void compat_nfs_string(struct nfs_string *dst, | 726 | static inline void compat_nfs_string(struct nfs_string *dst, |
727 | struct compat_nfs_string *src) | 727 | struct compat_nfs_string *src) |
728 | { | 728 | { |
729 | dst->data = compat_ptr(src->data); | 729 | dst->data = compat_ptr(src->data); |
730 | dst->len = src->len; | 730 | dst->len = src->len; |
731 | } | 731 | } |
732 | 732 | ||
733 | struct compat_nfs4_mount_data_v1 { | 733 | struct compat_nfs4_mount_data_v1 { |
734 | compat_int_t version; | 734 | compat_int_t version; |
735 | compat_int_t flags; | 735 | compat_int_t flags; |
736 | compat_int_t rsize; | 736 | compat_int_t rsize; |
737 | compat_int_t wsize; | 737 | compat_int_t wsize; |
738 | compat_int_t timeo; | 738 | compat_int_t timeo; |
739 | compat_int_t retrans; | 739 | compat_int_t retrans; |
740 | compat_int_t acregmin; | 740 | compat_int_t acregmin; |
741 | compat_int_t acregmax; | 741 | compat_int_t acregmax; |
742 | compat_int_t acdirmin; | 742 | compat_int_t acdirmin; |
743 | compat_int_t acdirmax; | 743 | compat_int_t acdirmax; |
744 | struct compat_nfs_string client_addr; | 744 | struct compat_nfs_string client_addr; |
745 | struct compat_nfs_string mnt_path; | 745 | struct compat_nfs_string mnt_path; |
746 | struct compat_nfs_string hostname; | 746 | struct compat_nfs_string hostname; |
747 | compat_uint_t host_addrlen; | 747 | compat_uint_t host_addrlen; |
748 | compat_uptr_t host_addr; | 748 | compat_uptr_t host_addr; |
749 | compat_int_t proto; | 749 | compat_int_t proto; |
750 | compat_int_t auth_flavourlen; | 750 | compat_int_t auth_flavourlen; |
751 | compat_uptr_t auth_flavours; | 751 | compat_uptr_t auth_flavours; |
752 | }; | 752 | }; |
753 | 753 | ||
754 | static int do_nfs4_super_data_conv(void *raw_data) | 754 | static int do_nfs4_super_data_conv(void *raw_data) |
755 | { | 755 | { |
756 | int version = *(compat_uint_t *) raw_data; | 756 | int version = *(compat_uint_t *) raw_data; |
757 | 757 | ||
758 | if (version == 1) { | 758 | if (version == 1) { |
759 | struct compat_nfs4_mount_data_v1 *raw = raw_data; | 759 | struct compat_nfs4_mount_data_v1 *raw = raw_data; |
760 | struct nfs4_mount_data *real = raw_data; | 760 | struct nfs4_mount_data *real = raw_data; |
761 | 761 | ||
762 | /* copy the fields backwards */ | 762 | /* copy the fields backwards */ |
763 | real->auth_flavours = compat_ptr(raw->auth_flavours); | 763 | real->auth_flavours = compat_ptr(raw->auth_flavours); |
764 | real->auth_flavourlen = raw->auth_flavourlen; | 764 | real->auth_flavourlen = raw->auth_flavourlen; |
765 | real->proto = raw->proto; | 765 | real->proto = raw->proto; |
766 | real->host_addr = compat_ptr(raw->host_addr); | 766 | real->host_addr = compat_ptr(raw->host_addr); |
767 | real->host_addrlen = raw->host_addrlen; | 767 | real->host_addrlen = raw->host_addrlen; |
768 | compat_nfs_string(&real->hostname, &raw->hostname); | 768 | compat_nfs_string(&real->hostname, &raw->hostname); |
769 | compat_nfs_string(&real->mnt_path, &raw->mnt_path); | 769 | compat_nfs_string(&real->mnt_path, &raw->mnt_path); |
770 | compat_nfs_string(&real->client_addr, &raw->client_addr); | 770 | compat_nfs_string(&real->client_addr, &raw->client_addr); |
771 | real->acdirmax = raw->acdirmax; | 771 | real->acdirmax = raw->acdirmax; |
772 | real->acdirmin = raw->acdirmin; | 772 | real->acdirmin = raw->acdirmin; |
773 | real->acregmax = raw->acregmax; | 773 | real->acregmax = raw->acregmax; |
774 | real->acregmin = raw->acregmin; | 774 | real->acregmin = raw->acregmin; |
775 | real->retrans = raw->retrans; | 775 | real->retrans = raw->retrans; |
776 | real->timeo = raw->timeo; | 776 | real->timeo = raw->timeo; |
777 | real->wsize = raw->wsize; | 777 | real->wsize = raw->wsize; |
778 | real->rsize = raw->rsize; | 778 | real->rsize = raw->rsize; |
779 | real->flags = raw->flags; | 779 | real->flags = raw->flags; |
780 | real->version = raw->version; | 780 | real->version = raw->version; |
781 | } | 781 | } |
782 | 782 | ||
783 | return 0; | 783 | return 0; |
784 | } | 784 | } |
785 | 785 | ||
786 | #define NCPFS_NAME "ncpfs" | 786 | #define NCPFS_NAME "ncpfs" |
787 | #define NFS4_NAME "nfs4" | 787 | #define NFS4_NAME "nfs4" |
788 | 788 | ||
789 | COMPAT_SYSCALL_DEFINE5(mount, const char __user *, dev_name, | 789 | COMPAT_SYSCALL_DEFINE5(mount, const char __user *, dev_name, |
790 | const char __user *, dir_name, | 790 | const char __user *, dir_name, |
791 | const char __user *, type, compat_ulong_t, flags, | 791 | const char __user *, type, compat_ulong_t, flags, |
792 | const void __user *, data) | 792 | const void __user *, data) |
793 | { | 793 | { |
794 | char *kernel_type; | 794 | char *kernel_type; |
795 | unsigned long data_page; | 795 | unsigned long data_page; |
796 | char *kernel_dev; | 796 | char *kernel_dev; |
797 | struct filename *dir; | 797 | struct filename *dir; |
798 | int retval; | 798 | int retval; |
799 | 799 | ||
800 | retval = copy_mount_string(type, &kernel_type); | 800 | retval = copy_mount_string(type, &kernel_type); |
801 | if (retval < 0) | 801 | if (retval < 0) |
802 | goto out; | 802 | goto out; |
803 | 803 | ||
804 | dir = getname(dir_name); | 804 | dir = getname(dir_name); |
805 | retval = PTR_ERR(dir); | 805 | retval = PTR_ERR(dir); |
806 | if (IS_ERR(dir)) | 806 | if (IS_ERR(dir)) |
807 | goto out1; | 807 | goto out1; |
808 | 808 | ||
809 | retval = copy_mount_string(dev_name, &kernel_dev); | 809 | retval = copy_mount_string(dev_name, &kernel_dev); |
810 | if (retval < 0) | 810 | if (retval < 0) |
811 | goto out2; | 811 | goto out2; |
812 | 812 | ||
813 | retval = copy_mount_options(data, &data_page); | 813 | retval = copy_mount_options(data, &data_page); |
814 | if (retval < 0) | 814 | if (retval < 0) |
815 | goto out3; | 815 | goto out3; |
816 | 816 | ||
817 | retval = -EINVAL; | 817 | retval = -EINVAL; |
818 | 818 | ||
819 | if (kernel_type && data_page) { | 819 | if (kernel_type && data_page) { |
820 | if (!strcmp(kernel_type, NCPFS_NAME)) { | 820 | if (!strcmp(kernel_type, NCPFS_NAME)) { |
821 | do_ncp_super_data_conv((void *)data_page); | 821 | do_ncp_super_data_conv((void *)data_page); |
822 | } else if (!strcmp(kernel_type, NFS4_NAME)) { | 822 | } else if (!strcmp(kernel_type, NFS4_NAME)) { |
823 | if (do_nfs4_super_data_conv((void *) data_page)) | 823 | if (do_nfs4_super_data_conv((void *) data_page)) |
824 | goto out4; | 824 | goto out4; |
825 | } | 825 | } |
826 | } | 826 | } |
827 | 827 | ||
828 | retval = do_mount(kernel_dev, dir->name, kernel_type, | 828 | retval = do_mount(kernel_dev, dir->name, kernel_type, |
829 | flags, (void*)data_page); | 829 | flags, (void*)data_page); |
830 | 830 | ||
831 | out4: | 831 | out4: |
832 | free_page(data_page); | 832 | free_page(data_page); |
833 | out3: | 833 | out3: |
834 | kfree(kernel_dev); | 834 | kfree(kernel_dev); |
835 | out2: | 835 | out2: |
836 | putname(dir); | 836 | putname(dir); |
837 | out1: | 837 | out1: |
838 | kfree(kernel_type); | 838 | kfree(kernel_type); |
839 | out: | 839 | out: |
840 | return retval; | 840 | return retval; |
841 | } | 841 | } |
842 | 842 | ||
843 | struct compat_old_linux_dirent { | 843 | struct compat_old_linux_dirent { |
844 | compat_ulong_t d_ino; | 844 | compat_ulong_t d_ino; |
845 | compat_ulong_t d_offset; | 845 | compat_ulong_t d_offset; |
846 | unsigned short d_namlen; | 846 | unsigned short d_namlen; |
847 | char d_name[1]; | 847 | char d_name[1]; |
848 | }; | 848 | }; |
849 | 849 | ||
850 | struct compat_readdir_callback { | 850 | struct compat_readdir_callback { |
851 | struct dir_context ctx; | 851 | struct dir_context ctx; |
852 | struct compat_old_linux_dirent __user *dirent; | 852 | struct compat_old_linux_dirent __user *dirent; |
853 | int result; | 853 | int result; |
854 | }; | 854 | }; |
855 | 855 | ||
856 | static int compat_fillonedir(void *__buf, const char *name, int namlen, | 856 | static int compat_fillonedir(void *__buf, const char *name, int namlen, |
857 | loff_t offset, u64 ino, unsigned int d_type) | 857 | loff_t offset, u64 ino, unsigned int d_type) |
858 | { | 858 | { |
859 | struct compat_readdir_callback *buf = __buf; | 859 | struct compat_readdir_callback *buf = __buf; |
860 | struct compat_old_linux_dirent __user *dirent; | 860 | struct compat_old_linux_dirent __user *dirent; |
861 | compat_ulong_t d_ino; | 861 | compat_ulong_t d_ino; |
862 | 862 | ||
863 | if (buf->result) | 863 | if (buf->result) |
864 | return -EINVAL; | 864 | return -EINVAL; |
865 | d_ino = ino; | 865 | d_ino = ino; |
866 | if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) { | 866 | if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) { |
867 | buf->result = -EOVERFLOW; | 867 | buf->result = -EOVERFLOW; |
868 | return -EOVERFLOW; | 868 | return -EOVERFLOW; |
869 | } | 869 | } |
870 | buf->result++; | 870 | buf->result++; |
871 | dirent = buf->dirent; | 871 | dirent = buf->dirent; |
872 | if (!access_ok(VERIFY_WRITE, dirent, | 872 | if (!access_ok(VERIFY_WRITE, dirent, |
873 | (unsigned long)(dirent->d_name + namlen + 1) - | 873 | (unsigned long)(dirent->d_name + namlen + 1) - |
874 | (unsigned long)dirent)) | 874 | (unsigned long)dirent)) |
875 | goto efault; | 875 | goto efault; |
876 | if ( __put_user(d_ino, &dirent->d_ino) || | 876 | if ( __put_user(d_ino, &dirent->d_ino) || |
877 | __put_user(offset, &dirent->d_offset) || | 877 | __put_user(offset, &dirent->d_offset) || |
878 | __put_user(namlen, &dirent->d_namlen) || | 878 | __put_user(namlen, &dirent->d_namlen) || |
879 | __copy_to_user(dirent->d_name, name, namlen) || | 879 | __copy_to_user(dirent->d_name, name, namlen) || |
880 | __put_user(0, dirent->d_name + namlen)) | 880 | __put_user(0, dirent->d_name + namlen)) |
881 | goto efault; | 881 | goto efault; |
882 | return 0; | 882 | return 0; |
883 | efault: | 883 | efault: |
884 | buf->result = -EFAULT; | 884 | buf->result = -EFAULT; |
885 | return -EFAULT; | 885 | return -EFAULT; |
886 | } | 886 | } |
887 | 887 | ||
888 | COMPAT_SYSCALL_DEFINE3(old_readdir, unsigned int, fd, | 888 | COMPAT_SYSCALL_DEFINE3(old_readdir, unsigned int, fd, |
889 | struct compat_old_linux_dirent __user *, dirent, unsigned int, count) | 889 | struct compat_old_linux_dirent __user *, dirent, unsigned int, count) |
890 | { | 890 | { |
891 | int error; | 891 | int error; |
892 | struct fd f = fdget(fd); | 892 | struct fd f = fdget(fd); |
893 | struct compat_readdir_callback buf = { | 893 | struct compat_readdir_callback buf = { |
894 | .ctx.actor = compat_fillonedir, | 894 | .ctx.actor = compat_fillonedir, |
895 | .dirent = dirent | 895 | .dirent = dirent |
896 | }; | 896 | }; |
897 | 897 | ||
898 | if (!f.file) | 898 | if (!f.file) |
899 | return -EBADF; | 899 | return -EBADF; |
900 | 900 | ||
901 | error = iterate_dir(f.file, &buf.ctx); | 901 | error = iterate_dir(f.file, &buf.ctx); |
902 | if (buf.result) | 902 | if (buf.result) |
903 | error = buf.result; | 903 | error = buf.result; |
904 | 904 | ||
905 | fdput(f); | 905 | fdput(f); |
906 | return error; | 906 | return error; |
907 | } | 907 | } |
908 | 908 | ||
909 | struct compat_linux_dirent { | 909 | struct compat_linux_dirent { |
910 | compat_ulong_t d_ino; | 910 | compat_ulong_t d_ino; |
911 | compat_ulong_t d_off; | 911 | compat_ulong_t d_off; |
912 | unsigned short d_reclen; | 912 | unsigned short d_reclen; |
913 | char d_name[1]; | 913 | char d_name[1]; |
914 | }; | 914 | }; |
915 | 915 | ||
916 | struct compat_getdents_callback { | 916 | struct compat_getdents_callback { |
917 | struct dir_context ctx; | 917 | struct dir_context ctx; |
918 | struct compat_linux_dirent __user *current_dir; | 918 | struct compat_linux_dirent __user *current_dir; |
919 | struct compat_linux_dirent __user *previous; | 919 | struct compat_linux_dirent __user *previous; |
920 | int count; | 920 | int count; |
921 | int error; | 921 | int error; |
922 | }; | 922 | }; |
923 | 923 | ||
924 | static int compat_filldir(void *__buf, const char *name, int namlen, | 924 | static int compat_filldir(void *__buf, const char *name, int namlen, |
925 | loff_t offset, u64 ino, unsigned int d_type) | 925 | loff_t offset, u64 ino, unsigned int d_type) |
926 | { | 926 | { |
927 | struct compat_linux_dirent __user * dirent; | 927 | struct compat_linux_dirent __user * dirent; |
928 | struct compat_getdents_callback *buf = __buf; | 928 | struct compat_getdents_callback *buf = __buf; |
929 | compat_ulong_t d_ino; | 929 | compat_ulong_t d_ino; |
930 | int reclen = ALIGN(offsetof(struct compat_linux_dirent, d_name) + | 930 | int reclen = ALIGN(offsetof(struct compat_linux_dirent, d_name) + |
931 | namlen + 2, sizeof(compat_long_t)); | 931 | namlen + 2, sizeof(compat_long_t)); |
932 | 932 | ||
933 | buf->error = -EINVAL; /* only used if we fail.. */ | 933 | buf->error = -EINVAL; /* only used if we fail.. */ |
934 | if (reclen > buf->count) | 934 | if (reclen > buf->count) |
935 | return -EINVAL; | 935 | return -EINVAL; |
936 | d_ino = ino; | 936 | d_ino = ino; |
937 | if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) { | 937 | if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) { |
938 | buf->error = -EOVERFLOW; | 938 | buf->error = -EOVERFLOW; |
939 | return -EOVERFLOW; | 939 | return -EOVERFLOW; |
940 | } | 940 | } |
941 | dirent = buf->previous; | 941 | dirent = buf->previous; |
942 | if (dirent) { | 942 | if (dirent) { |
943 | if (__put_user(offset, &dirent->d_off)) | 943 | if (__put_user(offset, &dirent->d_off)) |
944 | goto efault; | 944 | goto efault; |
945 | } | 945 | } |
946 | dirent = buf->current_dir; | 946 | dirent = buf->current_dir; |
947 | if (__put_user(d_ino, &dirent->d_ino)) | 947 | if (__put_user(d_ino, &dirent->d_ino)) |
948 | goto efault; | 948 | goto efault; |
949 | if (__put_user(reclen, &dirent->d_reclen)) | 949 | if (__put_user(reclen, &dirent->d_reclen)) |
950 | goto efault; | 950 | goto efault; |
951 | if (copy_to_user(dirent->d_name, name, namlen)) | 951 | if (copy_to_user(dirent->d_name, name, namlen)) |
952 | goto efault; | 952 | goto efault; |
953 | if (__put_user(0, dirent->d_name + namlen)) | 953 | if (__put_user(0, dirent->d_name + namlen)) |
954 | goto efault; | 954 | goto efault; |
955 | if (__put_user(d_type, (char __user *) dirent + reclen - 1)) | 955 | if (__put_user(d_type, (char __user *) dirent + reclen - 1)) |
956 | goto efault; | 956 | goto efault; |
957 | buf->previous = dirent; | 957 | buf->previous = dirent; |
958 | dirent = (void __user *)dirent + reclen; | 958 | dirent = (void __user *)dirent + reclen; |
959 | buf->current_dir = dirent; | 959 | buf->current_dir = dirent; |
960 | buf->count -= reclen; | 960 | buf->count -= reclen; |
961 | return 0; | 961 | return 0; |
962 | efault: | 962 | efault: |
963 | buf->error = -EFAULT; | 963 | buf->error = -EFAULT; |
964 | return -EFAULT; | 964 | return -EFAULT; |
965 | } | 965 | } |
966 | 966 | ||
967 | COMPAT_SYSCALL_DEFINE3(getdents, unsigned int, fd, | 967 | COMPAT_SYSCALL_DEFINE3(getdents, unsigned int, fd, |
968 | struct compat_linux_dirent __user *, dirent, unsigned int, count) | 968 | struct compat_linux_dirent __user *, dirent, unsigned int, count) |
969 | { | 969 | { |
970 | struct fd f; | 970 | struct fd f; |
971 | struct compat_linux_dirent __user * lastdirent; | 971 | struct compat_linux_dirent __user * lastdirent; |
972 | struct compat_getdents_callback buf = { | 972 | struct compat_getdents_callback buf = { |
973 | .ctx.actor = compat_filldir, | 973 | .ctx.actor = compat_filldir, |
974 | .current_dir = dirent, | 974 | .current_dir = dirent, |
975 | .count = count | 975 | .count = count |
976 | }; | 976 | }; |
977 | int error; | 977 | int error; |
978 | 978 | ||
979 | if (!access_ok(VERIFY_WRITE, dirent, count)) | 979 | if (!access_ok(VERIFY_WRITE, dirent, count)) |
980 | return -EFAULT; | 980 | return -EFAULT; |
981 | 981 | ||
982 | f = fdget(fd); | 982 | f = fdget(fd); |
983 | if (!f.file) | 983 | if (!f.file) |
984 | return -EBADF; | 984 | return -EBADF; |
985 | 985 | ||
986 | error = iterate_dir(f.file, &buf.ctx); | 986 | error = iterate_dir(f.file, &buf.ctx); |
987 | if (error >= 0) | 987 | if (error >= 0) |
988 | error = buf.error; | 988 | error = buf.error; |
989 | lastdirent = buf.previous; | 989 | lastdirent = buf.previous; |
990 | if (lastdirent) { | 990 | if (lastdirent) { |
991 | if (put_user(buf.ctx.pos, &lastdirent->d_off)) | 991 | if (put_user(buf.ctx.pos, &lastdirent->d_off)) |
992 | error = -EFAULT; | 992 | error = -EFAULT; |
993 | else | 993 | else |
994 | error = count - buf.count; | 994 | error = count - buf.count; |
995 | } | 995 | } |
996 | fdput(f); | 996 | fdput(f); |
997 | return error; | 997 | return error; |
998 | } | 998 | } |
999 | 999 | ||
1000 | #ifdef __ARCH_WANT_COMPAT_SYS_GETDENTS64 | 1000 | #ifdef __ARCH_WANT_COMPAT_SYS_GETDENTS64 |
1001 | 1001 | ||
1002 | struct compat_getdents_callback64 { | 1002 | struct compat_getdents_callback64 { |
1003 | struct dir_context ctx; | 1003 | struct dir_context ctx; |
1004 | struct linux_dirent64 __user *current_dir; | 1004 | struct linux_dirent64 __user *current_dir; |
1005 | struct linux_dirent64 __user *previous; | 1005 | struct linux_dirent64 __user *previous; |
1006 | int count; | 1006 | int count; |
1007 | int error; | 1007 | int error; |
1008 | }; | 1008 | }; |
1009 | 1009 | ||
1010 | static int compat_filldir64(void * __buf, const char * name, int namlen, loff_t offset, | 1010 | static int compat_filldir64(void * __buf, const char * name, int namlen, loff_t offset, |
1011 | u64 ino, unsigned int d_type) | 1011 | u64 ino, unsigned int d_type) |
1012 | { | 1012 | { |
1013 | struct linux_dirent64 __user *dirent; | 1013 | struct linux_dirent64 __user *dirent; |
1014 | struct compat_getdents_callback64 *buf = __buf; | 1014 | struct compat_getdents_callback64 *buf = __buf; |
1015 | int reclen = ALIGN(offsetof(struct linux_dirent64, d_name) + namlen + 1, | 1015 | int reclen = ALIGN(offsetof(struct linux_dirent64, d_name) + namlen + 1, |
1016 | sizeof(u64)); | 1016 | sizeof(u64)); |
1017 | u64 off; | 1017 | u64 off; |
1018 | 1018 | ||
1019 | buf->error = -EINVAL; /* only used if we fail.. */ | 1019 | buf->error = -EINVAL; /* only used if we fail.. */ |
1020 | if (reclen > buf->count) | 1020 | if (reclen > buf->count) |
1021 | return -EINVAL; | 1021 | return -EINVAL; |
1022 | dirent = buf->previous; | 1022 | dirent = buf->previous; |
1023 | 1023 | ||
1024 | if (dirent) { | 1024 | if (dirent) { |
1025 | if (__put_user_unaligned(offset, &dirent->d_off)) | 1025 | if (__put_user_unaligned(offset, &dirent->d_off)) |
1026 | goto efault; | 1026 | goto efault; |
1027 | } | 1027 | } |
1028 | dirent = buf->current_dir; | 1028 | dirent = buf->current_dir; |
1029 | if (__put_user_unaligned(ino, &dirent->d_ino)) | 1029 | if (__put_user_unaligned(ino, &dirent->d_ino)) |
1030 | goto efault; | 1030 | goto efault; |
1031 | off = 0; | 1031 | off = 0; |
1032 | if (__put_user_unaligned(off, &dirent->d_off)) | 1032 | if (__put_user_unaligned(off, &dirent->d_off)) |
1033 | goto efault; | 1033 | goto efault; |
1034 | if (__put_user(reclen, &dirent->d_reclen)) | 1034 | if (__put_user(reclen, &dirent->d_reclen)) |
1035 | goto efault; | 1035 | goto efault; |
1036 | if (__put_user(d_type, &dirent->d_type)) | 1036 | if (__put_user(d_type, &dirent->d_type)) |
1037 | goto efault; | 1037 | goto efault; |
1038 | if (copy_to_user(dirent->d_name, name, namlen)) | 1038 | if (copy_to_user(dirent->d_name, name, namlen)) |
1039 | goto efault; | 1039 | goto efault; |
1040 | if (__put_user(0, dirent->d_name + namlen)) | 1040 | if (__put_user(0, dirent->d_name + namlen)) |
1041 | goto efault; | 1041 | goto efault; |
1042 | buf->previous = dirent; | 1042 | buf->previous = dirent; |
1043 | dirent = (void __user *)dirent + reclen; | 1043 | dirent = (void __user *)dirent + reclen; |
1044 | buf->current_dir = dirent; | 1044 | buf->current_dir = dirent; |
1045 | buf->count -= reclen; | 1045 | buf->count -= reclen; |
1046 | return 0; | 1046 | return 0; |
1047 | efault: | 1047 | efault: |
1048 | buf->error = -EFAULT; | 1048 | buf->error = -EFAULT; |
1049 | return -EFAULT; | 1049 | return -EFAULT; |
1050 | } | 1050 | } |
1051 | 1051 | ||
1052 | COMPAT_SYSCALL_DEFINE3(getdents64, unsigned int, fd, | 1052 | COMPAT_SYSCALL_DEFINE3(getdents64, unsigned int, fd, |
1053 | struct linux_dirent64 __user *, dirent, unsigned int, count) | 1053 | struct linux_dirent64 __user *, dirent, unsigned int, count) |
1054 | { | 1054 | { |
1055 | struct fd f; | 1055 | struct fd f; |
1056 | struct linux_dirent64 __user * lastdirent; | 1056 | struct linux_dirent64 __user * lastdirent; |
1057 | struct compat_getdents_callback64 buf = { | 1057 | struct compat_getdents_callback64 buf = { |
1058 | .ctx.actor = compat_filldir64, | 1058 | .ctx.actor = compat_filldir64, |
1059 | .current_dir = dirent, | 1059 | .current_dir = dirent, |
1060 | .count = count | 1060 | .count = count |
1061 | }; | 1061 | }; |
1062 | int error; | 1062 | int error; |
1063 | 1063 | ||
1064 | if (!access_ok(VERIFY_WRITE, dirent, count)) | 1064 | if (!access_ok(VERIFY_WRITE, dirent, count)) |
1065 | return -EFAULT; | 1065 | return -EFAULT; |
1066 | 1066 | ||
1067 | f = fdget(fd); | 1067 | f = fdget(fd); |
1068 | if (!f.file) | 1068 | if (!f.file) |
1069 | return -EBADF; | 1069 | return -EBADF; |
1070 | 1070 | ||
1071 | error = iterate_dir(f.file, &buf.ctx); | 1071 | error = iterate_dir(f.file, &buf.ctx); |
1072 | if (error >= 0) | 1072 | if (error >= 0) |
1073 | error = buf.error; | 1073 | error = buf.error; |
1074 | lastdirent = buf.previous; | 1074 | lastdirent = buf.previous; |
1075 | if (lastdirent) { | 1075 | if (lastdirent) { |
1076 | typeof(lastdirent->d_off) d_off = buf.ctx.pos; | 1076 | typeof(lastdirent->d_off) d_off = buf.ctx.pos; |
1077 | if (__put_user_unaligned(d_off, &lastdirent->d_off)) | 1077 | if (__put_user_unaligned(d_off, &lastdirent->d_off)) |
1078 | error = -EFAULT; | 1078 | error = -EFAULT; |
1079 | else | 1079 | else |
1080 | error = count - buf.count; | 1080 | error = count - buf.count; |
1081 | } | 1081 | } |
1082 | fdput(f); | 1082 | fdput(f); |
1083 | return error; | 1083 | return error; |
1084 | } | 1084 | } |
1085 | #endif /* __ARCH_WANT_COMPAT_SYS_GETDENTS64 */ | 1085 | #endif /* __ARCH_WANT_COMPAT_SYS_GETDENTS64 */ |
1086 | 1086 | ||
1087 | /* | 1087 | /* |
1088 | * Exactly like fs/open.c:sys_open(), except that it doesn't set the | 1088 | * Exactly like fs/open.c:sys_open(), except that it doesn't set the |
1089 | * O_LARGEFILE flag. | 1089 | * O_LARGEFILE flag. |
1090 | */ | 1090 | */ |
1091 | COMPAT_SYSCALL_DEFINE3(open, const char __user *, filename, int, flags, umode_t, mode) | 1091 | COMPAT_SYSCALL_DEFINE3(open, const char __user *, filename, int, flags, umode_t, mode) |
1092 | { | 1092 | { |
1093 | return do_sys_open(AT_FDCWD, filename, flags, mode); | 1093 | return do_sys_open(AT_FDCWD, filename, flags, mode); |
1094 | } | 1094 | } |
1095 | 1095 | ||
1096 | /* | 1096 | /* |
1097 | * Exactly like fs/open.c:sys_openat(), except that it doesn't set the | 1097 | * Exactly like fs/open.c:sys_openat(), except that it doesn't set the |
1098 | * O_LARGEFILE flag. | 1098 | * O_LARGEFILE flag. |
1099 | */ | 1099 | */ |
1100 | COMPAT_SYSCALL_DEFINE4(openat, int, dfd, const char __user *, filename, int, flags, umode_t, mode) | 1100 | COMPAT_SYSCALL_DEFINE4(openat, int, dfd, const char __user *, filename, int, flags, umode_t, mode) |
1101 | { | 1101 | { |
1102 | return do_sys_open(dfd, filename, flags, mode); | 1102 | return do_sys_open(dfd, filename, flags, mode); |
1103 | } | 1103 | } |
1104 | 1104 | ||
1105 | #define __COMPAT_NFDBITS (8 * sizeof(compat_ulong_t)) | 1105 | #define __COMPAT_NFDBITS (8 * sizeof(compat_ulong_t)) |
1106 | 1106 | ||
1107 | static int poll_select_copy_remaining(struct timespec *end_time, void __user *p, | 1107 | static int poll_select_copy_remaining(struct timespec *end_time, void __user *p, |
1108 | int timeval, int ret) | 1108 | int timeval, int ret) |
1109 | { | 1109 | { |
1110 | struct timespec ts; | 1110 | struct timespec ts; |
1111 | 1111 | ||
1112 | if (!p) | 1112 | if (!p) |
1113 | return ret; | 1113 | return ret; |
1114 | 1114 | ||
1115 | if (current->personality & STICKY_TIMEOUTS) | 1115 | if (current->personality & STICKY_TIMEOUTS) |
1116 | goto sticky; | 1116 | goto sticky; |
1117 | 1117 | ||
1118 | /* No update for zero timeout */ | 1118 | /* No update for zero timeout */ |
1119 | if (!end_time->tv_sec && !end_time->tv_nsec) | 1119 | if (!end_time->tv_sec && !end_time->tv_nsec) |
1120 | return ret; | 1120 | return ret; |
1121 | 1121 | ||
1122 | ktime_get_ts(&ts); | 1122 | ktime_get_ts(&ts); |
1123 | ts = timespec_sub(*end_time, ts); | 1123 | ts = timespec_sub(*end_time, ts); |
1124 | if (ts.tv_sec < 0) | 1124 | if (ts.tv_sec < 0) |
1125 | ts.tv_sec = ts.tv_nsec = 0; | 1125 | ts.tv_sec = ts.tv_nsec = 0; |
1126 | 1126 | ||
1127 | if (timeval) { | 1127 | if (timeval) { |
1128 | struct compat_timeval rtv; | 1128 | struct compat_timeval rtv; |
1129 | 1129 | ||
1130 | rtv.tv_sec = ts.tv_sec; | 1130 | rtv.tv_sec = ts.tv_sec; |
1131 | rtv.tv_usec = ts.tv_nsec / NSEC_PER_USEC; | 1131 | rtv.tv_usec = ts.tv_nsec / NSEC_PER_USEC; |
1132 | 1132 | ||
1133 | if (!copy_to_user(p, &rtv, sizeof(rtv))) | 1133 | if (!copy_to_user(p, &rtv, sizeof(rtv))) |
1134 | return ret; | 1134 | return ret; |
1135 | } else { | 1135 | } else { |
1136 | struct compat_timespec rts; | 1136 | struct compat_timespec rts; |
1137 | 1137 | ||
1138 | rts.tv_sec = ts.tv_sec; | 1138 | rts.tv_sec = ts.tv_sec; |
1139 | rts.tv_nsec = ts.tv_nsec; | 1139 | rts.tv_nsec = ts.tv_nsec; |
1140 | 1140 | ||
1141 | if (!copy_to_user(p, &rts, sizeof(rts))) | 1141 | if (!copy_to_user(p, &rts, sizeof(rts))) |
1142 | return ret; | 1142 | return ret; |
1143 | } | 1143 | } |
1144 | /* | 1144 | /* |
1145 | * If an application puts its timeval in read-only memory, we | 1145 | * If an application puts its timeval in read-only memory, we |
1146 | * don't want the Linux-specific update to the timeval to | 1146 | * don't want the Linux-specific update to the timeval to |
1147 | * cause a fault after the select has completed | 1147 | * cause a fault after the select has completed |
1148 | * successfully. However, because we're not updating the | 1148 | * successfully. However, because we're not updating the |
1149 | * timeval, we can't restart the system call. | 1149 | * timeval, we can't restart the system call. |
1150 | */ | 1150 | */ |
1151 | 1151 | ||
1152 | sticky: | 1152 | sticky: |
1153 | if (ret == -ERESTARTNOHAND) | 1153 | if (ret == -ERESTARTNOHAND) |
1154 | ret = -EINTR; | 1154 | ret = -EINTR; |
1155 | return ret; | 1155 | return ret; |
1156 | } | 1156 | } |
1157 | 1157 | ||
1158 | /* | 1158 | /* |
1159 | * Ooo, nasty. We need here to frob 32-bit unsigned longs to | 1159 | * Ooo, nasty. We need here to frob 32-bit unsigned longs to |
1160 | * 64-bit unsigned longs. | 1160 | * 64-bit unsigned longs. |
1161 | */ | 1161 | */ |
1162 | static | 1162 | static |
1163 | int compat_get_fd_set(unsigned long nr, compat_ulong_t __user *ufdset, | 1163 | int compat_get_fd_set(unsigned long nr, compat_ulong_t __user *ufdset, |
1164 | unsigned long *fdset) | 1164 | unsigned long *fdset) |
1165 | { | 1165 | { |
1166 | nr = DIV_ROUND_UP(nr, __COMPAT_NFDBITS); | 1166 | nr = DIV_ROUND_UP(nr, __COMPAT_NFDBITS); |
1167 | if (ufdset) { | 1167 | if (ufdset) { |
1168 | unsigned long odd; | 1168 | unsigned long odd; |
1169 | 1169 | ||
1170 | if (!access_ok(VERIFY_WRITE, ufdset, nr*sizeof(compat_ulong_t))) | 1170 | if (!access_ok(VERIFY_WRITE, ufdset, nr*sizeof(compat_ulong_t))) |
1171 | return -EFAULT; | 1171 | return -EFAULT; |
1172 | 1172 | ||
1173 | odd = nr & 1UL; | 1173 | odd = nr & 1UL; |
1174 | nr &= ~1UL; | 1174 | nr &= ~1UL; |
1175 | while (nr) { | 1175 | while (nr) { |
1176 | unsigned long h, l; | 1176 | unsigned long h, l; |
1177 | if (__get_user(l, ufdset) || __get_user(h, ufdset+1)) | 1177 | if (__get_user(l, ufdset) || __get_user(h, ufdset+1)) |
1178 | return -EFAULT; | 1178 | return -EFAULT; |
1179 | ufdset += 2; | 1179 | ufdset += 2; |
1180 | *fdset++ = h << 32 | l; | 1180 | *fdset++ = h << 32 | l; |
1181 | nr -= 2; | 1181 | nr -= 2; |
1182 | } | 1182 | } |
1183 | if (odd && __get_user(*fdset, ufdset)) | 1183 | if (odd && __get_user(*fdset, ufdset)) |
1184 | return -EFAULT; | 1184 | return -EFAULT; |
1185 | } else { | 1185 | } else { |
1186 | /* Tricky, must clear full unsigned long in the | 1186 | /* Tricky, must clear full unsigned long in the |
1187 | * kernel fdset at the end, this makes sure that | 1187 | * kernel fdset at the end, this makes sure that |
1188 | * actually happens. | 1188 | * actually happens. |
1189 | */ | 1189 | */ |
1190 | memset(fdset, 0, ((nr + 1) & ~1)*sizeof(compat_ulong_t)); | 1190 | memset(fdset, 0, ((nr + 1) & ~1)*sizeof(compat_ulong_t)); |
1191 | } | 1191 | } |
1192 | return 0; | 1192 | return 0; |
1193 | } | 1193 | } |
1194 | 1194 | ||
1195 | static | 1195 | static |
1196 | int compat_set_fd_set(unsigned long nr, compat_ulong_t __user *ufdset, | 1196 | int compat_set_fd_set(unsigned long nr, compat_ulong_t __user *ufdset, |
1197 | unsigned long *fdset) | 1197 | unsigned long *fdset) |
1198 | { | 1198 | { |
1199 | unsigned long odd; | 1199 | unsigned long odd; |
1200 | nr = DIV_ROUND_UP(nr, __COMPAT_NFDBITS); | 1200 | nr = DIV_ROUND_UP(nr, __COMPAT_NFDBITS); |
1201 | 1201 | ||
1202 | if (!ufdset) | 1202 | if (!ufdset) |
1203 | return 0; | 1203 | return 0; |
1204 | 1204 | ||
1205 | odd = nr & 1UL; | 1205 | odd = nr & 1UL; |
1206 | nr &= ~1UL; | 1206 | nr &= ~1UL; |
1207 | while (nr) { | 1207 | while (nr) { |
1208 | unsigned long h, l; | 1208 | unsigned long h, l; |
1209 | l = *fdset++; | 1209 | l = *fdset++; |
1210 | h = l >> 32; | 1210 | h = l >> 32; |
1211 | if (__put_user(l, ufdset) || __put_user(h, ufdset+1)) | 1211 | if (__put_user(l, ufdset) || __put_user(h, ufdset+1)) |
1212 | return -EFAULT; | 1212 | return -EFAULT; |
1213 | ufdset += 2; | 1213 | ufdset += 2; |
1214 | nr -= 2; | 1214 | nr -= 2; |
1215 | } | 1215 | } |
1216 | if (odd && __put_user(*fdset, ufdset)) | 1216 | if (odd && __put_user(*fdset, ufdset)) |
1217 | return -EFAULT; | 1217 | return -EFAULT; |
1218 | return 0; | 1218 | return 0; |
1219 | } | 1219 | } |
1220 | 1220 | ||
1221 | 1221 | ||
1222 | /* | 1222 | /* |
1223 | * This is a virtual copy of sys_select from fs/select.c and probably | 1223 | * This is a virtual copy of sys_select from fs/select.c and probably |
1224 | * should be compared to it from time to time | 1224 | * should be compared to it from time to time |
1225 | */ | 1225 | */ |
1226 | 1226 | ||
1227 | /* | 1227 | /* |
1228 | * We can actually return ERESTARTSYS instead of EINTR, but I'd | 1228 | * We can actually return ERESTARTSYS instead of EINTR, but I'd |
1229 | * like to be certain this leads to no problems. So I return | 1229 | * like to be certain this leads to no problems. So I return |
1230 | * EINTR just for safety. | 1230 | * EINTR just for safety. |
1231 | * | 1231 | * |
1232 | * Update: ERESTARTSYS breaks at least the xview clock binary, so | 1232 | * Update: ERESTARTSYS breaks at least the xview clock binary, so |
1233 | * I'm trying ERESTARTNOHAND which restart only when you want to. | 1233 | * I'm trying ERESTARTNOHAND which restart only when you want to. |
1234 | */ | 1234 | */ |
1235 | int compat_core_sys_select(int n, compat_ulong_t __user *inp, | 1235 | int compat_core_sys_select(int n, compat_ulong_t __user *inp, |
1236 | compat_ulong_t __user *outp, compat_ulong_t __user *exp, | 1236 | compat_ulong_t __user *outp, compat_ulong_t __user *exp, |
1237 | struct timespec *end_time) | 1237 | struct timespec *end_time) |
1238 | { | 1238 | { |
1239 | fd_set_bits fds; | 1239 | fd_set_bits fds; |
1240 | void *bits; | 1240 | void *bits; |
1241 | int size, max_fds, ret = -EINVAL; | 1241 | int size, max_fds, ret = -EINVAL; |
1242 | struct fdtable *fdt; | 1242 | struct fdtable *fdt; |
1243 | long stack_fds[SELECT_STACK_ALLOC/sizeof(long)]; | 1243 | long stack_fds[SELECT_STACK_ALLOC/sizeof(long)]; |
1244 | 1244 | ||
1245 | if (n < 0) | 1245 | if (n < 0) |
1246 | goto out_nofds; | 1246 | goto out_nofds; |
1247 | 1247 | ||
1248 | /* max_fds can increase, so grab it once to avoid race */ | 1248 | /* max_fds can increase, so grab it once to avoid race */ |
1249 | rcu_read_lock(); | 1249 | rcu_read_lock(); |
1250 | fdt = files_fdtable(current->files); | 1250 | fdt = files_fdtable(current->files); |
1251 | max_fds = fdt->max_fds; | 1251 | max_fds = fdt->max_fds; |
1252 | rcu_read_unlock(); | 1252 | rcu_read_unlock(); |
1253 | if (n > max_fds) | 1253 | if (n > max_fds) |
1254 | n = max_fds; | 1254 | n = max_fds; |
1255 | 1255 | ||
1256 | /* | 1256 | /* |
1257 | * We need 6 bitmaps (in/out/ex for both incoming and outgoing), | 1257 | * We need 6 bitmaps (in/out/ex for both incoming and outgoing), |
1258 | * since we used fdset we need to allocate memory in units of | 1258 | * since we used fdset we need to allocate memory in units of |
1259 | * long-words. | 1259 | * long-words. |
1260 | */ | 1260 | */ |
1261 | size = FDS_BYTES(n); | 1261 | size = FDS_BYTES(n); |
1262 | bits = stack_fds; | 1262 | bits = stack_fds; |
1263 | if (size > sizeof(stack_fds) / 6) { | 1263 | if (size > sizeof(stack_fds) / 6) { |
1264 | bits = kmalloc(6 * size, GFP_KERNEL); | 1264 | bits = kmalloc(6 * size, GFP_KERNEL); |
1265 | ret = -ENOMEM; | 1265 | ret = -ENOMEM; |
1266 | if (!bits) | 1266 | if (!bits) |
1267 | goto out_nofds; | 1267 | goto out_nofds; |
1268 | } | 1268 | } |
1269 | fds.in = (unsigned long *) bits; | 1269 | fds.in = (unsigned long *) bits; |
1270 | fds.out = (unsigned long *) (bits + size); | 1270 | fds.out = (unsigned long *) (bits + size); |
1271 | fds.ex = (unsigned long *) (bits + 2*size); | 1271 | fds.ex = (unsigned long *) (bits + 2*size); |
1272 | fds.res_in = (unsigned long *) (bits + 3*size); | 1272 | fds.res_in = (unsigned long *) (bits + 3*size); |
1273 | fds.res_out = (unsigned long *) (bits + 4*size); | 1273 | fds.res_out = (unsigned long *) (bits + 4*size); |
1274 | fds.res_ex = (unsigned long *) (bits + 5*size); | 1274 | fds.res_ex = (unsigned long *) (bits + 5*size); |
1275 | 1275 | ||
1276 | if ((ret = compat_get_fd_set(n, inp, fds.in)) || | 1276 | if ((ret = compat_get_fd_set(n, inp, fds.in)) || |
1277 | (ret = compat_get_fd_set(n, outp, fds.out)) || | 1277 | (ret = compat_get_fd_set(n, outp, fds.out)) || |
1278 | (ret = compat_get_fd_set(n, exp, fds.ex))) | 1278 | (ret = compat_get_fd_set(n, exp, fds.ex))) |
1279 | goto out; | 1279 | goto out; |
1280 | zero_fd_set(n, fds.res_in); | 1280 | zero_fd_set(n, fds.res_in); |
1281 | zero_fd_set(n, fds.res_out); | 1281 | zero_fd_set(n, fds.res_out); |
1282 | zero_fd_set(n, fds.res_ex); | 1282 | zero_fd_set(n, fds.res_ex); |
1283 | 1283 | ||
1284 | ret = do_select(n, &fds, end_time); | 1284 | ret = do_select(n, &fds, end_time); |
1285 | 1285 | ||
1286 | if (ret < 0) | 1286 | if (ret < 0) |
1287 | goto out; | 1287 | goto out; |
1288 | if (!ret) { | 1288 | if (!ret) { |
1289 | ret = -ERESTARTNOHAND; | 1289 | ret = -ERESTARTNOHAND; |
1290 | if (signal_pending(current)) | 1290 | if (signal_pending(current)) |
1291 | goto out; | 1291 | goto out; |
1292 | ret = 0; | 1292 | ret = 0; |
1293 | } | 1293 | } |
1294 | 1294 | ||
1295 | if (compat_set_fd_set(n, inp, fds.res_in) || | 1295 | if (compat_set_fd_set(n, inp, fds.res_in) || |
1296 | compat_set_fd_set(n, outp, fds.res_out) || | 1296 | compat_set_fd_set(n, outp, fds.res_out) || |
1297 | compat_set_fd_set(n, exp, fds.res_ex)) | 1297 | compat_set_fd_set(n, exp, fds.res_ex)) |
1298 | ret = -EFAULT; | 1298 | ret = -EFAULT; |
1299 | out: | 1299 | out: |
1300 | if (bits != stack_fds) | 1300 | if (bits != stack_fds) |
1301 | kfree(bits); | 1301 | kfree(bits); |
1302 | out_nofds: | 1302 | out_nofds: |
1303 | return ret; | 1303 | return ret; |
1304 | } | 1304 | } |
1305 | 1305 | ||
1306 | COMPAT_SYSCALL_DEFINE5(select, int, n, compat_ulong_t __user *, inp, | 1306 | COMPAT_SYSCALL_DEFINE5(select, int, n, compat_ulong_t __user *, inp, |
1307 | compat_ulong_t __user *, outp, compat_ulong_t __user *, exp, | 1307 | compat_ulong_t __user *, outp, compat_ulong_t __user *, exp, |
1308 | struct compat_timeval __user *, tvp) | 1308 | struct compat_timeval __user *, tvp) |
1309 | { | 1309 | { |
1310 | struct timespec end_time, *to = NULL; | 1310 | struct timespec end_time, *to = NULL; |
1311 | struct compat_timeval tv; | 1311 | struct compat_timeval tv; |
1312 | int ret; | 1312 | int ret; |
1313 | 1313 | ||
1314 | if (tvp) { | 1314 | if (tvp) { |
1315 | if (copy_from_user(&tv, tvp, sizeof(tv))) | 1315 | if (copy_from_user(&tv, tvp, sizeof(tv))) |
1316 | return -EFAULT; | 1316 | return -EFAULT; |
1317 | 1317 | ||
1318 | to = &end_time; | 1318 | to = &end_time; |
1319 | if (poll_select_set_timeout(to, | 1319 | if (poll_select_set_timeout(to, |
1320 | tv.tv_sec + (tv.tv_usec / USEC_PER_SEC), | 1320 | tv.tv_sec + (tv.tv_usec / USEC_PER_SEC), |
1321 | (tv.tv_usec % USEC_PER_SEC) * NSEC_PER_USEC)) | 1321 | (tv.tv_usec % USEC_PER_SEC) * NSEC_PER_USEC)) |
1322 | return -EINVAL; | 1322 | return -EINVAL; |
1323 | } | 1323 | } |
1324 | 1324 | ||
1325 | ret = compat_core_sys_select(n, inp, outp, exp, to); | 1325 | ret = compat_core_sys_select(n, inp, outp, exp, to); |
1326 | ret = poll_select_copy_remaining(&end_time, tvp, 1, ret); | 1326 | ret = poll_select_copy_remaining(&end_time, tvp, 1, ret); |
1327 | 1327 | ||
1328 | return ret; | 1328 | return ret; |
1329 | } | 1329 | } |
1330 | 1330 | ||
1331 | struct compat_sel_arg_struct { | 1331 | struct compat_sel_arg_struct { |
1332 | compat_ulong_t n; | 1332 | compat_ulong_t n; |
1333 | compat_uptr_t inp; | 1333 | compat_uptr_t inp; |
1334 | compat_uptr_t outp; | 1334 | compat_uptr_t outp; |
1335 | compat_uptr_t exp; | 1335 | compat_uptr_t exp; |
1336 | compat_uptr_t tvp; | 1336 | compat_uptr_t tvp; |
1337 | }; | 1337 | }; |
1338 | 1338 | ||
1339 | COMPAT_SYSCALL_DEFINE1(old_select, struct compat_sel_arg_struct __user *, arg) | 1339 | COMPAT_SYSCALL_DEFINE1(old_select, struct compat_sel_arg_struct __user *, arg) |
1340 | { | 1340 | { |
1341 | struct compat_sel_arg_struct a; | 1341 | struct compat_sel_arg_struct a; |
1342 | 1342 | ||
1343 | if (copy_from_user(&a, arg, sizeof(a))) | 1343 | if (copy_from_user(&a, arg, sizeof(a))) |
1344 | return -EFAULT; | 1344 | return -EFAULT; |
1345 | return compat_sys_select(a.n, compat_ptr(a.inp), compat_ptr(a.outp), | 1345 | return compat_sys_select(a.n, compat_ptr(a.inp), compat_ptr(a.outp), |
1346 | compat_ptr(a.exp), compat_ptr(a.tvp)); | 1346 | compat_ptr(a.exp), compat_ptr(a.tvp)); |
1347 | } | 1347 | } |
1348 | 1348 | ||
1349 | static long do_compat_pselect(int n, compat_ulong_t __user *inp, | 1349 | static long do_compat_pselect(int n, compat_ulong_t __user *inp, |
1350 | compat_ulong_t __user *outp, compat_ulong_t __user *exp, | 1350 | compat_ulong_t __user *outp, compat_ulong_t __user *exp, |
1351 | struct compat_timespec __user *tsp, compat_sigset_t __user *sigmask, | 1351 | struct compat_timespec __user *tsp, compat_sigset_t __user *sigmask, |
1352 | compat_size_t sigsetsize) | 1352 | compat_size_t sigsetsize) |
1353 | { | 1353 | { |
1354 | compat_sigset_t ss32; | 1354 | compat_sigset_t ss32; |
1355 | sigset_t ksigmask, sigsaved; | 1355 | sigset_t ksigmask, sigsaved; |
1356 | struct compat_timespec ts; | 1356 | struct compat_timespec ts; |
1357 | struct timespec end_time, *to = NULL; | 1357 | struct timespec end_time, *to = NULL; |
1358 | int ret; | 1358 | int ret; |
1359 | 1359 | ||
1360 | if (tsp) { | 1360 | if (tsp) { |
1361 | if (copy_from_user(&ts, tsp, sizeof(ts))) | 1361 | if (copy_from_user(&ts, tsp, sizeof(ts))) |
1362 | return -EFAULT; | 1362 | return -EFAULT; |
1363 | 1363 | ||
1364 | to = &end_time; | 1364 | to = &end_time; |
1365 | if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec)) | 1365 | if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec)) |
1366 | return -EINVAL; | 1366 | return -EINVAL; |
1367 | } | 1367 | } |
1368 | 1368 | ||
1369 | if (sigmask) { | 1369 | if (sigmask) { |
1370 | if (sigsetsize != sizeof(compat_sigset_t)) | 1370 | if (sigsetsize != sizeof(compat_sigset_t)) |
1371 | return -EINVAL; | 1371 | return -EINVAL; |
1372 | if (copy_from_user(&ss32, sigmask, sizeof(ss32))) | 1372 | if (copy_from_user(&ss32, sigmask, sizeof(ss32))) |
1373 | return -EFAULT; | 1373 | return -EFAULT; |
1374 | sigset_from_compat(&ksigmask, &ss32); | 1374 | sigset_from_compat(&ksigmask, &ss32); |
1375 | 1375 | ||
1376 | sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP)); | 1376 | sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP)); |
1377 | sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); | 1377 | sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); |
1378 | } | 1378 | } |
1379 | 1379 | ||
1380 | ret = compat_core_sys_select(n, inp, outp, exp, to); | 1380 | ret = compat_core_sys_select(n, inp, outp, exp, to); |
1381 | ret = poll_select_copy_remaining(&end_time, tsp, 0, ret); | 1381 | ret = poll_select_copy_remaining(&end_time, tsp, 0, ret); |
1382 | 1382 | ||
1383 | if (ret == -ERESTARTNOHAND) { | 1383 | if (ret == -ERESTARTNOHAND) { |
1384 | /* | 1384 | /* |
1385 | * Don't restore the signal mask yet. Let do_signal() deliver | 1385 | * Don't restore the signal mask yet. Let do_signal() deliver |
1386 | * the signal on the way back to userspace, before the signal | 1386 | * the signal on the way back to userspace, before the signal |
1387 | * mask is restored. | 1387 | * mask is restored. |
1388 | */ | 1388 | */ |
1389 | if (sigmask) { | 1389 | if (sigmask) { |
1390 | memcpy(¤t->saved_sigmask, &sigsaved, | 1390 | memcpy(¤t->saved_sigmask, &sigsaved, |
1391 | sizeof(sigsaved)); | 1391 | sizeof(sigsaved)); |
1392 | set_restore_sigmask(); | 1392 | set_restore_sigmask(); |
1393 | } | 1393 | } |
1394 | } else if (sigmask) | 1394 | } else if (sigmask) |
1395 | sigprocmask(SIG_SETMASK, &sigsaved, NULL); | 1395 | sigprocmask(SIG_SETMASK, &sigsaved, NULL); |
1396 | 1396 | ||
1397 | return ret; | 1397 | return ret; |
1398 | } | 1398 | } |
1399 | 1399 | ||
1400 | COMPAT_SYSCALL_DEFINE6(pselect6, int, n, compat_ulong_t __user *, inp, | 1400 | COMPAT_SYSCALL_DEFINE6(pselect6, int, n, compat_ulong_t __user *, inp, |
1401 | compat_ulong_t __user *, outp, compat_ulong_t __user *, exp, | 1401 | compat_ulong_t __user *, outp, compat_ulong_t __user *, exp, |
1402 | struct compat_timespec __user *, tsp, void __user *, sig) | 1402 | struct compat_timespec __user *, tsp, void __user *, sig) |
1403 | { | 1403 | { |
1404 | compat_size_t sigsetsize = 0; | 1404 | compat_size_t sigsetsize = 0; |
1405 | compat_uptr_t up = 0; | 1405 | compat_uptr_t up = 0; |
1406 | 1406 | ||
1407 | if (sig) { | 1407 | if (sig) { |
1408 | if (!access_ok(VERIFY_READ, sig, | 1408 | if (!access_ok(VERIFY_READ, sig, |
1409 | sizeof(compat_uptr_t)+sizeof(compat_size_t)) || | 1409 | sizeof(compat_uptr_t)+sizeof(compat_size_t)) || |
1410 | __get_user(up, (compat_uptr_t __user *)sig) || | 1410 | __get_user(up, (compat_uptr_t __user *)sig) || |
1411 | __get_user(sigsetsize, | 1411 | __get_user(sigsetsize, |
1412 | (compat_size_t __user *)(sig+sizeof(up)))) | 1412 | (compat_size_t __user *)(sig+sizeof(up)))) |
1413 | return -EFAULT; | 1413 | return -EFAULT; |
1414 | } | 1414 | } |
1415 | return do_compat_pselect(n, inp, outp, exp, tsp, compat_ptr(up), | 1415 | return do_compat_pselect(n, inp, outp, exp, tsp, compat_ptr(up), |
1416 | sigsetsize); | 1416 | sigsetsize); |
1417 | } | 1417 | } |
1418 | 1418 | ||
1419 | COMPAT_SYSCALL_DEFINE5(ppoll, struct pollfd __user *, ufds, | 1419 | COMPAT_SYSCALL_DEFINE5(ppoll, struct pollfd __user *, ufds, |
1420 | unsigned int, nfds, struct compat_timespec __user *, tsp, | 1420 | unsigned int, nfds, struct compat_timespec __user *, tsp, |
1421 | const compat_sigset_t __user *, sigmask, compat_size_t, sigsetsize) | 1421 | const compat_sigset_t __user *, sigmask, compat_size_t, sigsetsize) |
1422 | { | 1422 | { |
1423 | compat_sigset_t ss32; | 1423 | compat_sigset_t ss32; |
1424 | sigset_t ksigmask, sigsaved; | 1424 | sigset_t ksigmask, sigsaved; |
1425 | struct compat_timespec ts; | 1425 | struct compat_timespec ts; |
1426 | struct timespec end_time, *to = NULL; | 1426 | struct timespec end_time, *to = NULL; |
1427 | int ret; | 1427 | int ret; |
1428 | 1428 | ||
1429 | if (tsp) { | 1429 | if (tsp) { |
1430 | if (copy_from_user(&ts, tsp, sizeof(ts))) | 1430 | if (copy_from_user(&ts, tsp, sizeof(ts))) |
1431 | return -EFAULT; | 1431 | return -EFAULT; |
1432 | 1432 | ||
1433 | to = &end_time; | 1433 | to = &end_time; |
1434 | if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec)) | 1434 | if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec)) |
1435 | return -EINVAL; | 1435 | return -EINVAL; |
1436 | } | 1436 | } |
1437 | 1437 | ||
1438 | if (sigmask) { | 1438 | if (sigmask) { |
1439 | if (sigsetsize != sizeof(compat_sigset_t)) | 1439 | if (sigsetsize != sizeof(compat_sigset_t)) |
1440 | return -EINVAL; | 1440 | return -EINVAL; |
1441 | if (copy_from_user(&ss32, sigmask, sizeof(ss32))) | 1441 | if (copy_from_user(&ss32, sigmask, sizeof(ss32))) |
1442 | return -EFAULT; | 1442 | return -EFAULT; |
1443 | sigset_from_compat(&ksigmask, &ss32); | 1443 | sigset_from_compat(&ksigmask, &ss32); |
1444 | 1444 | ||
1445 | sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP)); | 1445 | sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP)); |
1446 | sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); | 1446 | sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); |
1447 | } | 1447 | } |
1448 | 1448 | ||
1449 | ret = do_sys_poll(ufds, nfds, to); | 1449 | ret = do_sys_poll(ufds, nfds, to); |
1450 | 1450 | ||
1451 | /* We can restart this syscall, usually */ | 1451 | /* We can restart this syscall, usually */ |
1452 | if (ret == -EINTR) { | 1452 | if (ret == -EINTR) { |
1453 | /* | 1453 | /* |
1454 | * Don't restore the signal mask yet. Let do_signal() deliver | 1454 | * Don't restore the signal mask yet. Let do_signal() deliver |
1455 | * the signal on the way back to userspace, before the signal | 1455 | * the signal on the way back to userspace, before the signal |
1456 | * mask is restored. | 1456 | * mask is restored. |
1457 | */ | 1457 | */ |
1458 | if (sigmask) { | 1458 | if (sigmask) { |
1459 | memcpy(¤t->saved_sigmask, &sigsaved, | 1459 | memcpy(¤t->saved_sigmask, &sigsaved, |
1460 | sizeof(sigsaved)); | 1460 | sizeof(sigsaved)); |
1461 | set_restore_sigmask(); | 1461 | set_restore_sigmask(); |
1462 | } | 1462 | } |
1463 | ret = -ERESTARTNOHAND; | 1463 | ret = -ERESTARTNOHAND; |
1464 | } else if (sigmask) | 1464 | } else if (sigmask) |
1465 | sigprocmask(SIG_SETMASK, &sigsaved, NULL); | 1465 | sigprocmask(SIG_SETMASK, &sigsaved, NULL); |
1466 | 1466 | ||
1467 | ret = poll_select_copy_remaining(&end_time, tsp, 0, ret); | 1467 | ret = poll_select_copy_remaining(&end_time, tsp, 0, ret); |
1468 | 1468 | ||
1469 | return ret; | 1469 | return ret; |
1470 | } | 1470 | } |
1471 | 1471 | ||
1472 | #ifdef CONFIG_FHANDLE | 1472 | #ifdef CONFIG_FHANDLE |
1473 | /* | 1473 | /* |
1474 | * Exactly like fs/open.c:sys_open_by_handle_at(), except that it | 1474 | * Exactly like fs/open.c:sys_open_by_handle_at(), except that it |
1475 | * doesn't set the O_LARGEFILE flag. | 1475 | * doesn't set the O_LARGEFILE flag. |
1476 | */ | 1476 | */ |
1477 | COMPAT_SYSCALL_DEFINE3(open_by_handle_at, int, mountdirfd, | 1477 | COMPAT_SYSCALL_DEFINE3(open_by_handle_at, int, mountdirfd, |
1478 | struct file_handle __user *, handle, int, flags) | 1478 | struct file_handle __user *, handle, int, flags) |
1479 | { | 1479 | { |
1480 | return do_handle_open(mountdirfd, handle, flags); | 1480 | return do_handle_open(mountdirfd, handle, flags); |
1481 | } | 1481 | } |
1482 | #endif | 1482 | #endif |
1483 | 1483 |
fs/fcntl.c
1 | /* | 1 | /* |
2 | * linux/fs/fcntl.c | 2 | * linux/fs/fcntl.c |
3 | * | 3 | * |
4 | * Copyright (C) 1991, 1992 Linus Torvalds | 4 | * Copyright (C) 1991, 1992 Linus Torvalds |
5 | */ | 5 | */ |
6 | 6 | ||
7 | #include <linux/syscalls.h> | 7 | #include <linux/syscalls.h> |
8 | #include <linux/init.h> | 8 | #include <linux/init.h> |
9 | #include <linux/mm.h> | 9 | #include <linux/mm.h> |
10 | #include <linux/fs.h> | 10 | #include <linux/fs.h> |
11 | #include <linux/file.h> | 11 | #include <linux/file.h> |
12 | #include <linux/fdtable.h> | 12 | #include <linux/fdtable.h> |
13 | #include <linux/capability.h> | 13 | #include <linux/capability.h> |
14 | #include <linux/dnotify.h> | 14 | #include <linux/dnotify.h> |
15 | #include <linux/slab.h> | 15 | #include <linux/slab.h> |
16 | #include <linux/module.h> | 16 | #include <linux/module.h> |
17 | #include <linux/pipe_fs_i.h> | 17 | #include <linux/pipe_fs_i.h> |
18 | #include <linux/security.h> | 18 | #include <linux/security.h> |
19 | #include <linux/ptrace.h> | 19 | #include <linux/ptrace.h> |
20 | #include <linux/signal.h> | 20 | #include <linux/signal.h> |
21 | #include <linux/rcupdate.h> | 21 | #include <linux/rcupdate.h> |
22 | #include <linux/pid_namespace.h> | 22 | #include <linux/pid_namespace.h> |
23 | #include <linux/user_namespace.h> | 23 | #include <linux/user_namespace.h> |
24 | 24 | ||
25 | #include <asm/poll.h> | 25 | #include <asm/poll.h> |
26 | #include <asm/siginfo.h> | 26 | #include <asm/siginfo.h> |
27 | #include <asm/uaccess.h> | 27 | #include <asm/uaccess.h> |
28 | 28 | ||
29 | #define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT | O_NOATIME) | 29 | #define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT | O_NOATIME) |
30 | 30 | ||
31 | static int setfl(int fd, struct file * filp, unsigned long arg) | 31 | static int setfl(int fd, struct file * filp, unsigned long arg) |
32 | { | 32 | { |
33 | struct inode * inode = file_inode(filp); | 33 | struct inode * inode = file_inode(filp); |
34 | int error = 0; | 34 | int error = 0; |
35 | 35 | ||
36 | /* | 36 | /* |
37 | * O_APPEND cannot be cleared if the file is marked as append-only | 37 | * O_APPEND cannot be cleared if the file is marked as append-only |
38 | * and the file is open for write. | 38 | * and the file is open for write. |
39 | */ | 39 | */ |
40 | if (((arg ^ filp->f_flags) & O_APPEND) && IS_APPEND(inode)) | 40 | if (((arg ^ filp->f_flags) & O_APPEND) && IS_APPEND(inode)) |
41 | return -EPERM; | 41 | return -EPERM; |
42 | 42 | ||
43 | /* O_NOATIME can only be set by the owner or superuser */ | 43 | /* O_NOATIME can only be set by the owner or superuser */ |
44 | if ((arg & O_NOATIME) && !(filp->f_flags & O_NOATIME)) | 44 | if ((arg & O_NOATIME) && !(filp->f_flags & O_NOATIME)) |
45 | if (!inode_owner_or_capable(inode)) | 45 | if (!inode_owner_or_capable(inode)) |
46 | return -EPERM; | 46 | return -EPERM; |
47 | 47 | ||
48 | /* required for strict SunOS emulation */ | 48 | /* required for strict SunOS emulation */ |
49 | if (O_NONBLOCK != O_NDELAY) | 49 | if (O_NONBLOCK != O_NDELAY) |
50 | if (arg & O_NDELAY) | 50 | if (arg & O_NDELAY) |
51 | arg |= O_NONBLOCK; | 51 | arg |= O_NONBLOCK; |
52 | 52 | ||
53 | if (arg & O_DIRECT) { | 53 | if (arg & O_DIRECT) { |
54 | if (!filp->f_mapping || !filp->f_mapping->a_ops || | 54 | if (!filp->f_mapping || !filp->f_mapping->a_ops || |
55 | !filp->f_mapping->a_ops->direct_IO) | 55 | !filp->f_mapping->a_ops->direct_IO) |
56 | return -EINVAL; | 56 | return -EINVAL; |
57 | } | 57 | } |
58 | 58 | ||
59 | if (filp->f_op->check_flags) | 59 | if (filp->f_op->check_flags) |
60 | error = filp->f_op->check_flags(arg); | 60 | error = filp->f_op->check_flags(arg); |
61 | if (error) | 61 | if (error) |
62 | return error; | 62 | return error; |
63 | 63 | ||
64 | /* | 64 | /* |
65 | * ->fasync() is responsible for setting the FASYNC bit. | 65 | * ->fasync() is responsible for setting the FASYNC bit. |
66 | */ | 66 | */ |
67 | if (((arg ^ filp->f_flags) & FASYNC) && filp->f_op->fasync) { | 67 | if (((arg ^ filp->f_flags) & FASYNC) && filp->f_op->fasync) { |
68 | error = filp->f_op->fasync(fd, filp, (arg & FASYNC) != 0); | 68 | error = filp->f_op->fasync(fd, filp, (arg & FASYNC) != 0); |
69 | if (error < 0) | 69 | if (error < 0) |
70 | goto out; | 70 | goto out; |
71 | if (error > 0) | 71 | if (error > 0) |
72 | error = 0; | 72 | error = 0; |
73 | } | 73 | } |
74 | spin_lock(&filp->f_lock); | 74 | spin_lock(&filp->f_lock); |
75 | filp->f_flags = (arg & SETFL_MASK) | (filp->f_flags & ~SETFL_MASK); | 75 | filp->f_flags = (arg & SETFL_MASK) | (filp->f_flags & ~SETFL_MASK); |
76 | spin_unlock(&filp->f_lock); | 76 | spin_unlock(&filp->f_lock); |
77 | 77 | ||
78 | out: | 78 | out: |
79 | return error; | 79 | return error; |
80 | } | 80 | } |
81 | 81 | ||
82 | static void f_modown(struct file *filp, struct pid *pid, enum pid_type type, | 82 | static void f_modown(struct file *filp, struct pid *pid, enum pid_type type, |
83 | int force) | 83 | int force) |
84 | { | 84 | { |
85 | write_lock_irq(&filp->f_owner.lock); | 85 | write_lock_irq(&filp->f_owner.lock); |
86 | if (force || !filp->f_owner.pid) { | 86 | if (force || !filp->f_owner.pid) { |
87 | put_pid(filp->f_owner.pid); | 87 | put_pid(filp->f_owner.pid); |
88 | filp->f_owner.pid = get_pid(pid); | 88 | filp->f_owner.pid = get_pid(pid); |
89 | filp->f_owner.pid_type = type; | 89 | filp->f_owner.pid_type = type; |
90 | 90 | ||
91 | if (pid) { | 91 | if (pid) { |
92 | const struct cred *cred = current_cred(); | 92 | const struct cred *cred = current_cred(); |
93 | filp->f_owner.uid = cred->uid; | 93 | filp->f_owner.uid = cred->uid; |
94 | filp->f_owner.euid = cred->euid; | 94 | filp->f_owner.euid = cred->euid; |
95 | } | 95 | } |
96 | } | 96 | } |
97 | write_unlock_irq(&filp->f_owner.lock); | 97 | write_unlock_irq(&filp->f_owner.lock); |
98 | } | 98 | } |
99 | 99 | ||
100 | int __f_setown(struct file *filp, struct pid *pid, enum pid_type type, | 100 | int __f_setown(struct file *filp, struct pid *pid, enum pid_type type, |
101 | int force) | 101 | int force) |
102 | { | 102 | { |
103 | int err; | 103 | int err; |
104 | 104 | ||
105 | err = security_file_set_fowner(filp); | 105 | err = security_file_set_fowner(filp); |
106 | if (err) | 106 | if (err) |
107 | return err; | 107 | return err; |
108 | 108 | ||
109 | f_modown(filp, pid, type, force); | 109 | f_modown(filp, pid, type, force); |
110 | return 0; | 110 | return 0; |
111 | } | 111 | } |
112 | EXPORT_SYMBOL(__f_setown); | 112 | EXPORT_SYMBOL(__f_setown); |
113 | 113 | ||
114 | int f_setown(struct file *filp, unsigned long arg, int force) | 114 | int f_setown(struct file *filp, unsigned long arg, int force) |
115 | { | 115 | { |
116 | enum pid_type type; | 116 | enum pid_type type; |
117 | struct pid *pid; | 117 | struct pid *pid; |
118 | int who = arg; | 118 | int who = arg; |
119 | int result; | 119 | int result; |
120 | type = PIDTYPE_PID; | 120 | type = PIDTYPE_PID; |
121 | if (who < 0) { | 121 | if (who < 0) { |
122 | type = PIDTYPE_PGID; | 122 | type = PIDTYPE_PGID; |
123 | who = -who; | 123 | who = -who; |
124 | } | 124 | } |
125 | rcu_read_lock(); | 125 | rcu_read_lock(); |
126 | pid = find_vpid(who); | 126 | pid = find_vpid(who); |
127 | result = __f_setown(filp, pid, type, force); | 127 | result = __f_setown(filp, pid, type, force); |
128 | rcu_read_unlock(); | 128 | rcu_read_unlock(); |
129 | return result; | 129 | return result; |
130 | } | 130 | } |
131 | EXPORT_SYMBOL(f_setown); | 131 | EXPORT_SYMBOL(f_setown); |
132 | 132 | ||
133 | void f_delown(struct file *filp) | 133 | void f_delown(struct file *filp) |
134 | { | 134 | { |
135 | f_modown(filp, NULL, PIDTYPE_PID, 1); | 135 | f_modown(filp, NULL, PIDTYPE_PID, 1); |
136 | } | 136 | } |
137 | 137 | ||
138 | pid_t f_getown(struct file *filp) | 138 | pid_t f_getown(struct file *filp) |
139 | { | 139 | { |
140 | pid_t pid; | 140 | pid_t pid; |
141 | read_lock(&filp->f_owner.lock); | 141 | read_lock(&filp->f_owner.lock); |
142 | pid = pid_vnr(filp->f_owner.pid); | 142 | pid = pid_vnr(filp->f_owner.pid); |
143 | if (filp->f_owner.pid_type == PIDTYPE_PGID) | 143 | if (filp->f_owner.pid_type == PIDTYPE_PGID) |
144 | pid = -pid; | 144 | pid = -pid; |
145 | read_unlock(&filp->f_owner.lock); | 145 | read_unlock(&filp->f_owner.lock); |
146 | return pid; | 146 | return pid; |
147 | } | 147 | } |
148 | 148 | ||
149 | static int f_setown_ex(struct file *filp, unsigned long arg) | 149 | static int f_setown_ex(struct file *filp, unsigned long arg) |
150 | { | 150 | { |
151 | struct f_owner_ex __user *owner_p = (void __user *)arg; | 151 | struct f_owner_ex __user *owner_p = (void __user *)arg; |
152 | struct f_owner_ex owner; | 152 | struct f_owner_ex owner; |
153 | struct pid *pid; | 153 | struct pid *pid; |
154 | int type; | 154 | int type; |
155 | int ret; | 155 | int ret; |
156 | 156 | ||
157 | ret = copy_from_user(&owner, owner_p, sizeof(owner)); | 157 | ret = copy_from_user(&owner, owner_p, sizeof(owner)); |
158 | if (ret) | 158 | if (ret) |
159 | return -EFAULT; | 159 | return -EFAULT; |
160 | 160 | ||
161 | switch (owner.type) { | 161 | switch (owner.type) { |
162 | case F_OWNER_TID: | 162 | case F_OWNER_TID: |
163 | type = PIDTYPE_MAX; | 163 | type = PIDTYPE_MAX; |
164 | break; | 164 | break; |
165 | 165 | ||
166 | case F_OWNER_PID: | 166 | case F_OWNER_PID: |
167 | type = PIDTYPE_PID; | 167 | type = PIDTYPE_PID; |
168 | break; | 168 | break; |
169 | 169 | ||
170 | case F_OWNER_PGRP: | 170 | case F_OWNER_PGRP: |
171 | type = PIDTYPE_PGID; | 171 | type = PIDTYPE_PGID; |
172 | break; | 172 | break; |
173 | 173 | ||
174 | default: | 174 | default: |
175 | return -EINVAL; | 175 | return -EINVAL; |
176 | } | 176 | } |
177 | 177 | ||
178 | rcu_read_lock(); | 178 | rcu_read_lock(); |
179 | pid = find_vpid(owner.pid); | 179 | pid = find_vpid(owner.pid); |
180 | if (owner.pid && !pid) | 180 | if (owner.pid && !pid) |
181 | ret = -ESRCH; | 181 | ret = -ESRCH; |
182 | else | 182 | else |
183 | ret = __f_setown(filp, pid, type, 1); | 183 | ret = __f_setown(filp, pid, type, 1); |
184 | rcu_read_unlock(); | 184 | rcu_read_unlock(); |
185 | 185 | ||
186 | return ret; | 186 | return ret; |
187 | } | 187 | } |
188 | 188 | ||
189 | static int f_getown_ex(struct file *filp, unsigned long arg) | 189 | static int f_getown_ex(struct file *filp, unsigned long arg) |
190 | { | 190 | { |
191 | struct f_owner_ex __user *owner_p = (void __user *)arg; | 191 | struct f_owner_ex __user *owner_p = (void __user *)arg; |
192 | struct f_owner_ex owner; | 192 | struct f_owner_ex owner; |
193 | int ret = 0; | 193 | int ret = 0; |
194 | 194 | ||
195 | read_lock(&filp->f_owner.lock); | 195 | read_lock(&filp->f_owner.lock); |
196 | owner.pid = pid_vnr(filp->f_owner.pid); | 196 | owner.pid = pid_vnr(filp->f_owner.pid); |
197 | switch (filp->f_owner.pid_type) { | 197 | switch (filp->f_owner.pid_type) { |
198 | case PIDTYPE_MAX: | 198 | case PIDTYPE_MAX: |
199 | owner.type = F_OWNER_TID; | 199 | owner.type = F_OWNER_TID; |
200 | break; | 200 | break; |
201 | 201 | ||
202 | case PIDTYPE_PID: | 202 | case PIDTYPE_PID: |
203 | owner.type = F_OWNER_PID; | 203 | owner.type = F_OWNER_PID; |
204 | break; | 204 | break; |
205 | 205 | ||
206 | case PIDTYPE_PGID: | 206 | case PIDTYPE_PGID: |
207 | owner.type = F_OWNER_PGRP; | 207 | owner.type = F_OWNER_PGRP; |
208 | break; | 208 | break; |
209 | 209 | ||
210 | default: | 210 | default: |
211 | WARN_ON(1); | 211 | WARN_ON(1); |
212 | ret = -EINVAL; | 212 | ret = -EINVAL; |
213 | break; | 213 | break; |
214 | } | 214 | } |
215 | read_unlock(&filp->f_owner.lock); | 215 | read_unlock(&filp->f_owner.lock); |
216 | 216 | ||
217 | if (!ret) { | 217 | if (!ret) { |
218 | ret = copy_to_user(owner_p, &owner, sizeof(owner)); | 218 | ret = copy_to_user(owner_p, &owner, sizeof(owner)); |
219 | if (ret) | 219 | if (ret) |
220 | ret = -EFAULT; | 220 | ret = -EFAULT; |
221 | } | 221 | } |
222 | return ret; | 222 | return ret; |
223 | } | 223 | } |
224 | 224 | ||
225 | #ifdef CONFIG_CHECKPOINT_RESTORE | 225 | #ifdef CONFIG_CHECKPOINT_RESTORE |
226 | static int f_getowner_uids(struct file *filp, unsigned long arg) | 226 | static int f_getowner_uids(struct file *filp, unsigned long arg) |
227 | { | 227 | { |
228 | struct user_namespace *user_ns = current_user_ns(); | 228 | struct user_namespace *user_ns = current_user_ns(); |
229 | uid_t __user *dst = (void __user *)arg; | 229 | uid_t __user *dst = (void __user *)arg; |
230 | uid_t src[2]; | 230 | uid_t src[2]; |
231 | int err; | 231 | int err; |
232 | 232 | ||
233 | read_lock(&filp->f_owner.lock); | 233 | read_lock(&filp->f_owner.lock); |
234 | src[0] = from_kuid(user_ns, filp->f_owner.uid); | 234 | src[0] = from_kuid(user_ns, filp->f_owner.uid); |
235 | src[1] = from_kuid(user_ns, filp->f_owner.euid); | 235 | src[1] = from_kuid(user_ns, filp->f_owner.euid); |
236 | read_unlock(&filp->f_owner.lock); | 236 | read_unlock(&filp->f_owner.lock); |
237 | 237 | ||
238 | err = put_user(src[0], &dst[0]); | 238 | err = put_user(src[0], &dst[0]); |
239 | err |= put_user(src[1], &dst[1]); | 239 | err |= put_user(src[1], &dst[1]); |
240 | 240 | ||
241 | return err; | 241 | return err; |
242 | } | 242 | } |
243 | #else | 243 | #else |
244 | static int f_getowner_uids(struct file *filp, unsigned long arg) | 244 | static int f_getowner_uids(struct file *filp, unsigned long arg) |
245 | { | 245 | { |
246 | return -EINVAL; | 246 | return -EINVAL; |
247 | } | 247 | } |
248 | #endif | 248 | #endif |
249 | 249 | ||
250 | static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, | 250 | static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, |
251 | struct file *filp) | 251 | struct file *filp) |
252 | { | 252 | { |
253 | long err = -EINVAL; | 253 | long err = -EINVAL; |
254 | 254 | ||
255 | switch (cmd) { | 255 | switch (cmd) { |
256 | case F_DUPFD: | 256 | case F_DUPFD: |
257 | err = f_dupfd(arg, filp, 0); | 257 | err = f_dupfd(arg, filp, 0); |
258 | break; | 258 | break; |
259 | case F_DUPFD_CLOEXEC: | 259 | case F_DUPFD_CLOEXEC: |
260 | err = f_dupfd(arg, filp, O_CLOEXEC); | 260 | err = f_dupfd(arg, filp, O_CLOEXEC); |
261 | break; | 261 | break; |
262 | case F_GETFD: | 262 | case F_GETFD: |
263 | err = get_close_on_exec(fd) ? FD_CLOEXEC : 0; | 263 | err = get_close_on_exec(fd) ? FD_CLOEXEC : 0; |
264 | break; | 264 | break; |
265 | case F_SETFD: | 265 | case F_SETFD: |
266 | err = 0; | 266 | err = 0; |
267 | set_close_on_exec(fd, arg & FD_CLOEXEC); | 267 | set_close_on_exec(fd, arg & FD_CLOEXEC); |
268 | break; | 268 | break; |
269 | case F_GETFL: | 269 | case F_GETFL: |
270 | err = filp->f_flags; | 270 | err = filp->f_flags; |
271 | break; | 271 | break; |
272 | case F_SETFL: | 272 | case F_SETFL: |
273 | err = setfl(fd, filp, arg); | 273 | err = setfl(fd, filp, arg); |
274 | break; | 274 | break; |
275 | #if BITS_PER_LONG != 32 | 275 | #if BITS_PER_LONG != 32 |
276 | /* 32-bit arches must use fcntl64() */ | 276 | /* 32-bit arches must use fcntl64() */ |
277 | case F_GETLKP: | 277 | case F_OFD_GETLK: |
278 | #endif | 278 | #endif |
279 | case F_GETLK: | 279 | case F_GETLK: |
280 | err = fcntl_getlk(filp, cmd, (struct flock __user *) arg); | 280 | err = fcntl_getlk(filp, cmd, (struct flock __user *) arg); |
281 | break; | 281 | break; |
282 | #if BITS_PER_LONG != 32 | 282 | #if BITS_PER_LONG != 32 |
283 | /* 32-bit arches must use fcntl64() */ | 283 | /* 32-bit arches must use fcntl64() */ |
284 | case F_SETLKP: | 284 | case F_OFD_SETLK: |
285 | case F_SETLKPW: | 285 | case F_OFD_SETLKW: |
286 | #endif | 286 | #endif |
287 | /* Fallthrough */ | 287 | /* Fallthrough */ |
288 | case F_SETLK: | 288 | case F_SETLK: |
289 | case F_SETLKW: | 289 | case F_SETLKW: |
290 | err = fcntl_setlk(fd, filp, cmd, (struct flock __user *) arg); | 290 | err = fcntl_setlk(fd, filp, cmd, (struct flock __user *) arg); |
291 | break; | 291 | break; |
292 | case F_GETOWN: | 292 | case F_GETOWN: |
293 | /* | 293 | /* |
294 | * XXX If f_owner is a process group, the | 294 | * XXX If f_owner is a process group, the |
295 | * negative return value will get converted | 295 | * negative return value will get converted |
296 | * into an error. Oops. If we keep the | 296 | * into an error. Oops. If we keep the |
297 | * current syscall conventions, the only way | 297 | * current syscall conventions, the only way |
298 | * to fix this will be in libc. | 298 | * to fix this will be in libc. |
299 | */ | 299 | */ |
300 | err = f_getown(filp); | 300 | err = f_getown(filp); |
301 | force_successful_syscall_return(); | 301 | force_successful_syscall_return(); |
302 | break; | 302 | break; |
303 | case F_SETOWN: | 303 | case F_SETOWN: |
304 | err = f_setown(filp, arg, 1); | 304 | err = f_setown(filp, arg, 1); |
305 | break; | 305 | break; |
306 | case F_GETOWN_EX: | 306 | case F_GETOWN_EX: |
307 | err = f_getown_ex(filp, arg); | 307 | err = f_getown_ex(filp, arg); |
308 | break; | 308 | break; |
309 | case F_SETOWN_EX: | 309 | case F_SETOWN_EX: |
310 | err = f_setown_ex(filp, arg); | 310 | err = f_setown_ex(filp, arg); |
311 | break; | 311 | break; |
312 | case F_GETOWNER_UIDS: | 312 | case F_GETOWNER_UIDS: |
313 | err = f_getowner_uids(filp, arg); | 313 | err = f_getowner_uids(filp, arg); |
314 | break; | 314 | break; |
315 | case F_GETSIG: | 315 | case F_GETSIG: |
316 | err = filp->f_owner.signum; | 316 | err = filp->f_owner.signum; |
317 | break; | 317 | break; |
318 | case F_SETSIG: | 318 | case F_SETSIG: |
319 | /* arg == 0 restores default behaviour. */ | 319 | /* arg == 0 restores default behaviour. */ |
320 | if (!valid_signal(arg)) { | 320 | if (!valid_signal(arg)) { |
321 | break; | 321 | break; |
322 | } | 322 | } |
323 | err = 0; | 323 | err = 0; |
324 | filp->f_owner.signum = arg; | 324 | filp->f_owner.signum = arg; |
325 | break; | 325 | break; |
326 | case F_GETLEASE: | 326 | case F_GETLEASE: |
327 | err = fcntl_getlease(filp); | 327 | err = fcntl_getlease(filp); |
328 | break; | 328 | break; |
329 | case F_SETLEASE: | 329 | case F_SETLEASE: |
330 | err = fcntl_setlease(fd, filp, arg); | 330 | err = fcntl_setlease(fd, filp, arg); |
331 | break; | 331 | break; |
332 | case F_NOTIFY: | 332 | case F_NOTIFY: |
333 | err = fcntl_dirnotify(fd, filp, arg); | 333 | err = fcntl_dirnotify(fd, filp, arg); |
334 | break; | 334 | break; |
335 | case F_SETPIPE_SZ: | 335 | case F_SETPIPE_SZ: |
336 | case F_GETPIPE_SZ: | 336 | case F_GETPIPE_SZ: |
337 | err = pipe_fcntl(filp, cmd, arg); | 337 | err = pipe_fcntl(filp, cmd, arg); |
338 | break; | 338 | break; |
339 | default: | 339 | default: |
340 | break; | 340 | break; |
341 | } | 341 | } |
342 | return err; | 342 | return err; |
343 | } | 343 | } |
344 | 344 | ||
345 | static int check_fcntl_cmd(unsigned cmd) | 345 | static int check_fcntl_cmd(unsigned cmd) |
346 | { | 346 | { |
347 | switch (cmd) { | 347 | switch (cmd) { |
348 | case F_DUPFD: | 348 | case F_DUPFD: |
349 | case F_DUPFD_CLOEXEC: | 349 | case F_DUPFD_CLOEXEC: |
350 | case F_GETFD: | 350 | case F_GETFD: |
351 | case F_SETFD: | 351 | case F_SETFD: |
352 | case F_GETFL: | 352 | case F_GETFL: |
353 | return 1; | 353 | return 1; |
354 | } | 354 | } |
355 | return 0; | 355 | return 0; |
356 | } | 356 | } |
357 | 357 | ||
358 | SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, unsigned long, arg) | 358 | SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, unsigned long, arg) |
359 | { | 359 | { |
360 | struct fd f = fdget_raw(fd); | 360 | struct fd f = fdget_raw(fd); |
361 | long err = -EBADF; | 361 | long err = -EBADF; |
362 | 362 | ||
363 | if (!f.file) | 363 | if (!f.file) |
364 | goto out; | 364 | goto out; |
365 | 365 | ||
366 | if (unlikely(f.file->f_mode & FMODE_PATH)) { | 366 | if (unlikely(f.file->f_mode & FMODE_PATH)) { |
367 | if (!check_fcntl_cmd(cmd)) | 367 | if (!check_fcntl_cmd(cmd)) |
368 | goto out1; | 368 | goto out1; |
369 | } | 369 | } |
370 | 370 | ||
371 | err = security_file_fcntl(f.file, cmd, arg); | 371 | err = security_file_fcntl(f.file, cmd, arg); |
372 | if (!err) | 372 | if (!err) |
373 | err = do_fcntl(fd, cmd, arg, f.file); | 373 | err = do_fcntl(fd, cmd, arg, f.file); |
374 | 374 | ||
375 | out1: | 375 | out1: |
376 | fdput(f); | 376 | fdput(f); |
377 | out: | 377 | out: |
378 | return err; | 378 | return err; |
379 | } | 379 | } |
380 | 380 | ||
381 | #if BITS_PER_LONG == 32 | 381 | #if BITS_PER_LONG == 32 |
382 | SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd, | 382 | SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd, |
383 | unsigned long, arg) | 383 | unsigned long, arg) |
384 | { | 384 | { |
385 | struct fd f = fdget_raw(fd); | 385 | struct fd f = fdget_raw(fd); |
386 | long err = -EBADF; | 386 | long err = -EBADF; |
387 | 387 | ||
388 | if (!f.file) | 388 | if (!f.file) |
389 | goto out; | 389 | goto out; |
390 | 390 | ||
391 | if (unlikely(f.file->f_mode & FMODE_PATH)) { | 391 | if (unlikely(f.file->f_mode & FMODE_PATH)) { |
392 | if (!check_fcntl_cmd(cmd)) | 392 | if (!check_fcntl_cmd(cmd)) |
393 | goto out1; | 393 | goto out1; |
394 | } | 394 | } |
395 | 395 | ||
396 | err = security_file_fcntl(f.file, cmd, arg); | 396 | err = security_file_fcntl(f.file, cmd, arg); |
397 | if (err) | 397 | if (err) |
398 | goto out1; | 398 | goto out1; |
399 | 399 | ||
400 | switch (cmd) { | 400 | switch (cmd) { |
401 | case F_GETLK64: | 401 | case F_GETLK64: |
402 | case F_GETLKP: | 402 | case F_OFD_GETLK: |
403 | err = fcntl_getlk64(f.file, cmd, (struct flock64 __user *) arg); | 403 | err = fcntl_getlk64(f.file, cmd, (struct flock64 __user *) arg); |
404 | break; | 404 | break; |
405 | case F_SETLK64: | 405 | case F_SETLK64: |
406 | case F_SETLKW64: | 406 | case F_SETLKW64: |
407 | case F_SETLKP: | 407 | case F_OFD_SETLK: |
408 | case F_SETLKPW: | 408 | case F_OFD_SETLKW: |
409 | err = fcntl_setlk64(fd, f.file, cmd, | 409 | err = fcntl_setlk64(fd, f.file, cmd, |
410 | (struct flock64 __user *) arg); | 410 | (struct flock64 __user *) arg); |
411 | break; | 411 | break; |
412 | default: | 412 | default: |
413 | err = do_fcntl(fd, cmd, arg, f.file); | 413 | err = do_fcntl(fd, cmd, arg, f.file); |
414 | break; | 414 | break; |
415 | } | 415 | } |
416 | out1: | 416 | out1: |
417 | fdput(f); | 417 | fdput(f); |
418 | out: | 418 | out: |
419 | return err; | 419 | return err; |
420 | } | 420 | } |
421 | #endif | 421 | #endif |
422 | 422 | ||
423 | /* Table to convert sigio signal codes into poll band bitmaps */ | 423 | /* Table to convert sigio signal codes into poll band bitmaps */ |
424 | 424 | ||
425 | static const long band_table[NSIGPOLL] = { | 425 | static const long band_table[NSIGPOLL] = { |
426 | POLLIN | POLLRDNORM, /* POLL_IN */ | 426 | POLLIN | POLLRDNORM, /* POLL_IN */ |
427 | POLLOUT | POLLWRNORM | POLLWRBAND, /* POLL_OUT */ | 427 | POLLOUT | POLLWRNORM | POLLWRBAND, /* POLL_OUT */ |
428 | POLLIN | POLLRDNORM | POLLMSG, /* POLL_MSG */ | 428 | POLLIN | POLLRDNORM | POLLMSG, /* POLL_MSG */ |
429 | POLLERR, /* POLL_ERR */ | 429 | POLLERR, /* POLL_ERR */ |
430 | POLLPRI | POLLRDBAND, /* POLL_PRI */ | 430 | POLLPRI | POLLRDBAND, /* POLL_PRI */ |
431 | POLLHUP | POLLERR /* POLL_HUP */ | 431 | POLLHUP | POLLERR /* POLL_HUP */ |
432 | }; | 432 | }; |
433 | 433 | ||
434 | static inline int sigio_perm(struct task_struct *p, | 434 | static inline int sigio_perm(struct task_struct *p, |
435 | struct fown_struct *fown, int sig) | 435 | struct fown_struct *fown, int sig) |
436 | { | 436 | { |
437 | const struct cred *cred; | 437 | const struct cred *cred; |
438 | int ret; | 438 | int ret; |
439 | 439 | ||
440 | rcu_read_lock(); | 440 | rcu_read_lock(); |
441 | cred = __task_cred(p); | 441 | cred = __task_cred(p); |
442 | ret = ((uid_eq(fown->euid, GLOBAL_ROOT_UID) || | 442 | ret = ((uid_eq(fown->euid, GLOBAL_ROOT_UID) || |
443 | uid_eq(fown->euid, cred->suid) || uid_eq(fown->euid, cred->uid) || | 443 | uid_eq(fown->euid, cred->suid) || uid_eq(fown->euid, cred->uid) || |
444 | uid_eq(fown->uid, cred->suid) || uid_eq(fown->uid, cred->uid)) && | 444 | uid_eq(fown->uid, cred->suid) || uid_eq(fown->uid, cred->uid)) && |
445 | !security_file_send_sigiotask(p, fown, sig)); | 445 | !security_file_send_sigiotask(p, fown, sig)); |
446 | rcu_read_unlock(); | 446 | rcu_read_unlock(); |
447 | return ret; | 447 | return ret; |
448 | } | 448 | } |
449 | 449 | ||
450 | static void send_sigio_to_task(struct task_struct *p, | 450 | static void send_sigio_to_task(struct task_struct *p, |
451 | struct fown_struct *fown, | 451 | struct fown_struct *fown, |
452 | int fd, int reason, int group) | 452 | int fd, int reason, int group) |
453 | { | 453 | { |
454 | /* | 454 | /* |
455 | * F_SETSIG can change ->signum lockless in parallel, make | 455 | * F_SETSIG can change ->signum lockless in parallel, make |
456 | * sure we read it once and use the same value throughout. | 456 | * sure we read it once and use the same value throughout. |
457 | */ | 457 | */ |
458 | int signum = ACCESS_ONCE(fown->signum); | 458 | int signum = ACCESS_ONCE(fown->signum); |
459 | 459 | ||
460 | if (!sigio_perm(p, fown, signum)) | 460 | if (!sigio_perm(p, fown, signum)) |
461 | return; | 461 | return; |
462 | 462 | ||
463 | switch (signum) { | 463 | switch (signum) { |
464 | siginfo_t si; | 464 | siginfo_t si; |
465 | default: | 465 | default: |
466 | /* Queue a rt signal with the appropriate fd as its | 466 | /* Queue a rt signal with the appropriate fd as its |
467 | value. We use SI_SIGIO as the source, not | 467 | value. We use SI_SIGIO as the source, not |
468 | SI_KERNEL, since kernel signals always get | 468 | SI_KERNEL, since kernel signals always get |
469 | delivered even if we can't queue. Failure to | 469 | delivered even if we can't queue. Failure to |
470 | queue in this case _should_ be reported; we fall | 470 | queue in this case _should_ be reported; we fall |
471 | back to SIGIO in that case. --sct */ | 471 | back to SIGIO in that case. --sct */ |
472 | si.si_signo = signum; | 472 | si.si_signo = signum; |
473 | si.si_errno = 0; | 473 | si.si_errno = 0; |
474 | si.si_code = reason; | 474 | si.si_code = reason; |
475 | /* Make sure we are called with one of the POLL_* | 475 | /* Make sure we are called with one of the POLL_* |
476 | reasons, otherwise we could leak kernel stack into | 476 | reasons, otherwise we could leak kernel stack into |
477 | userspace. */ | 477 | userspace. */ |
478 | BUG_ON((reason & __SI_MASK) != __SI_POLL); | 478 | BUG_ON((reason & __SI_MASK) != __SI_POLL); |
479 | if (reason - POLL_IN >= NSIGPOLL) | 479 | if (reason - POLL_IN >= NSIGPOLL) |
480 | si.si_band = ~0L; | 480 | si.si_band = ~0L; |
481 | else | 481 | else |
482 | si.si_band = band_table[reason - POLL_IN]; | 482 | si.si_band = band_table[reason - POLL_IN]; |
483 | si.si_fd = fd; | 483 | si.si_fd = fd; |
484 | if (!do_send_sig_info(signum, &si, p, group)) | 484 | if (!do_send_sig_info(signum, &si, p, group)) |
485 | break; | 485 | break; |
486 | /* fall-through: fall back on the old plain SIGIO signal */ | 486 | /* fall-through: fall back on the old plain SIGIO signal */ |
487 | case 0: | 487 | case 0: |
488 | do_send_sig_info(SIGIO, SEND_SIG_PRIV, p, group); | 488 | do_send_sig_info(SIGIO, SEND_SIG_PRIV, p, group); |
489 | } | 489 | } |
490 | } | 490 | } |
491 | 491 | ||
492 | void send_sigio(struct fown_struct *fown, int fd, int band) | 492 | void send_sigio(struct fown_struct *fown, int fd, int band) |
493 | { | 493 | { |
494 | struct task_struct *p; | 494 | struct task_struct *p; |
495 | enum pid_type type; | 495 | enum pid_type type; |
496 | struct pid *pid; | 496 | struct pid *pid; |
497 | int group = 1; | 497 | int group = 1; |
498 | 498 | ||
499 | read_lock(&fown->lock); | 499 | read_lock(&fown->lock); |
500 | 500 | ||
501 | type = fown->pid_type; | 501 | type = fown->pid_type; |
502 | if (type == PIDTYPE_MAX) { | 502 | if (type == PIDTYPE_MAX) { |
503 | group = 0; | 503 | group = 0; |
504 | type = PIDTYPE_PID; | 504 | type = PIDTYPE_PID; |
505 | } | 505 | } |
506 | 506 | ||
507 | pid = fown->pid; | 507 | pid = fown->pid; |
508 | if (!pid) | 508 | if (!pid) |
509 | goto out_unlock_fown; | 509 | goto out_unlock_fown; |
510 | 510 | ||
511 | read_lock(&tasklist_lock); | 511 | read_lock(&tasklist_lock); |
512 | do_each_pid_task(pid, type, p) { | 512 | do_each_pid_task(pid, type, p) { |
513 | send_sigio_to_task(p, fown, fd, band, group); | 513 | send_sigio_to_task(p, fown, fd, band, group); |
514 | } while_each_pid_task(pid, type, p); | 514 | } while_each_pid_task(pid, type, p); |
515 | read_unlock(&tasklist_lock); | 515 | read_unlock(&tasklist_lock); |
516 | out_unlock_fown: | 516 | out_unlock_fown: |
517 | read_unlock(&fown->lock); | 517 | read_unlock(&fown->lock); |
518 | } | 518 | } |
519 | 519 | ||
520 | static void send_sigurg_to_task(struct task_struct *p, | 520 | static void send_sigurg_to_task(struct task_struct *p, |
521 | struct fown_struct *fown, int group) | 521 | struct fown_struct *fown, int group) |
522 | { | 522 | { |
523 | if (sigio_perm(p, fown, SIGURG)) | 523 | if (sigio_perm(p, fown, SIGURG)) |
524 | do_send_sig_info(SIGURG, SEND_SIG_PRIV, p, group); | 524 | do_send_sig_info(SIGURG, SEND_SIG_PRIV, p, group); |
525 | } | 525 | } |
526 | 526 | ||
527 | int send_sigurg(struct fown_struct *fown) | 527 | int send_sigurg(struct fown_struct *fown) |
528 | { | 528 | { |
529 | struct task_struct *p; | 529 | struct task_struct *p; |
530 | enum pid_type type; | 530 | enum pid_type type; |
531 | struct pid *pid; | 531 | struct pid *pid; |
532 | int group = 1; | 532 | int group = 1; |
533 | int ret = 0; | 533 | int ret = 0; |
534 | 534 | ||
535 | read_lock(&fown->lock); | 535 | read_lock(&fown->lock); |
536 | 536 | ||
537 | type = fown->pid_type; | 537 | type = fown->pid_type; |
538 | if (type == PIDTYPE_MAX) { | 538 | if (type == PIDTYPE_MAX) { |
539 | group = 0; | 539 | group = 0; |
540 | type = PIDTYPE_PID; | 540 | type = PIDTYPE_PID; |
541 | } | 541 | } |
542 | 542 | ||
543 | pid = fown->pid; | 543 | pid = fown->pid; |
544 | if (!pid) | 544 | if (!pid) |
545 | goto out_unlock_fown; | 545 | goto out_unlock_fown; |
546 | 546 | ||
547 | ret = 1; | 547 | ret = 1; |
548 | 548 | ||
549 | read_lock(&tasklist_lock); | 549 | read_lock(&tasklist_lock); |
550 | do_each_pid_task(pid, type, p) { | 550 | do_each_pid_task(pid, type, p) { |
551 | send_sigurg_to_task(p, fown, group); | 551 | send_sigurg_to_task(p, fown, group); |
552 | } while_each_pid_task(pid, type, p); | 552 | } while_each_pid_task(pid, type, p); |
553 | read_unlock(&tasklist_lock); | 553 | read_unlock(&tasklist_lock); |
554 | out_unlock_fown: | 554 | out_unlock_fown: |
555 | read_unlock(&fown->lock); | 555 | read_unlock(&fown->lock); |
556 | return ret; | 556 | return ret; |
557 | } | 557 | } |
558 | 558 | ||
559 | static DEFINE_SPINLOCK(fasync_lock); | 559 | static DEFINE_SPINLOCK(fasync_lock); |
560 | static struct kmem_cache *fasync_cache __read_mostly; | 560 | static struct kmem_cache *fasync_cache __read_mostly; |
561 | 561 | ||
562 | static void fasync_free_rcu(struct rcu_head *head) | 562 | static void fasync_free_rcu(struct rcu_head *head) |
563 | { | 563 | { |
564 | kmem_cache_free(fasync_cache, | 564 | kmem_cache_free(fasync_cache, |
565 | container_of(head, struct fasync_struct, fa_rcu)); | 565 | container_of(head, struct fasync_struct, fa_rcu)); |
566 | } | 566 | } |
567 | 567 | ||
568 | /* | 568 | /* |
569 | * Remove a fasync entry. If successfully removed, return | 569 | * Remove a fasync entry. If successfully removed, return |
570 | * positive and clear the FASYNC flag. If no entry exists, | 570 | * positive and clear the FASYNC flag. If no entry exists, |
571 | * do nothing and return 0. | 571 | * do nothing and return 0. |
572 | * | 572 | * |
573 | * NOTE! It is very important that the FASYNC flag always | 573 | * NOTE! It is very important that the FASYNC flag always |
574 | * match the state "is the filp on a fasync list". | 574 | * match the state "is the filp on a fasync list". |
575 | * | 575 | * |
576 | */ | 576 | */ |
577 | int fasync_remove_entry(struct file *filp, struct fasync_struct **fapp) | 577 | int fasync_remove_entry(struct file *filp, struct fasync_struct **fapp) |
578 | { | 578 | { |
579 | struct fasync_struct *fa, **fp; | 579 | struct fasync_struct *fa, **fp; |
580 | int result = 0; | 580 | int result = 0; |
581 | 581 | ||
582 | spin_lock(&filp->f_lock); | 582 | spin_lock(&filp->f_lock); |
583 | spin_lock(&fasync_lock); | 583 | spin_lock(&fasync_lock); |
584 | for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) { | 584 | for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) { |
585 | if (fa->fa_file != filp) | 585 | if (fa->fa_file != filp) |
586 | continue; | 586 | continue; |
587 | 587 | ||
588 | spin_lock_irq(&fa->fa_lock); | 588 | spin_lock_irq(&fa->fa_lock); |
589 | fa->fa_file = NULL; | 589 | fa->fa_file = NULL; |
590 | spin_unlock_irq(&fa->fa_lock); | 590 | spin_unlock_irq(&fa->fa_lock); |
591 | 591 | ||
592 | *fp = fa->fa_next; | 592 | *fp = fa->fa_next; |
593 | call_rcu(&fa->fa_rcu, fasync_free_rcu); | 593 | call_rcu(&fa->fa_rcu, fasync_free_rcu); |
594 | filp->f_flags &= ~FASYNC; | 594 | filp->f_flags &= ~FASYNC; |
595 | result = 1; | 595 | result = 1; |
596 | break; | 596 | break; |
597 | } | 597 | } |
598 | spin_unlock(&fasync_lock); | 598 | spin_unlock(&fasync_lock); |
599 | spin_unlock(&filp->f_lock); | 599 | spin_unlock(&filp->f_lock); |
600 | return result; | 600 | return result; |
601 | } | 601 | } |
602 | 602 | ||
603 | struct fasync_struct *fasync_alloc(void) | 603 | struct fasync_struct *fasync_alloc(void) |
604 | { | 604 | { |
605 | return kmem_cache_alloc(fasync_cache, GFP_KERNEL); | 605 | return kmem_cache_alloc(fasync_cache, GFP_KERNEL); |
606 | } | 606 | } |
607 | 607 | ||
608 | /* | 608 | /* |
609 | * NOTE! This can be used only for unused fasync entries: | 609 | * NOTE! This can be used only for unused fasync entries: |
610 | * entries that actually got inserted on the fasync list | 610 | * entries that actually got inserted on the fasync list |
611 | * need to be released by rcu - see fasync_remove_entry. | 611 | * need to be released by rcu - see fasync_remove_entry. |
612 | */ | 612 | */ |
613 | void fasync_free(struct fasync_struct *new) | 613 | void fasync_free(struct fasync_struct *new) |
614 | { | 614 | { |
615 | kmem_cache_free(fasync_cache, new); | 615 | kmem_cache_free(fasync_cache, new); |
616 | } | 616 | } |
617 | 617 | ||
618 | /* | 618 | /* |
619 | * Insert a new entry into the fasync list. Return the pointer to the | 619 | * Insert a new entry into the fasync list. Return the pointer to the |
620 | * old one if we didn't use the new one. | 620 | * old one if we didn't use the new one. |
621 | * | 621 | * |
622 | * NOTE! It is very important that the FASYNC flag always | 622 | * NOTE! It is very important that the FASYNC flag always |
623 | * match the state "is the filp on a fasync list". | 623 | * match the state "is the filp on a fasync list". |
624 | */ | 624 | */ |
625 | struct fasync_struct *fasync_insert_entry(int fd, struct file *filp, struct fasync_struct **fapp, struct fasync_struct *new) | 625 | struct fasync_struct *fasync_insert_entry(int fd, struct file *filp, struct fasync_struct **fapp, struct fasync_struct *new) |
626 | { | 626 | { |
627 | struct fasync_struct *fa, **fp; | 627 | struct fasync_struct *fa, **fp; |
628 | 628 | ||
629 | spin_lock(&filp->f_lock); | 629 | spin_lock(&filp->f_lock); |
630 | spin_lock(&fasync_lock); | 630 | spin_lock(&fasync_lock); |
631 | for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) { | 631 | for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) { |
632 | if (fa->fa_file != filp) | 632 | if (fa->fa_file != filp) |
633 | continue; | 633 | continue; |
634 | 634 | ||
635 | spin_lock_irq(&fa->fa_lock); | 635 | spin_lock_irq(&fa->fa_lock); |
636 | fa->fa_fd = fd; | 636 | fa->fa_fd = fd; |
637 | spin_unlock_irq(&fa->fa_lock); | 637 | spin_unlock_irq(&fa->fa_lock); |
638 | goto out; | 638 | goto out; |
639 | } | 639 | } |
640 | 640 | ||
641 | spin_lock_init(&new->fa_lock); | 641 | spin_lock_init(&new->fa_lock); |
642 | new->magic = FASYNC_MAGIC; | 642 | new->magic = FASYNC_MAGIC; |
643 | new->fa_file = filp; | 643 | new->fa_file = filp; |
644 | new->fa_fd = fd; | 644 | new->fa_fd = fd; |
645 | new->fa_next = *fapp; | 645 | new->fa_next = *fapp; |
646 | rcu_assign_pointer(*fapp, new); | 646 | rcu_assign_pointer(*fapp, new); |
647 | filp->f_flags |= FASYNC; | 647 | filp->f_flags |= FASYNC; |
648 | 648 | ||
649 | out: | 649 | out: |
650 | spin_unlock(&fasync_lock); | 650 | spin_unlock(&fasync_lock); |
651 | spin_unlock(&filp->f_lock); | 651 | spin_unlock(&filp->f_lock); |
652 | return fa; | 652 | return fa; |
653 | } | 653 | } |
654 | 654 | ||
655 | /* | 655 | /* |
656 | * Add a fasync entry. Return negative on error, positive if | 656 | * Add a fasync entry. Return negative on error, positive if |
657 | * added, and zero if did nothing but change an existing one. | 657 | * added, and zero if did nothing but change an existing one. |
658 | */ | 658 | */ |
659 | static int fasync_add_entry(int fd, struct file *filp, struct fasync_struct **fapp) | 659 | static int fasync_add_entry(int fd, struct file *filp, struct fasync_struct **fapp) |
660 | { | 660 | { |
661 | struct fasync_struct *new; | 661 | struct fasync_struct *new; |
662 | 662 | ||
663 | new = fasync_alloc(); | 663 | new = fasync_alloc(); |
664 | if (!new) | 664 | if (!new) |
665 | return -ENOMEM; | 665 | return -ENOMEM; |
666 | 666 | ||
667 | /* | 667 | /* |
668 | * fasync_insert_entry() returns the old (update) entry if | 668 | * fasync_insert_entry() returns the old (update) entry if |
669 | * it existed. | 669 | * it existed. |
670 | * | 670 | * |
671 | * So free the (unused) new entry and return 0 to let the | 671 | * So free the (unused) new entry and return 0 to let the |
672 | * caller know that we didn't add any new fasync entries. | 672 | * caller know that we didn't add any new fasync entries. |
673 | */ | 673 | */ |
674 | if (fasync_insert_entry(fd, filp, fapp, new)) { | 674 | if (fasync_insert_entry(fd, filp, fapp, new)) { |
675 | fasync_free(new); | 675 | fasync_free(new); |
676 | return 0; | 676 | return 0; |
677 | } | 677 | } |
678 | 678 | ||
679 | return 1; | 679 | return 1; |
680 | } | 680 | } |
681 | 681 | ||
682 | /* | 682 | /* |
683 | * fasync_helper() is used by almost all character device drivers | 683 | * fasync_helper() is used by almost all character device drivers |
684 | * to set up the fasync queue, and for regular files by the file | 684 | * to set up the fasync queue, and for regular files by the file |
685 | * lease code. It returns negative on error, 0 if it did no changes | 685 | * lease code. It returns negative on error, 0 if it did no changes |
686 | * and positive if it added/deleted the entry. | 686 | * and positive if it added/deleted the entry. |
687 | */ | 687 | */ |
688 | int fasync_helper(int fd, struct file * filp, int on, struct fasync_struct **fapp) | 688 | int fasync_helper(int fd, struct file * filp, int on, struct fasync_struct **fapp) |
689 | { | 689 | { |
690 | if (!on) | 690 | if (!on) |
691 | return fasync_remove_entry(filp, fapp); | 691 | return fasync_remove_entry(filp, fapp); |
692 | return fasync_add_entry(fd, filp, fapp); | 692 | return fasync_add_entry(fd, filp, fapp); |
693 | } | 693 | } |
694 | 694 | ||
695 | EXPORT_SYMBOL(fasync_helper); | 695 | EXPORT_SYMBOL(fasync_helper); |
696 | 696 | ||
697 | /* | 697 | /* |
698 | * rcu_read_lock() is held | 698 | * rcu_read_lock() is held |
699 | */ | 699 | */ |
700 | static void kill_fasync_rcu(struct fasync_struct *fa, int sig, int band) | 700 | static void kill_fasync_rcu(struct fasync_struct *fa, int sig, int band) |
701 | { | 701 | { |
702 | while (fa) { | 702 | while (fa) { |
703 | struct fown_struct *fown; | 703 | struct fown_struct *fown; |
704 | unsigned long flags; | 704 | unsigned long flags; |
705 | 705 | ||
706 | if (fa->magic != FASYNC_MAGIC) { | 706 | if (fa->magic != FASYNC_MAGIC) { |
707 | printk(KERN_ERR "kill_fasync: bad magic number in " | 707 | printk(KERN_ERR "kill_fasync: bad magic number in " |
708 | "fasync_struct!\n"); | 708 | "fasync_struct!\n"); |
709 | return; | 709 | return; |
710 | } | 710 | } |
711 | spin_lock_irqsave(&fa->fa_lock, flags); | 711 | spin_lock_irqsave(&fa->fa_lock, flags); |
712 | if (fa->fa_file) { | 712 | if (fa->fa_file) { |
713 | fown = &fa->fa_file->f_owner; | 713 | fown = &fa->fa_file->f_owner; |
714 | /* Don't send SIGURG to processes which have not set a | 714 | /* Don't send SIGURG to processes which have not set a |
715 | queued signum: SIGURG has its own default signalling | 715 | queued signum: SIGURG has its own default signalling |
716 | mechanism. */ | 716 | mechanism. */ |
717 | if (!(sig == SIGURG && fown->signum == 0)) | 717 | if (!(sig == SIGURG && fown->signum == 0)) |
718 | send_sigio(fown, fa->fa_fd, band); | 718 | send_sigio(fown, fa->fa_fd, band); |
719 | } | 719 | } |
720 | spin_unlock_irqrestore(&fa->fa_lock, flags); | 720 | spin_unlock_irqrestore(&fa->fa_lock, flags); |
721 | fa = rcu_dereference(fa->fa_next); | 721 | fa = rcu_dereference(fa->fa_next); |
722 | } | 722 | } |
723 | } | 723 | } |
724 | 724 | ||
725 | void kill_fasync(struct fasync_struct **fp, int sig, int band) | 725 | void kill_fasync(struct fasync_struct **fp, int sig, int band) |
726 | { | 726 | { |
727 | /* First a quick test without locking: usually | 727 | /* First a quick test without locking: usually |
728 | * the list is empty. | 728 | * the list is empty. |
729 | */ | 729 | */ |
730 | if (*fp) { | 730 | if (*fp) { |
731 | rcu_read_lock(); | 731 | rcu_read_lock(); |
732 | kill_fasync_rcu(rcu_dereference(*fp), sig, band); | 732 | kill_fasync_rcu(rcu_dereference(*fp), sig, band); |
733 | rcu_read_unlock(); | 733 | rcu_read_unlock(); |
734 | } | 734 | } |
735 | } | 735 | } |
736 | EXPORT_SYMBOL(kill_fasync); | 736 | EXPORT_SYMBOL(kill_fasync); |
737 | 737 | ||
738 | static int __init fcntl_init(void) | 738 | static int __init fcntl_init(void) |
739 | { | 739 | { |
740 | /* | 740 | /* |
741 | * Please add new bits here to ensure allocation uniqueness. | 741 | * Please add new bits here to ensure allocation uniqueness. |
742 | * Exceptions: O_NONBLOCK is a two bit define on parisc; O_NDELAY | 742 | * Exceptions: O_NONBLOCK is a two bit define on parisc; O_NDELAY |
743 | * is defined as O_NONBLOCK on some platforms and not on others. | 743 | * is defined as O_NONBLOCK on some platforms and not on others. |
744 | */ | 744 | */ |
745 | BUILD_BUG_ON(20 - 1 /* for O_RDONLY being 0 */ != HWEIGHT32( | 745 | BUILD_BUG_ON(20 - 1 /* for O_RDONLY being 0 */ != HWEIGHT32( |
746 | O_RDONLY | O_WRONLY | O_RDWR | | 746 | O_RDONLY | O_WRONLY | O_RDWR | |
747 | O_CREAT | O_EXCL | O_NOCTTY | | 747 | O_CREAT | O_EXCL | O_NOCTTY | |
748 | O_TRUNC | O_APPEND | /* O_NONBLOCK | */ | 748 | O_TRUNC | O_APPEND | /* O_NONBLOCK | */ |
749 | __O_SYNC | O_DSYNC | FASYNC | | 749 | __O_SYNC | O_DSYNC | FASYNC | |
750 | O_DIRECT | O_LARGEFILE | O_DIRECTORY | | 750 | O_DIRECT | O_LARGEFILE | O_DIRECTORY | |
751 | O_NOFOLLOW | O_NOATIME | O_CLOEXEC | | 751 | O_NOFOLLOW | O_NOATIME | O_CLOEXEC | |
752 | __FMODE_EXEC | O_PATH | __O_TMPFILE | 752 | __FMODE_EXEC | O_PATH | __O_TMPFILE |
753 | )); | 753 | )); |
754 | 754 | ||
755 | fasync_cache = kmem_cache_create("fasync_cache", | 755 | fasync_cache = kmem_cache_create("fasync_cache", |
756 | sizeof(struct fasync_struct), 0, SLAB_PANIC, NULL); | 756 | sizeof(struct fasync_struct), 0, SLAB_PANIC, NULL); |
757 | return 0; | 757 | return 0; |
758 | } | 758 | } |
759 | 759 | ||
760 | module_init(fcntl_init) | 760 | module_init(fcntl_init) |
761 | 761 |
fs/locks.c
1 | /* | 1 | /* |
2 | * linux/fs/locks.c | 2 | * linux/fs/locks.c |
3 | * | 3 | * |
4 | * Provide support for fcntl()'s F_GETLK, F_SETLK, and F_SETLKW calls. | 4 | * Provide support for fcntl()'s F_GETLK, F_SETLK, and F_SETLKW calls. |
5 | * Doug Evans (dje@spiff.uucp), August 07, 1992 | 5 | * Doug Evans (dje@spiff.uucp), August 07, 1992 |
6 | * | 6 | * |
7 | * Deadlock detection added. | 7 | * Deadlock detection added. |
8 | * FIXME: one thing isn't handled yet: | 8 | * FIXME: one thing isn't handled yet: |
9 | * - mandatory locks (requires lots of changes elsewhere) | 9 | * - mandatory locks (requires lots of changes elsewhere) |
10 | * Kelly Carmichael (kelly@[142.24.8.65]), September 17, 1994. | 10 | * Kelly Carmichael (kelly@[142.24.8.65]), September 17, 1994. |
11 | * | 11 | * |
12 | * Miscellaneous edits, and a total rewrite of posix_lock_file() code. | 12 | * Miscellaneous edits, and a total rewrite of posix_lock_file() code. |
13 | * Kai Petzke (wpp@marie.physik.tu-berlin.de), 1994 | 13 | * Kai Petzke (wpp@marie.physik.tu-berlin.de), 1994 |
14 | * | 14 | * |
15 | * Converted file_lock_table to a linked list from an array, which eliminates | 15 | * Converted file_lock_table to a linked list from an array, which eliminates |
16 | * the limits on how many active file locks are open. | 16 | * the limits on how many active file locks are open. |
17 | * Chad Page (pageone@netcom.com), November 27, 1994 | 17 | * Chad Page (pageone@netcom.com), November 27, 1994 |
18 | * | 18 | * |
19 | * Removed dependency on file descriptors. dup()'ed file descriptors now | 19 | * Removed dependency on file descriptors. dup()'ed file descriptors now |
20 | * get the same locks as the original file descriptors, and a close() on | 20 | * get the same locks as the original file descriptors, and a close() on |
21 | * any file descriptor removes ALL the locks on the file for the current | 21 | * any file descriptor removes ALL the locks on the file for the current |
22 | * process. Since locks still depend on the process id, locks are inherited | 22 | * process. Since locks still depend on the process id, locks are inherited |
23 | * after an exec() but not after a fork(). This agrees with POSIX, and both | 23 | * after an exec() but not after a fork(). This agrees with POSIX, and both |
24 | * BSD and SVR4 practice. | 24 | * BSD and SVR4 practice. |
25 | * Andy Walker (andy@lysaker.kvaerner.no), February 14, 1995 | 25 | * Andy Walker (andy@lysaker.kvaerner.no), February 14, 1995 |
26 | * | 26 | * |
27 | * Scrapped free list which is redundant now that we allocate locks | 27 | * Scrapped free list which is redundant now that we allocate locks |
28 | * dynamically with kmalloc()/kfree(). | 28 | * dynamically with kmalloc()/kfree(). |
29 | * Andy Walker (andy@lysaker.kvaerner.no), February 21, 1995 | 29 | * Andy Walker (andy@lysaker.kvaerner.no), February 21, 1995 |
30 | * | 30 | * |
31 | * Implemented two lock personalities - FL_FLOCK and FL_POSIX. | 31 | * Implemented two lock personalities - FL_FLOCK and FL_POSIX. |
32 | * | 32 | * |
33 | * FL_POSIX locks are created with calls to fcntl() and lockf() through the | 33 | * FL_POSIX locks are created with calls to fcntl() and lockf() through the |
34 | * fcntl() system call. They have the semantics described above. | 34 | * fcntl() system call. They have the semantics described above. |
35 | * | 35 | * |
36 | * FL_FLOCK locks are created with calls to flock(), through the flock() | 36 | * FL_FLOCK locks are created with calls to flock(), through the flock() |
37 | * system call, which is new. Old C libraries implement flock() via fcntl() | 37 | * system call, which is new. Old C libraries implement flock() via fcntl() |
38 | * and will continue to use the old, broken implementation. | 38 | * and will continue to use the old, broken implementation. |
39 | * | 39 | * |
40 | * FL_FLOCK locks follow the 4.4 BSD flock() semantics. They are associated | 40 | * FL_FLOCK locks follow the 4.4 BSD flock() semantics. They are associated |
41 | * with a file pointer (filp). As a result they can be shared by a parent | 41 | * with a file pointer (filp). As a result they can be shared by a parent |
42 | * process and its children after a fork(). They are removed when the last | 42 | * process and its children after a fork(). They are removed when the last |
43 | * file descriptor referring to the file pointer is closed (unless explicitly | 43 | * file descriptor referring to the file pointer is closed (unless explicitly |
44 | * unlocked). | 44 | * unlocked). |
45 | * | 45 | * |
46 | * FL_FLOCK locks never deadlock, an existing lock is always removed before | 46 | * FL_FLOCK locks never deadlock, an existing lock is always removed before |
47 | * upgrading from shared to exclusive (or vice versa). When this happens | 47 | * upgrading from shared to exclusive (or vice versa). When this happens |
48 | * any processes blocked by the current lock are woken up and allowed to | 48 | * any processes blocked by the current lock are woken up and allowed to |
49 | * run before the new lock is applied. | 49 | * run before the new lock is applied. |
50 | * Andy Walker (andy@lysaker.kvaerner.no), June 09, 1995 | 50 | * Andy Walker (andy@lysaker.kvaerner.no), June 09, 1995 |
51 | * | 51 | * |
52 | * Removed some race conditions in flock_lock_file(), marked other possible | 52 | * Removed some race conditions in flock_lock_file(), marked other possible |
53 | * races. Just grep for FIXME to see them. | 53 | * races. Just grep for FIXME to see them. |
54 | * Dmitry Gorodchanin (pgmdsg@ibi.com), February 09, 1996. | 54 | * Dmitry Gorodchanin (pgmdsg@ibi.com), February 09, 1996. |
55 | * | 55 | * |
56 | * Addressed Dmitry's concerns. Deadlock checking no longer recursive. | 56 | * Addressed Dmitry's concerns. Deadlock checking no longer recursive. |
57 | * Lock allocation changed to GFP_ATOMIC as we can't afford to sleep | 57 | * Lock allocation changed to GFP_ATOMIC as we can't afford to sleep |
58 | * once we've checked for blocking and deadlocking. | 58 | * once we've checked for blocking and deadlocking. |
59 | * Andy Walker (andy@lysaker.kvaerner.no), April 03, 1996. | 59 | * Andy Walker (andy@lysaker.kvaerner.no), April 03, 1996. |
60 | * | 60 | * |
61 | * Initial implementation of mandatory locks. SunOS turned out to be | 61 | * Initial implementation of mandatory locks. SunOS turned out to be |
62 | * a rotten model, so I implemented the "obvious" semantics. | 62 | * a rotten model, so I implemented the "obvious" semantics. |
63 | * See 'Documentation/filesystems/mandatory-locking.txt' for details. | 63 | * See 'Documentation/filesystems/mandatory-locking.txt' for details. |
64 | * Andy Walker (andy@lysaker.kvaerner.no), April 06, 1996. | 64 | * Andy Walker (andy@lysaker.kvaerner.no), April 06, 1996. |
65 | * | 65 | * |
66 | * Don't allow mandatory locks on mmap()'ed files. Added simple functions to | 66 | * Don't allow mandatory locks on mmap()'ed files. Added simple functions to |
67 | * check if a file has mandatory locks, used by mmap(), open() and creat() to | 67 | * check if a file has mandatory locks, used by mmap(), open() and creat() to |
68 | * see if system call should be rejected. Ref. HP-UX/SunOS/Solaris Reference | 68 | * see if system call should be rejected. Ref. HP-UX/SunOS/Solaris Reference |
69 | * Manual, Section 2. | 69 | * Manual, Section 2. |
70 | * Andy Walker (andy@lysaker.kvaerner.no), April 09, 1996. | 70 | * Andy Walker (andy@lysaker.kvaerner.no), April 09, 1996. |
71 | * | 71 | * |
72 | * Tidied up block list handling. Added '/proc/locks' interface. | 72 | * Tidied up block list handling. Added '/proc/locks' interface. |
73 | * Andy Walker (andy@lysaker.kvaerner.no), April 24, 1996. | 73 | * Andy Walker (andy@lysaker.kvaerner.no), April 24, 1996. |
74 | * | 74 | * |
75 | * Fixed deadlock condition for pathological code that mixes calls to | 75 | * Fixed deadlock condition for pathological code that mixes calls to |
76 | * flock() and fcntl(). | 76 | * flock() and fcntl(). |
77 | * Andy Walker (andy@lysaker.kvaerner.no), April 29, 1996. | 77 | * Andy Walker (andy@lysaker.kvaerner.no), April 29, 1996. |
78 | * | 78 | * |
79 | * Allow only one type of locking scheme (FL_POSIX or FL_FLOCK) to be in use | 79 | * Allow only one type of locking scheme (FL_POSIX or FL_FLOCK) to be in use |
80 | * for a given file at a time. Changed the CONFIG_LOCK_MANDATORY scheme to | 80 | * for a given file at a time. Changed the CONFIG_LOCK_MANDATORY scheme to |
81 | * guarantee sensible behaviour in the case where file system modules might | 81 | * guarantee sensible behaviour in the case where file system modules might |
82 | * be compiled with different options than the kernel itself. | 82 | * be compiled with different options than the kernel itself. |
83 | * Andy Walker (andy@lysaker.kvaerner.no), May 15, 1996. | 83 | * Andy Walker (andy@lysaker.kvaerner.no), May 15, 1996. |
84 | * | 84 | * |
85 | * Added a couple of missing wake_up() calls. Thanks to Thomas Meckel | 85 | * Added a couple of missing wake_up() calls. Thanks to Thomas Meckel |
86 | * (Thomas.Meckel@mni.fh-giessen.de) for spotting this. | 86 | * (Thomas.Meckel@mni.fh-giessen.de) for spotting this. |
87 | * Andy Walker (andy@lysaker.kvaerner.no), May 15, 1996. | 87 | * Andy Walker (andy@lysaker.kvaerner.no), May 15, 1996. |
88 | * | 88 | * |
89 | * Changed FL_POSIX locks to use the block list in the same way as FL_FLOCK | 89 | * Changed FL_POSIX locks to use the block list in the same way as FL_FLOCK |
90 | * locks. Changed process synchronisation to avoid dereferencing locks that | 90 | * locks. Changed process synchronisation to avoid dereferencing locks that |
91 | * have already been freed. | 91 | * have already been freed. |
92 | * Andy Walker (andy@lysaker.kvaerner.no), Sep 21, 1996. | 92 | * Andy Walker (andy@lysaker.kvaerner.no), Sep 21, 1996. |
93 | * | 93 | * |
94 | * Made the block list a circular list to minimise searching in the list. | 94 | * Made the block list a circular list to minimise searching in the list. |
95 | * Andy Walker (andy@lysaker.kvaerner.no), Sep 25, 1996. | 95 | * Andy Walker (andy@lysaker.kvaerner.no), Sep 25, 1996. |
96 | * | 96 | * |
97 | * Made mandatory locking a mount option. Default is not to allow mandatory | 97 | * Made mandatory locking a mount option. Default is not to allow mandatory |
98 | * locking. | 98 | * locking. |
99 | * Andy Walker (andy@lysaker.kvaerner.no), Oct 04, 1996. | 99 | * Andy Walker (andy@lysaker.kvaerner.no), Oct 04, 1996. |
100 | * | 100 | * |
101 | * Some adaptations for NFS support. | 101 | * Some adaptations for NFS support. |
102 | * Olaf Kirch (okir@monad.swb.de), Dec 1996, | 102 | * Olaf Kirch (okir@monad.swb.de), Dec 1996, |
103 | * | 103 | * |
104 | * Fixed /proc/locks interface so that we can't overrun the buffer we are handed. | 104 | * Fixed /proc/locks interface so that we can't overrun the buffer we are handed. |
105 | * Andy Walker (andy@lysaker.kvaerner.no), May 12, 1997. | 105 | * Andy Walker (andy@lysaker.kvaerner.no), May 12, 1997. |
106 | * | 106 | * |
107 | * Use slab allocator instead of kmalloc/kfree. | 107 | * Use slab allocator instead of kmalloc/kfree. |
108 | * Use generic list implementation from <linux/list.h>. | 108 | * Use generic list implementation from <linux/list.h>. |
109 | * Sped up posix_locks_deadlock by only considering blocked locks. | 109 | * Sped up posix_locks_deadlock by only considering blocked locks. |
110 | * Matthew Wilcox <willy@debian.org>, March, 2000. | 110 | * Matthew Wilcox <willy@debian.org>, March, 2000. |
111 | * | 111 | * |
112 | * Leases and LOCK_MAND | 112 | * Leases and LOCK_MAND |
113 | * Matthew Wilcox <willy@debian.org>, June, 2000. | 113 | * Matthew Wilcox <willy@debian.org>, June, 2000. |
114 | * Stephen Rothwell <sfr@canb.auug.org.au>, June, 2000. | 114 | * Stephen Rothwell <sfr@canb.auug.org.au>, June, 2000. |
115 | */ | 115 | */ |
116 | 116 | ||
117 | #include <linux/capability.h> | 117 | #include <linux/capability.h> |
118 | #include <linux/file.h> | 118 | #include <linux/file.h> |
119 | #include <linux/fdtable.h> | 119 | #include <linux/fdtable.h> |
120 | #include <linux/fs.h> | 120 | #include <linux/fs.h> |
121 | #include <linux/init.h> | 121 | #include <linux/init.h> |
122 | #include <linux/module.h> | 122 | #include <linux/module.h> |
123 | #include <linux/security.h> | 123 | #include <linux/security.h> |
124 | #include <linux/slab.h> | 124 | #include <linux/slab.h> |
125 | #include <linux/syscalls.h> | 125 | #include <linux/syscalls.h> |
126 | #include <linux/time.h> | 126 | #include <linux/time.h> |
127 | #include <linux/rcupdate.h> | 127 | #include <linux/rcupdate.h> |
128 | #include <linux/pid_namespace.h> | 128 | #include <linux/pid_namespace.h> |
129 | #include <linux/hashtable.h> | 129 | #include <linux/hashtable.h> |
130 | #include <linux/percpu.h> | 130 | #include <linux/percpu.h> |
131 | #include <linux/lglock.h> | 131 | #include <linux/lglock.h> |
132 | 132 | ||
133 | #include <asm/uaccess.h> | 133 | #include <asm/uaccess.h> |
134 | 134 | ||
135 | #define IS_POSIX(fl) (fl->fl_flags & FL_POSIX) | 135 | #define IS_POSIX(fl) (fl->fl_flags & FL_POSIX) |
136 | #define IS_FLOCK(fl) (fl->fl_flags & FL_FLOCK) | 136 | #define IS_FLOCK(fl) (fl->fl_flags & FL_FLOCK) |
137 | #define IS_LEASE(fl) (fl->fl_flags & (FL_LEASE|FL_DELEG)) | 137 | #define IS_LEASE(fl) (fl->fl_flags & (FL_LEASE|FL_DELEG)) |
138 | #define IS_FILE_PVT(fl) (fl->fl_flags & FL_FILE_PVT) | 138 | #define IS_OFDLCK(fl) (fl->fl_flags & FL_OFDLCK) |
139 | 139 | ||
140 | static bool lease_breaking(struct file_lock *fl) | 140 | static bool lease_breaking(struct file_lock *fl) |
141 | { | 141 | { |
142 | return fl->fl_flags & (FL_UNLOCK_PENDING | FL_DOWNGRADE_PENDING); | 142 | return fl->fl_flags & (FL_UNLOCK_PENDING | FL_DOWNGRADE_PENDING); |
143 | } | 143 | } |
144 | 144 | ||
145 | static int target_leasetype(struct file_lock *fl) | 145 | static int target_leasetype(struct file_lock *fl) |
146 | { | 146 | { |
147 | if (fl->fl_flags & FL_UNLOCK_PENDING) | 147 | if (fl->fl_flags & FL_UNLOCK_PENDING) |
148 | return F_UNLCK; | 148 | return F_UNLCK; |
149 | if (fl->fl_flags & FL_DOWNGRADE_PENDING) | 149 | if (fl->fl_flags & FL_DOWNGRADE_PENDING) |
150 | return F_RDLCK; | 150 | return F_RDLCK; |
151 | return fl->fl_type; | 151 | return fl->fl_type; |
152 | } | 152 | } |
153 | 153 | ||
154 | int leases_enable = 1; | 154 | int leases_enable = 1; |
155 | int lease_break_time = 45; | 155 | int lease_break_time = 45; |
156 | 156 | ||
157 | #define for_each_lock(inode, lockp) \ | 157 | #define for_each_lock(inode, lockp) \ |
158 | for (lockp = &inode->i_flock; *lockp != NULL; lockp = &(*lockp)->fl_next) | 158 | for (lockp = &inode->i_flock; *lockp != NULL; lockp = &(*lockp)->fl_next) |
159 | 159 | ||
160 | /* | 160 | /* |
161 | * The global file_lock_list is only used for displaying /proc/locks, so we | 161 | * The global file_lock_list is only used for displaying /proc/locks, so we |
162 | * keep a list on each CPU, with each list protected by its own spinlock via | 162 | * keep a list on each CPU, with each list protected by its own spinlock via |
163 | * the file_lock_lglock. Note that alterations to the list also require that | 163 | * the file_lock_lglock. Note that alterations to the list also require that |
164 | * the relevant i_lock is held. | 164 | * the relevant i_lock is held. |
165 | */ | 165 | */ |
166 | DEFINE_STATIC_LGLOCK(file_lock_lglock); | 166 | DEFINE_STATIC_LGLOCK(file_lock_lglock); |
167 | static DEFINE_PER_CPU(struct hlist_head, file_lock_list); | 167 | static DEFINE_PER_CPU(struct hlist_head, file_lock_list); |
168 | 168 | ||
169 | /* | 169 | /* |
170 | * The blocked_hash is used to find POSIX lock loops for deadlock detection. | 170 | * The blocked_hash is used to find POSIX lock loops for deadlock detection. |
171 | * It is protected by blocked_lock_lock. | 171 | * It is protected by blocked_lock_lock. |
172 | * | 172 | * |
173 | * We hash locks by lockowner in order to optimize searching for the lock a | 173 | * We hash locks by lockowner in order to optimize searching for the lock a |
174 | * particular lockowner is waiting on. | 174 | * particular lockowner is waiting on. |
175 | * | 175 | * |
176 | * FIXME: make this value scale via some heuristic? We generally will want more | 176 | * FIXME: make this value scale via some heuristic? We generally will want more |
177 | * buckets when we have more lockowners holding locks, but that's a little | 177 | * buckets when we have more lockowners holding locks, but that's a little |
178 | * difficult to determine without knowing what the workload will look like. | 178 | * difficult to determine without knowing what the workload will look like. |
179 | */ | 179 | */ |
180 | #define BLOCKED_HASH_BITS 7 | 180 | #define BLOCKED_HASH_BITS 7 |
181 | static DEFINE_HASHTABLE(blocked_hash, BLOCKED_HASH_BITS); | 181 | static DEFINE_HASHTABLE(blocked_hash, BLOCKED_HASH_BITS); |
182 | 182 | ||
183 | /* | 183 | /* |
184 | * This lock protects the blocked_hash. Generally, if you're accessing it, you | 184 | * This lock protects the blocked_hash. Generally, if you're accessing it, you |
185 | * want to be holding this lock. | 185 | * want to be holding this lock. |
186 | * | 186 | * |
187 | * In addition, it also protects the fl->fl_block list, and the fl->fl_next | 187 | * In addition, it also protects the fl->fl_block list, and the fl->fl_next |
188 | * pointer for file_lock structures that are acting as lock requests (in | 188 | * pointer for file_lock structures that are acting as lock requests (in |
189 | * contrast to those that are acting as records of acquired locks). | 189 | * contrast to those that are acting as records of acquired locks). |
190 | * | 190 | * |
191 | * Note that when we acquire this lock in order to change the above fields, | 191 | * Note that when we acquire this lock in order to change the above fields, |
192 | * we often hold the i_lock as well. In certain cases, when reading the fields | 192 | * we often hold the i_lock as well. In certain cases, when reading the fields |
193 | * protected by this lock, we can skip acquiring it iff we already hold the | 193 | * protected by this lock, we can skip acquiring it iff we already hold the |
194 | * i_lock. | 194 | * i_lock. |
195 | * | 195 | * |
196 | * In particular, adding an entry to the fl_block list requires that you hold | 196 | * In particular, adding an entry to the fl_block list requires that you hold |
197 | * both the i_lock and the blocked_lock_lock (acquired in that order). Deleting | 197 | * both the i_lock and the blocked_lock_lock (acquired in that order). Deleting |
198 | * an entry from the list however only requires the file_lock_lock. | 198 | * an entry from the list however only requires the file_lock_lock. |
199 | */ | 199 | */ |
200 | static DEFINE_SPINLOCK(blocked_lock_lock); | 200 | static DEFINE_SPINLOCK(blocked_lock_lock); |
201 | 201 | ||
202 | static struct kmem_cache *filelock_cache __read_mostly; | 202 | static struct kmem_cache *filelock_cache __read_mostly; |
203 | 203 | ||
204 | static void locks_init_lock_heads(struct file_lock *fl) | 204 | static void locks_init_lock_heads(struct file_lock *fl) |
205 | { | 205 | { |
206 | INIT_HLIST_NODE(&fl->fl_link); | 206 | INIT_HLIST_NODE(&fl->fl_link); |
207 | INIT_LIST_HEAD(&fl->fl_block); | 207 | INIT_LIST_HEAD(&fl->fl_block); |
208 | init_waitqueue_head(&fl->fl_wait); | 208 | init_waitqueue_head(&fl->fl_wait); |
209 | } | 209 | } |
210 | 210 | ||
211 | /* Allocate an empty lock structure. */ | 211 | /* Allocate an empty lock structure. */ |
212 | struct file_lock *locks_alloc_lock(void) | 212 | struct file_lock *locks_alloc_lock(void) |
213 | { | 213 | { |
214 | struct file_lock *fl = kmem_cache_zalloc(filelock_cache, GFP_KERNEL); | 214 | struct file_lock *fl = kmem_cache_zalloc(filelock_cache, GFP_KERNEL); |
215 | 215 | ||
216 | if (fl) | 216 | if (fl) |
217 | locks_init_lock_heads(fl); | 217 | locks_init_lock_heads(fl); |
218 | 218 | ||
219 | return fl; | 219 | return fl; |
220 | } | 220 | } |
221 | EXPORT_SYMBOL_GPL(locks_alloc_lock); | 221 | EXPORT_SYMBOL_GPL(locks_alloc_lock); |
222 | 222 | ||
223 | void locks_release_private(struct file_lock *fl) | 223 | void locks_release_private(struct file_lock *fl) |
224 | { | 224 | { |
225 | if (fl->fl_ops) { | 225 | if (fl->fl_ops) { |
226 | if (fl->fl_ops->fl_release_private) | 226 | if (fl->fl_ops->fl_release_private) |
227 | fl->fl_ops->fl_release_private(fl); | 227 | fl->fl_ops->fl_release_private(fl); |
228 | fl->fl_ops = NULL; | 228 | fl->fl_ops = NULL; |
229 | } | 229 | } |
230 | fl->fl_lmops = NULL; | 230 | fl->fl_lmops = NULL; |
231 | 231 | ||
232 | } | 232 | } |
233 | EXPORT_SYMBOL_GPL(locks_release_private); | 233 | EXPORT_SYMBOL_GPL(locks_release_private); |
234 | 234 | ||
235 | /* Free a lock which is not in use. */ | 235 | /* Free a lock which is not in use. */ |
236 | void locks_free_lock(struct file_lock *fl) | 236 | void locks_free_lock(struct file_lock *fl) |
237 | { | 237 | { |
238 | BUG_ON(waitqueue_active(&fl->fl_wait)); | 238 | BUG_ON(waitqueue_active(&fl->fl_wait)); |
239 | BUG_ON(!list_empty(&fl->fl_block)); | 239 | BUG_ON(!list_empty(&fl->fl_block)); |
240 | BUG_ON(!hlist_unhashed(&fl->fl_link)); | 240 | BUG_ON(!hlist_unhashed(&fl->fl_link)); |
241 | 241 | ||
242 | locks_release_private(fl); | 242 | locks_release_private(fl); |
243 | kmem_cache_free(filelock_cache, fl); | 243 | kmem_cache_free(filelock_cache, fl); |
244 | } | 244 | } |
245 | EXPORT_SYMBOL(locks_free_lock); | 245 | EXPORT_SYMBOL(locks_free_lock); |
246 | 246 | ||
247 | void locks_init_lock(struct file_lock *fl) | 247 | void locks_init_lock(struct file_lock *fl) |
248 | { | 248 | { |
249 | memset(fl, 0, sizeof(struct file_lock)); | 249 | memset(fl, 0, sizeof(struct file_lock)); |
250 | locks_init_lock_heads(fl); | 250 | locks_init_lock_heads(fl); |
251 | } | 251 | } |
252 | 252 | ||
253 | EXPORT_SYMBOL(locks_init_lock); | 253 | EXPORT_SYMBOL(locks_init_lock); |
254 | 254 | ||
255 | static void locks_copy_private(struct file_lock *new, struct file_lock *fl) | 255 | static void locks_copy_private(struct file_lock *new, struct file_lock *fl) |
256 | { | 256 | { |
257 | if (fl->fl_ops) { | 257 | if (fl->fl_ops) { |
258 | if (fl->fl_ops->fl_copy_lock) | 258 | if (fl->fl_ops->fl_copy_lock) |
259 | fl->fl_ops->fl_copy_lock(new, fl); | 259 | fl->fl_ops->fl_copy_lock(new, fl); |
260 | new->fl_ops = fl->fl_ops; | 260 | new->fl_ops = fl->fl_ops; |
261 | } | 261 | } |
262 | if (fl->fl_lmops) | 262 | if (fl->fl_lmops) |
263 | new->fl_lmops = fl->fl_lmops; | 263 | new->fl_lmops = fl->fl_lmops; |
264 | } | 264 | } |
265 | 265 | ||
266 | /* | 266 | /* |
267 | * Initialize a new lock from an existing file_lock structure. | 267 | * Initialize a new lock from an existing file_lock structure. |
268 | */ | 268 | */ |
269 | void __locks_copy_lock(struct file_lock *new, const struct file_lock *fl) | 269 | void __locks_copy_lock(struct file_lock *new, const struct file_lock *fl) |
270 | { | 270 | { |
271 | new->fl_owner = fl->fl_owner; | 271 | new->fl_owner = fl->fl_owner; |
272 | new->fl_pid = fl->fl_pid; | 272 | new->fl_pid = fl->fl_pid; |
273 | new->fl_file = NULL; | 273 | new->fl_file = NULL; |
274 | new->fl_flags = fl->fl_flags; | 274 | new->fl_flags = fl->fl_flags; |
275 | new->fl_type = fl->fl_type; | 275 | new->fl_type = fl->fl_type; |
276 | new->fl_start = fl->fl_start; | 276 | new->fl_start = fl->fl_start; |
277 | new->fl_end = fl->fl_end; | 277 | new->fl_end = fl->fl_end; |
278 | new->fl_ops = NULL; | 278 | new->fl_ops = NULL; |
279 | new->fl_lmops = NULL; | 279 | new->fl_lmops = NULL; |
280 | } | 280 | } |
281 | EXPORT_SYMBOL(__locks_copy_lock); | 281 | EXPORT_SYMBOL(__locks_copy_lock); |
282 | 282 | ||
283 | void locks_copy_lock(struct file_lock *new, struct file_lock *fl) | 283 | void locks_copy_lock(struct file_lock *new, struct file_lock *fl) |
284 | { | 284 | { |
285 | locks_release_private(new); | 285 | locks_release_private(new); |
286 | 286 | ||
287 | __locks_copy_lock(new, fl); | 287 | __locks_copy_lock(new, fl); |
288 | new->fl_file = fl->fl_file; | 288 | new->fl_file = fl->fl_file; |
289 | new->fl_ops = fl->fl_ops; | 289 | new->fl_ops = fl->fl_ops; |
290 | new->fl_lmops = fl->fl_lmops; | 290 | new->fl_lmops = fl->fl_lmops; |
291 | 291 | ||
292 | locks_copy_private(new, fl); | 292 | locks_copy_private(new, fl); |
293 | } | 293 | } |
294 | 294 | ||
295 | EXPORT_SYMBOL(locks_copy_lock); | 295 | EXPORT_SYMBOL(locks_copy_lock); |
296 | 296 | ||
297 | static inline int flock_translate_cmd(int cmd) { | 297 | static inline int flock_translate_cmd(int cmd) { |
298 | if (cmd & LOCK_MAND) | 298 | if (cmd & LOCK_MAND) |
299 | return cmd & (LOCK_MAND | LOCK_RW); | 299 | return cmd & (LOCK_MAND | LOCK_RW); |
300 | switch (cmd) { | 300 | switch (cmd) { |
301 | case LOCK_SH: | 301 | case LOCK_SH: |
302 | return F_RDLCK; | 302 | return F_RDLCK; |
303 | case LOCK_EX: | 303 | case LOCK_EX: |
304 | return F_WRLCK; | 304 | return F_WRLCK; |
305 | case LOCK_UN: | 305 | case LOCK_UN: |
306 | return F_UNLCK; | 306 | return F_UNLCK; |
307 | } | 307 | } |
308 | return -EINVAL; | 308 | return -EINVAL; |
309 | } | 309 | } |
310 | 310 | ||
311 | /* Fill in a file_lock structure with an appropriate FLOCK lock. */ | 311 | /* Fill in a file_lock structure with an appropriate FLOCK lock. */ |
312 | static int flock_make_lock(struct file *filp, struct file_lock **lock, | 312 | static int flock_make_lock(struct file *filp, struct file_lock **lock, |
313 | unsigned int cmd) | 313 | unsigned int cmd) |
314 | { | 314 | { |
315 | struct file_lock *fl; | 315 | struct file_lock *fl; |
316 | int type = flock_translate_cmd(cmd); | 316 | int type = flock_translate_cmd(cmd); |
317 | if (type < 0) | 317 | if (type < 0) |
318 | return type; | 318 | return type; |
319 | 319 | ||
320 | fl = locks_alloc_lock(); | 320 | fl = locks_alloc_lock(); |
321 | if (fl == NULL) | 321 | if (fl == NULL) |
322 | return -ENOMEM; | 322 | return -ENOMEM; |
323 | 323 | ||
324 | fl->fl_file = filp; | 324 | fl->fl_file = filp; |
325 | fl->fl_pid = current->tgid; | 325 | fl->fl_pid = current->tgid; |
326 | fl->fl_flags = FL_FLOCK; | 326 | fl->fl_flags = FL_FLOCK; |
327 | fl->fl_type = type; | 327 | fl->fl_type = type; |
328 | fl->fl_end = OFFSET_MAX; | 328 | fl->fl_end = OFFSET_MAX; |
329 | 329 | ||
330 | *lock = fl; | 330 | *lock = fl; |
331 | return 0; | 331 | return 0; |
332 | } | 332 | } |
333 | 333 | ||
334 | static int assign_type(struct file_lock *fl, long type) | 334 | static int assign_type(struct file_lock *fl, long type) |
335 | { | 335 | { |
336 | switch (type) { | 336 | switch (type) { |
337 | case F_RDLCK: | 337 | case F_RDLCK: |
338 | case F_WRLCK: | 338 | case F_WRLCK: |
339 | case F_UNLCK: | 339 | case F_UNLCK: |
340 | fl->fl_type = type; | 340 | fl->fl_type = type; |
341 | break; | 341 | break; |
342 | default: | 342 | default: |
343 | return -EINVAL; | 343 | return -EINVAL; |
344 | } | 344 | } |
345 | return 0; | 345 | return 0; |
346 | } | 346 | } |
347 | 347 | ||
348 | static int flock64_to_posix_lock(struct file *filp, struct file_lock *fl, | 348 | static int flock64_to_posix_lock(struct file *filp, struct file_lock *fl, |
349 | struct flock64 *l) | 349 | struct flock64 *l) |
350 | { | 350 | { |
351 | switch (l->l_whence) { | 351 | switch (l->l_whence) { |
352 | case SEEK_SET: | 352 | case SEEK_SET: |
353 | fl->fl_start = 0; | 353 | fl->fl_start = 0; |
354 | break; | 354 | break; |
355 | case SEEK_CUR: | 355 | case SEEK_CUR: |
356 | fl->fl_start = filp->f_pos; | 356 | fl->fl_start = filp->f_pos; |
357 | break; | 357 | break; |
358 | case SEEK_END: | 358 | case SEEK_END: |
359 | fl->fl_start = i_size_read(file_inode(filp)); | 359 | fl->fl_start = i_size_read(file_inode(filp)); |
360 | break; | 360 | break; |
361 | default: | 361 | default: |
362 | return -EINVAL; | 362 | return -EINVAL; |
363 | } | 363 | } |
364 | if (l->l_start > OFFSET_MAX - fl->fl_start) | 364 | if (l->l_start > OFFSET_MAX - fl->fl_start) |
365 | return -EOVERFLOW; | 365 | return -EOVERFLOW; |
366 | fl->fl_start += l->l_start; | 366 | fl->fl_start += l->l_start; |
367 | if (fl->fl_start < 0) | 367 | if (fl->fl_start < 0) |
368 | return -EINVAL; | 368 | return -EINVAL; |
369 | 369 | ||
370 | /* POSIX-1996 leaves the case l->l_len < 0 undefined; | 370 | /* POSIX-1996 leaves the case l->l_len < 0 undefined; |
371 | POSIX-2001 defines it. */ | 371 | POSIX-2001 defines it. */ |
372 | if (l->l_len > 0) { | 372 | if (l->l_len > 0) { |
373 | if (l->l_len - 1 > OFFSET_MAX - fl->fl_start) | 373 | if (l->l_len - 1 > OFFSET_MAX - fl->fl_start) |
374 | return -EOVERFLOW; | 374 | return -EOVERFLOW; |
375 | fl->fl_end = fl->fl_start + l->l_len - 1; | 375 | fl->fl_end = fl->fl_start + l->l_len - 1; |
376 | 376 | ||
377 | } else if (l->l_len < 0) { | 377 | } else if (l->l_len < 0) { |
378 | if (fl->fl_start + l->l_len < 0) | 378 | if (fl->fl_start + l->l_len < 0) |
379 | return -EINVAL; | 379 | return -EINVAL; |
380 | fl->fl_end = fl->fl_start - 1; | 380 | fl->fl_end = fl->fl_start - 1; |
381 | fl->fl_start += l->l_len; | 381 | fl->fl_start += l->l_len; |
382 | } else | 382 | } else |
383 | fl->fl_end = OFFSET_MAX; | 383 | fl->fl_end = OFFSET_MAX; |
384 | 384 | ||
385 | fl->fl_owner = current->files; | 385 | fl->fl_owner = current->files; |
386 | fl->fl_pid = current->tgid; | 386 | fl->fl_pid = current->tgid; |
387 | fl->fl_file = filp; | 387 | fl->fl_file = filp; |
388 | fl->fl_flags = FL_POSIX; | 388 | fl->fl_flags = FL_POSIX; |
389 | fl->fl_ops = NULL; | 389 | fl->fl_ops = NULL; |
390 | fl->fl_lmops = NULL; | 390 | fl->fl_lmops = NULL; |
391 | 391 | ||
392 | /* Ensure that fl->fl_filp has compatible f_mode */ | 392 | /* Ensure that fl->fl_filp has compatible f_mode */ |
393 | switch (l->l_type) { | 393 | switch (l->l_type) { |
394 | case F_RDLCK: | 394 | case F_RDLCK: |
395 | if (!(filp->f_mode & FMODE_READ)) | 395 | if (!(filp->f_mode & FMODE_READ)) |
396 | return -EBADF; | 396 | return -EBADF; |
397 | break; | 397 | break; |
398 | case F_WRLCK: | 398 | case F_WRLCK: |
399 | if (!(filp->f_mode & FMODE_WRITE)) | 399 | if (!(filp->f_mode & FMODE_WRITE)) |
400 | return -EBADF; | 400 | return -EBADF; |
401 | break; | 401 | break; |
402 | } | 402 | } |
403 | 403 | ||
404 | return assign_type(fl, l->l_type); | 404 | return assign_type(fl, l->l_type); |
405 | } | 405 | } |
406 | 406 | ||
407 | /* Verify a "struct flock" and copy it to a "struct file_lock" as a POSIX | 407 | /* Verify a "struct flock" and copy it to a "struct file_lock" as a POSIX |
408 | * style lock. | 408 | * style lock. |
409 | */ | 409 | */ |
410 | static int flock_to_posix_lock(struct file *filp, struct file_lock *fl, | 410 | static int flock_to_posix_lock(struct file *filp, struct file_lock *fl, |
411 | struct flock *l) | 411 | struct flock *l) |
412 | { | 412 | { |
413 | struct flock64 ll = { | 413 | struct flock64 ll = { |
414 | .l_type = l->l_type, | 414 | .l_type = l->l_type, |
415 | .l_whence = l->l_whence, | 415 | .l_whence = l->l_whence, |
416 | .l_start = l->l_start, | 416 | .l_start = l->l_start, |
417 | .l_len = l->l_len, | 417 | .l_len = l->l_len, |
418 | }; | 418 | }; |
419 | 419 | ||
420 | return flock64_to_posix_lock(filp, fl, &ll); | 420 | return flock64_to_posix_lock(filp, fl, &ll); |
421 | } | 421 | } |
422 | 422 | ||
423 | /* default lease lock manager operations */ | 423 | /* default lease lock manager operations */ |
424 | static void lease_break_callback(struct file_lock *fl) | 424 | static void lease_break_callback(struct file_lock *fl) |
425 | { | 425 | { |
426 | kill_fasync(&fl->fl_fasync, SIGIO, POLL_MSG); | 426 | kill_fasync(&fl->fl_fasync, SIGIO, POLL_MSG); |
427 | } | 427 | } |
428 | 428 | ||
429 | static const struct lock_manager_operations lease_manager_ops = { | 429 | static const struct lock_manager_operations lease_manager_ops = { |
430 | .lm_break = lease_break_callback, | 430 | .lm_break = lease_break_callback, |
431 | .lm_change = lease_modify, | 431 | .lm_change = lease_modify, |
432 | }; | 432 | }; |
433 | 433 | ||
434 | /* | 434 | /* |
435 | * Initialize a lease, use the default lock manager operations | 435 | * Initialize a lease, use the default lock manager operations |
436 | */ | 436 | */ |
437 | static int lease_init(struct file *filp, long type, struct file_lock *fl) | 437 | static int lease_init(struct file *filp, long type, struct file_lock *fl) |
438 | { | 438 | { |
439 | if (assign_type(fl, type) != 0) | 439 | if (assign_type(fl, type) != 0) |
440 | return -EINVAL; | 440 | return -EINVAL; |
441 | 441 | ||
442 | fl->fl_owner = current->files; | 442 | fl->fl_owner = current->files; |
443 | fl->fl_pid = current->tgid; | 443 | fl->fl_pid = current->tgid; |
444 | 444 | ||
445 | fl->fl_file = filp; | 445 | fl->fl_file = filp; |
446 | fl->fl_flags = FL_LEASE; | 446 | fl->fl_flags = FL_LEASE; |
447 | fl->fl_start = 0; | 447 | fl->fl_start = 0; |
448 | fl->fl_end = OFFSET_MAX; | 448 | fl->fl_end = OFFSET_MAX; |
449 | fl->fl_ops = NULL; | 449 | fl->fl_ops = NULL; |
450 | fl->fl_lmops = &lease_manager_ops; | 450 | fl->fl_lmops = &lease_manager_ops; |
451 | return 0; | 451 | return 0; |
452 | } | 452 | } |
453 | 453 | ||
454 | /* Allocate a file_lock initialised to this type of lease */ | 454 | /* Allocate a file_lock initialised to this type of lease */ |
455 | static struct file_lock *lease_alloc(struct file *filp, long type) | 455 | static struct file_lock *lease_alloc(struct file *filp, long type) |
456 | { | 456 | { |
457 | struct file_lock *fl = locks_alloc_lock(); | 457 | struct file_lock *fl = locks_alloc_lock(); |
458 | int error = -ENOMEM; | 458 | int error = -ENOMEM; |
459 | 459 | ||
460 | if (fl == NULL) | 460 | if (fl == NULL) |
461 | return ERR_PTR(error); | 461 | return ERR_PTR(error); |
462 | 462 | ||
463 | error = lease_init(filp, type, fl); | 463 | error = lease_init(filp, type, fl); |
464 | if (error) { | 464 | if (error) { |
465 | locks_free_lock(fl); | 465 | locks_free_lock(fl); |
466 | return ERR_PTR(error); | 466 | return ERR_PTR(error); |
467 | } | 467 | } |
468 | return fl; | 468 | return fl; |
469 | } | 469 | } |
470 | 470 | ||
471 | /* Check if two locks overlap each other. | 471 | /* Check if two locks overlap each other. |
472 | */ | 472 | */ |
473 | static inline int locks_overlap(struct file_lock *fl1, struct file_lock *fl2) | 473 | static inline int locks_overlap(struct file_lock *fl1, struct file_lock *fl2) |
474 | { | 474 | { |
475 | return ((fl1->fl_end >= fl2->fl_start) && | 475 | return ((fl1->fl_end >= fl2->fl_start) && |
476 | (fl2->fl_end >= fl1->fl_start)); | 476 | (fl2->fl_end >= fl1->fl_start)); |
477 | } | 477 | } |
478 | 478 | ||
479 | /* | 479 | /* |
480 | * Check whether two locks have the same owner. | 480 | * Check whether two locks have the same owner. |
481 | */ | 481 | */ |
482 | static int posix_same_owner(struct file_lock *fl1, struct file_lock *fl2) | 482 | static int posix_same_owner(struct file_lock *fl1, struct file_lock *fl2) |
483 | { | 483 | { |
484 | if (fl1->fl_lmops && fl1->fl_lmops->lm_compare_owner) | 484 | if (fl1->fl_lmops && fl1->fl_lmops->lm_compare_owner) |
485 | return fl2->fl_lmops == fl1->fl_lmops && | 485 | return fl2->fl_lmops == fl1->fl_lmops && |
486 | fl1->fl_lmops->lm_compare_owner(fl1, fl2); | 486 | fl1->fl_lmops->lm_compare_owner(fl1, fl2); |
487 | return fl1->fl_owner == fl2->fl_owner; | 487 | return fl1->fl_owner == fl2->fl_owner; |
488 | } | 488 | } |
489 | 489 | ||
490 | /* Must be called with the i_lock held! */ | 490 | /* Must be called with the i_lock held! */ |
491 | static void locks_insert_global_locks(struct file_lock *fl) | 491 | static void locks_insert_global_locks(struct file_lock *fl) |
492 | { | 492 | { |
493 | lg_local_lock(&file_lock_lglock); | 493 | lg_local_lock(&file_lock_lglock); |
494 | fl->fl_link_cpu = smp_processor_id(); | 494 | fl->fl_link_cpu = smp_processor_id(); |
495 | hlist_add_head(&fl->fl_link, this_cpu_ptr(&file_lock_list)); | 495 | hlist_add_head(&fl->fl_link, this_cpu_ptr(&file_lock_list)); |
496 | lg_local_unlock(&file_lock_lglock); | 496 | lg_local_unlock(&file_lock_lglock); |
497 | } | 497 | } |
498 | 498 | ||
499 | /* Must be called with the i_lock held! */ | 499 | /* Must be called with the i_lock held! */ |
500 | static void locks_delete_global_locks(struct file_lock *fl) | 500 | static void locks_delete_global_locks(struct file_lock *fl) |
501 | { | 501 | { |
502 | /* | 502 | /* |
503 | * Avoid taking lock if already unhashed. This is safe since this check | 503 | * Avoid taking lock if already unhashed. This is safe since this check |
504 | * is done while holding the i_lock, and new insertions into the list | 504 | * is done while holding the i_lock, and new insertions into the list |
505 | * also require that it be held. | 505 | * also require that it be held. |
506 | */ | 506 | */ |
507 | if (hlist_unhashed(&fl->fl_link)) | 507 | if (hlist_unhashed(&fl->fl_link)) |
508 | return; | 508 | return; |
509 | lg_local_lock_cpu(&file_lock_lglock, fl->fl_link_cpu); | 509 | lg_local_lock_cpu(&file_lock_lglock, fl->fl_link_cpu); |
510 | hlist_del_init(&fl->fl_link); | 510 | hlist_del_init(&fl->fl_link); |
511 | lg_local_unlock_cpu(&file_lock_lglock, fl->fl_link_cpu); | 511 | lg_local_unlock_cpu(&file_lock_lglock, fl->fl_link_cpu); |
512 | } | 512 | } |
513 | 513 | ||
514 | static unsigned long | 514 | static unsigned long |
515 | posix_owner_key(struct file_lock *fl) | 515 | posix_owner_key(struct file_lock *fl) |
516 | { | 516 | { |
517 | if (fl->fl_lmops && fl->fl_lmops->lm_owner_key) | 517 | if (fl->fl_lmops && fl->fl_lmops->lm_owner_key) |
518 | return fl->fl_lmops->lm_owner_key(fl); | 518 | return fl->fl_lmops->lm_owner_key(fl); |
519 | return (unsigned long)fl->fl_owner; | 519 | return (unsigned long)fl->fl_owner; |
520 | } | 520 | } |
521 | 521 | ||
522 | static void locks_insert_global_blocked(struct file_lock *waiter) | 522 | static void locks_insert_global_blocked(struct file_lock *waiter) |
523 | { | 523 | { |
524 | hash_add(blocked_hash, &waiter->fl_link, posix_owner_key(waiter)); | 524 | hash_add(blocked_hash, &waiter->fl_link, posix_owner_key(waiter)); |
525 | } | 525 | } |
526 | 526 | ||
527 | static void locks_delete_global_blocked(struct file_lock *waiter) | 527 | static void locks_delete_global_blocked(struct file_lock *waiter) |
528 | { | 528 | { |
529 | hash_del(&waiter->fl_link); | 529 | hash_del(&waiter->fl_link); |
530 | } | 530 | } |
531 | 531 | ||
532 | /* Remove waiter from blocker's block list. | 532 | /* Remove waiter from blocker's block list. |
533 | * When blocker ends up pointing to itself then the list is empty. | 533 | * When blocker ends up pointing to itself then the list is empty. |
534 | * | 534 | * |
535 | * Must be called with blocked_lock_lock held. | 535 | * Must be called with blocked_lock_lock held. |
536 | */ | 536 | */ |
537 | static void __locks_delete_block(struct file_lock *waiter) | 537 | static void __locks_delete_block(struct file_lock *waiter) |
538 | { | 538 | { |
539 | locks_delete_global_blocked(waiter); | 539 | locks_delete_global_blocked(waiter); |
540 | list_del_init(&waiter->fl_block); | 540 | list_del_init(&waiter->fl_block); |
541 | waiter->fl_next = NULL; | 541 | waiter->fl_next = NULL; |
542 | } | 542 | } |
543 | 543 | ||
544 | static void locks_delete_block(struct file_lock *waiter) | 544 | static void locks_delete_block(struct file_lock *waiter) |
545 | { | 545 | { |
546 | spin_lock(&blocked_lock_lock); | 546 | spin_lock(&blocked_lock_lock); |
547 | __locks_delete_block(waiter); | 547 | __locks_delete_block(waiter); |
548 | spin_unlock(&blocked_lock_lock); | 548 | spin_unlock(&blocked_lock_lock); |
549 | } | 549 | } |
550 | 550 | ||
551 | /* Insert waiter into blocker's block list. | 551 | /* Insert waiter into blocker's block list. |
552 | * We use a circular list so that processes can be easily woken up in | 552 | * We use a circular list so that processes can be easily woken up in |
553 | * the order they blocked. The documentation doesn't require this but | 553 | * the order they blocked. The documentation doesn't require this but |
554 | * it seems like the reasonable thing to do. | 554 | * it seems like the reasonable thing to do. |
555 | * | 555 | * |
556 | * Must be called with both the i_lock and blocked_lock_lock held. The fl_block | 556 | * Must be called with both the i_lock and blocked_lock_lock held. The fl_block |
557 | * list itself is protected by the blocked_lock_lock, but by ensuring that the | 557 | * list itself is protected by the blocked_lock_lock, but by ensuring that the |
558 | * i_lock is also held on insertions we can avoid taking the blocked_lock_lock | 558 | * i_lock is also held on insertions we can avoid taking the blocked_lock_lock |
559 | * in some cases when we see that the fl_block list is empty. | 559 | * in some cases when we see that the fl_block list is empty. |
560 | */ | 560 | */ |
561 | static void __locks_insert_block(struct file_lock *blocker, | 561 | static void __locks_insert_block(struct file_lock *blocker, |
562 | struct file_lock *waiter) | 562 | struct file_lock *waiter) |
563 | { | 563 | { |
564 | BUG_ON(!list_empty(&waiter->fl_block)); | 564 | BUG_ON(!list_empty(&waiter->fl_block)); |
565 | waiter->fl_next = blocker; | 565 | waiter->fl_next = blocker; |
566 | list_add_tail(&waiter->fl_block, &blocker->fl_block); | 566 | list_add_tail(&waiter->fl_block, &blocker->fl_block); |
567 | if (IS_POSIX(blocker) && !IS_FILE_PVT(blocker)) | 567 | if (IS_POSIX(blocker) && !IS_OFDLCK(blocker)) |
568 | locks_insert_global_blocked(waiter); | 568 | locks_insert_global_blocked(waiter); |
569 | } | 569 | } |
570 | 570 | ||
571 | /* Must be called with i_lock held. */ | 571 | /* Must be called with i_lock held. */ |
572 | static void locks_insert_block(struct file_lock *blocker, | 572 | static void locks_insert_block(struct file_lock *blocker, |
573 | struct file_lock *waiter) | 573 | struct file_lock *waiter) |
574 | { | 574 | { |
575 | spin_lock(&blocked_lock_lock); | 575 | spin_lock(&blocked_lock_lock); |
576 | __locks_insert_block(blocker, waiter); | 576 | __locks_insert_block(blocker, waiter); |
577 | spin_unlock(&blocked_lock_lock); | 577 | spin_unlock(&blocked_lock_lock); |
578 | } | 578 | } |
579 | 579 | ||
580 | /* | 580 | /* |
581 | * Wake up processes blocked waiting for blocker. | 581 | * Wake up processes blocked waiting for blocker. |
582 | * | 582 | * |
583 | * Must be called with the inode->i_lock held! | 583 | * Must be called with the inode->i_lock held! |
584 | */ | 584 | */ |
585 | static void locks_wake_up_blocks(struct file_lock *blocker) | 585 | static void locks_wake_up_blocks(struct file_lock *blocker) |
586 | { | 586 | { |
587 | /* | 587 | /* |
588 | * Avoid taking global lock if list is empty. This is safe since new | 588 | * Avoid taking global lock if list is empty. This is safe since new |
589 | * blocked requests are only added to the list under the i_lock, and | 589 | * blocked requests are only added to the list under the i_lock, and |
590 | * the i_lock is always held here. Note that removal from the fl_block | 590 | * the i_lock is always held here. Note that removal from the fl_block |
591 | * list does not require the i_lock, so we must recheck list_empty() | 591 | * list does not require the i_lock, so we must recheck list_empty() |
592 | * after acquiring the blocked_lock_lock. | 592 | * after acquiring the blocked_lock_lock. |
593 | */ | 593 | */ |
594 | if (list_empty(&blocker->fl_block)) | 594 | if (list_empty(&blocker->fl_block)) |
595 | return; | 595 | return; |
596 | 596 | ||
597 | spin_lock(&blocked_lock_lock); | 597 | spin_lock(&blocked_lock_lock); |
598 | while (!list_empty(&blocker->fl_block)) { | 598 | while (!list_empty(&blocker->fl_block)) { |
599 | struct file_lock *waiter; | 599 | struct file_lock *waiter; |
600 | 600 | ||
601 | waiter = list_first_entry(&blocker->fl_block, | 601 | waiter = list_first_entry(&blocker->fl_block, |
602 | struct file_lock, fl_block); | 602 | struct file_lock, fl_block); |
603 | __locks_delete_block(waiter); | 603 | __locks_delete_block(waiter); |
604 | if (waiter->fl_lmops && waiter->fl_lmops->lm_notify) | 604 | if (waiter->fl_lmops && waiter->fl_lmops->lm_notify) |
605 | waiter->fl_lmops->lm_notify(waiter); | 605 | waiter->fl_lmops->lm_notify(waiter); |
606 | else | 606 | else |
607 | wake_up(&waiter->fl_wait); | 607 | wake_up(&waiter->fl_wait); |
608 | } | 608 | } |
609 | spin_unlock(&blocked_lock_lock); | 609 | spin_unlock(&blocked_lock_lock); |
610 | } | 610 | } |
611 | 611 | ||
612 | /* Insert file lock fl into an inode's lock list at the position indicated | 612 | /* Insert file lock fl into an inode's lock list at the position indicated |
613 | * by pos. At the same time add the lock to the global file lock list. | 613 | * by pos. At the same time add the lock to the global file lock list. |
614 | * | 614 | * |
615 | * Must be called with the i_lock held! | 615 | * Must be called with the i_lock held! |
616 | */ | 616 | */ |
617 | static void locks_insert_lock(struct file_lock **pos, struct file_lock *fl) | 617 | static void locks_insert_lock(struct file_lock **pos, struct file_lock *fl) |
618 | { | 618 | { |
619 | fl->fl_nspid = get_pid(task_tgid(current)); | 619 | fl->fl_nspid = get_pid(task_tgid(current)); |
620 | 620 | ||
621 | /* insert into file's list */ | 621 | /* insert into file's list */ |
622 | fl->fl_next = *pos; | 622 | fl->fl_next = *pos; |
623 | *pos = fl; | 623 | *pos = fl; |
624 | 624 | ||
625 | locks_insert_global_locks(fl); | 625 | locks_insert_global_locks(fl); |
626 | } | 626 | } |
627 | 627 | ||
628 | /** | 628 | /** |
629 | * locks_delete_lock - Delete a lock and then free it. | 629 | * locks_delete_lock - Delete a lock and then free it. |
630 | * @thisfl_p: pointer that points to the fl_next field of the previous | 630 | * @thisfl_p: pointer that points to the fl_next field of the previous |
631 | * inode->i_flock list entry | 631 | * inode->i_flock list entry |
632 | * | 632 | * |
633 | * Unlink a lock from all lists and free the namespace reference, but don't | 633 | * Unlink a lock from all lists and free the namespace reference, but don't |
634 | * free it yet. Wake up processes that are blocked waiting for this lock and | 634 | * free it yet. Wake up processes that are blocked waiting for this lock and |
635 | * notify the FS that the lock has been cleared. | 635 | * notify the FS that the lock has been cleared. |
636 | * | 636 | * |
637 | * Must be called with the i_lock held! | 637 | * Must be called with the i_lock held! |
638 | */ | 638 | */ |
639 | static void locks_unlink_lock(struct file_lock **thisfl_p) | 639 | static void locks_unlink_lock(struct file_lock **thisfl_p) |
640 | { | 640 | { |
641 | struct file_lock *fl = *thisfl_p; | 641 | struct file_lock *fl = *thisfl_p; |
642 | 642 | ||
643 | locks_delete_global_locks(fl); | 643 | locks_delete_global_locks(fl); |
644 | 644 | ||
645 | *thisfl_p = fl->fl_next; | 645 | *thisfl_p = fl->fl_next; |
646 | fl->fl_next = NULL; | 646 | fl->fl_next = NULL; |
647 | 647 | ||
648 | if (fl->fl_nspid) { | 648 | if (fl->fl_nspid) { |
649 | put_pid(fl->fl_nspid); | 649 | put_pid(fl->fl_nspid); |
650 | fl->fl_nspid = NULL; | 650 | fl->fl_nspid = NULL; |
651 | } | 651 | } |
652 | 652 | ||
653 | locks_wake_up_blocks(fl); | 653 | locks_wake_up_blocks(fl); |
654 | } | 654 | } |
655 | 655 | ||
656 | /* | 656 | /* |
657 | * Unlink a lock from all lists and free it. | 657 | * Unlink a lock from all lists and free it. |
658 | * | 658 | * |
659 | * Must be called with i_lock held! | 659 | * Must be called with i_lock held! |
660 | */ | 660 | */ |
661 | static void locks_delete_lock(struct file_lock **thisfl_p) | 661 | static void locks_delete_lock(struct file_lock **thisfl_p) |
662 | { | 662 | { |
663 | struct file_lock *fl = *thisfl_p; | 663 | struct file_lock *fl = *thisfl_p; |
664 | 664 | ||
665 | locks_unlink_lock(thisfl_p); | 665 | locks_unlink_lock(thisfl_p); |
666 | locks_free_lock(fl); | 666 | locks_free_lock(fl); |
667 | } | 667 | } |
668 | 668 | ||
669 | /* Determine if lock sys_fl blocks lock caller_fl. Common functionality | 669 | /* Determine if lock sys_fl blocks lock caller_fl. Common functionality |
670 | * checks for shared/exclusive status of overlapping locks. | 670 | * checks for shared/exclusive status of overlapping locks. |
671 | */ | 671 | */ |
672 | static int locks_conflict(struct file_lock *caller_fl, struct file_lock *sys_fl) | 672 | static int locks_conflict(struct file_lock *caller_fl, struct file_lock *sys_fl) |
673 | { | 673 | { |
674 | if (sys_fl->fl_type == F_WRLCK) | 674 | if (sys_fl->fl_type == F_WRLCK) |
675 | return 1; | 675 | return 1; |
676 | if (caller_fl->fl_type == F_WRLCK) | 676 | if (caller_fl->fl_type == F_WRLCK) |
677 | return 1; | 677 | return 1; |
678 | return 0; | 678 | return 0; |
679 | } | 679 | } |
680 | 680 | ||
681 | /* Determine if lock sys_fl blocks lock caller_fl. POSIX specific | 681 | /* Determine if lock sys_fl blocks lock caller_fl. POSIX specific |
682 | * checking before calling the locks_conflict(). | 682 | * checking before calling the locks_conflict(). |
683 | */ | 683 | */ |
684 | static int posix_locks_conflict(struct file_lock *caller_fl, struct file_lock *sys_fl) | 684 | static int posix_locks_conflict(struct file_lock *caller_fl, struct file_lock *sys_fl) |
685 | { | 685 | { |
686 | /* POSIX locks owned by the same process do not conflict with | 686 | /* POSIX locks owned by the same process do not conflict with |
687 | * each other. | 687 | * each other. |
688 | */ | 688 | */ |
689 | if (!IS_POSIX(sys_fl) || posix_same_owner(caller_fl, sys_fl)) | 689 | if (!IS_POSIX(sys_fl) || posix_same_owner(caller_fl, sys_fl)) |
690 | return (0); | 690 | return (0); |
691 | 691 | ||
692 | /* Check whether they overlap */ | 692 | /* Check whether they overlap */ |
693 | if (!locks_overlap(caller_fl, sys_fl)) | 693 | if (!locks_overlap(caller_fl, sys_fl)) |
694 | return 0; | 694 | return 0; |
695 | 695 | ||
696 | return (locks_conflict(caller_fl, sys_fl)); | 696 | return (locks_conflict(caller_fl, sys_fl)); |
697 | } | 697 | } |
698 | 698 | ||
699 | /* Determine if lock sys_fl blocks lock caller_fl. FLOCK specific | 699 | /* Determine if lock sys_fl blocks lock caller_fl. FLOCK specific |
700 | * checking before calling the locks_conflict(). | 700 | * checking before calling the locks_conflict(). |
701 | */ | 701 | */ |
702 | static int flock_locks_conflict(struct file_lock *caller_fl, struct file_lock *sys_fl) | 702 | static int flock_locks_conflict(struct file_lock *caller_fl, struct file_lock *sys_fl) |
703 | { | 703 | { |
704 | /* FLOCK locks referring to the same filp do not conflict with | 704 | /* FLOCK locks referring to the same filp do not conflict with |
705 | * each other. | 705 | * each other. |
706 | */ | 706 | */ |
707 | if (!IS_FLOCK(sys_fl) || (caller_fl->fl_file == sys_fl->fl_file)) | 707 | if (!IS_FLOCK(sys_fl) || (caller_fl->fl_file == sys_fl->fl_file)) |
708 | return (0); | 708 | return (0); |
709 | if ((caller_fl->fl_type & LOCK_MAND) || (sys_fl->fl_type & LOCK_MAND)) | 709 | if ((caller_fl->fl_type & LOCK_MAND) || (sys_fl->fl_type & LOCK_MAND)) |
710 | return 0; | 710 | return 0; |
711 | 711 | ||
712 | return (locks_conflict(caller_fl, sys_fl)); | 712 | return (locks_conflict(caller_fl, sys_fl)); |
713 | } | 713 | } |
714 | 714 | ||
715 | void | 715 | void |
716 | posix_test_lock(struct file *filp, struct file_lock *fl) | 716 | posix_test_lock(struct file *filp, struct file_lock *fl) |
717 | { | 717 | { |
718 | struct file_lock *cfl; | 718 | struct file_lock *cfl; |
719 | struct inode *inode = file_inode(filp); | 719 | struct inode *inode = file_inode(filp); |
720 | 720 | ||
721 | spin_lock(&inode->i_lock); | 721 | spin_lock(&inode->i_lock); |
722 | for (cfl = file_inode(filp)->i_flock; cfl; cfl = cfl->fl_next) { | 722 | for (cfl = file_inode(filp)->i_flock; cfl; cfl = cfl->fl_next) { |
723 | if (!IS_POSIX(cfl)) | 723 | if (!IS_POSIX(cfl)) |
724 | continue; | 724 | continue; |
725 | if (posix_locks_conflict(fl, cfl)) | 725 | if (posix_locks_conflict(fl, cfl)) |
726 | break; | 726 | break; |
727 | } | 727 | } |
728 | if (cfl) { | 728 | if (cfl) { |
729 | __locks_copy_lock(fl, cfl); | 729 | __locks_copy_lock(fl, cfl); |
730 | if (cfl->fl_nspid) | 730 | if (cfl->fl_nspid) |
731 | fl->fl_pid = pid_vnr(cfl->fl_nspid); | 731 | fl->fl_pid = pid_vnr(cfl->fl_nspid); |
732 | } else | 732 | } else |
733 | fl->fl_type = F_UNLCK; | 733 | fl->fl_type = F_UNLCK; |
734 | spin_unlock(&inode->i_lock); | 734 | spin_unlock(&inode->i_lock); |
735 | return; | 735 | return; |
736 | } | 736 | } |
737 | EXPORT_SYMBOL(posix_test_lock); | 737 | EXPORT_SYMBOL(posix_test_lock); |
738 | 738 | ||
739 | /* | 739 | /* |
740 | * Deadlock detection: | 740 | * Deadlock detection: |
741 | * | 741 | * |
742 | * We attempt to detect deadlocks that are due purely to posix file | 742 | * We attempt to detect deadlocks that are due purely to posix file |
743 | * locks. | 743 | * locks. |
744 | * | 744 | * |
745 | * We assume that a task can be waiting for at most one lock at a time. | 745 | * We assume that a task can be waiting for at most one lock at a time. |
746 | * So for any acquired lock, the process holding that lock may be | 746 | * So for any acquired lock, the process holding that lock may be |
747 | * waiting on at most one other lock. That lock in turns may be held by | 747 | * waiting on at most one other lock. That lock in turns may be held by |
748 | * someone waiting for at most one other lock. Given a requested lock | 748 | * someone waiting for at most one other lock. Given a requested lock |
749 | * caller_fl which is about to wait for a conflicting lock block_fl, we | 749 | * caller_fl which is about to wait for a conflicting lock block_fl, we |
750 | * follow this chain of waiters to ensure we are not about to create a | 750 | * follow this chain of waiters to ensure we are not about to create a |
751 | * cycle. | 751 | * cycle. |
752 | * | 752 | * |
753 | * Since we do this before we ever put a process to sleep on a lock, we | 753 | * Since we do this before we ever put a process to sleep on a lock, we |
754 | * are ensured that there is never a cycle; that is what guarantees that | 754 | * are ensured that there is never a cycle; that is what guarantees that |
755 | * the while() loop in posix_locks_deadlock() eventually completes. | 755 | * the while() loop in posix_locks_deadlock() eventually completes. |
756 | * | 756 | * |
757 | * Note: the above assumption may not be true when handling lock | 757 | * Note: the above assumption may not be true when handling lock |
758 | * requests from a broken NFS client. It may also fail in the presence | 758 | * requests from a broken NFS client. It may also fail in the presence |
759 | * of tasks (such as posix threads) sharing the same open file table. | 759 | * of tasks (such as posix threads) sharing the same open file table. |
760 | * To handle those cases, we just bail out after a few iterations. | 760 | * To handle those cases, we just bail out after a few iterations. |
761 | * | 761 | * |
762 | * For FL_FILE_PVT locks, the owner is the filp, not the files_struct. | 762 | * For FL_OFDLCK locks, the owner is the filp, not the files_struct. |
763 | * Because the owner is not even nominally tied to a thread of | 763 | * Because the owner is not even nominally tied to a thread of |
764 | * execution, the deadlock detection below can't reasonably work well. Just | 764 | * execution, the deadlock detection below can't reasonably work well. Just |
765 | * skip it for those. | 765 | * skip it for those. |
766 | * | 766 | * |
767 | * In principle, we could do a more limited deadlock detection on FL_FILE_PVT | 767 | * In principle, we could do a more limited deadlock detection on FL_OFDLCK |
768 | * locks that just checks for the case where two tasks are attempting to | 768 | * locks that just checks for the case where two tasks are attempting to |
769 | * upgrade from read to write locks on the same inode. | 769 | * upgrade from read to write locks on the same inode. |
770 | */ | 770 | */ |
771 | 771 | ||
772 | #define MAX_DEADLK_ITERATIONS 10 | 772 | #define MAX_DEADLK_ITERATIONS 10 |
773 | 773 | ||
774 | /* Find a lock that the owner of the given block_fl is blocking on. */ | 774 | /* Find a lock that the owner of the given block_fl is blocking on. */ |
775 | static struct file_lock *what_owner_is_waiting_for(struct file_lock *block_fl) | 775 | static struct file_lock *what_owner_is_waiting_for(struct file_lock *block_fl) |
776 | { | 776 | { |
777 | struct file_lock *fl; | 777 | struct file_lock *fl; |
778 | 778 | ||
779 | hash_for_each_possible(blocked_hash, fl, fl_link, posix_owner_key(block_fl)) { | 779 | hash_for_each_possible(blocked_hash, fl, fl_link, posix_owner_key(block_fl)) { |
780 | if (posix_same_owner(fl, block_fl)) | 780 | if (posix_same_owner(fl, block_fl)) |
781 | return fl->fl_next; | 781 | return fl->fl_next; |
782 | } | 782 | } |
783 | return NULL; | 783 | return NULL; |
784 | } | 784 | } |
785 | 785 | ||
786 | /* Must be called with the blocked_lock_lock held! */ | 786 | /* Must be called with the blocked_lock_lock held! */ |
787 | static int posix_locks_deadlock(struct file_lock *caller_fl, | 787 | static int posix_locks_deadlock(struct file_lock *caller_fl, |
788 | struct file_lock *block_fl) | 788 | struct file_lock *block_fl) |
789 | { | 789 | { |
790 | int i = 0; | 790 | int i = 0; |
791 | 791 | ||
792 | /* | 792 | /* |
793 | * This deadlock detector can't reasonably detect deadlocks with | 793 | * This deadlock detector can't reasonably detect deadlocks with |
794 | * FL_FILE_PVT locks, since they aren't owned by a process, per-se. | 794 | * FL_OFDLCK locks, since they aren't owned by a process, per-se. |
795 | */ | 795 | */ |
796 | if (IS_FILE_PVT(caller_fl)) | 796 | if (IS_OFDLCK(caller_fl)) |
797 | return 0; | 797 | return 0; |
798 | 798 | ||
799 | while ((block_fl = what_owner_is_waiting_for(block_fl))) { | 799 | while ((block_fl = what_owner_is_waiting_for(block_fl))) { |
800 | if (i++ > MAX_DEADLK_ITERATIONS) | 800 | if (i++ > MAX_DEADLK_ITERATIONS) |
801 | return 0; | 801 | return 0; |
802 | if (posix_same_owner(caller_fl, block_fl)) | 802 | if (posix_same_owner(caller_fl, block_fl)) |
803 | return 1; | 803 | return 1; |
804 | } | 804 | } |
805 | return 0; | 805 | return 0; |
806 | } | 806 | } |
807 | 807 | ||
808 | /* Try to create a FLOCK lock on filp. We always insert new FLOCK locks | 808 | /* Try to create a FLOCK lock on filp. We always insert new FLOCK locks |
809 | * after any leases, but before any posix locks. | 809 | * after any leases, but before any posix locks. |
810 | * | 810 | * |
811 | * Note that if called with an FL_EXISTS argument, the caller may determine | 811 | * Note that if called with an FL_EXISTS argument, the caller may determine |
812 | * whether or not a lock was successfully freed by testing the return | 812 | * whether or not a lock was successfully freed by testing the return |
813 | * value for -ENOENT. | 813 | * value for -ENOENT. |
814 | */ | 814 | */ |
815 | static int flock_lock_file(struct file *filp, struct file_lock *request) | 815 | static int flock_lock_file(struct file *filp, struct file_lock *request) |
816 | { | 816 | { |
817 | struct file_lock *new_fl = NULL; | 817 | struct file_lock *new_fl = NULL; |
818 | struct file_lock **before; | 818 | struct file_lock **before; |
819 | struct inode * inode = file_inode(filp); | 819 | struct inode * inode = file_inode(filp); |
820 | int error = 0; | 820 | int error = 0; |
821 | int found = 0; | 821 | int found = 0; |
822 | 822 | ||
823 | if (!(request->fl_flags & FL_ACCESS) && (request->fl_type != F_UNLCK)) { | 823 | if (!(request->fl_flags & FL_ACCESS) && (request->fl_type != F_UNLCK)) { |
824 | new_fl = locks_alloc_lock(); | 824 | new_fl = locks_alloc_lock(); |
825 | if (!new_fl) | 825 | if (!new_fl) |
826 | return -ENOMEM; | 826 | return -ENOMEM; |
827 | } | 827 | } |
828 | 828 | ||
829 | spin_lock(&inode->i_lock); | 829 | spin_lock(&inode->i_lock); |
830 | if (request->fl_flags & FL_ACCESS) | 830 | if (request->fl_flags & FL_ACCESS) |
831 | goto find_conflict; | 831 | goto find_conflict; |
832 | 832 | ||
833 | for_each_lock(inode, before) { | 833 | for_each_lock(inode, before) { |
834 | struct file_lock *fl = *before; | 834 | struct file_lock *fl = *before; |
835 | if (IS_POSIX(fl)) | 835 | if (IS_POSIX(fl)) |
836 | break; | 836 | break; |
837 | if (IS_LEASE(fl)) | 837 | if (IS_LEASE(fl)) |
838 | continue; | 838 | continue; |
839 | if (filp != fl->fl_file) | 839 | if (filp != fl->fl_file) |
840 | continue; | 840 | continue; |
841 | if (request->fl_type == fl->fl_type) | 841 | if (request->fl_type == fl->fl_type) |
842 | goto out; | 842 | goto out; |
843 | found = 1; | 843 | found = 1; |
844 | locks_delete_lock(before); | 844 | locks_delete_lock(before); |
845 | break; | 845 | break; |
846 | } | 846 | } |
847 | 847 | ||
848 | if (request->fl_type == F_UNLCK) { | 848 | if (request->fl_type == F_UNLCK) { |
849 | if ((request->fl_flags & FL_EXISTS) && !found) | 849 | if ((request->fl_flags & FL_EXISTS) && !found) |
850 | error = -ENOENT; | 850 | error = -ENOENT; |
851 | goto out; | 851 | goto out; |
852 | } | 852 | } |
853 | 853 | ||
854 | /* | 854 | /* |
855 | * If a higher-priority process was blocked on the old file lock, | 855 | * If a higher-priority process was blocked on the old file lock, |
856 | * give it the opportunity to lock the file. | 856 | * give it the opportunity to lock the file. |
857 | */ | 857 | */ |
858 | if (found) { | 858 | if (found) { |
859 | spin_unlock(&inode->i_lock); | 859 | spin_unlock(&inode->i_lock); |
860 | cond_resched(); | 860 | cond_resched(); |
861 | spin_lock(&inode->i_lock); | 861 | spin_lock(&inode->i_lock); |
862 | } | 862 | } |
863 | 863 | ||
864 | find_conflict: | 864 | find_conflict: |
865 | for_each_lock(inode, before) { | 865 | for_each_lock(inode, before) { |
866 | struct file_lock *fl = *before; | 866 | struct file_lock *fl = *before; |
867 | if (IS_POSIX(fl)) | 867 | if (IS_POSIX(fl)) |
868 | break; | 868 | break; |
869 | if (IS_LEASE(fl)) | 869 | if (IS_LEASE(fl)) |
870 | continue; | 870 | continue; |
871 | if (!flock_locks_conflict(request, fl)) | 871 | if (!flock_locks_conflict(request, fl)) |
872 | continue; | 872 | continue; |
873 | error = -EAGAIN; | 873 | error = -EAGAIN; |
874 | if (!(request->fl_flags & FL_SLEEP)) | 874 | if (!(request->fl_flags & FL_SLEEP)) |
875 | goto out; | 875 | goto out; |
876 | error = FILE_LOCK_DEFERRED; | 876 | error = FILE_LOCK_DEFERRED; |
877 | locks_insert_block(fl, request); | 877 | locks_insert_block(fl, request); |
878 | goto out; | 878 | goto out; |
879 | } | 879 | } |
880 | if (request->fl_flags & FL_ACCESS) | 880 | if (request->fl_flags & FL_ACCESS) |
881 | goto out; | 881 | goto out; |
882 | locks_copy_lock(new_fl, request); | 882 | locks_copy_lock(new_fl, request); |
883 | locks_insert_lock(before, new_fl); | 883 | locks_insert_lock(before, new_fl); |
884 | new_fl = NULL; | 884 | new_fl = NULL; |
885 | error = 0; | 885 | error = 0; |
886 | 886 | ||
887 | out: | 887 | out: |
888 | spin_unlock(&inode->i_lock); | 888 | spin_unlock(&inode->i_lock); |
889 | if (new_fl) | 889 | if (new_fl) |
890 | locks_free_lock(new_fl); | 890 | locks_free_lock(new_fl); |
891 | return error; | 891 | return error; |
892 | } | 892 | } |
893 | 893 | ||
894 | static int __posix_lock_file(struct inode *inode, struct file_lock *request, struct file_lock *conflock) | 894 | static int __posix_lock_file(struct inode *inode, struct file_lock *request, struct file_lock *conflock) |
895 | { | 895 | { |
896 | struct file_lock *fl; | 896 | struct file_lock *fl; |
897 | struct file_lock *new_fl = NULL; | 897 | struct file_lock *new_fl = NULL; |
898 | struct file_lock *new_fl2 = NULL; | 898 | struct file_lock *new_fl2 = NULL; |
899 | struct file_lock *left = NULL; | 899 | struct file_lock *left = NULL; |
900 | struct file_lock *right = NULL; | 900 | struct file_lock *right = NULL; |
901 | struct file_lock **before; | 901 | struct file_lock **before; |
902 | int error; | 902 | int error; |
903 | bool added = false; | 903 | bool added = false; |
904 | 904 | ||
905 | /* | 905 | /* |
906 | * We may need two file_lock structures for this operation, | 906 | * We may need two file_lock structures for this operation, |
907 | * so we get them in advance to avoid races. | 907 | * so we get them in advance to avoid races. |
908 | * | 908 | * |
909 | * In some cases we can be sure, that no new locks will be needed | 909 | * In some cases we can be sure, that no new locks will be needed |
910 | */ | 910 | */ |
911 | if (!(request->fl_flags & FL_ACCESS) && | 911 | if (!(request->fl_flags & FL_ACCESS) && |
912 | (request->fl_type != F_UNLCK || | 912 | (request->fl_type != F_UNLCK || |
913 | request->fl_start != 0 || request->fl_end != OFFSET_MAX)) { | 913 | request->fl_start != 0 || request->fl_end != OFFSET_MAX)) { |
914 | new_fl = locks_alloc_lock(); | 914 | new_fl = locks_alloc_lock(); |
915 | new_fl2 = locks_alloc_lock(); | 915 | new_fl2 = locks_alloc_lock(); |
916 | } | 916 | } |
917 | 917 | ||
918 | spin_lock(&inode->i_lock); | 918 | spin_lock(&inode->i_lock); |
919 | /* | 919 | /* |
920 | * New lock request. Walk all POSIX locks and look for conflicts. If | 920 | * New lock request. Walk all POSIX locks and look for conflicts. If |
921 | * there are any, either return error or put the request on the | 921 | * there are any, either return error or put the request on the |
922 | * blocker's list of waiters and the global blocked_hash. | 922 | * blocker's list of waiters and the global blocked_hash. |
923 | */ | 923 | */ |
924 | if (request->fl_type != F_UNLCK) { | 924 | if (request->fl_type != F_UNLCK) { |
925 | for_each_lock(inode, before) { | 925 | for_each_lock(inode, before) { |
926 | fl = *before; | 926 | fl = *before; |
927 | if (!IS_POSIX(fl)) | 927 | if (!IS_POSIX(fl)) |
928 | continue; | 928 | continue; |
929 | if (!posix_locks_conflict(request, fl)) | 929 | if (!posix_locks_conflict(request, fl)) |
930 | continue; | 930 | continue; |
931 | if (conflock) | 931 | if (conflock) |
932 | __locks_copy_lock(conflock, fl); | 932 | __locks_copy_lock(conflock, fl); |
933 | error = -EAGAIN; | 933 | error = -EAGAIN; |
934 | if (!(request->fl_flags & FL_SLEEP)) | 934 | if (!(request->fl_flags & FL_SLEEP)) |
935 | goto out; | 935 | goto out; |
936 | /* | 936 | /* |
937 | * Deadlock detection and insertion into the blocked | 937 | * Deadlock detection and insertion into the blocked |
938 | * locks list must be done while holding the same lock! | 938 | * locks list must be done while holding the same lock! |
939 | */ | 939 | */ |
940 | error = -EDEADLK; | 940 | error = -EDEADLK; |
941 | spin_lock(&blocked_lock_lock); | 941 | spin_lock(&blocked_lock_lock); |
942 | if (likely(!posix_locks_deadlock(request, fl))) { | 942 | if (likely(!posix_locks_deadlock(request, fl))) { |
943 | error = FILE_LOCK_DEFERRED; | 943 | error = FILE_LOCK_DEFERRED; |
944 | __locks_insert_block(fl, request); | 944 | __locks_insert_block(fl, request); |
945 | } | 945 | } |
946 | spin_unlock(&blocked_lock_lock); | 946 | spin_unlock(&blocked_lock_lock); |
947 | goto out; | 947 | goto out; |
948 | } | 948 | } |
949 | } | 949 | } |
950 | 950 | ||
951 | /* If we're just looking for a conflict, we're done. */ | 951 | /* If we're just looking for a conflict, we're done. */ |
952 | error = 0; | 952 | error = 0; |
953 | if (request->fl_flags & FL_ACCESS) | 953 | if (request->fl_flags & FL_ACCESS) |
954 | goto out; | 954 | goto out; |
955 | 955 | ||
956 | /* | 956 | /* |
957 | * Find the first old lock with the same owner as the new lock. | 957 | * Find the first old lock with the same owner as the new lock. |
958 | */ | 958 | */ |
959 | 959 | ||
960 | before = &inode->i_flock; | 960 | before = &inode->i_flock; |
961 | 961 | ||
962 | /* First skip locks owned by other processes. */ | 962 | /* First skip locks owned by other processes. */ |
963 | while ((fl = *before) && (!IS_POSIX(fl) || | 963 | while ((fl = *before) && (!IS_POSIX(fl) || |
964 | !posix_same_owner(request, fl))) { | 964 | !posix_same_owner(request, fl))) { |
965 | before = &fl->fl_next; | 965 | before = &fl->fl_next; |
966 | } | 966 | } |
967 | 967 | ||
968 | /* Process locks with this owner. */ | 968 | /* Process locks with this owner. */ |
969 | while ((fl = *before) && posix_same_owner(request, fl)) { | 969 | while ((fl = *before) && posix_same_owner(request, fl)) { |
970 | /* Detect adjacent or overlapping regions (if same lock type) | 970 | /* Detect adjacent or overlapping regions (if same lock type) |
971 | */ | 971 | */ |
972 | if (request->fl_type == fl->fl_type) { | 972 | if (request->fl_type == fl->fl_type) { |
973 | /* In all comparisons of start vs end, use | 973 | /* In all comparisons of start vs end, use |
974 | * "start - 1" rather than "end + 1". If end | 974 | * "start - 1" rather than "end + 1". If end |
975 | * is OFFSET_MAX, end + 1 will become negative. | 975 | * is OFFSET_MAX, end + 1 will become negative. |
976 | */ | 976 | */ |
977 | if (fl->fl_end < request->fl_start - 1) | 977 | if (fl->fl_end < request->fl_start - 1) |
978 | goto next_lock; | 978 | goto next_lock; |
979 | /* If the next lock in the list has entirely bigger | 979 | /* If the next lock in the list has entirely bigger |
980 | * addresses than the new one, insert the lock here. | 980 | * addresses than the new one, insert the lock here. |
981 | */ | 981 | */ |
982 | if (fl->fl_start - 1 > request->fl_end) | 982 | if (fl->fl_start - 1 > request->fl_end) |
983 | break; | 983 | break; |
984 | 984 | ||
985 | /* If we come here, the new and old lock are of the | 985 | /* If we come here, the new and old lock are of the |
986 | * same type and adjacent or overlapping. Make one | 986 | * same type and adjacent or overlapping. Make one |
987 | * lock yielding from the lower start address of both | 987 | * lock yielding from the lower start address of both |
988 | * locks to the higher end address. | 988 | * locks to the higher end address. |
989 | */ | 989 | */ |
990 | if (fl->fl_start > request->fl_start) | 990 | if (fl->fl_start > request->fl_start) |
991 | fl->fl_start = request->fl_start; | 991 | fl->fl_start = request->fl_start; |
992 | else | 992 | else |
993 | request->fl_start = fl->fl_start; | 993 | request->fl_start = fl->fl_start; |
994 | if (fl->fl_end < request->fl_end) | 994 | if (fl->fl_end < request->fl_end) |
995 | fl->fl_end = request->fl_end; | 995 | fl->fl_end = request->fl_end; |
996 | else | 996 | else |
997 | request->fl_end = fl->fl_end; | 997 | request->fl_end = fl->fl_end; |
998 | if (added) { | 998 | if (added) { |
999 | locks_delete_lock(before); | 999 | locks_delete_lock(before); |
1000 | continue; | 1000 | continue; |
1001 | } | 1001 | } |
1002 | request = fl; | 1002 | request = fl; |
1003 | added = true; | 1003 | added = true; |
1004 | } | 1004 | } |
1005 | else { | 1005 | else { |
1006 | /* Processing for different lock types is a bit | 1006 | /* Processing for different lock types is a bit |
1007 | * more complex. | 1007 | * more complex. |
1008 | */ | 1008 | */ |
1009 | if (fl->fl_end < request->fl_start) | 1009 | if (fl->fl_end < request->fl_start) |
1010 | goto next_lock; | 1010 | goto next_lock; |
1011 | if (fl->fl_start > request->fl_end) | 1011 | if (fl->fl_start > request->fl_end) |
1012 | break; | 1012 | break; |
1013 | if (request->fl_type == F_UNLCK) | 1013 | if (request->fl_type == F_UNLCK) |
1014 | added = true; | 1014 | added = true; |
1015 | if (fl->fl_start < request->fl_start) | 1015 | if (fl->fl_start < request->fl_start) |
1016 | left = fl; | 1016 | left = fl; |
1017 | /* If the next lock in the list has a higher end | 1017 | /* If the next lock in the list has a higher end |
1018 | * address than the new one, insert the new one here. | 1018 | * address than the new one, insert the new one here. |
1019 | */ | 1019 | */ |
1020 | if (fl->fl_end > request->fl_end) { | 1020 | if (fl->fl_end > request->fl_end) { |
1021 | right = fl; | 1021 | right = fl; |
1022 | break; | 1022 | break; |
1023 | } | 1023 | } |
1024 | if (fl->fl_start >= request->fl_start) { | 1024 | if (fl->fl_start >= request->fl_start) { |
1025 | /* The new lock completely replaces an old | 1025 | /* The new lock completely replaces an old |
1026 | * one (This may happen several times). | 1026 | * one (This may happen several times). |
1027 | */ | 1027 | */ |
1028 | if (added) { | 1028 | if (added) { |
1029 | locks_delete_lock(before); | 1029 | locks_delete_lock(before); |
1030 | continue; | 1030 | continue; |
1031 | } | 1031 | } |
1032 | /* Replace the old lock with the new one. | 1032 | /* Replace the old lock with the new one. |
1033 | * Wake up anybody waiting for the old one, | 1033 | * Wake up anybody waiting for the old one, |
1034 | * as the change in lock type might satisfy | 1034 | * as the change in lock type might satisfy |
1035 | * their needs. | 1035 | * their needs. |
1036 | */ | 1036 | */ |
1037 | locks_wake_up_blocks(fl); | 1037 | locks_wake_up_blocks(fl); |
1038 | fl->fl_start = request->fl_start; | 1038 | fl->fl_start = request->fl_start; |
1039 | fl->fl_end = request->fl_end; | 1039 | fl->fl_end = request->fl_end; |
1040 | fl->fl_type = request->fl_type; | 1040 | fl->fl_type = request->fl_type; |
1041 | locks_release_private(fl); | 1041 | locks_release_private(fl); |
1042 | locks_copy_private(fl, request); | 1042 | locks_copy_private(fl, request); |
1043 | request = fl; | 1043 | request = fl; |
1044 | added = true; | 1044 | added = true; |
1045 | } | 1045 | } |
1046 | } | 1046 | } |
1047 | /* Go on to next lock. | 1047 | /* Go on to next lock. |
1048 | */ | 1048 | */ |
1049 | next_lock: | 1049 | next_lock: |
1050 | before = &fl->fl_next; | 1050 | before = &fl->fl_next; |
1051 | } | 1051 | } |
1052 | 1052 | ||
1053 | /* | 1053 | /* |
1054 | * The above code only modifies existing locks in case of merging or | 1054 | * The above code only modifies existing locks in case of merging or |
1055 | * replacing. If new lock(s) need to be inserted all modifications are | 1055 | * replacing. If new lock(s) need to be inserted all modifications are |
1056 | * done below this, so it's safe yet to bail out. | 1056 | * done below this, so it's safe yet to bail out. |
1057 | */ | 1057 | */ |
1058 | error = -ENOLCK; /* "no luck" */ | 1058 | error = -ENOLCK; /* "no luck" */ |
1059 | if (right && left == right && !new_fl2) | 1059 | if (right && left == right && !new_fl2) |
1060 | goto out; | 1060 | goto out; |
1061 | 1061 | ||
1062 | error = 0; | 1062 | error = 0; |
1063 | if (!added) { | 1063 | if (!added) { |
1064 | if (request->fl_type == F_UNLCK) { | 1064 | if (request->fl_type == F_UNLCK) { |
1065 | if (request->fl_flags & FL_EXISTS) | 1065 | if (request->fl_flags & FL_EXISTS) |
1066 | error = -ENOENT; | 1066 | error = -ENOENT; |
1067 | goto out; | 1067 | goto out; |
1068 | } | 1068 | } |
1069 | 1069 | ||
1070 | if (!new_fl) { | 1070 | if (!new_fl) { |
1071 | error = -ENOLCK; | 1071 | error = -ENOLCK; |
1072 | goto out; | 1072 | goto out; |
1073 | } | 1073 | } |
1074 | locks_copy_lock(new_fl, request); | 1074 | locks_copy_lock(new_fl, request); |
1075 | locks_insert_lock(before, new_fl); | 1075 | locks_insert_lock(before, new_fl); |
1076 | new_fl = NULL; | 1076 | new_fl = NULL; |
1077 | } | 1077 | } |
1078 | if (right) { | 1078 | if (right) { |
1079 | if (left == right) { | 1079 | if (left == right) { |
1080 | /* The new lock breaks the old one in two pieces, | 1080 | /* The new lock breaks the old one in two pieces, |
1081 | * so we have to use the second new lock. | 1081 | * so we have to use the second new lock. |
1082 | */ | 1082 | */ |
1083 | left = new_fl2; | 1083 | left = new_fl2; |
1084 | new_fl2 = NULL; | 1084 | new_fl2 = NULL; |
1085 | locks_copy_lock(left, right); | 1085 | locks_copy_lock(left, right); |
1086 | locks_insert_lock(before, left); | 1086 | locks_insert_lock(before, left); |
1087 | } | 1087 | } |
1088 | right->fl_start = request->fl_end + 1; | 1088 | right->fl_start = request->fl_end + 1; |
1089 | locks_wake_up_blocks(right); | 1089 | locks_wake_up_blocks(right); |
1090 | } | 1090 | } |
1091 | if (left) { | 1091 | if (left) { |
1092 | left->fl_end = request->fl_start - 1; | 1092 | left->fl_end = request->fl_start - 1; |
1093 | locks_wake_up_blocks(left); | 1093 | locks_wake_up_blocks(left); |
1094 | } | 1094 | } |
1095 | out: | 1095 | out: |
1096 | spin_unlock(&inode->i_lock); | 1096 | spin_unlock(&inode->i_lock); |
1097 | /* | 1097 | /* |
1098 | * Free any unused locks. | 1098 | * Free any unused locks. |
1099 | */ | 1099 | */ |
1100 | if (new_fl) | 1100 | if (new_fl) |
1101 | locks_free_lock(new_fl); | 1101 | locks_free_lock(new_fl); |
1102 | if (new_fl2) | 1102 | if (new_fl2) |
1103 | locks_free_lock(new_fl2); | 1103 | locks_free_lock(new_fl2); |
1104 | return error; | 1104 | return error; |
1105 | } | 1105 | } |
1106 | 1106 | ||
1107 | /** | 1107 | /** |
1108 | * posix_lock_file - Apply a POSIX-style lock to a file | 1108 | * posix_lock_file - Apply a POSIX-style lock to a file |
1109 | * @filp: The file to apply the lock to | 1109 | * @filp: The file to apply the lock to |
1110 | * @fl: The lock to be applied | 1110 | * @fl: The lock to be applied |
1111 | * @conflock: Place to return a copy of the conflicting lock, if found. | 1111 | * @conflock: Place to return a copy of the conflicting lock, if found. |
1112 | * | 1112 | * |
1113 | * Add a POSIX style lock to a file. | 1113 | * Add a POSIX style lock to a file. |
1114 | * We merge adjacent & overlapping locks whenever possible. | 1114 | * We merge adjacent & overlapping locks whenever possible. |
1115 | * POSIX locks are sorted by owner task, then by starting address | 1115 | * POSIX locks are sorted by owner task, then by starting address |
1116 | * | 1116 | * |
1117 | * Note that if called with an FL_EXISTS argument, the caller may determine | 1117 | * Note that if called with an FL_EXISTS argument, the caller may determine |
1118 | * whether or not a lock was successfully freed by testing the return | 1118 | * whether or not a lock was successfully freed by testing the return |
1119 | * value for -ENOENT. | 1119 | * value for -ENOENT. |
1120 | */ | 1120 | */ |
1121 | int posix_lock_file(struct file *filp, struct file_lock *fl, | 1121 | int posix_lock_file(struct file *filp, struct file_lock *fl, |
1122 | struct file_lock *conflock) | 1122 | struct file_lock *conflock) |
1123 | { | 1123 | { |
1124 | return __posix_lock_file(file_inode(filp), fl, conflock); | 1124 | return __posix_lock_file(file_inode(filp), fl, conflock); |
1125 | } | 1125 | } |
1126 | EXPORT_SYMBOL(posix_lock_file); | 1126 | EXPORT_SYMBOL(posix_lock_file); |
1127 | 1127 | ||
1128 | /** | 1128 | /** |
1129 | * posix_lock_file_wait - Apply a POSIX-style lock to a file | 1129 | * posix_lock_file_wait - Apply a POSIX-style lock to a file |
1130 | * @filp: The file to apply the lock to | 1130 | * @filp: The file to apply the lock to |
1131 | * @fl: The lock to be applied | 1131 | * @fl: The lock to be applied |
1132 | * | 1132 | * |
1133 | * Add a POSIX style lock to a file. | 1133 | * Add a POSIX style lock to a file. |
1134 | * We merge adjacent & overlapping locks whenever possible. | 1134 | * We merge adjacent & overlapping locks whenever possible. |
1135 | * POSIX locks are sorted by owner task, then by starting address | 1135 | * POSIX locks are sorted by owner task, then by starting address |
1136 | */ | 1136 | */ |
1137 | int posix_lock_file_wait(struct file *filp, struct file_lock *fl) | 1137 | int posix_lock_file_wait(struct file *filp, struct file_lock *fl) |
1138 | { | 1138 | { |
1139 | int error; | 1139 | int error; |
1140 | might_sleep (); | 1140 | might_sleep (); |
1141 | for (;;) { | 1141 | for (;;) { |
1142 | error = posix_lock_file(filp, fl, NULL); | 1142 | error = posix_lock_file(filp, fl, NULL); |
1143 | if (error != FILE_LOCK_DEFERRED) | 1143 | if (error != FILE_LOCK_DEFERRED) |
1144 | break; | 1144 | break; |
1145 | error = wait_event_interruptible(fl->fl_wait, !fl->fl_next); | 1145 | error = wait_event_interruptible(fl->fl_wait, !fl->fl_next); |
1146 | if (!error) | 1146 | if (!error) |
1147 | continue; | 1147 | continue; |
1148 | 1148 | ||
1149 | locks_delete_block(fl); | 1149 | locks_delete_block(fl); |
1150 | break; | 1150 | break; |
1151 | } | 1151 | } |
1152 | return error; | 1152 | return error; |
1153 | } | 1153 | } |
1154 | EXPORT_SYMBOL(posix_lock_file_wait); | 1154 | EXPORT_SYMBOL(posix_lock_file_wait); |
1155 | 1155 | ||
1156 | /** | 1156 | /** |
1157 | * locks_mandatory_locked - Check for an active lock | 1157 | * locks_mandatory_locked - Check for an active lock |
1158 | * @file: the file to check | 1158 | * @file: the file to check |
1159 | * | 1159 | * |
1160 | * Searches the inode's list of locks to find any POSIX locks which conflict. | 1160 | * Searches the inode's list of locks to find any POSIX locks which conflict. |
1161 | * This function is called from locks_verify_locked() only. | 1161 | * This function is called from locks_verify_locked() only. |
1162 | */ | 1162 | */ |
1163 | int locks_mandatory_locked(struct file *file) | 1163 | int locks_mandatory_locked(struct file *file) |
1164 | { | 1164 | { |
1165 | struct inode *inode = file_inode(file); | 1165 | struct inode *inode = file_inode(file); |
1166 | fl_owner_t owner = current->files; | 1166 | fl_owner_t owner = current->files; |
1167 | struct file_lock *fl; | 1167 | struct file_lock *fl; |
1168 | 1168 | ||
1169 | /* | 1169 | /* |
1170 | * Search the lock list for this inode for any POSIX locks. | 1170 | * Search the lock list for this inode for any POSIX locks. |
1171 | */ | 1171 | */ |
1172 | spin_lock(&inode->i_lock); | 1172 | spin_lock(&inode->i_lock); |
1173 | for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { | 1173 | for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { |
1174 | if (!IS_POSIX(fl)) | 1174 | if (!IS_POSIX(fl)) |
1175 | continue; | 1175 | continue; |
1176 | if (fl->fl_owner != owner && fl->fl_owner != (fl_owner_t)file) | 1176 | if (fl->fl_owner != owner && fl->fl_owner != (fl_owner_t)file) |
1177 | break; | 1177 | break; |
1178 | } | 1178 | } |
1179 | spin_unlock(&inode->i_lock); | 1179 | spin_unlock(&inode->i_lock); |
1180 | return fl ? -EAGAIN : 0; | 1180 | return fl ? -EAGAIN : 0; |
1181 | } | 1181 | } |
1182 | 1182 | ||
1183 | /** | 1183 | /** |
1184 | * locks_mandatory_area - Check for a conflicting lock | 1184 | * locks_mandatory_area - Check for a conflicting lock |
1185 | * @read_write: %FLOCK_VERIFY_WRITE for exclusive access, %FLOCK_VERIFY_READ | 1185 | * @read_write: %FLOCK_VERIFY_WRITE for exclusive access, %FLOCK_VERIFY_READ |
1186 | * for shared | 1186 | * for shared |
1187 | * @inode: the file to check | 1187 | * @inode: the file to check |
1188 | * @filp: how the file was opened (if it was) | 1188 | * @filp: how the file was opened (if it was) |
1189 | * @offset: start of area to check | 1189 | * @offset: start of area to check |
1190 | * @count: length of area to check | 1190 | * @count: length of area to check |
1191 | * | 1191 | * |
1192 | * Searches the inode's list of locks to find any POSIX locks which conflict. | 1192 | * Searches the inode's list of locks to find any POSIX locks which conflict. |
1193 | * This function is called from rw_verify_area() and | 1193 | * This function is called from rw_verify_area() and |
1194 | * locks_verify_truncate(). | 1194 | * locks_verify_truncate(). |
1195 | */ | 1195 | */ |
1196 | int locks_mandatory_area(int read_write, struct inode *inode, | 1196 | int locks_mandatory_area(int read_write, struct inode *inode, |
1197 | struct file *filp, loff_t offset, | 1197 | struct file *filp, loff_t offset, |
1198 | size_t count) | 1198 | size_t count) |
1199 | { | 1199 | { |
1200 | struct file_lock fl; | 1200 | struct file_lock fl; |
1201 | int error; | 1201 | int error; |
1202 | bool sleep = false; | 1202 | bool sleep = false; |
1203 | 1203 | ||
1204 | locks_init_lock(&fl); | 1204 | locks_init_lock(&fl); |
1205 | fl.fl_pid = current->tgid; | 1205 | fl.fl_pid = current->tgid; |
1206 | fl.fl_file = filp; | 1206 | fl.fl_file = filp; |
1207 | fl.fl_flags = FL_POSIX | FL_ACCESS; | 1207 | fl.fl_flags = FL_POSIX | FL_ACCESS; |
1208 | if (filp && !(filp->f_flags & O_NONBLOCK)) | 1208 | if (filp && !(filp->f_flags & O_NONBLOCK)) |
1209 | sleep = true; | 1209 | sleep = true; |
1210 | fl.fl_type = (read_write == FLOCK_VERIFY_WRITE) ? F_WRLCK : F_RDLCK; | 1210 | fl.fl_type = (read_write == FLOCK_VERIFY_WRITE) ? F_WRLCK : F_RDLCK; |
1211 | fl.fl_start = offset; | 1211 | fl.fl_start = offset; |
1212 | fl.fl_end = offset + count - 1; | 1212 | fl.fl_end = offset + count - 1; |
1213 | 1213 | ||
1214 | for (;;) { | 1214 | for (;;) { |
1215 | if (filp) { | 1215 | if (filp) { |
1216 | fl.fl_owner = (fl_owner_t)filp; | 1216 | fl.fl_owner = (fl_owner_t)filp; |
1217 | fl.fl_flags &= ~FL_SLEEP; | 1217 | fl.fl_flags &= ~FL_SLEEP; |
1218 | error = __posix_lock_file(inode, &fl, NULL); | 1218 | error = __posix_lock_file(inode, &fl, NULL); |
1219 | if (!error) | 1219 | if (!error) |
1220 | break; | 1220 | break; |
1221 | } | 1221 | } |
1222 | 1222 | ||
1223 | if (sleep) | 1223 | if (sleep) |
1224 | fl.fl_flags |= FL_SLEEP; | 1224 | fl.fl_flags |= FL_SLEEP; |
1225 | fl.fl_owner = current->files; | 1225 | fl.fl_owner = current->files; |
1226 | error = __posix_lock_file(inode, &fl, NULL); | 1226 | error = __posix_lock_file(inode, &fl, NULL); |
1227 | if (error != FILE_LOCK_DEFERRED) | 1227 | if (error != FILE_LOCK_DEFERRED) |
1228 | break; | 1228 | break; |
1229 | error = wait_event_interruptible(fl.fl_wait, !fl.fl_next); | 1229 | error = wait_event_interruptible(fl.fl_wait, !fl.fl_next); |
1230 | if (!error) { | 1230 | if (!error) { |
1231 | /* | 1231 | /* |
1232 | * If we've been sleeping someone might have | 1232 | * If we've been sleeping someone might have |
1233 | * changed the permissions behind our back. | 1233 | * changed the permissions behind our back. |
1234 | */ | 1234 | */ |
1235 | if (__mandatory_lock(inode)) | 1235 | if (__mandatory_lock(inode)) |
1236 | continue; | 1236 | continue; |
1237 | } | 1237 | } |
1238 | 1238 | ||
1239 | locks_delete_block(&fl); | 1239 | locks_delete_block(&fl); |
1240 | break; | 1240 | break; |
1241 | } | 1241 | } |
1242 | 1242 | ||
1243 | return error; | 1243 | return error; |
1244 | } | 1244 | } |
1245 | 1245 | ||
1246 | EXPORT_SYMBOL(locks_mandatory_area); | 1246 | EXPORT_SYMBOL(locks_mandatory_area); |
1247 | 1247 | ||
1248 | static void lease_clear_pending(struct file_lock *fl, int arg) | 1248 | static void lease_clear_pending(struct file_lock *fl, int arg) |
1249 | { | 1249 | { |
1250 | switch (arg) { | 1250 | switch (arg) { |
1251 | case F_UNLCK: | 1251 | case F_UNLCK: |
1252 | fl->fl_flags &= ~FL_UNLOCK_PENDING; | 1252 | fl->fl_flags &= ~FL_UNLOCK_PENDING; |
1253 | /* fall through: */ | 1253 | /* fall through: */ |
1254 | case F_RDLCK: | 1254 | case F_RDLCK: |
1255 | fl->fl_flags &= ~FL_DOWNGRADE_PENDING; | 1255 | fl->fl_flags &= ~FL_DOWNGRADE_PENDING; |
1256 | } | 1256 | } |
1257 | } | 1257 | } |
1258 | 1258 | ||
1259 | /* We already had a lease on this file; just change its type */ | 1259 | /* We already had a lease on this file; just change its type */ |
1260 | int lease_modify(struct file_lock **before, int arg) | 1260 | int lease_modify(struct file_lock **before, int arg) |
1261 | { | 1261 | { |
1262 | struct file_lock *fl = *before; | 1262 | struct file_lock *fl = *before; |
1263 | int error = assign_type(fl, arg); | 1263 | int error = assign_type(fl, arg); |
1264 | 1264 | ||
1265 | if (error) | 1265 | if (error) |
1266 | return error; | 1266 | return error; |
1267 | lease_clear_pending(fl, arg); | 1267 | lease_clear_pending(fl, arg); |
1268 | locks_wake_up_blocks(fl); | 1268 | locks_wake_up_blocks(fl); |
1269 | if (arg == F_UNLCK) { | 1269 | if (arg == F_UNLCK) { |
1270 | struct file *filp = fl->fl_file; | 1270 | struct file *filp = fl->fl_file; |
1271 | 1271 | ||
1272 | f_delown(filp); | 1272 | f_delown(filp); |
1273 | filp->f_owner.signum = 0; | 1273 | filp->f_owner.signum = 0; |
1274 | fasync_helper(0, fl->fl_file, 0, &fl->fl_fasync); | 1274 | fasync_helper(0, fl->fl_file, 0, &fl->fl_fasync); |
1275 | if (fl->fl_fasync != NULL) { | 1275 | if (fl->fl_fasync != NULL) { |
1276 | printk(KERN_ERR "locks_delete_lock: fasync == %p\n", fl->fl_fasync); | 1276 | printk(KERN_ERR "locks_delete_lock: fasync == %p\n", fl->fl_fasync); |
1277 | fl->fl_fasync = NULL; | 1277 | fl->fl_fasync = NULL; |
1278 | } | 1278 | } |
1279 | locks_delete_lock(before); | 1279 | locks_delete_lock(before); |
1280 | } | 1280 | } |
1281 | return 0; | 1281 | return 0; |
1282 | } | 1282 | } |
1283 | 1283 | ||
1284 | EXPORT_SYMBOL(lease_modify); | 1284 | EXPORT_SYMBOL(lease_modify); |
1285 | 1285 | ||
1286 | static bool past_time(unsigned long then) | 1286 | static bool past_time(unsigned long then) |
1287 | { | 1287 | { |
1288 | if (!then) | 1288 | if (!then) |
1289 | /* 0 is a special value meaning "this never expires": */ | 1289 | /* 0 is a special value meaning "this never expires": */ |
1290 | return false; | 1290 | return false; |
1291 | return time_after(jiffies, then); | 1291 | return time_after(jiffies, then); |
1292 | } | 1292 | } |
1293 | 1293 | ||
1294 | static void time_out_leases(struct inode *inode) | 1294 | static void time_out_leases(struct inode *inode) |
1295 | { | 1295 | { |
1296 | struct file_lock **before; | 1296 | struct file_lock **before; |
1297 | struct file_lock *fl; | 1297 | struct file_lock *fl; |
1298 | 1298 | ||
1299 | before = &inode->i_flock; | 1299 | before = &inode->i_flock; |
1300 | while ((fl = *before) && IS_LEASE(fl) && lease_breaking(fl)) { | 1300 | while ((fl = *before) && IS_LEASE(fl) && lease_breaking(fl)) { |
1301 | if (past_time(fl->fl_downgrade_time)) | 1301 | if (past_time(fl->fl_downgrade_time)) |
1302 | lease_modify(before, F_RDLCK); | 1302 | lease_modify(before, F_RDLCK); |
1303 | if (past_time(fl->fl_break_time)) | 1303 | if (past_time(fl->fl_break_time)) |
1304 | lease_modify(before, F_UNLCK); | 1304 | lease_modify(before, F_UNLCK); |
1305 | if (fl == *before) /* lease_modify may have freed fl */ | 1305 | if (fl == *before) /* lease_modify may have freed fl */ |
1306 | before = &fl->fl_next; | 1306 | before = &fl->fl_next; |
1307 | } | 1307 | } |
1308 | } | 1308 | } |
1309 | 1309 | ||
1310 | static bool leases_conflict(struct file_lock *lease, struct file_lock *breaker) | 1310 | static bool leases_conflict(struct file_lock *lease, struct file_lock *breaker) |
1311 | { | 1311 | { |
1312 | if ((breaker->fl_flags & FL_DELEG) && (lease->fl_flags & FL_LEASE)) | 1312 | if ((breaker->fl_flags & FL_DELEG) && (lease->fl_flags & FL_LEASE)) |
1313 | return false; | 1313 | return false; |
1314 | return locks_conflict(breaker, lease); | 1314 | return locks_conflict(breaker, lease); |
1315 | } | 1315 | } |
1316 | 1316 | ||
1317 | /** | 1317 | /** |
1318 | * __break_lease - revoke all outstanding leases on file | 1318 | * __break_lease - revoke all outstanding leases on file |
1319 | * @inode: the inode of the file to return | 1319 | * @inode: the inode of the file to return |
1320 | * @mode: O_RDONLY: break only write leases; O_WRONLY or O_RDWR: | 1320 | * @mode: O_RDONLY: break only write leases; O_WRONLY or O_RDWR: |
1321 | * break all leases | 1321 | * break all leases |
1322 | * @type: FL_LEASE: break leases and delegations; FL_DELEG: break | 1322 | * @type: FL_LEASE: break leases and delegations; FL_DELEG: break |
1323 | * only delegations | 1323 | * only delegations |
1324 | * | 1324 | * |
1325 | * break_lease (inlined for speed) has checked there already is at least | 1325 | * break_lease (inlined for speed) has checked there already is at least |
1326 | * some kind of lock (maybe a lease) on this file. Leases are broken on | 1326 | * some kind of lock (maybe a lease) on this file. Leases are broken on |
1327 | * a call to open() or truncate(). This function can sleep unless you | 1327 | * a call to open() or truncate(). This function can sleep unless you |
1328 | * specified %O_NONBLOCK to your open(). | 1328 | * specified %O_NONBLOCK to your open(). |
1329 | */ | 1329 | */ |
1330 | int __break_lease(struct inode *inode, unsigned int mode, unsigned int type) | 1330 | int __break_lease(struct inode *inode, unsigned int mode, unsigned int type) |
1331 | { | 1331 | { |
1332 | int error = 0; | 1332 | int error = 0; |
1333 | struct file_lock *new_fl, *flock; | 1333 | struct file_lock *new_fl, *flock; |
1334 | struct file_lock *fl; | 1334 | struct file_lock *fl; |
1335 | unsigned long break_time; | 1335 | unsigned long break_time; |
1336 | int i_have_this_lease = 0; | 1336 | int i_have_this_lease = 0; |
1337 | bool lease_conflict = false; | 1337 | bool lease_conflict = false; |
1338 | int want_write = (mode & O_ACCMODE) != O_RDONLY; | 1338 | int want_write = (mode & O_ACCMODE) != O_RDONLY; |
1339 | 1339 | ||
1340 | new_fl = lease_alloc(NULL, want_write ? F_WRLCK : F_RDLCK); | 1340 | new_fl = lease_alloc(NULL, want_write ? F_WRLCK : F_RDLCK); |
1341 | if (IS_ERR(new_fl)) | 1341 | if (IS_ERR(new_fl)) |
1342 | return PTR_ERR(new_fl); | 1342 | return PTR_ERR(new_fl); |
1343 | new_fl->fl_flags = type; | 1343 | new_fl->fl_flags = type; |
1344 | 1344 | ||
1345 | spin_lock(&inode->i_lock); | 1345 | spin_lock(&inode->i_lock); |
1346 | 1346 | ||
1347 | time_out_leases(inode); | 1347 | time_out_leases(inode); |
1348 | 1348 | ||
1349 | flock = inode->i_flock; | 1349 | flock = inode->i_flock; |
1350 | if ((flock == NULL) || !IS_LEASE(flock)) | 1350 | if ((flock == NULL) || !IS_LEASE(flock)) |
1351 | goto out; | 1351 | goto out; |
1352 | 1352 | ||
1353 | for (fl = flock; fl && IS_LEASE(fl); fl = fl->fl_next) { | 1353 | for (fl = flock; fl && IS_LEASE(fl); fl = fl->fl_next) { |
1354 | if (leases_conflict(fl, new_fl)) { | 1354 | if (leases_conflict(fl, new_fl)) { |
1355 | lease_conflict = true; | 1355 | lease_conflict = true; |
1356 | if (fl->fl_owner == current->files) | 1356 | if (fl->fl_owner == current->files) |
1357 | i_have_this_lease = 1; | 1357 | i_have_this_lease = 1; |
1358 | } | 1358 | } |
1359 | } | 1359 | } |
1360 | if (!lease_conflict) | 1360 | if (!lease_conflict) |
1361 | goto out; | 1361 | goto out; |
1362 | 1362 | ||
1363 | break_time = 0; | 1363 | break_time = 0; |
1364 | if (lease_break_time > 0) { | 1364 | if (lease_break_time > 0) { |
1365 | break_time = jiffies + lease_break_time * HZ; | 1365 | break_time = jiffies + lease_break_time * HZ; |
1366 | if (break_time == 0) | 1366 | if (break_time == 0) |
1367 | break_time++; /* so that 0 means no break time */ | 1367 | break_time++; /* so that 0 means no break time */ |
1368 | } | 1368 | } |
1369 | 1369 | ||
1370 | for (fl = flock; fl && IS_LEASE(fl); fl = fl->fl_next) { | 1370 | for (fl = flock; fl && IS_LEASE(fl); fl = fl->fl_next) { |
1371 | if (!leases_conflict(fl, new_fl)) | 1371 | if (!leases_conflict(fl, new_fl)) |
1372 | continue; | 1372 | continue; |
1373 | if (want_write) { | 1373 | if (want_write) { |
1374 | if (fl->fl_flags & FL_UNLOCK_PENDING) | 1374 | if (fl->fl_flags & FL_UNLOCK_PENDING) |
1375 | continue; | 1375 | continue; |
1376 | fl->fl_flags |= FL_UNLOCK_PENDING; | 1376 | fl->fl_flags |= FL_UNLOCK_PENDING; |
1377 | fl->fl_break_time = break_time; | 1377 | fl->fl_break_time = break_time; |
1378 | } else { | 1378 | } else { |
1379 | if (lease_breaking(flock)) | 1379 | if (lease_breaking(flock)) |
1380 | continue; | 1380 | continue; |
1381 | fl->fl_flags |= FL_DOWNGRADE_PENDING; | 1381 | fl->fl_flags |= FL_DOWNGRADE_PENDING; |
1382 | fl->fl_downgrade_time = break_time; | 1382 | fl->fl_downgrade_time = break_time; |
1383 | } | 1383 | } |
1384 | fl->fl_lmops->lm_break(fl); | 1384 | fl->fl_lmops->lm_break(fl); |
1385 | } | 1385 | } |
1386 | 1386 | ||
1387 | if (i_have_this_lease || (mode & O_NONBLOCK)) { | 1387 | if (i_have_this_lease || (mode & O_NONBLOCK)) { |
1388 | error = -EWOULDBLOCK; | 1388 | error = -EWOULDBLOCK; |
1389 | goto out; | 1389 | goto out; |
1390 | } | 1390 | } |
1391 | 1391 | ||
1392 | restart: | 1392 | restart: |
1393 | break_time = flock->fl_break_time; | 1393 | break_time = flock->fl_break_time; |
1394 | if (break_time != 0) | 1394 | if (break_time != 0) |
1395 | break_time -= jiffies; | 1395 | break_time -= jiffies; |
1396 | if (break_time == 0) | 1396 | if (break_time == 0) |
1397 | break_time++; | 1397 | break_time++; |
1398 | locks_insert_block(flock, new_fl); | 1398 | locks_insert_block(flock, new_fl); |
1399 | spin_unlock(&inode->i_lock); | 1399 | spin_unlock(&inode->i_lock); |
1400 | error = wait_event_interruptible_timeout(new_fl->fl_wait, | 1400 | error = wait_event_interruptible_timeout(new_fl->fl_wait, |
1401 | !new_fl->fl_next, break_time); | 1401 | !new_fl->fl_next, break_time); |
1402 | spin_lock(&inode->i_lock); | 1402 | spin_lock(&inode->i_lock); |
1403 | locks_delete_block(new_fl); | 1403 | locks_delete_block(new_fl); |
1404 | if (error >= 0) { | 1404 | if (error >= 0) { |
1405 | if (error == 0) | 1405 | if (error == 0) |
1406 | time_out_leases(inode); | 1406 | time_out_leases(inode); |
1407 | /* | 1407 | /* |
1408 | * Wait for the next conflicting lease that has not been | 1408 | * Wait for the next conflicting lease that has not been |
1409 | * broken yet | 1409 | * broken yet |
1410 | */ | 1410 | */ |
1411 | for (flock = inode->i_flock; flock && IS_LEASE(flock); | 1411 | for (flock = inode->i_flock; flock && IS_LEASE(flock); |
1412 | flock = flock->fl_next) { | 1412 | flock = flock->fl_next) { |
1413 | if (leases_conflict(new_fl, flock)) | 1413 | if (leases_conflict(new_fl, flock)) |
1414 | goto restart; | 1414 | goto restart; |
1415 | } | 1415 | } |
1416 | error = 0; | 1416 | error = 0; |
1417 | } | 1417 | } |
1418 | 1418 | ||
1419 | out: | 1419 | out: |
1420 | spin_unlock(&inode->i_lock); | 1420 | spin_unlock(&inode->i_lock); |
1421 | locks_free_lock(new_fl); | 1421 | locks_free_lock(new_fl); |
1422 | return error; | 1422 | return error; |
1423 | } | 1423 | } |
1424 | 1424 | ||
1425 | EXPORT_SYMBOL(__break_lease); | 1425 | EXPORT_SYMBOL(__break_lease); |
1426 | 1426 | ||
1427 | /** | 1427 | /** |
1428 | * lease_get_mtime - get the last modified time of an inode | 1428 | * lease_get_mtime - get the last modified time of an inode |
1429 | * @inode: the inode | 1429 | * @inode: the inode |
1430 | * @time: pointer to a timespec which will contain the last modified time | 1430 | * @time: pointer to a timespec which will contain the last modified time |
1431 | * | 1431 | * |
1432 | * This is to force NFS clients to flush their caches for files with | 1432 | * This is to force NFS clients to flush their caches for files with |
1433 | * exclusive leases. The justification is that if someone has an | 1433 | * exclusive leases. The justification is that if someone has an |
1434 | * exclusive lease, then they could be modifying it. | 1434 | * exclusive lease, then they could be modifying it. |
1435 | */ | 1435 | */ |
1436 | void lease_get_mtime(struct inode *inode, struct timespec *time) | 1436 | void lease_get_mtime(struct inode *inode, struct timespec *time) |
1437 | { | 1437 | { |
1438 | struct file_lock *flock = inode->i_flock; | 1438 | struct file_lock *flock = inode->i_flock; |
1439 | if (flock && IS_LEASE(flock) && (flock->fl_type == F_WRLCK)) | 1439 | if (flock && IS_LEASE(flock) && (flock->fl_type == F_WRLCK)) |
1440 | *time = current_fs_time(inode->i_sb); | 1440 | *time = current_fs_time(inode->i_sb); |
1441 | else | 1441 | else |
1442 | *time = inode->i_mtime; | 1442 | *time = inode->i_mtime; |
1443 | } | 1443 | } |
1444 | 1444 | ||
1445 | EXPORT_SYMBOL(lease_get_mtime); | 1445 | EXPORT_SYMBOL(lease_get_mtime); |
1446 | 1446 | ||
1447 | /** | 1447 | /** |
1448 | * fcntl_getlease - Enquire what lease is currently active | 1448 | * fcntl_getlease - Enquire what lease is currently active |
1449 | * @filp: the file | 1449 | * @filp: the file |
1450 | * | 1450 | * |
1451 | * The value returned by this function will be one of | 1451 | * The value returned by this function will be one of |
1452 | * (if no lease break is pending): | 1452 | * (if no lease break is pending): |
1453 | * | 1453 | * |
1454 | * %F_RDLCK to indicate a shared lease is held. | 1454 | * %F_RDLCK to indicate a shared lease is held. |
1455 | * | 1455 | * |
1456 | * %F_WRLCK to indicate an exclusive lease is held. | 1456 | * %F_WRLCK to indicate an exclusive lease is held. |
1457 | * | 1457 | * |
1458 | * %F_UNLCK to indicate no lease is held. | 1458 | * %F_UNLCK to indicate no lease is held. |
1459 | * | 1459 | * |
1460 | * (if a lease break is pending): | 1460 | * (if a lease break is pending): |
1461 | * | 1461 | * |
1462 | * %F_RDLCK to indicate an exclusive lease needs to be | 1462 | * %F_RDLCK to indicate an exclusive lease needs to be |
1463 | * changed to a shared lease (or removed). | 1463 | * changed to a shared lease (or removed). |
1464 | * | 1464 | * |
1465 | * %F_UNLCK to indicate the lease needs to be removed. | 1465 | * %F_UNLCK to indicate the lease needs to be removed. |
1466 | * | 1466 | * |
1467 | * XXX: sfr & willy disagree over whether F_INPROGRESS | 1467 | * XXX: sfr & willy disagree over whether F_INPROGRESS |
1468 | * should be returned to userspace. | 1468 | * should be returned to userspace. |
1469 | */ | 1469 | */ |
1470 | int fcntl_getlease(struct file *filp) | 1470 | int fcntl_getlease(struct file *filp) |
1471 | { | 1471 | { |
1472 | struct file_lock *fl; | 1472 | struct file_lock *fl; |
1473 | struct inode *inode = file_inode(filp); | 1473 | struct inode *inode = file_inode(filp); |
1474 | int type = F_UNLCK; | 1474 | int type = F_UNLCK; |
1475 | 1475 | ||
1476 | spin_lock(&inode->i_lock); | 1476 | spin_lock(&inode->i_lock); |
1477 | time_out_leases(file_inode(filp)); | 1477 | time_out_leases(file_inode(filp)); |
1478 | for (fl = file_inode(filp)->i_flock; fl && IS_LEASE(fl); | 1478 | for (fl = file_inode(filp)->i_flock; fl && IS_LEASE(fl); |
1479 | fl = fl->fl_next) { | 1479 | fl = fl->fl_next) { |
1480 | if (fl->fl_file == filp) { | 1480 | if (fl->fl_file == filp) { |
1481 | type = target_leasetype(fl); | 1481 | type = target_leasetype(fl); |
1482 | break; | 1482 | break; |
1483 | } | 1483 | } |
1484 | } | 1484 | } |
1485 | spin_unlock(&inode->i_lock); | 1485 | spin_unlock(&inode->i_lock); |
1486 | return type; | 1486 | return type; |
1487 | } | 1487 | } |
1488 | 1488 | ||
1489 | /** | 1489 | /** |
1490 | * check_conflicting_open - see if the given dentry points to a file that has | 1490 | * check_conflicting_open - see if the given dentry points to a file that has |
1491 | * an existing open that would conflict with the | 1491 | * an existing open that would conflict with the |
1492 | * desired lease. | 1492 | * desired lease. |
1493 | * @dentry: dentry to check | 1493 | * @dentry: dentry to check |
1494 | * @arg: type of lease that we're trying to acquire | 1494 | * @arg: type of lease that we're trying to acquire |
1495 | * | 1495 | * |
1496 | * Check to see if there's an existing open fd on this file that would | 1496 | * Check to see if there's an existing open fd on this file that would |
1497 | * conflict with the lease we're trying to set. | 1497 | * conflict with the lease we're trying to set. |
1498 | */ | 1498 | */ |
1499 | static int | 1499 | static int |
1500 | check_conflicting_open(const struct dentry *dentry, const long arg) | 1500 | check_conflicting_open(const struct dentry *dentry, const long arg) |
1501 | { | 1501 | { |
1502 | int ret = 0; | 1502 | int ret = 0; |
1503 | struct inode *inode = dentry->d_inode; | 1503 | struct inode *inode = dentry->d_inode; |
1504 | 1504 | ||
1505 | if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0)) | 1505 | if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0)) |
1506 | return -EAGAIN; | 1506 | return -EAGAIN; |
1507 | 1507 | ||
1508 | if ((arg == F_WRLCK) && ((d_count(dentry) > 1) || | 1508 | if ((arg == F_WRLCK) && ((d_count(dentry) > 1) || |
1509 | (atomic_read(&inode->i_count) > 1))) | 1509 | (atomic_read(&inode->i_count) > 1))) |
1510 | ret = -EAGAIN; | 1510 | ret = -EAGAIN; |
1511 | 1511 | ||
1512 | return ret; | 1512 | return ret; |
1513 | } | 1513 | } |
1514 | 1514 | ||
1515 | static int generic_add_lease(struct file *filp, long arg, struct file_lock **flp) | 1515 | static int generic_add_lease(struct file *filp, long arg, struct file_lock **flp) |
1516 | { | 1516 | { |
1517 | struct file_lock *fl, **before, **my_before = NULL, *lease; | 1517 | struct file_lock *fl, **before, **my_before = NULL, *lease; |
1518 | struct dentry *dentry = filp->f_path.dentry; | 1518 | struct dentry *dentry = filp->f_path.dentry; |
1519 | struct inode *inode = dentry->d_inode; | 1519 | struct inode *inode = dentry->d_inode; |
1520 | bool is_deleg = (*flp)->fl_flags & FL_DELEG; | 1520 | bool is_deleg = (*flp)->fl_flags & FL_DELEG; |
1521 | int error; | 1521 | int error; |
1522 | 1522 | ||
1523 | lease = *flp; | 1523 | lease = *flp; |
1524 | /* | 1524 | /* |
1525 | * In the delegation case we need mutual exclusion with | 1525 | * In the delegation case we need mutual exclusion with |
1526 | * a number of operations that take the i_mutex. We trylock | 1526 | * a number of operations that take the i_mutex. We trylock |
1527 | * because delegations are an optional optimization, and if | 1527 | * because delegations are an optional optimization, and if |
1528 | * there's some chance of a conflict--we'd rather not | 1528 | * there's some chance of a conflict--we'd rather not |
1529 | * bother, maybe that's a sign this just isn't a good file to | 1529 | * bother, maybe that's a sign this just isn't a good file to |
1530 | * hand out a delegation on. | 1530 | * hand out a delegation on. |
1531 | */ | 1531 | */ |
1532 | if (is_deleg && !mutex_trylock(&inode->i_mutex)) | 1532 | if (is_deleg && !mutex_trylock(&inode->i_mutex)) |
1533 | return -EAGAIN; | 1533 | return -EAGAIN; |
1534 | 1534 | ||
1535 | if (is_deleg && arg == F_WRLCK) { | 1535 | if (is_deleg && arg == F_WRLCK) { |
1536 | /* Write delegations are not currently supported: */ | 1536 | /* Write delegations are not currently supported: */ |
1537 | mutex_unlock(&inode->i_mutex); | 1537 | mutex_unlock(&inode->i_mutex); |
1538 | WARN_ON_ONCE(1); | 1538 | WARN_ON_ONCE(1); |
1539 | return -EINVAL; | 1539 | return -EINVAL; |
1540 | } | 1540 | } |
1541 | 1541 | ||
1542 | error = check_conflicting_open(dentry, arg); | 1542 | error = check_conflicting_open(dentry, arg); |
1543 | if (error) | 1543 | if (error) |
1544 | goto out; | 1544 | goto out; |
1545 | 1545 | ||
1546 | /* | 1546 | /* |
1547 | * At this point, we know that if there is an exclusive | 1547 | * At this point, we know that if there is an exclusive |
1548 | * lease on this file, then we hold it on this filp | 1548 | * lease on this file, then we hold it on this filp |
1549 | * (otherwise our open of this file would have blocked). | 1549 | * (otherwise our open of this file would have blocked). |
1550 | * And if we are trying to acquire an exclusive lease, | 1550 | * And if we are trying to acquire an exclusive lease, |
1551 | * then the file is not open by anyone (including us) | 1551 | * then the file is not open by anyone (including us) |
1552 | * except for this filp. | 1552 | * except for this filp. |
1553 | */ | 1553 | */ |
1554 | error = -EAGAIN; | 1554 | error = -EAGAIN; |
1555 | for (before = &inode->i_flock; | 1555 | for (before = &inode->i_flock; |
1556 | ((fl = *before) != NULL) && IS_LEASE(fl); | 1556 | ((fl = *before) != NULL) && IS_LEASE(fl); |
1557 | before = &fl->fl_next) { | 1557 | before = &fl->fl_next) { |
1558 | if (fl->fl_file == filp) { | 1558 | if (fl->fl_file == filp) { |
1559 | my_before = before; | 1559 | my_before = before; |
1560 | continue; | 1560 | continue; |
1561 | } | 1561 | } |
1562 | /* | 1562 | /* |
1563 | * No exclusive leases if someone else has a lease on | 1563 | * No exclusive leases if someone else has a lease on |
1564 | * this file: | 1564 | * this file: |
1565 | */ | 1565 | */ |
1566 | if (arg == F_WRLCK) | 1566 | if (arg == F_WRLCK) |
1567 | goto out; | 1567 | goto out; |
1568 | /* | 1568 | /* |
1569 | * Modifying our existing lease is OK, but no getting a | 1569 | * Modifying our existing lease is OK, but no getting a |
1570 | * new lease if someone else is opening for write: | 1570 | * new lease if someone else is opening for write: |
1571 | */ | 1571 | */ |
1572 | if (fl->fl_flags & FL_UNLOCK_PENDING) | 1572 | if (fl->fl_flags & FL_UNLOCK_PENDING) |
1573 | goto out; | 1573 | goto out; |
1574 | } | 1574 | } |
1575 | 1575 | ||
1576 | if (my_before != NULL) { | 1576 | if (my_before != NULL) { |
1577 | error = lease->fl_lmops->lm_change(my_before, arg); | 1577 | error = lease->fl_lmops->lm_change(my_before, arg); |
1578 | if (!error) | 1578 | if (!error) |
1579 | *flp = *my_before; | 1579 | *flp = *my_before; |
1580 | goto out; | 1580 | goto out; |
1581 | } | 1581 | } |
1582 | 1582 | ||
1583 | error = -EINVAL; | 1583 | error = -EINVAL; |
1584 | if (!leases_enable) | 1584 | if (!leases_enable) |
1585 | goto out; | 1585 | goto out; |
1586 | 1586 | ||
1587 | locks_insert_lock(before, lease); | 1587 | locks_insert_lock(before, lease); |
1588 | /* | 1588 | /* |
1589 | * The check in break_lease() is lockless. It's possible for another | 1589 | * The check in break_lease() is lockless. It's possible for another |
1590 | * open to race in after we did the earlier check for a conflicting | 1590 | * open to race in after we did the earlier check for a conflicting |
1591 | * open but before the lease was inserted. Check again for a | 1591 | * open but before the lease was inserted. Check again for a |
1592 | * conflicting open and cancel the lease if there is one. | 1592 | * conflicting open and cancel the lease if there is one. |
1593 | * | 1593 | * |
1594 | * We also add a barrier here to ensure that the insertion of the lock | 1594 | * We also add a barrier here to ensure that the insertion of the lock |
1595 | * precedes these checks. | 1595 | * precedes these checks. |
1596 | */ | 1596 | */ |
1597 | smp_mb(); | 1597 | smp_mb(); |
1598 | error = check_conflicting_open(dentry, arg); | 1598 | error = check_conflicting_open(dentry, arg); |
1599 | if (error) | 1599 | if (error) |
1600 | locks_unlink_lock(flp); | 1600 | locks_unlink_lock(flp); |
1601 | out: | 1601 | out: |
1602 | if (is_deleg) | 1602 | if (is_deleg) |
1603 | mutex_unlock(&inode->i_mutex); | 1603 | mutex_unlock(&inode->i_mutex); |
1604 | return error; | 1604 | return error; |
1605 | } | 1605 | } |
1606 | 1606 | ||
1607 | static int generic_delete_lease(struct file *filp, struct file_lock **flp) | 1607 | static int generic_delete_lease(struct file *filp, struct file_lock **flp) |
1608 | { | 1608 | { |
1609 | struct file_lock *fl, **before; | 1609 | struct file_lock *fl, **before; |
1610 | struct dentry *dentry = filp->f_path.dentry; | 1610 | struct dentry *dentry = filp->f_path.dentry; |
1611 | struct inode *inode = dentry->d_inode; | 1611 | struct inode *inode = dentry->d_inode; |
1612 | 1612 | ||
1613 | for (before = &inode->i_flock; | 1613 | for (before = &inode->i_flock; |
1614 | ((fl = *before) != NULL) && IS_LEASE(fl); | 1614 | ((fl = *before) != NULL) && IS_LEASE(fl); |
1615 | before = &fl->fl_next) { | 1615 | before = &fl->fl_next) { |
1616 | if (fl->fl_file != filp) | 1616 | if (fl->fl_file != filp) |
1617 | continue; | 1617 | continue; |
1618 | return (*flp)->fl_lmops->lm_change(before, F_UNLCK); | 1618 | return (*flp)->fl_lmops->lm_change(before, F_UNLCK); |
1619 | } | 1619 | } |
1620 | return -EAGAIN; | 1620 | return -EAGAIN; |
1621 | } | 1621 | } |
1622 | 1622 | ||
1623 | /** | 1623 | /** |
1624 | * generic_setlease - sets a lease on an open file | 1624 | * generic_setlease - sets a lease on an open file |
1625 | * @filp: file pointer | 1625 | * @filp: file pointer |
1626 | * @arg: type of lease to obtain | 1626 | * @arg: type of lease to obtain |
1627 | * @flp: input - file_lock to use, output - file_lock inserted | 1627 | * @flp: input - file_lock to use, output - file_lock inserted |
1628 | * | 1628 | * |
1629 | * The (input) flp->fl_lmops->lm_break function is required | 1629 | * The (input) flp->fl_lmops->lm_break function is required |
1630 | * by break_lease(). | 1630 | * by break_lease(). |
1631 | * | 1631 | * |
1632 | * Called with inode->i_lock held. | 1632 | * Called with inode->i_lock held. |
1633 | */ | 1633 | */ |
1634 | int generic_setlease(struct file *filp, long arg, struct file_lock **flp) | 1634 | int generic_setlease(struct file *filp, long arg, struct file_lock **flp) |
1635 | { | 1635 | { |
1636 | struct dentry *dentry = filp->f_path.dentry; | 1636 | struct dentry *dentry = filp->f_path.dentry; |
1637 | struct inode *inode = dentry->d_inode; | 1637 | struct inode *inode = dentry->d_inode; |
1638 | int error; | 1638 | int error; |
1639 | 1639 | ||
1640 | if ((!uid_eq(current_fsuid(), inode->i_uid)) && !capable(CAP_LEASE)) | 1640 | if ((!uid_eq(current_fsuid(), inode->i_uid)) && !capable(CAP_LEASE)) |
1641 | return -EACCES; | 1641 | return -EACCES; |
1642 | if (!S_ISREG(inode->i_mode)) | 1642 | if (!S_ISREG(inode->i_mode)) |
1643 | return -EINVAL; | 1643 | return -EINVAL; |
1644 | error = security_file_lock(filp, arg); | 1644 | error = security_file_lock(filp, arg); |
1645 | if (error) | 1645 | if (error) |
1646 | return error; | 1646 | return error; |
1647 | 1647 | ||
1648 | time_out_leases(inode); | 1648 | time_out_leases(inode); |
1649 | 1649 | ||
1650 | BUG_ON(!(*flp)->fl_lmops->lm_break); | 1650 | BUG_ON(!(*flp)->fl_lmops->lm_break); |
1651 | 1651 | ||
1652 | switch (arg) { | 1652 | switch (arg) { |
1653 | case F_UNLCK: | 1653 | case F_UNLCK: |
1654 | return generic_delete_lease(filp, flp); | 1654 | return generic_delete_lease(filp, flp); |
1655 | case F_RDLCK: | 1655 | case F_RDLCK: |
1656 | case F_WRLCK: | 1656 | case F_WRLCK: |
1657 | return generic_add_lease(filp, arg, flp); | 1657 | return generic_add_lease(filp, arg, flp); |
1658 | default: | 1658 | default: |
1659 | return -EINVAL; | 1659 | return -EINVAL; |
1660 | } | 1660 | } |
1661 | } | 1661 | } |
1662 | EXPORT_SYMBOL(generic_setlease); | 1662 | EXPORT_SYMBOL(generic_setlease); |
1663 | 1663 | ||
1664 | static int __vfs_setlease(struct file *filp, long arg, struct file_lock **lease) | 1664 | static int __vfs_setlease(struct file *filp, long arg, struct file_lock **lease) |
1665 | { | 1665 | { |
1666 | if (filp->f_op->setlease) | 1666 | if (filp->f_op->setlease) |
1667 | return filp->f_op->setlease(filp, arg, lease); | 1667 | return filp->f_op->setlease(filp, arg, lease); |
1668 | else | 1668 | else |
1669 | return generic_setlease(filp, arg, lease); | 1669 | return generic_setlease(filp, arg, lease); |
1670 | } | 1670 | } |
1671 | 1671 | ||
1672 | /** | 1672 | /** |
1673 | * vfs_setlease - sets a lease on an open file | 1673 | * vfs_setlease - sets a lease on an open file |
1674 | * @filp: file pointer | 1674 | * @filp: file pointer |
1675 | * @arg: type of lease to obtain | 1675 | * @arg: type of lease to obtain |
1676 | * @lease: file_lock to use | 1676 | * @lease: file_lock to use |
1677 | * | 1677 | * |
1678 | * Call this to establish a lease on the file. | 1678 | * Call this to establish a lease on the file. |
1679 | * The (*lease)->fl_lmops->lm_break operation must be set; if not, | 1679 | * The (*lease)->fl_lmops->lm_break operation must be set; if not, |
1680 | * break_lease will oops! | 1680 | * break_lease will oops! |
1681 | * | 1681 | * |
1682 | * This will call the filesystem's setlease file method, if | 1682 | * This will call the filesystem's setlease file method, if |
1683 | * defined. Note that there is no getlease method; instead, the | 1683 | * defined. Note that there is no getlease method; instead, the |
1684 | * filesystem setlease method should call back to setlease() to | 1684 | * filesystem setlease method should call back to setlease() to |
1685 | * add a lease to the inode's lease list, where fcntl_getlease() can | 1685 | * add a lease to the inode's lease list, where fcntl_getlease() can |
1686 | * find it. Since fcntl_getlease() only reports whether the current | 1686 | * find it. Since fcntl_getlease() only reports whether the current |
1687 | * task holds a lease, a cluster filesystem need only do this for | 1687 | * task holds a lease, a cluster filesystem need only do this for |
1688 | * leases held by processes on this node. | 1688 | * leases held by processes on this node. |
1689 | * | 1689 | * |
1690 | * There is also no break_lease method; filesystems that | 1690 | * There is also no break_lease method; filesystems that |
1691 | * handle their own leases should break leases themselves from the | 1691 | * handle their own leases should break leases themselves from the |
1692 | * filesystem's open, create, and (on truncate) setattr methods. | 1692 | * filesystem's open, create, and (on truncate) setattr methods. |
1693 | * | 1693 | * |
1694 | * Warning: the only current setlease methods exist only to disable | 1694 | * Warning: the only current setlease methods exist only to disable |
1695 | * leases in certain cases. More vfs changes may be required to | 1695 | * leases in certain cases. More vfs changes may be required to |
1696 | * allow a full filesystem lease implementation. | 1696 | * allow a full filesystem lease implementation. |
1697 | */ | 1697 | */ |
1698 | 1698 | ||
1699 | int vfs_setlease(struct file *filp, long arg, struct file_lock **lease) | 1699 | int vfs_setlease(struct file *filp, long arg, struct file_lock **lease) |
1700 | { | 1700 | { |
1701 | struct inode *inode = file_inode(filp); | 1701 | struct inode *inode = file_inode(filp); |
1702 | int error; | 1702 | int error; |
1703 | 1703 | ||
1704 | spin_lock(&inode->i_lock); | 1704 | spin_lock(&inode->i_lock); |
1705 | error = __vfs_setlease(filp, arg, lease); | 1705 | error = __vfs_setlease(filp, arg, lease); |
1706 | spin_unlock(&inode->i_lock); | 1706 | spin_unlock(&inode->i_lock); |
1707 | 1707 | ||
1708 | return error; | 1708 | return error; |
1709 | } | 1709 | } |
1710 | EXPORT_SYMBOL_GPL(vfs_setlease); | 1710 | EXPORT_SYMBOL_GPL(vfs_setlease); |
1711 | 1711 | ||
1712 | static int do_fcntl_delete_lease(struct file *filp) | 1712 | static int do_fcntl_delete_lease(struct file *filp) |
1713 | { | 1713 | { |
1714 | struct file_lock fl, *flp = &fl; | 1714 | struct file_lock fl, *flp = &fl; |
1715 | 1715 | ||
1716 | lease_init(filp, F_UNLCK, flp); | 1716 | lease_init(filp, F_UNLCK, flp); |
1717 | 1717 | ||
1718 | return vfs_setlease(filp, F_UNLCK, &flp); | 1718 | return vfs_setlease(filp, F_UNLCK, &flp); |
1719 | } | 1719 | } |
1720 | 1720 | ||
1721 | static int do_fcntl_add_lease(unsigned int fd, struct file *filp, long arg) | 1721 | static int do_fcntl_add_lease(unsigned int fd, struct file *filp, long arg) |
1722 | { | 1722 | { |
1723 | struct file_lock *fl, *ret; | 1723 | struct file_lock *fl, *ret; |
1724 | struct inode *inode = file_inode(filp); | 1724 | struct inode *inode = file_inode(filp); |
1725 | struct fasync_struct *new; | 1725 | struct fasync_struct *new; |
1726 | int error; | 1726 | int error; |
1727 | 1727 | ||
1728 | fl = lease_alloc(filp, arg); | 1728 | fl = lease_alloc(filp, arg); |
1729 | if (IS_ERR(fl)) | 1729 | if (IS_ERR(fl)) |
1730 | return PTR_ERR(fl); | 1730 | return PTR_ERR(fl); |
1731 | 1731 | ||
1732 | new = fasync_alloc(); | 1732 | new = fasync_alloc(); |
1733 | if (!new) { | 1733 | if (!new) { |
1734 | locks_free_lock(fl); | 1734 | locks_free_lock(fl); |
1735 | return -ENOMEM; | 1735 | return -ENOMEM; |
1736 | } | 1736 | } |
1737 | ret = fl; | 1737 | ret = fl; |
1738 | spin_lock(&inode->i_lock); | 1738 | spin_lock(&inode->i_lock); |
1739 | error = __vfs_setlease(filp, arg, &ret); | 1739 | error = __vfs_setlease(filp, arg, &ret); |
1740 | if (error) { | 1740 | if (error) { |
1741 | spin_unlock(&inode->i_lock); | 1741 | spin_unlock(&inode->i_lock); |
1742 | locks_free_lock(fl); | 1742 | locks_free_lock(fl); |
1743 | goto out_free_fasync; | 1743 | goto out_free_fasync; |
1744 | } | 1744 | } |
1745 | if (ret != fl) | 1745 | if (ret != fl) |
1746 | locks_free_lock(fl); | 1746 | locks_free_lock(fl); |
1747 | 1747 | ||
1748 | /* | 1748 | /* |
1749 | * fasync_insert_entry() returns the old entry if any. | 1749 | * fasync_insert_entry() returns the old entry if any. |
1750 | * If there was no old entry, then it used 'new' and | 1750 | * If there was no old entry, then it used 'new' and |
1751 | * inserted it into the fasync list. Clear new so that | 1751 | * inserted it into the fasync list. Clear new so that |
1752 | * we don't release it here. | 1752 | * we don't release it here. |
1753 | */ | 1753 | */ |
1754 | if (!fasync_insert_entry(fd, filp, &ret->fl_fasync, new)) | 1754 | if (!fasync_insert_entry(fd, filp, &ret->fl_fasync, new)) |
1755 | new = NULL; | 1755 | new = NULL; |
1756 | 1756 | ||
1757 | error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0); | 1757 | error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0); |
1758 | spin_unlock(&inode->i_lock); | 1758 | spin_unlock(&inode->i_lock); |
1759 | 1759 | ||
1760 | out_free_fasync: | 1760 | out_free_fasync: |
1761 | if (new) | 1761 | if (new) |
1762 | fasync_free(new); | 1762 | fasync_free(new); |
1763 | return error; | 1763 | return error; |
1764 | } | 1764 | } |
1765 | 1765 | ||
1766 | /** | 1766 | /** |
1767 | * fcntl_setlease - sets a lease on an open file | 1767 | * fcntl_setlease - sets a lease on an open file |
1768 | * @fd: open file descriptor | 1768 | * @fd: open file descriptor |
1769 | * @filp: file pointer | 1769 | * @filp: file pointer |
1770 | * @arg: type of lease to obtain | 1770 | * @arg: type of lease to obtain |
1771 | * | 1771 | * |
1772 | * Call this fcntl to establish a lease on the file. | 1772 | * Call this fcntl to establish a lease on the file. |
1773 | * Note that you also need to call %F_SETSIG to | 1773 | * Note that you also need to call %F_SETSIG to |
1774 | * receive a signal when the lease is broken. | 1774 | * receive a signal when the lease is broken. |
1775 | */ | 1775 | */ |
1776 | int fcntl_setlease(unsigned int fd, struct file *filp, long arg) | 1776 | int fcntl_setlease(unsigned int fd, struct file *filp, long arg) |
1777 | { | 1777 | { |
1778 | if (arg == F_UNLCK) | 1778 | if (arg == F_UNLCK) |
1779 | return do_fcntl_delete_lease(filp); | 1779 | return do_fcntl_delete_lease(filp); |
1780 | return do_fcntl_add_lease(fd, filp, arg); | 1780 | return do_fcntl_add_lease(fd, filp, arg); |
1781 | } | 1781 | } |
1782 | 1782 | ||
1783 | /** | 1783 | /** |
1784 | * flock_lock_file_wait - Apply a FLOCK-style lock to a file | 1784 | * flock_lock_file_wait - Apply a FLOCK-style lock to a file |
1785 | * @filp: The file to apply the lock to | 1785 | * @filp: The file to apply the lock to |
1786 | * @fl: The lock to be applied | 1786 | * @fl: The lock to be applied |
1787 | * | 1787 | * |
1788 | * Add a FLOCK style lock to a file. | 1788 | * Add a FLOCK style lock to a file. |
1789 | */ | 1789 | */ |
1790 | int flock_lock_file_wait(struct file *filp, struct file_lock *fl) | 1790 | int flock_lock_file_wait(struct file *filp, struct file_lock *fl) |
1791 | { | 1791 | { |
1792 | int error; | 1792 | int error; |
1793 | might_sleep(); | 1793 | might_sleep(); |
1794 | for (;;) { | 1794 | for (;;) { |
1795 | error = flock_lock_file(filp, fl); | 1795 | error = flock_lock_file(filp, fl); |
1796 | if (error != FILE_LOCK_DEFERRED) | 1796 | if (error != FILE_LOCK_DEFERRED) |
1797 | break; | 1797 | break; |
1798 | error = wait_event_interruptible(fl->fl_wait, !fl->fl_next); | 1798 | error = wait_event_interruptible(fl->fl_wait, !fl->fl_next); |
1799 | if (!error) | 1799 | if (!error) |
1800 | continue; | 1800 | continue; |
1801 | 1801 | ||
1802 | locks_delete_block(fl); | 1802 | locks_delete_block(fl); |
1803 | break; | 1803 | break; |
1804 | } | 1804 | } |
1805 | return error; | 1805 | return error; |
1806 | } | 1806 | } |
1807 | 1807 | ||
1808 | EXPORT_SYMBOL(flock_lock_file_wait); | 1808 | EXPORT_SYMBOL(flock_lock_file_wait); |
1809 | 1809 | ||
1810 | /** | 1810 | /** |
1811 | * sys_flock: - flock() system call. | 1811 | * sys_flock: - flock() system call. |
1812 | * @fd: the file descriptor to lock. | 1812 | * @fd: the file descriptor to lock. |
1813 | * @cmd: the type of lock to apply. | 1813 | * @cmd: the type of lock to apply. |
1814 | * | 1814 | * |
1815 | * Apply a %FL_FLOCK style lock to an open file descriptor. | 1815 | * Apply a %FL_FLOCK style lock to an open file descriptor. |
1816 | * The @cmd can be one of | 1816 | * The @cmd can be one of |
1817 | * | 1817 | * |
1818 | * %LOCK_SH -- a shared lock. | 1818 | * %LOCK_SH -- a shared lock. |
1819 | * | 1819 | * |
1820 | * %LOCK_EX -- an exclusive lock. | 1820 | * %LOCK_EX -- an exclusive lock. |
1821 | * | 1821 | * |
1822 | * %LOCK_UN -- remove an existing lock. | 1822 | * %LOCK_UN -- remove an existing lock. |
1823 | * | 1823 | * |
1824 | * %LOCK_MAND -- a `mandatory' flock. This exists to emulate Windows Share Modes. | 1824 | * %LOCK_MAND -- a `mandatory' flock. This exists to emulate Windows Share Modes. |
1825 | * | 1825 | * |
1826 | * %LOCK_MAND can be combined with %LOCK_READ or %LOCK_WRITE to allow other | 1826 | * %LOCK_MAND can be combined with %LOCK_READ or %LOCK_WRITE to allow other |
1827 | * processes read and write access respectively. | 1827 | * processes read and write access respectively. |
1828 | */ | 1828 | */ |
1829 | SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd) | 1829 | SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd) |
1830 | { | 1830 | { |
1831 | struct fd f = fdget(fd); | 1831 | struct fd f = fdget(fd); |
1832 | struct file_lock *lock; | 1832 | struct file_lock *lock; |
1833 | int can_sleep, unlock; | 1833 | int can_sleep, unlock; |
1834 | int error; | 1834 | int error; |
1835 | 1835 | ||
1836 | error = -EBADF; | 1836 | error = -EBADF; |
1837 | if (!f.file) | 1837 | if (!f.file) |
1838 | goto out; | 1838 | goto out; |
1839 | 1839 | ||
1840 | can_sleep = !(cmd & LOCK_NB); | 1840 | can_sleep = !(cmd & LOCK_NB); |
1841 | cmd &= ~LOCK_NB; | 1841 | cmd &= ~LOCK_NB; |
1842 | unlock = (cmd == LOCK_UN); | 1842 | unlock = (cmd == LOCK_UN); |
1843 | 1843 | ||
1844 | if (!unlock && !(cmd & LOCK_MAND) && | 1844 | if (!unlock && !(cmd & LOCK_MAND) && |
1845 | !(f.file->f_mode & (FMODE_READ|FMODE_WRITE))) | 1845 | !(f.file->f_mode & (FMODE_READ|FMODE_WRITE))) |
1846 | goto out_putf; | 1846 | goto out_putf; |
1847 | 1847 | ||
1848 | error = flock_make_lock(f.file, &lock, cmd); | 1848 | error = flock_make_lock(f.file, &lock, cmd); |
1849 | if (error) | 1849 | if (error) |
1850 | goto out_putf; | 1850 | goto out_putf; |
1851 | if (can_sleep) | 1851 | if (can_sleep) |
1852 | lock->fl_flags |= FL_SLEEP; | 1852 | lock->fl_flags |= FL_SLEEP; |
1853 | 1853 | ||
1854 | error = security_file_lock(f.file, lock->fl_type); | 1854 | error = security_file_lock(f.file, lock->fl_type); |
1855 | if (error) | 1855 | if (error) |
1856 | goto out_free; | 1856 | goto out_free; |
1857 | 1857 | ||
1858 | if (f.file->f_op->flock) | 1858 | if (f.file->f_op->flock) |
1859 | error = f.file->f_op->flock(f.file, | 1859 | error = f.file->f_op->flock(f.file, |
1860 | (can_sleep) ? F_SETLKW : F_SETLK, | 1860 | (can_sleep) ? F_SETLKW : F_SETLK, |
1861 | lock); | 1861 | lock); |
1862 | else | 1862 | else |
1863 | error = flock_lock_file_wait(f.file, lock); | 1863 | error = flock_lock_file_wait(f.file, lock); |
1864 | 1864 | ||
1865 | out_free: | 1865 | out_free: |
1866 | locks_free_lock(lock); | 1866 | locks_free_lock(lock); |
1867 | 1867 | ||
1868 | out_putf: | 1868 | out_putf: |
1869 | fdput(f); | 1869 | fdput(f); |
1870 | out: | 1870 | out: |
1871 | return error; | 1871 | return error; |
1872 | } | 1872 | } |
1873 | 1873 | ||
1874 | /** | 1874 | /** |
1875 | * vfs_test_lock - test file byte range lock | 1875 | * vfs_test_lock - test file byte range lock |
1876 | * @filp: The file to test lock for | 1876 | * @filp: The file to test lock for |
1877 | * @fl: The lock to test; also used to hold result | 1877 | * @fl: The lock to test; also used to hold result |
1878 | * | 1878 | * |
1879 | * Returns -ERRNO on failure. Indicates presence of conflicting lock by | 1879 | * Returns -ERRNO on failure. Indicates presence of conflicting lock by |
1880 | * setting conf->fl_type to something other than F_UNLCK. | 1880 | * setting conf->fl_type to something other than F_UNLCK. |
1881 | */ | 1881 | */ |
1882 | int vfs_test_lock(struct file *filp, struct file_lock *fl) | 1882 | int vfs_test_lock(struct file *filp, struct file_lock *fl) |
1883 | { | 1883 | { |
1884 | if (filp->f_op->lock) | 1884 | if (filp->f_op->lock) |
1885 | return filp->f_op->lock(filp, F_GETLK, fl); | 1885 | return filp->f_op->lock(filp, F_GETLK, fl); |
1886 | posix_test_lock(filp, fl); | 1886 | posix_test_lock(filp, fl); |
1887 | return 0; | 1887 | return 0; |
1888 | } | 1888 | } |
1889 | EXPORT_SYMBOL_GPL(vfs_test_lock); | 1889 | EXPORT_SYMBOL_GPL(vfs_test_lock); |
1890 | 1890 | ||
1891 | static int posix_lock_to_flock(struct flock *flock, struct file_lock *fl) | 1891 | static int posix_lock_to_flock(struct flock *flock, struct file_lock *fl) |
1892 | { | 1892 | { |
1893 | flock->l_pid = IS_FILE_PVT(fl) ? -1 : fl->fl_pid; | 1893 | flock->l_pid = IS_OFDLCK(fl) ? -1 : fl->fl_pid; |
1894 | #if BITS_PER_LONG == 32 | 1894 | #if BITS_PER_LONG == 32 |
1895 | /* | 1895 | /* |
1896 | * Make sure we can represent the posix lock via | 1896 | * Make sure we can represent the posix lock via |
1897 | * legacy 32bit flock. | 1897 | * legacy 32bit flock. |
1898 | */ | 1898 | */ |
1899 | if (fl->fl_start > OFFT_OFFSET_MAX) | 1899 | if (fl->fl_start > OFFT_OFFSET_MAX) |
1900 | return -EOVERFLOW; | 1900 | return -EOVERFLOW; |
1901 | if (fl->fl_end != OFFSET_MAX && fl->fl_end > OFFT_OFFSET_MAX) | 1901 | if (fl->fl_end != OFFSET_MAX && fl->fl_end > OFFT_OFFSET_MAX) |
1902 | return -EOVERFLOW; | 1902 | return -EOVERFLOW; |
1903 | #endif | 1903 | #endif |
1904 | flock->l_start = fl->fl_start; | 1904 | flock->l_start = fl->fl_start; |
1905 | flock->l_len = fl->fl_end == OFFSET_MAX ? 0 : | 1905 | flock->l_len = fl->fl_end == OFFSET_MAX ? 0 : |
1906 | fl->fl_end - fl->fl_start + 1; | 1906 | fl->fl_end - fl->fl_start + 1; |
1907 | flock->l_whence = 0; | 1907 | flock->l_whence = 0; |
1908 | flock->l_type = fl->fl_type; | 1908 | flock->l_type = fl->fl_type; |
1909 | return 0; | 1909 | return 0; |
1910 | } | 1910 | } |
1911 | 1911 | ||
1912 | #if BITS_PER_LONG == 32 | 1912 | #if BITS_PER_LONG == 32 |
1913 | static void posix_lock_to_flock64(struct flock64 *flock, struct file_lock *fl) | 1913 | static void posix_lock_to_flock64(struct flock64 *flock, struct file_lock *fl) |
1914 | { | 1914 | { |
1915 | flock->l_pid = IS_FILE_PVT(fl) ? -1 : fl->fl_pid; | 1915 | flock->l_pid = IS_OFDLCK(fl) ? -1 : fl->fl_pid; |
1916 | flock->l_start = fl->fl_start; | 1916 | flock->l_start = fl->fl_start; |
1917 | flock->l_len = fl->fl_end == OFFSET_MAX ? 0 : | 1917 | flock->l_len = fl->fl_end == OFFSET_MAX ? 0 : |
1918 | fl->fl_end - fl->fl_start + 1; | 1918 | fl->fl_end - fl->fl_start + 1; |
1919 | flock->l_whence = 0; | 1919 | flock->l_whence = 0; |
1920 | flock->l_type = fl->fl_type; | 1920 | flock->l_type = fl->fl_type; |
1921 | } | 1921 | } |
1922 | #endif | 1922 | #endif |
1923 | 1923 | ||
1924 | /* Report the first existing lock that would conflict with l. | 1924 | /* Report the first existing lock that would conflict with l. |
1925 | * This implements the F_GETLK command of fcntl(). | 1925 | * This implements the F_GETLK command of fcntl(). |
1926 | */ | 1926 | */ |
1927 | int fcntl_getlk(struct file *filp, unsigned int cmd, struct flock __user *l) | 1927 | int fcntl_getlk(struct file *filp, unsigned int cmd, struct flock __user *l) |
1928 | { | 1928 | { |
1929 | struct file_lock file_lock; | 1929 | struct file_lock file_lock; |
1930 | struct flock flock; | 1930 | struct flock flock; |
1931 | int error; | 1931 | int error; |
1932 | 1932 | ||
1933 | error = -EFAULT; | 1933 | error = -EFAULT; |
1934 | if (copy_from_user(&flock, l, sizeof(flock))) | 1934 | if (copy_from_user(&flock, l, sizeof(flock))) |
1935 | goto out; | 1935 | goto out; |
1936 | error = -EINVAL; | 1936 | error = -EINVAL; |
1937 | if ((flock.l_type != F_RDLCK) && (flock.l_type != F_WRLCK)) | 1937 | if ((flock.l_type != F_RDLCK) && (flock.l_type != F_WRLCK)) |
1938 | goto out; | 1938 | goto out; |
1939 | 1939 | ||
1940 | error = flock_to_posix_lock(filp, &file_lock, &flock); | 1940 | error = flock_to_posix_lock(filp, &file_lock, &flock); |
1941 | if (error) | 1941 | if (error) |
1942 | goto out; | 1942 | goto out; |
1943 | 1943 | ||
1944 | if (cmd == F_GETLKP) { | 1944 | if (cmd == F_OFD_GETLK) { |
1945 | error = -EINVAL; | 1945 | error = -EINVAL; |
1946 | if (flock.l_pid != 0) | 1946 | if (flock.l_pid != 0) |
1947 | goto out; | 1947 | goto out; |
1948 | 1948 | ||
1949 | cmd = F_GETLK; | 1949 | cmd = F_GETLK; |
1950 | file_lock.fl_flags |= FL_FILE_PVT; | 1950 | file_lock.fl_flags |= FL_OFDLCK; |
1951 | file_lock.fl_owner = (fl_owner_t)filp; | 1951 | file_lock.fl_owner = (fl_owner_t)filp; |
1952 | } | 1952 | } |
1953 | 1953 | ||
1954 | error = vfs_test_lock(filp, &file_lock); | 1954 | error = vfs_test_lock(filp, &file_lock); |
1955 | if (error) | 1955 | if (error) |
1956 | goto out; | 1956 | goto out; |
1957 | 1957 | ||
1958 | flock.l_type = file_lock.fl_type; | 1958 | flock.l_type = file_lock.fl_type; |
1959 | if (file_lock.fl_type != F_UNLCK) { | 1959 | if (file_lock.fl_type != F_UNLCK) { |
1960 | error = posix_lock_to_flock(&flock, &file_lock); | 1960 | error = posix_lock_to_flock(&flock, &file_lock); |
1961 | if (error) | 1961 | if (error) |
1962 | goto out; | 1962 | goto out; |
1963 | } | 1963 | } |
1964 | error = -EFAULT; | 1964 | error = -EFAULT; |
1965 | if (!copy_to_user(l, &flock, sizeof(flock))) | 1965 | if (!copy_to_user(l, &flock, sizeof(flock))) |
1966 | error = 0; | 1966 | error = 0; |
1967 | out: | 1967 | out: |
1968 | return error; | 1968 | return error; |
1969 | } | 1969 | } |
1970 | 1970 | ||
1971 | /** | 1971 | /** |
1972 | * vfs_lock_file - file byte range lock | 1972 | * vfs_lock_file - file byte range lock |
1973 | * @filp: The file to apply the lock to | 1973 | * @filp: The file to apply the lock to |
1974 | * @cmd: type of locking operation (F_SETLK, F_GETLK, etc.) | 1974 | * @cmd: type of locking operation (F_SETLK, F_GETLK, etc.) |
1975 | * @fl: The lock to be applied | 1975 | * @fl: The lock to be applied |
1976 | * @conf: Place to return a copy of the conflicting lock, if found. | 1976 | * @conf: Place to return a copy of the conflicting lock, if found. |
1977 | * | 1977 | * |
1978 | * A caller that doesn't care about the conflicting lock may pass NULL | 1978 | * A caller that doesn't care about the conflicting lock may pass NULL |
1979 | * as the final argument. | 1979 | * as the final argument. |
1980 | * | 1980 | * |
1981 | * If the filesystem defines a private ->lock() method, then @conf will | 1981 | * If the filesystem defines a private ->lock() method, then @conf will |
1982 | * be left unchanged; so a caller that cares should initialize it to | 1982 | * be left unchanged; so a caller that cares should initialize it to |
1983 | * some acceptable default. | 1983 | * some acceptable default. |
1984 | * | 1984 | * |
1985 | * To avoid blocking kernel daemons, such as lockd, that need to acquire POSIX | 1985 | * To avoid blocking kernel daemons, such as lockd, that need to acquire POSIX |
1986 | * locks, the ->lock() interface may return asynchronously, before the lock has | 1986 | * locks, the ->lock() interface may return asynchronously, before the lock has |
1987 | * been granted or denied by the underlying filesystem, if (and only if) | 1987 | * been granted or denied by the underlying filesystem, if (and only if) |
1988 | * lm_grant is set. Callers expecting ->lock() to return asynchronously | 1988 | * lm_grant is set. Callers expecting ->lock() to return asynchronously |
1989 | * will only use F_SETLK, not F_SETLKW; they will set FL_SLEEP if (and only if) | 1989 | * will only use F_SETLK, not F_SETLKW; they will set FL_SLEEP if (and only if) |
1990 | * the request is for a blocking lock. When ->lock() does return asynchronously, | 1990 | * the request is for a blocking lock. When ->lock() does return asynchronously, |
1991 | * it must return FILE_LOCK_DEFERRED, and call ->lm_grant() when the lock | 1991 | * it must return FILE_LOCK_DEFERRED, and call ->lm_grant() when the lock |
1992 | * request completes. | 1992 | * request completes. |
1993 | * If the request is for non-blocking lock the file system should return | 1993 | * If the request is for non-blocking lock the file system should return |
1994 | * FILE_LOCK_DEFERRED then try to get the lock and call the callback routine | 1994 | * FILE_LOCK_DEFERRED then try to get the lock and call the callback routine |
1995 | * with the result. If the request timed out the callback routine will return a | 1995 | * with the result. If the request timed out the callback routine will return a |
1996 | * nonzero return code and the file system should release the lock. The file | 1996 | * nonzero return code and the file system should release the lock. The file |
1997 | * system is also responsible to keep a corresponding posix lock when it | 1997 | * system is also responsible to keep a corresponding posix lock when it |
1998 | * grants a lock so the VFS can find out which locks are locally held and do | 1998 | * grants a lock so the VFS can find out which locks are locally held and do |
1999 | * the correct lock cleanup when required. | 1999 | * the correct lock cleanup when required. |
2000 | * The underlying filesystem must not drop the kernel lock or call | 2000 | * The underlying filesystem must not drop the kernel lock or call |
2001 | * ->lm_grant() before returning to the caller with a FILE_LOCK_DEFERRED | 2001 | * ->lm_grant() before returning to the caller with a FILE_LOCK_DEFERRED |
2002 | * return code. | 2002 | * return code. |
2003 | */ | 2003 | */ |
2004 | int vfs_lock_file(struct file *filp, unsigned int cmd, struct file_lock *fl, struct file_lock *conf) | 2004 | int vfs_lock_file(struct file *filp, unsigned int cmd, struct file_lock *fl, struct file_lock *conf) |
2005 | { | 2005 | { |
2006 | if (filp->f_op->lock) | 2006 | if (filp->f_op->lock) |
2007 | return filp->f_op->lock(filp, cmd, fl); | 2007 | return filp->f_op->lock(filp, cmd, fl); |
2008 | else | 2008 | else |
2009 | return posix_lock_file(filp, fl, conf); | 2009 | return posix_lock_file(filp, fl, conf); |
2010 | } | 2010 | } |
2011 | EXPORT_SYMBOL_GPL(vfs_lock_file); | 2011 | EXPORT_SYMBOL_GPL(vfs_lock_file); |
2012 | 2012 | ||
2013 | static int do_lock_file_wait(struct file *filp, unsigned int cmd, | 2013 | static int do_lock_file_wait(struct file *filp, unsigned int cmd, |
2014 | struct file_lock *fl) | 2014 | struct file_lock *fl) |
2015 | { | 2015 | { |
2016 | int error; | 2016 | int error; |
2017 | 2017 | ||
2018 | error = security_file_lock(filp, fl->fl_type); | 2018 | error = security_file_lock(filp, fl->fl_type); |
2019 | if (error) | 2019 | if (error) |
2020 | return error; | 2020 | return error; |
2021 | 2021 | ||
2022 | for (;;) { | 2022 | for (;;) { |
2023 | error = vfs_lock_file(filp, cmd, fl, NULL); | 2023 | error = vfs_lock_file(filp, cmd, fl, NULL); |
2024 | if (error != FILE_LOCK_DEFERRED) | 2024 | if (error != FILE_LOCK_DEFERRED) |
2025 | break; | 2025 | break; |
2026 | error = wait_event_interruptible(fl->fl_wait, !fl->fl_next); | 2026 | error = wait_event_interruptible(fl->fl_wait, !fl->fl_next); |
2027 | if (!error) | 2027 | if (!error) |
2028 | continue; | 2028 | continue; |
2029 | 2029 | ||
2030 | locks_delete_block(fl); | 2030 | locks_delete_block(fl); |
2031 | break; | 2031 | break; |
2032 | } | 2032 | } |
2033 | 2033 | ||
2034 | return error; | 2034 | return error; |
2035 | } | 2035 | } |
2036 | 2036 | ||
2037 | /* Apply the lock described by l to an open file descriptor. | 2037 | /* Apply the lock described by l to an open file descriptor. |
2038 | * This implements both the F_SETLK and F_SETLKW commands of fcntl(). | 2038 | * This implements both the F_SETLK and F_SETLKW commands of fcntl(). |
2039 | */ | 2039 | */ |
2040 | int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd, | 2040 | int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd, |
2041 | struct flock __user *l) | 2041 | struct flock __user *l) |
2042 | { | 2042 | { |
2043 | struct file_lock *file_lock = locks_alloc_lock(); | 2043 | struct file_lock *file_lock = locks_alloc_lock(); |
2044 | struct flock flock; | 2044 | struct flock flock; |
2045 | struct inode *inode; | 2045 | struct inode *inode; |
2046 | struct file *f; | 2046 | struct file *f; |
2047 | int error; | 2047 | int error; |
2048 | 2048 | ||
2049 | if (file_lock == NULL) | 2049 | if (file_lock == NULL) |
2050 | return -ENOLCK; | 2050 | return -ENOLCK; |
2051 | 2051 | ||
2052 | /* | 2052 | /* |
2053 | * This might block, so we do it before checking the inode. | 2053 | * This might block, so we do it before checking the inode. |
2054 | */ | 2054 | */ |
2055 | error = -EFAULT; | 2055 | error = -EFAULT; |
2056 | if (copy_from_user(&flock, l, sizeof(flock))) | 2056 | if (copy_from_user(&flock, l, sizeof(flock))) |
2057 | goto out; | 2057 | goto out; |
2058 | 2058 | ||
2059 | inode = file_inode(filp); | 2059 | inode = file_inode(filp); |
2060 | 2060 | ||
2061 | /* Don't allow mandatory locks on files that may be memory mapped | 2061 | /* Don't allow mandatory locks on files that may be memory mapped |
2062 | * and shared. | 2062 | * and shared. |
2063 | */ | 2063 | */ |
2064 | if (mandatory_lock(inode) && mapping_writably_mapped(filp->f_mapping)) { | 2064 | if (mandatory_lock(inode) && mapping_writably_mapped(filp->f_mapping)) { |
2065 | error = -EAGAIN; | 2065 | error = -EAGAIN; |
2066 | goto out; | 2066 | goto out; |
2067 | } | 2067 | } |
2068 | 2068 | ||
2069 | again: | 2069 | again: |
2070 | error = flock_to_posix_lock(filp, file_lock, &flock); | 2070 | error = flock_to_posix_lock(filp, file_lock, &flock); |
2071 | if (error) | 2071 | if (error) |
2072 | goto out; | 2072 | goto out; |
2073 | 2073 | ||
2074 | /* | 2074 | /* |
2075 | * If the cmd is requesting file-private locks, then set the | 2075 | * If the cmd is requesting file-private locks, then set the |
2076 | * FL_FILE_PVT flag and override the owner. | 2076 | * FL_OFDLCK flag and override the owner. |
2077 | */ | 2077 | */ |
2078 | switch (cmd) { | 2078 | switch (cmd) { |
2079 | case F_SETLKP: | 2079 | case F_OFD_SETLK: |
2080 | error = -EINVAL; | 2080 | error = -EINVAL; |
2081 | if (flock.l_pid != 0) | 2081 | if (flock.l_pid != 0) |
2082 | goto out; | 2082 | goto out; |
2083 | 2083 | ||
2084 | cmd = F_SETLK; | 2084 | cmd = F_SETLK; |
2085 | file_lock->fl_flags |= FL_FILE_PVT; | 2085 | file_lock->fl_flags |= FL_OFDLCK; |
2086 | file_lock->fl_owner = (fl_owner_t)filp; | 2086 | file_lock->fl_owner = (fl_owner_t)filp; |
2087 | break; | 2087 | break; |
2088 | case F_SETLKPW: | 2088 | case F_OFD_SETLKW: |
2089 | error = -EINVAL; | 2089 | error = -EINVAL; |
2090 | if (flock.l_pid != 0) | 2090 | if (flock.l_pid != 0) |
2091 | goto out; | 2091 | goto out; |
2092 | 2092 | ||
2093 | cmd = F_SETLKW; | 2093 | cmd = F_SETLKW; |
2094 | file_lock->fl_flags |= FL_FILE_PVT; | 2094 | file_lock->fl_flags |= FL_OFDLCK; |
2095 | file_lock->fl_owner = (fl_owner_t)filp; | 2095 | file_lock->fl_owner = (fl_owner_t)filp; |
2096 | /* Fallthrough */ | 2096 | /* Fallthrough */ |
2097 | case F_SETLKW: | 2097 | case F_SETLKW: |
2098 | file_lock->fl_flags |= FL_SLEEP; | 2098 | file_lock->fl_flags |= FL_SLEEP; |
2099 | } | 2099 | } |
2100 | 2100 | ||
2101 | error = do_lock_file_wait(filp, cmd, file_lock); | 2101 | error = do_lock_file_wait(filp, cmd, file_lock); |
2102 | 2102 | ||
2103 | /* | 2103 | /* |
2104 | * Attempt to detect a close/fcntl race and recover by | 2104 | * Attempt to detect a close/fcntl race and recover by |
2105 | * releasing the lock that was just acquired. | 2105 | * releasing the lock that was just acquired. |
2106 | */ | 2106 | */ |
2107 | /* | 2107 | /* |
2108 | * we need that spin_lock here - it prevents reordering between | 2108 | * we need that spin_lock here - it prevents reordering between |
2109 | * update of inode->i_flock and check for it done in close(). | 2109 | * update of inode->i_flock and check for it done in close(). |
2110 | * rcu_read_lock() wouldn't do. | 2110 | * rcu_read_lock() wouldn't do. |
2111 | */ | 2111 | */ |
2112 | spin_lock(¤t->files->file_lock); | 2112 | spin_lock(¤t->files->file_lock); |
2113 | f = fcheck(fd); | 2113 | f = fcheck(fd); |
2114 | spin_unlock(¤t->files->file_lock); | 2114 | spin_unlock(¤t->files->file_lock); |
2115 | if (!error && f != filp && flock.l_type != F_UNLCK) { | 2115 | if (!error && f != filp && flock.l_type != F_UNLCK) { |
2116 | flock.l_type = F_UNLCK; | 2116 | flock.l_type = F_UNLCK; |
2117 | goto again; | 2117 | goto again; |
2118 | } | 2118 | } |
2119 | 2119 | ||
2120 | out: | 2120 | out: |
2121 | locks_free_lock(file_lock); | 2121 | locks_free_lock(file_lock); |
2122 | return error; | 2122 | return error; |
2123 | } | 2123 | } |
2124 | 2124 | ||
2125 | #if BITS_PER_LONG == 32 | 2125 | #if BITS_PER_LONG == 32 |
2126 | /* Report the first existing lock that would conflict with l. | 2126 | /* Report the first existing lock that would conflict with l. |
2127 | * This implements the F_GETLK command of fcntl(). | 2127 | * This implements the F_GETLK command of fcntl(). |
2128 | */ | 2128 | */ |
2129 | int fcntl_getlk64(struct file *filp, unsigned int cmd, struct flock64 __user *l) | 2129 | int fcntl_getlk64(struct file *filp, unsigned int cmd, struct flock64 __user *l) |
2130 | { | 2130 | { |
2131 | struct file_lock file_lock; | 2131 | struct file_lock file_lock; |
2132 | struct flock64 flock; | 2132 | struct flock64 flock; |
2133 | int error; | 2133 | int error; |
2134 | 2134 | ||
2135 | error = -EFAULT; | 2135 | error = -EFAULT; |
2136 | if (copy_from_user(&flock, l, sizeof(flock))) | 2136 | if (copy_from_user(&flock, l, sizeof(flock))) |
2137 | goto out; | 2137 | goto out; |
2138 | error = -EINVAL; | 2138 | error = -EINVAL; |
2139 | if ((flock.l_type != F_RDLCK) && (flock.l_type != F_WRLCK)) | 2139 | if ((flock.l_type != F_RDLCK) && (flock.l_type != F_WRLCK)) |
2140 | goto out; | 2140 | goto out; |
2141 | 2141 | ||
2142 | error = flock64_to_posix_lock(filp, &file_lock, &flock); | 2142 | error = flock64_to_posix_lock(filp, &file_lock, &flock); |
2143 | if (error) | 2143 | if (error) |
2144 | goto out; | 2144 | goto out; |
2145 | 2145 | ||
2146 | if (cmd == F_GETLKP) { | 2146 | if (cmd == F_OFD_GETLK) { |
2147 | error = -EINVAL; | 2147 | error = -EINVAL; |
2148 | if (flock.l_pid != 0) | 2148 | if (flock.l_pid != 0) |
2149 | goto out; | 2149 | goto out; |
2150 | 2150 | ||
2151 | cmd = F_GETLK64; | 2151 | cmd = F_GETLK64; |
2152 | file_lock.fl_flags |= FL_FILE_PVT; | 2152 | file_lock.fl_flags |= FL_OFDLCK; |
2153 | file_lock.fl_owner = (fl_owner_t)filp; | 2153 | file_lock.fl_owner = (fl_owner_t)filp; |
2154 | } | 2154 | } |
2155 | 2155 | ||
2156 | error = vfs_test_lock(filp, &file_lock); | 2156 | error = vfs_test_lock(filp, &file_lock); |
2157 | if (error) | 2157 | if (error) |
2158 | goto out; | 2158 | goto out; |
2159 | 2159 | ||
2160 | flock.l_type = file_lock.fl_type; | 2160 | flock.l_type = file_lock.fl_type; |
2161 | if (file_lock.fl_type != F_UNLCK) | 2161 | if (file_lock.fl_type != F_UNLCK) |
2162 | posix_lock_to_flock64(&flock, &file_lock); | 2162 | posix_lock_to_flock64(&flock, &file_lock); |
2163 | 2163 | ||
2164 | error = -EFAULT; | 2164 | error = -EFAULT; |
2165 | if (!copy_to_user(l, &flock, sizeof(flock))) | 2165 | if (!copy_to_user(l, &flock, sizeof(flock))) |
2166 | error = 0; | 2166 | error = 0; |
2167 | 2167 | ||
2168 | out: | 2168 | out: |
2169 | return error; | 2169 | return error; |
2170 | } | 2170 | } |
2171 | 2171 | ||
2172 | /* Apply the lock described by l to an open file descriptor. | 2172 | /* Apply the lock described by l to an open file descriptor. |
2173 | * This implements both the F_SETLK and F_SETLKW commands of fcntl(). | 2173 | * This implements both the F_SETLK and F_SETLKW commands of fcntl(). |
2174 | */ | 2174 | */ |
2175 | int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd, | 2175 | int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd, |
2176 | struct flock64 __user *l) | 2176 | struct flock64 __user *l) |
2177 | { | 2177 | { |
2178 | struct file_lock *file_lock = locks_alloc_lock(); | 2178 | struct file_lock *file_lock = locks_alloc_lock(); |
2179 | struct flock64 flock; | 2179 | struct flock64 flock; |
2180 | struct inode *inode; | 2180 | struct inode *inode; |
2181 | struct file *f; | 2181 | struct file *f; |
2182 | int error; | 2182 | int error; |
2183 | 2183 | ||
2184 | if (file_lock == NULL) | 2184 | if (file_lock == NULL) |
2185 | return -ENOLCK; | 2185 | return -ENOLCK; |
2186 | 2186 | ||
2187 | /* | 2187 | /* |
2188 | * This might block, so we do it before checking the inode. | 2188 | * This might block, so we do it before checking the inode. |
2189 | */ | 2189 | */ |
2190 | error = -EFAULT; | 2190 | error = -EFAULT; |
2191 | if (copy_from_user(&flock, l, sizeof(flock))) | 2191 | if (copy_from_user(&flock, l, sizeof(flock))) |
2192 | goto out; | 2192 | goto out; |
2193 | 2193 | ||
2194 | inode = file_inode(filp); | 2194 | inode = file_inode(filp); |
2195 | 2195 | ||
2196 | /* Don't allow mandatory locks on files that may be memory mapped | 2196 | /* Don't allow mandatory locks on files that may be memory mapped |
2197 | * and shared. | 2197 | * and shared. |
2198 | */ | 2198 | */ |
2199 | if (mandatory_lock(inode) && mapping_writably_mapped(filp->f_mapping)) { | 2199 | if (mandatory_lock(inode) && mapping_writably_mapped(filp->f_mapping)) { |
2200 | error = -EAGAIN; | 2200 | error = -EAGAIN; |
2201 | goto out; | 2201 | goto out; |
2202 | } | 2202 | } |
2203 | 2203 | ||
2204 | again: | 2204 | again: |
2205 | error = flock64_to_posix_lock(filp, file_lock, &flock); | 2205 | error = flock64_to_posix_lock(filp, file_lock, &flock); |
2206 | if (error) | 2206 | if (error) |
2207 | goto out; | 2207 | goto out; |
2208 | 2208 | ||
2209 | /* | 2209 | /* |
2210 | * If the cmd is requesting file-private locks, then set the | 2210 | * If the cmd is requesting file-private locks, then set the |
2211 | * FL_FILE_PVT flag and override the owner. | 2211 | * FL_OFDLCK flag and override the owner. |
2212 | */ | 2212 | */ |
2213 | switch (cmd) { | 2213 | switch (cmd) { |
2214 | case F_SETLKP: | 2214 | case F_OFD_SETLK: |
2215 | error = -EINVAL; | 2215 | error = -EINVAL; |
2216 | if (flock.l_pid != 0) | 2216 | if (flock.l_pid != 0) |
2217 | goto out; | 2217 | goto out; |
2218 | 2218 | ||
2219 | cmd = F_SETLK64; | 2219 | cmd = F_SETLK64; |
2220 | file_lock->fl_flags |= FL_FILE_PVT; | 2220 | file_lock->fl_flags |= FL_OFDLCK; |
2221 | file_lock->fl_owner = (fl_owner_t)filp; | 2221 | file_lock->fl_owner = (fl_owner_t)filp; |
2222 | break; | 2222 | break; |
2223 | case F_SETLKPW: | 2223 | case F_OFD_SETLKW: |
2224 | error = -EINVAL; | 2224 | error = -EINVAL; |
2225 | if (flock.l_pid != 0) | 2225 | if (flock.l_pid != 0) |
2226 | goto out; | 2226 | goto out; |
2227 | 2227 | ||
2228 | cmd = F_SETLKW64; | 2228 | cmd = F_SETLKW64; |
2229 | file_lock->fl_flags |= FL_FILE_PVT; | 2229 | file_lock->fl_flags |= FL_OFDLCK; |
2230 | file_lock->fl_owner = (fl_owner_t)filp; | 2230 | file_lock->fl_owner = (fl_owner_t)filp; |
2231 | /* Fallthrough */ | 2231 | /* Fallthrough */ |
2232 | case F_SETLKW64: | 2232 | case F_SETLKW64: |
2233 | file_lock->fl_flags |= FL_SLEEP; | 2233 | file_lock->fl_flags |= FL_SLEEP; |
2234 | } | 2234 | } |
2235 | 2235 | ||
2236 | error = do_lock_file_wait(filp, cmd, file_lock); | 2236 | error = do_lock_file_wait(filp, cmd, file_lock); |
2237 | 2237 | ||
2238 | /* | 2238 | /* |
2239 | * Attempt to detect a close/fcntl race and recover by | 2239 | * Attempt to detect a close/fcntl race and recover by |
2240 | * releasing the lock that was just acquired. | 2240 | * releasing the lock that was just acquired. |
2241 | */ | 2241 | */ |
2242 | spin_lock(¤t->files->file_lock); | 2242 | spin_lock(¤t->files->file_lock); |
2243 | f = fcheck(fd); | 2243 | f = fcheck(fd); |
2244 | spin_unlock(¤t->files->file_lock); | 2244 | spin_unlock(¤t->files->file_lock); |
2245 | if (!error && f != filp && flock.l_type != F_UNLCK) { | 2245 | if (!error && f != filp && flock.l_type != F_UNLCK) { |
2246 | flock.l_type = F_UNLCK; | 2246 | flock.l_type = F_UNLCK; |
2247 | goto again; | 2247 | goto again; |
2248 | } | 2248 | } |
2249 | 2249 | ||
2250 | out: | 2250 | out: |
2251 | locks_free_lock(file_lock); | 2251 | locks_free_lock(file_lock); |
2252 | return error; | 2252 | return error; |
2253 | } | 2253 | } |
2254 | #endif /* BITS_PER_LONG == 32 */ | 2254 | #endif /* BITS_PER_LONG == 32 */ |
2255 | 2255 | ||
2256 | /* | 2256 | /* |
2257 | * This function is called when the file is being removed | 2257 | * This function is called when the file is being removed |
2258 | * from the task's fd array. POSIX locks belonging to this task | 2258 | * from the task's fd array. POSIX locks belonging to this task |
2259 | * are deleted at this time. | 2259 | * are deleted at this time. |
2260 | */ | 2260 | */ |
2261 | void locks_remove_posix(struct file *filp, fl_owner_t owner) | 2261 | void locks_remove_posix(struct file *filp, fl_owner_t owner) |
2262 | { | 2262 | { |
2263 | struct file_lock lock; | 2263 | struct file_lock lock; |
2264 | 2264 | ||
2265 | /* | 2265 | /* |
2266 | * If there are no locks held on this file, we don't need to call | 2266 | * If there are no locks held on this file, we don't need to call |
2267 | * posix_lock_file(). Another process could be setting a lock on this | 2267 | * posix_lock_file(). Another process could be setting a lock on this |
2268 | * file at the same time, but we wouldn't remove that lock anyway. | 2268 | * file at the same time, but we wouldn't remove that lock anyway. |
2269 | */ | 2269 | */ |
2270 | if (!file_inode(filp)->i_flock) | 2270 | if (!file_inode(filp)->i_flock) |
2271 | return; | 2271 | return; |
2272 | 2272 | ||
2273 | lock.fl_type = F_UNLCK; | 2273 | lock.fl_type = F_UNLCK; |
2274 | lock.fl_flags = FL_POSIX | FL_CLOSE; | 2274 | lock.fl_flags = FL_POSIX | FL_CLOSE; |
2275 | lock.fl_start = 0; | 2275 | lock.fl_start = 0; |
2276 | lock.fl_end = OFFSET_MAX; | 2276 | lock.fl_end = OFFSET_MAX; |
2277 | lock.fl_owner = owner; | 2277 | lock.fl_owner = owner; |
2278 | lock.fl_pid = current->tgid; | 2278 | lock.fl_pid = current->tgid; |
2279 | lock.fl_file = filp; | 2279 | lock.fl_file = filp; |
2280 | lock.fl_ops = NULL; | 2280 | lock.fl_ops = NULL; |
2281 | lock.fl_lmops = NULL; | 2281 | lock.fl_lmops = NULL; |
2282 | 2282 | ||
2283 | vfs_lock_file(filp, F_SETLK, &lock, NULL); | 2283 | vfs_lock_file(filp, F_SETLK, &lock, NULL); |
2284 | 2284 | ||
2285 | if (lock.fl_ops && lock.fl_ops->fl_release_private) | 2285 | if (lock.fl_ops && lock.fl_ops->fl_release_private) |
2286 | lock.fl_ops->fl_release_private(&lock); | 2286 | lock.fl_ops->fl_release_private(&lock); |
2287 | } | 2287 | } |
2288 | 2288 | ||
2289 | EXPORT_SYMBOL(locks_remove_posix); | 2289 | EXPORT_SYMBOL(locks_remove_posix); |
2290 | 2290 | ||
2291 | /* | 2291 | /* |
2292 | * This function is called on the last close of an open file. | 2292 | * This function is called on the last close of an open file. |
2293 | */ | 2293 | */ |
2294 | void locks_remove_file(struct file *filp) | 2294 | void locks_remove_file(struct file *filp) |
2295 | { | 2295 | { |
2296 | struct inode * inode = file_inode(filp); | 2296 | struct inode * inode = file_inode(filp); |
2297 | struct file_lock *fl; | 2297 | struct file_lock *fl; |
2298 | struct file_lock **before; | 2298 | struct file_lock **before; |
2299 | 2299 | ||
2300 | if (!inode->i_flock) | 2300 | if (!inode->i_flock) |
2301 | return; | 2301 | return; |
2302 | 2302 | ||
2303 | locks_remove_posix(filp, (fl_owner_t)filp); | 2303 | locks_remove_posix(filp, (fl_owner_t)filp); |
2304 | 2304 | ||
2305 | if (filp->f_op->flock) { | 2305 | if (filp->f_op->flock) { |
2306 | struct file_lock fl = { | 2306 | struct file_lock fl = { |
2307 | .fl_pid = current->tgid, | 2307 | .fl_pid = current->tgid, |
2308 | .fl_file = filp, | 2308 | .fl_file = filp, |
2309 | .fl_flags = FL_FLOCK, | 2309 | .fl_flags = FL_FLOCK, |
2310 | .fl_type = F_UNLCK, | 2310 | .fl_type = F_UNLCK, |
2311 | .fl_end = OFFSET_MAX, | 2311 | .fl_end = OFFSET_MAX, |
2312 | }; | 2312 | }; |
2313 | filp->f_op->flock(filp, F_SETLKW, &fl); | 2313 | filp->f_op->flock(filp, F_SETLKW, &fl); |
2314 | if (fl.fl_ops && fl.fl_ops->fl_release_private) | 2314 | if (fl.fl_ops && fl.fl_ops->fl_release_private) |
2315 | fl.fl_ops->fl_release_private(&fl); | 2315 | fl.fl_ops->fl_release_private(&fl); |
2316 | } | 2316 | } |
2317 | 2317 | ||
2318 | spin_lock(&inode->i_lock); | 2318 | spin_lock(&inode->i_lock); |
2319 | before = &inode->i_flock; | 2319 | before = &inode->i_flock; |
2320 | 2320 | ||
2321 | while ((fl = *before) != NULL) { | 2321 | while ((fl = *before) != NULL) { |
2322 | if (fl->fl_file == filp) { | 2322 | if (fl->fl_file == filp) { |
2323 | if (IS_LEASE(fl)) { | 2323 | if (IS_LEASE(fl)) { |
2324 | lease_modify(before, F_UNLCK); | 2324 | lease_modify(before, F_UNLCK); |
2325 | continue; | 2325 | continue; |
2326 | } | 2326 | } |
2327 | 2327 | ||
2328 | /* | 2328 | /* |
2329 | * There's a leftover lock on the list of a type that | 2329 | * There's a leftover lock on the list of a type that |
2330 | * we didn't expect to see. Most likely a classic | 2330 | * we didn't expect to see. Most likely a classic |
2331 | * POSIX lock that ended up not getting released | 2331 | * POSIX lock that ended up not getting released |
2332 | * properly, or that raced onto the list somehow. Log | 2332 | * properly, or that raced onto the list somehow. Log |
2333 | * some info about it and then just remove it from | 2333 | * some info about it and then just remove it from |
2334 | * the list. | 2334 | * the list. |
2335 | */ | 2335 | */ |
2336 | WARN(!IS_FLOCK(fl), | 2336 | WARN(!IS_FLOCK(fl), |
2337 | "leftover lock: dev=%u:%u ino=%lu type=%hhd flags=0x%x start=%lld end=%lld\n", | 2337 | "leftover lock: dev=%u:%u ino=%lu type=%hhd flags=0x%x start=%lld end=%lld\n", |
2338 | MAJOR(inode->i_sb->s_dev), | 2338 | MAJOR(inode->i_sb->s_dev), |
2339 | MINOR(inode->i_sb->s_dev), inode->i_ino, | 2339 | MINOR(inode->i_sb->s_dev), inode->i_ino, |
2340 | fl->fl_type, fl->fl_flags, | 2340 | fl->fl_type, fl->fl_flags, |
2341 | fl->fl_start, fl->fl_end); | 2341 | fl->fl_start, fl->fl_end); |
2342 | 2342 | ||
2343 | locks_delete_lock(before); | 2343 | locks_delete_lock(before); |
2344 | continue; | 2344 | continue; |
2345 | } | 2345 | } |
2346 | before = &fl->fl_next; | 2346 | before = &fl->fl_next; |
2347 | } | 2347 | } |
2348 | spin_unlock(&inode->i_lock); | 2348 | spin_unlock(&inode->i_lock); |
2349 | } | 2349 | } |
2350 | 2350 | ||
2351 | /** | 2351 | /** |
2352 | * posix_unblock_lock - stop waiting for a file lock | 2352 | * posix_unblock_lock - stop waiting for a file lock |
2353 | * @waiter: the lock which was waiting | 2353 | * @waiter: the lock which was waiting |
2354 | * | 2354 | * |
2355 | * lockd needs to block waiting for locks. | 2355 | * lockd needs to block waiting for locks. |
2356 | */ | 2356 | */ |
2357 | int | 2357 | int |
2358 | posix_unblock_lock(struct file_lock *waiter) | 2358 | posix_unblock_lock(struct file_lock *waiter) |
2359 | { | 2359 | { |
2360 | int status = 0; | 2360 | int status = 0; |
2361 | 2361 | ||
2362 | spin_lock(&blocked_lock_lock); | 2362 | spin_lock(&blocked_lock_lock); |
2363 | if (waiter->fl_next) | 2363 | if (waiter->fl_next) |
2364 | __locks_delete_block(waiter); | 2364 | __locks_delete_block(waiter); |
2365 | else | 2365 | else |
2366 | status = -ENOENT; | 2366 | status = -ENOENT; |
2367 | spin_unlock(&blocked_lock_lock); | 2367 | spin_unlock(&blocked_lock_lock); |
2368 | return status; | 2368 | return status; |
2369 | } | 2369 | } |
2370 | EXPORT_SYMBOL(posix_unblock_lock); | 2370 | EXPORT_SYMBOL(posix_unblock_lock); |
2371 | 2371 | ||
2372 | /** | 2372 | /** |
2373 | * vfs_cancel_lock - file byte range unblock lock | 2373 | * vfs_cancel_lock - file byte range unblock lock |
2374 | * @filp: The file to apply the unblock to | 2374 | * @filp: The file to apply the unblock to |
2375 | * @fl: The lock to be unblocked | 2375 | * @fl: The lock to be unblocked |
2376 | * | 2376 | * |
2377 | * Used by lock managers to cancel blocked requests | 2377 | * Used by lock managers to cancel blocked requests |
2378 | */ | 2378 | */ |
2379 | int vfs_cancel_lock(struct file *filp, struct file_lock *fl) | 2379 | int vfs_cancel_lock(struct file *filp, struct file_lock *fl) |
2380 | { | 2380 | { |
2381 | if (filp->f_op->lock) | 2381 | if (filp->f_op->lock) |
2382 | return filp->f_op->lock(filp, F_CANCELLK, fl); | 2382 | return filp->f_op->lock(filp, F_CANCELLK, fl); |
2383 | return 0; | 2383 | return 0; |
2384 | } | 2384 | } |
2385 | 2385 | ||
2386 | EXPORT_SYMBOL_GPL(vfs_cancel_lock); | 2386 | EXPORT_SYMBOL_GPL(vfs_cancel_lock); |
2387 | 2387 | ||
2388 | #ifdef CONFIG_PROC_FS | 2388 | #ifdef CONFIG_PROC_FS |
2389 | #include <linux/proc_fs.h> | 2389 | #include <linux/proc_fs.h> |
2390 | #include <linux/seq_file.h> | 2390 | #include <linux/seq_file.h> |
2391 | 2391 | ||
2392 | struct locks_iterator { | 2392 | struct locks_iterator { |
2393 | int li_cpu; | 2393 | int li_cpu; |
2394 | loff_t li_pos; | 2394 | loff_t li_pos; |
2395 | }; | 2395 | }; |
2396 | 2396 | ||
2397 | static void lock_get_status(struct seq_file *f, struct file_lock *fl, | 2397 | static void lock_get_status(struct seq_file *f, struct file_lock *fl, |
2398 | loff_t id, char *pfx) | 2398 | loff_t id, char *pfx) |
2399 | { | 2399 | { |
2400 | struct inode *inode = NULL; | 2400 | struct inode *inode = NULL; |
2401 | unsigned int fl_pid; | 2401 | unsigned int fl_pid; |
2402 | 2402 | ||
2403 | if (fl->fl_nspid) | 2403 | if (fl->fl_nspid) |
2404 | fl_pid = pid_vnr(fl->fl_nspid); | 2404 | fl_pid = pid_vnr(fl->fl_nspid); |
2405 | else | 2405 | else |
2406 | fl_pid = fl->fl_pid; | 2406 | fl_pid = fl->fl_pid; |
2407 | 2407 | ||
2408 | if (fl->fl_file != NULL) | 2408 | if (fl->fl_file != NULL) |
2409 | inode = file_inode(fl->fl_file); | 2409 | inode = file_inode(fl->fl_file); |
2410 | 2410 | ||
2411 | seq_printf(f, "%lld:%s ", id, pfx); | 2411 | seq_printf(f, "%lld:%s ", id, pfx); |
2412 | if (IS_POSIX(fl)) { | 2412 | if (IS_POSIX(fl)) { |
2413 | if (fl->fl_flags & FL_ACCESS) | 2413 | if (fl->fl_flags & FL_ACCESS) |
2414 | seq_printf(f, "ACCESS"); | 2414 | seq_printf(f, "ACCESS"); |
2415 | else if (IS_FILE_PVT(fl)) | 2415 | else if (IS_OFDLCK(fl)) |
2416 | seq_printf(f, "FLPVT "); | 2416 | seq_printf(f, "OFDLCK"); |
2417 | else | 2417 | else |
2418 | seq_printf(f, "POSIX "); | 2418 | seq_printf(f, "POSIX "); |
2419 | 2419 | ||
2420 | seq_printf(f, " %s ", | 2420 | seq_printf(f, " %s ", |
2421 | (inode == NULL) ? "*NOINODE*" : | 2421 | (inode == NULL) ? "*NOINODE*" : |
2422 | mandatory_lock(inode) ? "MANDATORY" : "ADVISORY "); | 2422 | mandatory_lock(inode) ? "MANDATORY" : "ADVISORY "); |
2423 | } else if (IS_FLOCK(fl)) { | 2423 | } else if (IS_FLOCK(fl)) { |
2424 | if (fl->fl_type & LOCK_MAND) { | 2424 | if (fl->fl_type & LOCK_MAND) { |
2425 | seq_printf(f, "FLOCK MSNFS "); | 2425 | seq_printf(f, "FLOCK MSNFS "); |
2426 | } else { | 2426 | } else { |
2427 | seq_printf(f, "FLOCK ADVISORY "); | 2427 | seq_printf(f, "FLOCK ADVISORY "); |
2428 | } | 2428 | } |
2429 | } else if (IS_LEASE(fl)) { | 2429 | } else if (IS_LEASE(fl)) { |
2430 | seq_printf(f, "LEASE "); | 2430 | seq_printf(f, "LEASE "); |
2431 | if (lease_breaking(fl)) | 2431 | if (lease_breaking(fl)) |
2432 | seq_printf(f, "BREAKING "); | 2432 | seq_printf(f, "BREAKING "); |
2433 | else if (fl->fl_file) | 2433 | else if (fl->fl_file) |
2434 | seq_printf(f, "ACTIVE "); | 2434 | seq_printf(f, "ACTIVE "); |
2435 | else | 2435 | else |
2436 | seq_printf(f, "BREAKER "); | 2436 | seq_printf(f, "BREAKER "); |
2437 | } else { | 2437 | } else { |
2438 | seq_printf(f, "UNKNOWN UNKNOWN "); | 2438 | seq_printf(f, "UNKNOWN UNKNOWN "); |
2439 | } | 2439 | } |
2440 | if (fl->fl_type & LOCK_MAND) { | 2440 | if (fl->fl_type & LOCK_MAND) { |
2441 | seq_printf(f, "%s ", | 2441 | seq_printf(f, "%s ", |
2442 | (fl->fl_type & LOCK_READ) | 2442 | (fl->fl_type & LOCK_READ) |
2443 | ? (fl->fl_type & LOCK_WRITE) ? "RW " : "READ " | 2443 | ? (fl->fl_type & LOCK_WRITE) ? "RW " : "READ " |
2444 | : (fl->fl_type & LOCK_WRITE) ? "WRITE" : "NONE "); | 2444 | : (fl->fl_type & LOCK_WRITE) ? "WRITE" : "NONE "); |
2445 | } else { | 2445 | } else { |
2446 | seq_printf(f, "%s ", | 2446 | seq_printf(f, "%s ", |
2447 | (lease_breaking(fl)) | 2447 | (lease_breaking(fl)) |
2448 | ? (fl->fl_type == F_UNLCK) ? "UNLCK" : "READ " | 2448 | ? (fl->fl_type == F_UNLCK) ? "UNLCK" : "READ " |
2449 | : (fl->fl_type == F_WRLCK) ? "WRITE" : "READ "); | 2449 | : (fl->fl_type == F_WRLCK) ? "WRITE" : "READ "); |
2450 | } | 2450 | } |
2451 | if (inode) { | 2451 | if (inode) { |
2452 | #ifdef WE_CAN_BREAK_LSLK_NOW | 2452 | #ifdef WE_CAN_BREAK_LSLK_NOW |
2453 | seq_printf(f, "%d %s:%ld ", fl_pid, | 2453 | seq_printf(f, "%d %s:%ld ", fl_pid, |
2454 | inode->i_sb->s_id, inode->i_ino); | 2454 | inode->i_sb->s_id, inode->i_ino); |
2455 | #else | 2455 | #else |
2456 | /* userspace relies on this representation of dev_t ;-( */ | 2456 | /* userspace relies on this representation of dev_t ;-( */ |
2457 | seq_printf(f, "%d %02x:%02x:%ld ", fl_pid, | 2457 | seq_printf(f, "%d %02x:%02x:%ld ", fl_pid, |
2458 | MAJOR(inode->i_sb->s_dev), | 2458 | MAJOR(inode->i_sb->s_dev), |
2459 | MINOR(inode->i_sb->s_dev), inode->i_ino); | 2459 | MINOR(inode->i_sb->s_dev), inode->i_ino); |
2460 | #endif | 2460 | #endif |
2461 | } else { | 2461 | } else { |
2462 | seq_printf(f, "%d <none>:0 ", fl_pid); | 2462 | seq_printf(f, "%d <none>:0 ", fl_pid); |
2463 | } | 2463 | } |
2464 | if (IS_POSIX(fl)) { | 2464 | if (IS_POSIX(fl)) { |
2465 | if (fl->fl_end == OFFSET_MAX) | 2465 | if (fl->fl_end == OFFSET_MAX) |
2466 | seq_printf(f, "%Ld EOF\n", fl->fl_start); | 2466 | seq_printf(f, "%Ld EOF\n", fl->fl_start); |
2467 | else | 2467 | else |
2468 | seq_printf(f, "%Ld %Ld\n", fl->fl_start, fl->fl_end); | 2468 | seq_printf(f, "%Ld %Ld\n", fl->fl_start, fl->fl_end); |
2469 | } else { | 2469 | } else { |
2470 | seq_printf(f, "0 EOF\n"); | 2470 | seq_printf(f, "0 EOF\n"); |
2471 | } | 2471 | } |
2472 | } | 2472 | } |
2473 | 2473 | ||
2474 | static int locks_show(struct seq_file *f, void *v) | 2474 | static int locks_show(struct seq_file *f, void *v) |
2475 | { | 2475 | { |
2476 | struct locks_iterator *iter = f->private; | 2476 | struct locks_iterator *iter = f->private; |
2477 | struct file_lock *fl, *bfl; | 2477 | struct file_lock *fl, *bfl; |
2478 | 2478 | ||
2479 | fl = hlist_entry(v, struct file_lock, fl_link); | 2479 | fl = hlist_entry(v, struct file_lock, fl_link); |
2480 | 2480 | ||
2481 | lock_get_status(f, fl, iter->li_pos, ""); | 2481 | lock_get_status(f, fl, iter->li_pos, ""); |
2482 | 2482 | ||
2483 | list_for_each_entry(bfl, &fl->fl_block, fl_block) | 2483 | list_for_each_entry(bfl, &fl->fl_block, fl_block) |
2484 | lock_get_status(f, bfl, iter->li_pos, " ->"); | 2484 | lock_get_status(f, bfl, iter->li_pos, " ->"); |
2485 | 2485 | ||
2486 | return 0; | 2486 | return 0; |
2487 | } | 2487 | } |
2488 | 2488 | ||
2489 | static void *locks_start(struct seq_file *f, loff_t *pos) | 2489 | static void *locks_start(struct seq_file *f, loff_t *pos) |
2490 | __acquires(&blocked_lock_lock) | 2490 | __acquires(&blocked_lock_lock) |
2491 | { | 2491 | { |
2492 | struct locks_iterator *iter = f->private; | 2492 | struct locks_iterator *iter = f->private; |
2493 | 2493 | ||
2494 | iter->li_pos = *pos + 1; | 2494 | iter->li_pos = *pos + 1; |
2495 | lg_global_lock(&file_lock_lglock); | 2495 | lg_global_lock(&file_lock_lglock); |
2496 | spin_lock(&blocked_lock_lock); | 2496 | spin_lock(&blocked_lock_lock); |
2497 | return seq_hlist_start_percpu(&file_lock_list, &iter->li_cpu, *pos); | 2497 | return seq_hlist_start_percpu(&file_lock_list, &iter->li_cpu, *pos); |
2498 | } | 2498 | } |
2499 | 2499 | ||
2500 | static void *locks_next(struct seq_file *f, void *v, loff_t *pos) | 2500 | static void *locks_next(struct seq_file *f, void *v, loff_t *pos) |
2501 | { | 2501 | { |
2502 | struct locks_iterator *iter = f->private; | 2502 | struct locks_iterator *iter = f->private; |
2503 | 2503 | ||
2504 | ++iter->li_pos; | 2504 | ++iter->li_pos; |
2505 | return seq_hlist_next_percpu(v, &file_lock_list, &iter->li_cpu, pos); | 2505 | return seq_hlist_next_percpu(v, &file_lock_list, &iter->li_cpu, pos); |
2506 | } | 2506 | } |
2507 | 2507 | ||
2508 | static void locks_stop(struct seq_file *f, void *v) | 2508 | static void locks_stop(struct seq_file *f, void *v) |
2509 | __releases(&blocked_lock_lock) | 2509 | __releases(&blocked_lock_lock) |
2510 | { | 2510 | { |
2511 | spin_unlock(&blocked_lock_lock); | 2511 | spin_unlock(&blocked_lock_lock); |
2512 | lg_global_unlock(&file_lock_lglock); | 2512 | lg_global_unlock(&file_lock_lglock); |
2513 | } | 2513 | } |
2514 | 2514 | ||
2515 | static const struct seq_operations locks_seq_operations = { | 2515 | static const struct seq_operations locks_seq_operations = { |
2516 | .start = locks_start, | 2516 | .start = locks_start, |
2517 | .next = locks_next, | 2517 | .next = locks_next, |
2518 | .stop = locks_stop, | 2518 | .stop = locks_stop, |
2519 | .show = locks_show, | 2519 | .show = locks_show, |
2520 | }; | 2520 | }; |
2521 | 2521 | ||
2522 | static int locks_open(struct inode *inode, struct file *filp) | 2522 | static int locks_open(struct inode *inode, struct file *filp) |
2523 | { | 2523 | { |
2524 | return seq_open_private(filp, &locks_seq_operations, | 2524 | return seq_open_private(filp, &locks_seq_operations, |
2525 | sizeof(struct locks_iterator)); | 2525 | sizeof(struct locks_iterator)); |
2526 | } | 2526 | } |
2527 | 2527 | ||
2528 | static const struct file_operations proc_locks_operations = { | 2528 | static const struct file_operations proc_locks_operations = { |
2529 | .open = locks_open, | 2529 | .open = locks_open, |
2530 | .read = seq_read, | 2530 | .read = seq_read, |
2531 | .llseek = seq_lseek, | 2531 | .llseek = seq_lseek, |
2532 | .release = seq_release_private, | 2532 | .release = seq_release_private, |
2533 | }; | 2533 | }; |
2534 | 2534 | ||
2535 | static int __init proc_locks_init(void) | 2535 | static int __init proc_locks_init(void) |
2536 | { | 2536 | { |
2537 | proc_create("locks", 0, NULL, &proc_locks_operations); | 2537 | proc_create("locks", 0, NULL, &proc_locks_operations); |
2538 | return 0; | 2538 | return 0; |
2539 | } | 2539 | } |
2540 | module_init(proc_locks_init); | 2540 | module_init(proc_locks_init); |
2541 | #endif | 2541 | #endif |
2542 | 2542 | ||
2543 | /** | 2543 | /** |
2544 | * lock_may_read - checks that the region is free of locks | 2544 | * lock_may_read - checks that the region is free of locks |
2545 | * @inode: the inode that is being read | 2545 | * @inode: the inode that is being read |
2546 | * @start: the first byte to read | 2546 | * @start: the first byte to read |
2547 | * @len: the number of bytes to read | 2547 | * @len: the number of bytes to read |
2548 | * | 2548 | * |
2549 | * Emulates Windows locking requirements. Whole-file | 2549 | * Emulates Windows locking requirements. Whole-file |
2550 | * mandatory locks (share modes) can prohibit a read and | 2550 | * mandatory locks (share modes) can prohibit a read and |
2551 | * byte-range POSIX locks can prohibit a read if they overlap. | 2551 | * byte-range POSIX locks can prohibit a read if they overlap. |
2552 | * | 2552 | * |
2553 | * N.B. this function is only ever called | 2553 | * N.B. this function is only ever called |
2554 | * from knfsd and ownership of locks is never checked. | 2554 | * from knfsd and ownership of locks is never checked. |
2555 | */ | 2555 | */ |
2556 | int lock_may_read(struct inode *inode, loff_t start, unsigned long len) | 2556 | int lock_may_read(struct inode *inode, loff_t start, unsigned long len) |
2557 | { | 2557 | { |
2558 | struct file_lock *fl; | 2558 | struct file_lock *fl; |
2559 | int result = 1; | 2559 | int result = 1; |
2560 | 2560 | ||
2561 | spin_lock(&inode->i_lock); | 2561 | spin_lock(&inode->i_lock); |
2562 | for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { | 2562 | for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { |
2563 | if (IS_POSIX(fl)) { | 2563 | if (IS_POSIX(fl)) { |
2564 | if (fl->fl_type == F_RDLCK) | 2564 | if (fl->fl_type == F_RDLCK) |
2565 | continue; | 2565 | continue; |
2566 | if ((fl->fl_end < start) || (fl->fl_start > (start + len))) | 2566 | if ((fl->fl_end < start) || (fl->fl_start > (start + len))) |
2567 | continue; | 2567 | continue; |
2568 | } else if (IS_FLOCK(fl)) { | 2568 | } else if (IS_FLOCK(fl)) { |
2569 | if (!(fl->fl_type & LOCK_MAND)) | 2569 | if (!(fl->fl_type & LOCK_MAND)) |
2570 | continue; | 2570 | continue; |
2571 | if (fl->fl_type & LOCK_READ) | 2571 | if (fl->fl_type & LOCK_READ) |
2572 | continue; | 2572 | continue; |
2573 | } else | 2573 | } else |
2574 | continue; | 2574 | continue; |
2575 | result = 0; | 2575 | result = 0; |
2576 | break; | 2576 | break; |
2577 | } | 2577 | } |
2578 | spin_unlock(&inode->i_lock); | 2578 | spin_unlock(&inode->i_lock); |
2579 | return result; | 2579 | return result; |
2580 | } | 2580 | } |
2581 | 2581 | ||
2582 | EXPORT_SYMBOL(lock_may_read); | 2582 | EXPORT_SYMBOL(lock_may_read); |
2583 | 2583 | ||
2584 | /** | 2584 | /** |
2585 | * lock_may_write - checks that the region is free of locks | 2585 | * lock_may_write - checks that the region is free of locks |
2586 | * @inode: the inode that is being written | 2586 | * @inode: the inode that is being written |
2587 | * @start: the first byte to write | 2587 | * @start: the first byte to write |
2588 | * @len: the number of bytes to write | 2588 | * @len: the number of bytes to write |
2589 | * | 2589 | * |
2590 | * Emulates Windows locking requirements. Whole-file | 2590 | * Emulates Windows locking requirements. Whole-file |
2591 | * mandatory locks (share modes) can prohibit a write and | 2591 | * mandatory locks (share modes) can prohibit a write and |
2592 | * byte-range POSIX locks can prohibit a write if they overlap. | 2592 | * byte-range POSIX locks can prohibit a write if they overlap. |
2593 | * | 2593 | * |
2594 | * N.B. this function is only ever called | 2594 | * N.B. this function is only ever called |
2595 | * from knfsd and ownership of locks is never checked. | 2595 | * from knfsd and ownership of locks is never checked. |
2596 | */ | 2596 | */ |
2597 | int lock_may_write(struct inode *inode, loff_t start, unsigned long len) | 2597 | int lock_may_write(struct inode *inode, loff_t start, unsigned long len) |
2598 | { | 2598 | { |
2599 | struct file_lock *fl; | 2599 | struct file_lock *fl; |
2600 | int result = 1; | 2600 | int result = 1; |
2601 | 2601 | ||
2602 | spin_lock(&inode->i_lock); | 2602 | spin_lock(&inode->i_lock); |
2603 | for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { | 2603 | for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { |
2604 | if (IS_POSIX(fl)) { | 2604 | if (IS_POSIX(fl)) { |
2605 | if ((fl->fl_end < start) || (fl->fl_start > (start + len))) | 2605 | if ((fl->fl_end < start) || (fl->fl_start > (start + len))) |
2606 | continue; | 2606 | continue; |
2607 | } else if (IS_FLOCK(fl)) { | 2607 | } else if (IS_FLOCK(fl)) { |
2608 | if (!(fl->fl_type & LOCK_MAND)) | 2608 | if (!(fl->fl_type & LOCK_MAND)) |
2609 | continue; | 2609 | continue; |
2610 | if (fl->fl_type & LOCK_WRITE) | 2610 | if (fl->fl_type & LOCK_WRITE) |
2611 | continue; | 2611 | continue; |
2612 | } else | 2612 | } else |
2613 | continue; | 2613 | continue; |
2614 | result = 0; | 2614 | result = 0; |
2615 | break; | 2615 | break; |
2616 | } | 2616 | } |
2617 | spin_unlock(&inode->i_lock); | 2617 | spin_unlock(&inode->i_lock); |
2618 | return result; | 2618 | return result; |
2619 | } | 2619 | } |
2620 | 2620 | ||
2621 | EXPORT_SYMBOL(lock_may_write); | 2621 | EXPORT_SYMBOL(lock_may_write); |
2622 | 2622 | ||
2623 | static int __init filelock_init(void) | 2623 | static int __init filelock_init(void) |
2624 | { | 2624 | { |
2625 | int i; | 2625 | int i; |
2626 | 2626 | ||
2627 | filelock_cache = kmem_cache_create("file_lock_cache", | 2627 | filelock_cache = kmem_cache_create("file_lock_cache", |
2628 | sizeof(struct file_lock), 0, SLAB_PANIC, NULL); | 2628 | sizeof(struct file_lock), 0, SLAB_PANIC, NULL); |
2629 | 2629 | ||
2630 | lg_lock_init(&file_lock_lglock, "file_lock_lglock"); | 2630 | lg_lock_init(&file_lock_lglock, "file_lock_lglock"); |
2631 | 2631 | ||
2632 | for_each_possible_cpu(i) | 2632 | for_each_possible_cpu(i) |
2633 | INIT_HLIST_HEAD(per_cpu_ptr(&file_lock_list, i)); | 2633 | INIT_HLIST_HEAD(per_cpu_ptr(&file_lock_list, i)); |
2634 | 2634 | ||
2635 | return 0; | 2635 | return 0; |
2636 | } | 2636 | } |
2637 | 2637 | ||
2638 | core_initcall(filelock_init); | 2638 | core_initcall(filelock_init); |
2639 | 2639 |
include/linux/fs.h
1 | #ifndef _LINUX_FS_H | 1 | #ifndef _LINUX_FS_H |
2 | #define _LINUX_FS_H | 2 | #define _LINUX_FS_H |
3 | 3 | ||
4 | 4 | ||
5 | #include <linux/linkage.h> | 5 | #include <linux/linkage.h> |
6 | #include <linux/wait.h> | 6 | #include <linux/wait.h> |
7 | #include <linux/kdev_t.h> | 7 | #include <linux/kdev_t.h> |
8 | #include <linux/dcache.h> | 8 | #include <linux/dcache.h> |
9 | #include <linux/path.h> | 9 | #include <linux/path.h> |
10 | #include <linux/stat.h> | 10 | #include <linux/stat.h> |
11 | #include <linux/cache.h> | 11 | #include <linux/cache.h> |
12 | #include <linux/list.h> | 12 | #include <linux/list.h> |
13 | #include <linux/list_lru.h> | 13 | #include <linux/list_lru.h> |
14 | #include <linux/llist.h> | 14 | #include <linux/llist.h> |
15 | #include <linux/radix-tree.h> | 15 | #include <linux/radix-tree.h> |
16 | #include <linux/rbtree.h> | 16 | #include <linux/rbtree.h> |
17 | #include <linux/init.h> | 17 | #include <linux/init.h> |
18 | #include <linux/pid.h> | 18 | #include <linux/pid.h> |
19 | #include <linux/bug.h> | 19 | #include <linux/bug.h> |
20 | #include <linux/mutex.h> | 20 | #include <linux/mutex.h> |
21 | #include <linux/capability.h> | 21 | #include <linux/capability.h> |
22 | #include <linux/semaphore.h> | 22 | #include <linux/semaphore.h> |
23 | #include <linux/fiemap.h> | 23 | #include <linux/fiemap.h> |
24 | #include <linux/rculist_bl.h> | 24 | #include <linux/rculist_bl.h> |
25 | #include <linux/atomic.h> | 25 | #include <linux/atomic.h> |
26 | #include <linux/shrinker.h> | 26 | #include <linux/shrinker.h> |
27 | #include <linux/migrate_mode.h> | 27 | #include <linux/migrate_mode.h> |
28 | #include <linux/uidgid.h> | 28 | #include <linux/uidgid.h> |
29 | #include <linux/lockdep.h> | 29 | #include <linux/lockdep.h> |
30 | #include <linux/percpu-rwsem.h> | 30 | #include <linux/percpu-rwsem.h> |
31 | #include <linux/blk_types.h> | 31 | #include <linux/blk_types.h> |
32 | 32 | ||
33 | #include <asm/byteorder.h> | 33 | #include <asm/byteorder.h> |
34 | #include <uapi/linux/fs.h> | 34 | #include <uapi/linux/fs.h> |
35 | 35 | ||
36 | struct export_operations; | 36 | struct export_operations; |
37 | struct hd_geometry; | 37 | struct hd_geometry; |
38 | struct iovec; | 38 | struct iovec; |
39 | struct nameidata; | 39 | struct nameidata; |
40 | struct kiocb; | 40 | struct kiocb; |
41 | struct kobject; | 41 | struct kobject; |
42 | struct pipe_inode_info; | 42 | struct pipe_inode_info; |
43 | struct poll_table_struct; | 43 | struct poll_table_struct; |
44 | struct kstatfs; | 44 | struct kstatfs; |
45 | struct vm_area_struct; | 45 | struct vm_area_struct; |
46 | struct vfsmount; | 46 | struct vfsmount; |
47 | struct cred; | 47 | struct cred; |
48 | struct swap_info_struct; | 48 | struct swap_info_struct; |
49 | struct seq_file; | 49 | struct seq_file; |
50 | struct workqueue_struct; | 50 | struct workqueue_struct; |
51 | struct iov_iter; | 51 | struct iov_iter; |
52 | 52 | ||
53 | extern void __init inode_init(void); | 53 | extern void __init inode_init(void); |
54 | extern void __init inode_init_early(void); | 54 | extern void __init inode_init_early(void); |
55 | extern void __init files_init(unsigned long); | 55 | extern void __init files_init(unsigned long); |
56 | 56 | ||
57 | extern struct files_stat_struct files_stat; | 57 | extern struct files_stat_struct files_stat; |
58 | extern unsigned long get_max_files(void); | 58 | extern unsigned long get_max_files(void); |
59 | extern int sysctl_nr_open; | 59 | extern int sysctl_nr_open; |
60 | extern struct inodes_stat_t inodes_stat; | 60 | extern struct inodes_stat_t inodes_stat; |
61 | extern int leases_enable, lease_break_time; | 61 | extern int leases_enable, lease_break_time; |
62 | extern int sysctl_protected_symlinks; | 62 | extern int sysctl_protected_symlinks; |
63 | extern int sysctl_protected_hardlinks; | 63 | extern int sysctl_protected_hardlinks; |
64 | 64 | ||
65 | struct buffer_head; | 65 | struct buffer_head; |
66 | typedef int (get_block_t)(struct inode *inode, sector_t iblock, | 66 | typedef int (get_block_t)(struct inode *inode, sector_t iblock, |
67 | struct buffer_head *bh_result, int create); | 67 | struct buffer_head *bh_result, int create); |
68 | typedef void (dio_iodone_t)(struct kiocb *iocb, loff_t offset, | 68 | typedef void (dio_iodone_t)(struct kiocb *iocb, loff_t offset, |
69 | ssize_t bytes, void *private); | 69 | ssize_t bytes, void *private); |
70 | 70 | ||
71 | #define MAY_EXEC 0x00000001 | 71 | #define MAY_EXEC 0x00000001 |
72 | #define MAY_WRITE 0x00000002 | 72 | #define MAY_WRITE 0x00000002 |
73 | #define MAY_READ 0x00000004 | 73 | #define MAY_READ 0x00000004 |
74 | #define MAY_APPEND 0x00000008 | 74 | #define MAY_APPEND 0x00000008 |
75 | #define MAY_ACCESS 0x00000010 | 75 | #define MAY_ACCESS 0x00000010 |
76 | #define MAY_OPEN 0x00000020 | 76 | #define MAY_OPEN 0x00000020 |
77 | #define MAY_CHDIR 0x00000040 | 77 | #define MAY_CHDIR 0x00000040 |
78 | /* called from RCU mode, don't block */ | 78 | /* called from RCU mode, don't block */ |
79 | #define MAY_NOT_BLOCK 0x00000080 | 79 | #define MAY_NOT_BLOCK 0x00000080 |
80 | 80 | ||
81 | /* | 81 | /* |
82 | * flags in file.f_mode. Note that FMODE_READ and FMODE_WRITE must correspond | 82 | * flags in file.f_mode. Note that FMODE_READ and FMODE_WRITE must correspond |
83 | * to O_WRONLY and O_RDWR via the strange trick in __dentry_open() | 83 | * to O_WRONLY and O_RDWR via the strange trick in __dentry_open() |
84 | */ | 84 | */ |
85 | 85 | ||
86 | /* file is open for reading */ | 86 | /* file is open for reading */ |
87 | #define FMODE_READ ((__force fmode_t)0x1) | 87 | #define FMODE_READ ((__force fmode_t)0x1) |
88 | /* file is open for writing */ | 88 | /* file is open for writing */ |
89 | #define FMODE_WRITE ((__force fmode_t)0x2) | 89 | #define FMODE_WRITE ((__force fmode_t)0x2) |
90 | /* file is seekable */ | 90 | /* file is seekable */ |
91 | #define FMODE_LSEEK ((__force fmode_t)0x4) | 91 | #define FMODE_LSEEK ((__force fmode_t)0x4) |
92 | /* file can be accessed using pread */ | 92 | /* file can be accessed using pread */ |
93 | #define FMODE_PREAD ((__force fmode_t)0x8) | 93 | #define FMODE_PREAD ((__force fmode_t)0x8) |
94 | /* file can be accessed using pwrite */ | 94 | /* file can be accessed using pwrite */ |
95 | #define FMODE_PWRITE ((__force fmode_t)0x10) | 95 | #define FMODE_PWRITE ((__force fmode_t)0x10) |
96 | /* File is opened for execution with sys_execve / sys_uselib */ | 96 | /* File is opened for execution with sys_execve / sys_uselib */ |
97 | #define FMODE_EXEC ((__force fmode_t)0x20) | 97 | #define FMODE_EXEC ((__force fmode_t)0x20) |
98 | /* File is opened with O_NDELAY (only set for block devices) */ | 98 | /* File is opened with O_NDELAY (only set for block devices) */ |
99 | #define FMODE_NDELAY ((__force fmode_t)0x40) | 99 | #define FMODE_NDELAY ((__force fmode_t)0x40) |
100 | /* File is opened with O_EXCL (only set for block devices) */ | 100 | /* File is opened with O_EXCL (only set for block devices) */ |
101 | #define FMODE_EXCL ((__force fmode_t)0x80) | 101 | #define FMODE_EXCL ((__force fmode_t)0x80) |
102 | /* File is opened using open(.., 3, ..) and is writeable only for ioctls | 102 | /* File is opened using open(.., 3, ..) and is writeable only for ioctls |
103 | (specialy hack for floppy.c) */ | 103 | (specialy hack for floppy.c) */ |
104 | #define FMODE_WRITE_IOCTL ((__force fmode_t)0x100) | 104 | #define FMODE_WRITE_IOCTL ((__force fmode_t)0x100) |
105 | /* 32bit hashes as llseek() offset (for directories) */ | 105 | /* 32bit hashes as llseek() offset (for directories) */ |
106 | #define FMODE_32BITHASH ((__force fmode_t)0x200) | 106 | #define FMODE_32BITHASH ((__force fmode_t)0x200) |
107 | /* 64bit hashes as llseek() offset (for directories) */ | 107 | /* 64bit hashes as llseek() offset (for directories) */ |
108 | #define FMODE_64BITHASH ((__force fmode_t)0x400) | 108 | #define FMODE_64BITHASH ((__force fmode_t)0x400) |
109 | 109 | ||
110 | /* | 110 | /* |
111 | * Don't update ctime and mtime. | 111 | * Don't update ctime and mtime. |
112 | * | 112 | * |
113 | * Currently a special hack for the XFS open_by_handle ioctl, but we'll | 113 | * Currently a special hack for the XFS open_by_handle ioctl, but we'll |
114 | * hopefully graduate it to a proper O_CMTIME flag supported by open(2) soon. | 114 | * hopefully graduate it to a proper O_CMTIME flag supported by open(2) soon. |
115 | */ | 115 | */ |
116 | #define FMODE_NOCMTIME ((__force fmode_t)0x800) | 116 | #define FMODE_NOCMTIME ((__force fmode_t)0x800) |
117 | 117 | ||
118 | /* Expect random access pattern */ | 118 | /* Expect random access pattern */ |
119 | #define FMODE_RANDOM ((__force fmode_t)0x1000) | 119 | #define FMODE_RANDOM ((__force fmode_t)0x1000) |
120 | 120 | ||
121 | /* File is huge (eg. /dev/kmem): treat loff_t as unsigned */ | 121 | /* File is huge (eg. /dev/kmem): treat loff_t as unsigned */ |
122 | #define FMODE_UNSIGNED_OFFSET ((__force fmode_t)0x2000) | 122 | #define FMODE_UNSIGNED_OFFSET ((__force fmode_t)0x2000) |
123 | 123 | ||
124 | /* File is opened with O_PATH; almost nothing can be done with it */ | 124 | /* File is opened with O_PATH; almost nothing can be done with it */ |
125 | #define FMODE_PATH ((__force fmode_t)0x4000) | 125 | #define FMODE_PATH ((__force fmode_t)0x4000) |
126 | 126 | ||
127 | /* File needs atomic accesses to f_pos */ | 127 | /* File needs atomic accesses to f_pos */ |
128 | #define FMODE_ATOMIC_POS ((__force fmode_t)0x8000) | 128 | #define FMODE_ATOMIC_POS ((__force fmode_t)0x8000) |
129 | /* Write access to underlying fs */ | 129 | /* Write access to underlying fs */ |
130 | #define FMODE_WRITER ((__force fmode_t)0x10000) | 130 | #define FMODE_WRITER ((__force fmode_t)0x10000) |
131 | 131 | ||
132 | /* File was opened by fanotify and shouldn't generate fanotify events */ | 132 | /* File was opened by fanotify and shouldn't generate fanotify events */ |
133 | #define FMODE_NONOTIFY ((__force fmode_t)0x1000000) | 133 | #define FMODE_NONOTIFY ((__force fmode_t)0x1000000) |
134 | 134 | ||
135 | /* | 135 | /* |
136 | * Flag for rw_copy_check_uvector and compat_rw_copy_check_uvector | 136 | * Flag for rw_copy_check_uvector and compat_rw_copy_check_uvector |
137 | * that indicates that they should check the contents of the iovec are | 137 | * that indicates that they should check the contents of the iovec are |
138 | * valid, but not check the memory that the iovec elements | 138 | * valid, but not check the memory that the iovec elements |
139 | * points too. | 139 | * points too. |
140 | */ | 140 | */ |
141 | #define CHECK_IOVEC_ONLY -1 | 141 | #define CHECK_IOVEC_ONLY -1 |
142 | 142 | ||
143 | /* | 143 | /* |
144 | * The below are the various read and write types that we support. Some of | 144 | * The below are the various read and write types that we support. Some of |
145 | * them include behavioral modifiers that send information down to the | 145 | * them include behavioral modifiers that send information down to the |
146 | * block layer and IO scheduler. Terminology: | 146 | * block layer and IO scheduler. Terminology: |
147 | * | 147 | * |
148 | * The block layer uses device plugging to defer IO a little bit, in | 148 | * The block layer uses device plugging to defer IO a little bit, in |
149 | * the hope that we will see more IO very shortly. This increases | 149 | * the hope that we will see more IO very shortly. This increases |
150 | * coalescing of adjacent IO and thus reduces the number of IOs we | 150 | * coalescing of adjacent IO and thus reduces the number of IOs we |
151 | * have to send to the device. It also allows for better queuing, | 151 | * have to send to the device. It also allows for better queuing, |
152 | * if the IO isn't mergeable. If the caller is going to be waiting | 152 | * if the IO isn't mergeable. If the caller is going to be waiting |
153 | * for the IO, then he must ensure that the device is unplugged so | 153 | * for the IO, then he must ensure that the device is unplugged so |
154 | * that the IO is dispatched to the driver. | 154 | * that the IO is dispatched to the driver. |
155 | * | 155 | * |
156 | * All IO is handled async in Linux. This is fine for background | 156 | * All IO is handled async in Linux. This is fine for background |
157 | * writes, but for reads or writes that someone waits for completion | 157 | * writes, but for reads or writes that someone waits for completion |
158 | * on, we want to notify the block layer and IO scheduler so that they | 158 | * on, we want to notify the block layer and IO scheduler so that they |
159 | * know about it. That allows them to make better scheduling | 159 | * know about it. That allows them to make better scheduling |
160 | * decisions. So when the below references 'sync' and 'async', it | 160 | * decisions. So when the below references 'sync' and 'async', it |
161 | * is referencing this priority hint. | 161 | * is referencing this priority hint. |
162 | * | 162 | * |
163 | * With that in mind, the available types are: | 163 | * With that in mind, the available types are: |
164 | * | 164 | * |
165 | * READ A normal read operation. Device will be plugged. | 165 | * READ A normal read operation. Device will be plugged. |
166 | * READ_SYNC A synchronous read. Device is not plugged, caller can | 166 | * READ_SYNC A synchronous read. Device is not plugged, caller can |
167 | * immediately wait on this read without caring about | 167 | * immediately wait on this read without caring about |
168 | * unplugging. | 168 | * unplugging. |
169 | * READA Used for read-ahead operations. Lower priority, and the | 169 | * READA Used for read-ahead operations. Lower priority, and the |
170 | * block layer could (in theory) choose to ignore this | 170 | * block layer could (in theory) choose to ignore this |
171 | * request if it runs into resource problems. | 171 | * request if it runs into resource problems. |
172 | * WRITE A normal async write. Device will be plugged. | 172 | * WRITE A normal async write. Device will be plugged. |
173 | * WRITE_SYNC Synchronous write. Identical to WRITE, but passes down | 173 | * WRITE_SYNC Synchronous write. Identical to WRITE, but passes down |
174 | * the hint that someone will be waiting on this IO | 174 | * the hint that someone will be waiting on this IO |
175 | * shortly. The write equivalent of READ_SYNC. | 175 | * shortly. The write equivalent of READ_SYNC. |
176 | * WRITE_ODIRECT Special case write for O_DIRECT only. | 176 | * WRITE_ODIRECT Special case write for O_DIRECT only. |
177 | * WRITE_FLUSH Like WRITE_SYNC but with preceding cache flush. | 177 | * WRITE_FLUSH Like WRITE_SYNC but with preceding cache flush. |
178 | * WRITE_FUA Like WRITE_SYNC but data is guaranteed to be on | 178 | * WRITE_FUA Like WRITE_SYNC but data is guaranteed to be on |
179 | * non-volatile media on completion. | 179 | * non-volatile media on completion. |
180 | * WRITE_FLUSH_FUA Combination of WRITE_FLUSH and FUA. The IO is preceded | 180 | * WRITE_FLUSH_FUA Combination of WRITE_FLUSH and FUA. The IO is preceded |
181 | * by a cache flush and data is guaranteed to be on | 181 | * by a cache flush and data is guaranteed to be on |
182 | * non-volatile media on completion. | 182 | * non-volatile media on completion. |
183 | * | 183 | * |
184 | */ | 184 | */ |
185 | #define RW_MASK REQ_WRITE | 185 | #define RW_MASK REQ_WRITE |
186 | #define RWA_MASK REQ_RAHEAD | 186 | #define RWA_MASK REQ_RAHEAD |
187 | 187 | ||
188 | #define READ 0 | 188 | #define READ 0 |
189 | #define WRITE RW_MASK | 189 | #define WRITE RW_MASK |
190 | #define READA RWA_MASK | 190 | #define READA RWA_MASK |
191 | #define KERNEL_READ (READ|REQ_KERNEL) | 191 | #define KERNEL_READ (READ|REQ_KERNEL) |
192 | #define KERNEL_WRITE (WRITE|REQ_KERNEL) | 192 | #define KERNEL_WRITE (WRITE|REQ_KERNEL) |
193 | 193 | ||
194 | #define READ_SYNC (READ | REQ_SYNC) | 194 | #define READ_SYNC (READ | REQ_SYNC) |
195 | #define WRITE_SYNC (WRITE | REQ_SYNC | REQ_NOIDLE) | 195 | #define WRITE_SYNC (WRITE | REQ_SYNC | REQ_NOIDLE) |
196 | #define WRITE_ODIRECT (WRITE | REQ_SYNC) | 196 | #define WRITE_ODIRECT (WRITE | REQ_SYNC) |
197 | #define WRITE_FLUSH (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_FLUSH) | 197 | #define WRITE_FLUSH (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_FLUSH) |
198 | #define WRITE_FUA (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_FUA) | 198 | #define WRITE_FUA (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_FUA) |
199 | #define WRITE_FLUSH_FUA (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_FLUSH | REQ_FUA) | 199 | #define WRITE_FLUSH_FUA (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_FLUSH | REQ_FUA) |
200 | 200 | ||
201 | /* | 201 | /* |
202 | * Attribute flags. These should be or-ed together to figure out what | 202 | * Attribute flags. These should be or-ed together to figure out what |
203 | * has been changed! | 203 | * has been changed! |
204 | */ | 204 | */ |
205 | #define ATTR_MODE (1 << 0) | 205 | #define ATTR_MODE (1 << 0) |
206 | #define ATTR_UID (1 << 1) | 206 | #define ATTR_UID (1 << 1) |
207 | #define ATTR_GID (1 << 2) | 207 | #define ATTR_GID (1 << 2) |
208 | #define ATTR_SIZE (1 << 3) | 208 | #define ATTR_SIZE (1 << 3) |
209 | #define ATTR_ATIME (1 << 4) | 209 | #define ATTR_ATIME (1 << 4) |
210 | #define ATTR_MTIME (1 << 5) | 210 | #define ATTR_MTIME (1 << 5) |
211 | #define ATTR_CTIME (1 << 6) | 211 | #define ATTR_CTIME (1 << 6) |
212 | #define ATTR_ATIME_SET (1 << 7) | 212 | #define ATTR_ATIME_SET (1 << 7) |
213 | #define ATTR_MTIME_SET (1 << 8) | 213 | #define ATTR_MTIME_SET (1 << 8) |
214 | #define ATTR_FORCE (1 << 9) /* Not a change, but a change it */ | 214 | #define ATTR_FORCE (1 << 9) /* Not a change, but a change it */ |
215 | #define ATTR_ATTR_FLAG (1 << 10) | 215 | #define ATTR_ATTR_FLAG (1 << 10) |
216 | #define ATTR_KILL_SUID (1 << 11) | 216 | #define ATTR_KILL_SUID (1 << 11) |
217 | #define ATTR_KILL_SGID (1 << 12) | 217 | #define ATTR_KILL_SGID (1 << 12) |
218 | #define ATTR_FILE (1 << 13) | 218 | #define ATTR_FILE (1 << 13) |
219 | #define ATTR_KILL_PRIV (1 << 14) | 219 | #define ATTR_KILL_PRIV (1 << 14) |
220 | #define ATTR_OPEN (1 << 15) /* Truncating from open(O_TRUNC) */ | 220 | #define ATTR_OPEN (1 << 15) /* Truncating from open(O_TRUNC) */ |
221 | #define ATTR_TIMES_SET (1 << 16) | 221 | #define ATTR_TIMES_SET (1 << 16) |
222 | 222 | ||
223 | /* | 223 | /* |
224 | * This is the Inode Attributes structure, used for notify_change(). It | 224 | * This is the Inode Attributes structure, used for notify_change(). It |
225 | * uses the above definitions as flags, to know which values have changed. | 225 | * uses the above definitions as flags, to know which values have changed. |
226 | * Also, in this manner, a Filesystem can look at only the values it cares | 226 | * Also, in this manner, a Filesystem can look at only the values it cares |
227 | * about. Basically, these are the attributes that the VFS layer can | 227 | * about. Basically, these are the attributes that the VFS layer can |
228 | * request to change from the FS layer. | 228 | * request to change from the FS layer. |
229 | * | 229 | * |
230 | * Derek Atkins <warlord@MIT.EDU> 94-10-20 | 230 | * Derek Atkins <warlord@MIT.EDU> 94-10-20 |
231 | */ | 231 | */ |
232 | struct iattr { | 232 | struct iattr { |
233 | unsigned int ia_valid; | 233 | unsigned int ia_valid; |
234 | umode_t ia_mode; | 234 | umode_t ia_mode; |
235 | kuid_t ia_uid; | 235 | kuid_t ia_uid; |
236 | kgid_t ia_gid; | 236 | kgid_t ia_gid; |
237 | loff_t ia_size; | 237 | loff_t ia_size; |
238 | struct timespec ia_atime; | 238 | struct timespec ia_atime; |
239 | struct timespec ia_mtime; | 239 | struct timespec ia_mtime; |
240 | struct timespec ia_ctime; | 240 | struct timespec ia_ctime; |
241 | 241 | ||
242 | /* | 242 | /* |
243 | * Not an attribute, but an auxiliary info for filesystems wanting to | 243 | * Not an attribute, but an auxiliary info for filesystems wanting to |
244 | * implement an ftruncate() like method. NOTE: filesystem should | 244 | * implement an ftruncate() like method. NOTE: filesystem should |
245 | * check for (ia_valid & ATTR_FILE), and not for (ia_file != NULL). | 245 | * check for (ia_valid & ATTR_FILE), and not for (ia_file != NULL). |
246 | */ | 246 | */ |
247 | struct file *ia_file; | 247 | struct file *ia_file; |
248 | }; | 248 | }; |
249 | 249 | ||
250 | /* | 250 | /* |
251 | * Includes for diskquotas. | 251 | * Includes for diskquotas. |
252 | */ | 252 | */ |
253 | #include <linux/quota.h> | 253 | #include <linux/quota.h> |
254 | 254 | ||
255 | /** | 255 | /** |
256 | * enum positive_aop_returns - aop return codes with specific semantics | 256 | * enum positive_aop_returns - aop return codes with specific semantics |
257 | * | 257 | * |
258 | * @AOP_WRITEPAGE_ACTIVATE: Informs the caller that page writeback has | 258 | * @AOP_WRITEPAGE_ACTIVATE: Informs the caller that page writeback has |
259 | * completed, that the page is still locked, and | 259 | * completed, that the page is still locked, and |
260 | * should be considered active. The VM uses this hint | 260 | * should be considered active. The VM uses this hint |
261 | * to return the page to the active list -- it won't | 261 | * to return the page to the active list -- it won't |
262 | * be a candidate for writeback again in the near | 262 | * be a candidate for writeback again in the near |
263 | * future. Other callers must be careful to unlock | 263 | * future. Other callers must be careful to unlock |
264 | * the page if they get this return. Returned by | 264 | * the page if they get this return. Returned by |
265 | * writepage(); | 265 | * writepage(); |
266 | * | 266 | * |
267 | * @AOP_TRUNCATED_PAGE: The AOP method that was handed a locked page has | 267 | * @AOP_TRUNCATED_PAGE: The AOP method that was handed a locked page has |
268 | * unlocked it and the page might have been truncated. | 268 | * unlocked it and the page might have been truncated. |
269 | * The caller should back up to acquiring a new page and | 269 | * The caller should back up to acquiring a new page and |
270 | * trying again. The aop will be taking reasonable | 270 | * trying again. The aop will be taking reasonable |
271 | * precautions not to livelock. If the caller held a page | 271 | * precautions not to livelock. If the caller held a page |
272 | * reference, it should drop it before retrying. Returned | 272 | * reference, it should drop it before retrying. Returned |
273 | * by readpage(). | 273 | * by readpage(). |
274 | * | 274 | * |
275 | * address_space_operation functions return these large constants to indicate | 275 | * address_space_operation functions return these large constants to indicate |
276 | * special semantics to the caller. These are much larger than the bytes in a | 276 | * special semantics to the caller. These are much larger than the bytes in a |
277 | * page to allow for functions that return the number of bytes operated on in a | 277 | * page to allow for functions that return the number of bytes operated on in a |
278 | * given page. | 278 | * given page. |
279 | */ | 279 | */ |
280 | 280 | ||
281 | enum positive_aop_returns { | 281 | enum positive_aop_returns { |
282 | AOP_WRITEPAGE_ACTIVATE = 0x80000, | 282 | AOP_WRITEPAGE_ACTIVATE = 0x80000, |
283 | AOP_TRUNCATED_PAGE = 0x80001, | 283 | AOP_TRUNCATED_PAGE = 0x80001, |
284 | }; | 284 | }; |
285 | 285 | ||
286 | #define AOP_FLAG_UNINTERRUPTIBLE 0x0001 /* will not do a short write */ | 286 | #define AOP_FLAG_UNINTERRUPTIBLE 0x0001 /* will not do a short write */ |
287 | #define AOP_FLAG_CONT_EXPAND 0x0002 /* called from cont_expand */ | 287 | #define AOP_FLAG_CONT_EXPAND 0x0002 /* called from cont_expand */ |
288 | #define AOP_FLAG_NOFS 0x0004 /* used by filesystem to direct | 288 | #define AOP_FLAG_NOFS 0x0004 /* used by filesystem to direct |
289 | * helper code (eg buffer layer) | 289 | * helper code (eg buffer layer) |
290 | * to clear GFP_FS from alloc */ | 290 | * to clear GFP_FS from alloc */ |
291 | 291 | ||
292 | /* | 292 | /* |
293 | * oh the beauties of C type declarations. | 293 | * oh the beauties of C type declarations. |
294 | */ | 294 | */ |
295 | struct page; | 295 | struct page; |
296 | struct address_space; | 296 | struct address_space; |
297 | struct writeback_control; | 297 | struct writeback_control; |
298 | 298 | ||
299 | /* | 299 | /* |
300 | * "descriptor" for what we're up to with a read. | 300 | * "descriptor" for what we're up to with a read. |
301 | * This allows us to use the same read code yet | 301 | * This allows us to use the same read code yet |
302 | * have multiple different users of the data that | 302 | * have multiple different users of the data that |
303 | * we read from a file. | 303 | * we read from a file. |
304 | * | 304 | * |
305 | * The simplest case just copies the data to user | 305 | * The simplest case just copies the data to user |
306 | * mode. | 306 | * mode. |
307 | */ | 307 | */ |
308 | typedef struct { | 308 | typedef struct { |
309 | size_t written; | 309 | size_t written; |
310 | size_t count; | 310 | size_t count; |
311 | union { | 311 | union { |
312 | char __user *buf; | 312 | char __user *buf; |
313 | void *data; | 313 | void *data; |
314 | } arg; | 314 | } arg; |
315 | int error; | 315 | int error; |
316 | } read_descriptor_t; | 316 | } read_descriptor_t; |
317 | 317 | ||
318 | typedef int (*read_actor_t)(read_descriptor_t *, struct page *, | 318 | typedef int (*read_actor_t)(read_descriptor_t *, struct page *, |
319 | unsigned long, unsigned long); | 319 | unsigned long, unsigned long); |
320 | 320 | ||
321 | struct address_space_operations { | 321 | struct address_space_operations { |
322 | int (*writepage)(struct page *page, struct writeback_control *wbc); | 322 | int (*writepage)(struct page *page, struct writeback_control *wbc); |
323 | int (*readpage)(struct file *, struct page *); | 323 | int (*readpage)(struct file *, struct page *); |
324 | 324 | ||
325 | /* Write back some dirty pages from this mapping. */ | 325 | /* Write back some dirty pages from this mapping. */ |
326 | int (*writepages)(struct address_space *, struct writeback_control *); | 326 | int (*writepages)(struct address_space *, struct writeback_control *); |
327 | 327 | ||
328 | /* Set a page dirty. Return true if this dirtied it */ | 328 | /* Set a page dirty. Return true if this dirtied it */ |
329 | int (*set_page_dirty)(struct page *page); | 329 | int (*set_page_dirty)(struct page *page); |
330 | 330 | ||
331 | int (*readpages)(struct file *filp, struct address_space *mapping, | 331 | int (*readpages)(struct file *filp, struct address_space *mapping, |
332 | struct list_head *pages, unsigned nr_pages); | 332 | struct list_head *pages, unsigned nr_pages); |
333 | 333 | ||
334 | int (*write_begin)(struct file *, struct address_space *mapping, | 334 | int (*write_begin)(struct file *, struct address_space *mapping, |
335 | loff_t pos, unsigned len, unsigned flags, | 335 | loff_t pos, unsigned len, unsigned flags, |
336 | struct page **pagep, void **fsdata); | 336 | struct page **pagep, void **fsdata); |
337 | int (*write_end)(struct file *, struct address_space *mapping, | 337 | int (*write_end)(struct file *, struct address_space *mapping, |
338 | loff_t pos, unsigned len, unsigned copied, | 338 | loff_t pos, unsigned len, unsigned copied, |
339 | struct page *page, void *fsdata); | 339 | struct page *page, void *fsdata); |
340 | 340 | ||
341 | /* Unfortunately this kludge is needed for FIBMAP. Don't use it */ | 341 | /* Unfortunately this kludge is needed for FIBMAP. Don't use it */ |
342 | sector_t (*bmap)(struct address_space *, sector_t); | 342 | sector_t (*bmap)(struct address_space *, sector_t); |
343 | void (*invalidatepage) (struct page *, unsigned int, unsigned int); | 343 | void (*invalidatepage) (struct page *, unsigned int, unsigned int); |
344 | int (*releasepage) (struct page *, gfp_t); | 344 | int (*releasepage) (struct page *, gfp_t); |
345 | void (*freepage)(struct page *); | 345 | void (*freepage)(struct page *); |
346 | ssize_t (*direct_IO)(int, struct kiocb *, const struct iovec *iov, | 346 | ssize_t (*direct_IO)(int, struct kiocb *, const struct iovec *iov, |
347 | loff_t offset, unsigned long nr_segs); | 347 | loff_t offset, unsigned long nr_segs); |
348 | int (*get_xip_mem)(struct address_space *, pgoff_t, int, | 348 | int (*get_xip_mem)(struct address_space *, pgoff_t, int, |
349 | void **, unsigned long *); | 349 | void **, unsigned long *); |
350 | /* | 350 | /* |
351 | * migrate the contents of a page to the specified target. If | 351 | * migrate the contents of a page to the specified target. If |
352 | * migrate_mode is MIGRATE_ASYNC, it must not block. | 352 | * migrate_mode is MIGRATE_ASYNC, it must not block. |
353 | */ | 353 | */ |
354 | int (*migratepage) (struct address_space *, | 354 | int (*migratepage) (struct address_space *, |
355 | struct page *, struct page *, enum migrate_mode); | 355 | struct page *, struct page *, enum migrate_mode); |
356 | int (*launder_page) (struct page *); | 356 | int (*launder_page) (struct page *); |
357 | int (*is_partially_uptodate) (struct page *, unsigned long, | 357 | int (*is_partially_uptodate) (struct page *, unsigned long, |
358 | unsigned long); | 358 | unsigned long); |
359 | void (*is_dirty_writeback) (struct page *, bool *, bool *); | 359 | void (*is_dirty_writeback) (struct page *, bool *, bool *); |
360 | int (*error_remove_page)(struct address_space *, struct page *); | 360 | int (*error_remove_page)(struct address_space *, struct page *); |
361 | 361 | ||
362 | /* swapfile support */ | 362 | /* swapfile support */ |
363 | int (*swap_activate)(struct swap_info_struct *sis, struct file *file, | 363 | int (*swap_activate)(struct swap_info_struct *sis, struct file *file, |
364 | sector_t *span); | 364 | sector_t *span); |
365 | void (*swap_deactivate)(struct file *file); | 365 | void (*swap_deactivate)(struct file *file); |
366 | }; | 366 | }; |
367 | 367 | ||
368 | extern const struct address_space_operations empty_aops; | 368 | extern const struct address_space_operations empty_aops; |
369 | 369 | ||
370 | /* | 370 | /* |
371 | * pagecache_write_begin/pagecache_write_end must be used by general code | 371 | * pagecache_write_begin/pagecache_write_end must be used by general code |
372 | * to write into the pagecache. | 372 | * to write into the pagecache. |
373 | */ | 373 | */ |
374 | int pagecache_write_begin(struct file *, struct address_space *mapping, | 374 | int pagecache_write_begin(struct file *, struct address_space *mapping, |
375 | loff_t pos, unsigned len, unsigned flags, | 375 | loff_t pos, unsigned len, unsigned flags, |
376 | struct page **pagep, void **fsdata); | 376 | struct page **pagep, void **fsdata); |
377 | 377 | ||
378 | int pagecache_write_end(struct file *, struct address_space *mapping, | 378 | int pagecache_write_end(struct file *, struct address_space *mapping, |
379 | loff_t pos, unsigned len, unsigned copied, | 379 | loff_t pos, unsigned len, unsigned copied, |
380 | struct page *page, void *fsdata); | 380 | struct page *page, void *fsdata); |
381 | 381 | ||
382 | struct backing_dev_info; | 382 | struct backing_dev_info; |
383 | struct address_space { | 383 | struct address_space { |
384 | struct inode *host; /* owner: inode, block_device */ | 384 | struct inode *host; /* owner: inode, block_device */ |
385 | struct radix_tree_root page_tree; /* radix tree of all pages */ | 385 | struct radix_tree_root page_tree; /* radix tree of all pages */ |
386 | spinlock_t tree_lock; /* and lock protecting it */ | 386 | spinlock_t tree_lock; /* and lock protecting it */ |
387 | unsigned int i_mmap_writable;/* count VM_SHARED mappings */ | 387 | unsigned int i_mmap_writable;/* count VM_SHARED mappings */ |
388 | struct rb_root i_mmap; /* tree of private and shared mappings */ | 388 | struct rb_root i_mmap; /* tree of private and shared mappings */ |
389 | struct list_head i_mmap_nonlinear;/*list VM_NONLINEAR mappings */ | 389 | struct list_head i_mmap_nonlinear;/*list VM_NONLINEAR mappings */ |
390 | struct mutex i_mmap_mutex; /* protect tree, count, list */ | 390 | struct mutex i_mmap_mutex; /* protect tree, count, list */ |
391 | /* Protected by tree_lock together with the radix tree */ | 391 | /* Protected by tree_lock together with the radix tree */ |
392 | unsigned long nrpages; /* number of total pages */ | 392 | unsigned long nrpages; /* number of total pages */ |
393 | unsigned long nrshadows; /* number of shadow entries */ | 393 | unsigned long nrshadows; /* number of shadow entries */ |
394 | pgoff_t writeback_index;/* writeback starts here */ | 394 | pgoff_t writeback_index;/* writeback starts here */ |
395 | const struct address_space_operations *a_ops; /* methods */ | 395 | const struct address_space_operations *a_ops; /* methods */ |
396 | unsigned long flags; /* error bits/gfp mask */ | 396 | unsigned long flags; /* error bits/gfp mask */ |
397 | struct backing_dev_info *backing_dev_info; /* device readahead, etc */ | 397 | struct backing_dev_info *backing_dev_info; /* device readahead, etc */ |
398 | spinlock_t private_lock; /* for use by the address_space */ | 398 | spinlock_t private_lock; /* for use by the address_space */ |
399 | struct list_head private_list; /* ditto */ | 399 | struct list_head private_list; /* ditto */ |
400 | void *private_data; /* ditto */ | 400 | void *private_data; /* ditto */ |
401 | } __attribute__((aligned(sizeof(long)))); | 401 | } __attribute__((aligned(sizeof(long)))); |
402 | /* | 402 | /* |
403 | * On most architectures that alignment is already the case; but | 403 | * On most architectures that alignment is already the case; but |
404 | * must be enforced here for CRIS, to let the least significant bit | 404 | * must be enforced here for CRIS, to let the least significant bit |
405 | * of struct page's "mapping" pointer be used for PAGE_MAPPING_ANON. | 405 | * of struct page's "mapping" pointer be used for PAGE_MAPPING_ANON. |
406 | */ | 406 | */ |
407 | struct request_queue; | 407 | struct request_queue; |
408 | 408 | ||
409 | struct block_device { | 409 | struct block_device { |
410 | dev_t bd_dev; /* not a kdev_t - it's a search key */ | 410 | dev_t bd_dev; /* not a kdev_t - it's a search key */ |
411 | int bd_openers; | 411 | int bd_openers; |
412 | struct inode * bd_inode; /* will die */ | 412 | struct inode * bd_inode; /* will die */ |
413 | struct super_block * bd_super; | 413 | struct super_block * bd_super; |
414 | struct mutex bd_mutex; /* open/close mutex */ | 414 | struct mutex bd_mutex; /* open/close mutex */ |
415 | struct list_head bd_inodes; | 415 | struct list_head bd_inodes; |
416 | void * bd_claiming; | 416 | void * bd_claiming; |
417 | void * bd_holder; | 417 | void * bd_holder; |
418 | int bd_holders; | 418 | int bd_holders; |
419 | bool bd_write_holder; | 419 | bool bd_write_holder; |
420 | #ifdef CONFIG_SYSFS | 420 | #ifdef CONFIG_SYSFS |
421 | struct list_head bd_holder_disks; | 421 | struct list_head bd_holder_disks; |
422 | #endif | 422 | #endif |
423 | struct block_device * bd_contains; | 423 | struct block_device * bd_contains; |
424 | unsigned bd_block_size; | 424 | unsigned bd_block_size; |
425 | struct hd_struct * bd_part; | 425 | struct hd_struct * bd_part; |
426 | /* number of times partitions within this device have been opened. */ | 426 | /* number of times partitions within this device have been opened. */ |
427 | unsigned bd_part_count; | 427 | unsigned bd_part_count; |
428 | int bd_invalidated; | 428 | int bd_invalidated; |
429 | struct gendisk * bd_disk; | 429 | struct gendisk * bd_disk; |
430 | struct request_queue * bd_queue; | 430 | struct request_queue * bd_queue; |
431 | struct list_head bd_list; | 431 | struct list_head bd_list; |
432 | /* | 432 | /* |
433 | * Private data. You must have bd_claim'ed the block_device | 433 | * Private data. You must have bd_claim'ed the block_device |
434 | * to use this. NOTE: bd_claim allows an owner to claim | 434 | * to use this. NOTE: bd_claim allows an owner to claim |
435 | * the same device multiple times, the owner must take special | 435 | * the same device multiple times, the owner must take special |
436 | * care to not mess up bd_private for that case. | 436 | * care to not mess up bd_private for that case. |
437 | */ | 437 | */ |
438 | unsigned long bd_private; | 438 | unsigned long bd_private; |
439 | 439 | ||
440 | /* The counter of freeze processes */ | 440 | /* The counter of freeze processes */ |
441 | int bd_fsfreeze_count; | 441 | int bd_fsfreeze_count; |
442 | /* Mutex for freeze */ | 442 | /* Mutex for freeze */ |
443 | struct mutex bd_fsfreeze_mutex; | 443 | struct mutex bd_fsfreeze_mutex; |
444 | }; | 444 | }; |
445 | 445 | ||
446 | /* | 446 | /* |
447 | * Radix-tree tags, for tagging dirty and writeback pages within the pagecache | 447 | * Radix-tree tags, for tagging dirty and writeback pages within the pagecache |
448 | * radix trees | 448 | * radix trees |
449 | */ | 449 | */ |
450 | #define PAGECACHE_TAG_DIRTY 0 | 450 | #define PAGECACHE_TAG_DIRTY 0 |
451 | #define PAGECACHE_TAG_WRITEBACK 1 | 451 | #define PAGECACHE_TAG_WRITEBACK 1 |
452 | #define PAGECACHE_TAG_TOWRITE 2 | 452 | #define PAGECACHE_TAG_TOWRITE 2 |
453 | 453 | ||
454 | int mapping_tagged(struct address_space *mapping, int tag); | 454 | int mapping_tagged(struct address_space *mapping, int tag); |
455 | 455 | ||
456 | /* | 456 | /* |
457 | * Might pages of this file be mapped into userspace? | 457 | * Might pages of this file be mapped into userspace? |
458 | */ | 458 | */ |
459 | static inline int mapping_mapped(struct address_space *mapping) | 459 | static inline int mapping_mapped(struct address_space *mapping) |
460 | { | 460 | { |
461 | return !RB_EMPTY_ROOT(&mapping->i_mmap) || | 461 | return !RB_EMPTY_ROOT(&mapping->i_mmap) || |
462 | !list_empty(&mapping->i_mmap_nonlinear); | 462 | !list_empty(&mapping->i_mmap_nonlinear); |
463 | } | 463 | } |
464 | 464 | ||
465 | /* | 465 | /* |
466 | * Might pages of this file have been modified in userspace? | 466 | * Might pages of this file have been modified in userspace? |
467 | * Note that i_mmap_writable counts all VM_SHARED vmas: do_mmap_pgoff | 467 | * Note that i_mmap_writable counts all VM_SHARED vmas: do_mmap_pgoff |
468 | * marks vma as VM_SHARED if it is shared, and the file was opened for | 468 | * marks vma as VM_SHARED if it is shared, and the file was opened for |
469 | * writing i.e. vma may be mprotected writable even if now readonly. | 469 | * writing i.e. vma may be mprotected writable even if now readonly. |
470 | */ | 470 | */ |
471 | static inline int mapping_writably_mapped(struct address_space *mapping) | 471 | static inline int mapping_writably_mapped(struct address_space *mapping) |
472 | { | 472 | { |
473 | return mapping->i_mmap_writable != 0; | 473 | return mapping->i_mmap_writable != 0; |
474 | } | 474 | } |
475 | 475 | ||
476 | /* | 476 | /* |
477 | * Use sequence counter to get consistent i_size on 32-bit processors. | 477 | * Use sequence counter to get consistent i_size on 32-bit processors. |
478 | */ | 478 | */ |
479 | #if BITS_PER_LONG==32 && defined(CONFIG_SMP) | 479 | #if BITS_PER_LONG==32 && defined(CONFIG_SMP) |
480 | #include <linux/seqlock.h> | 480 | #include <linux/seqlock.h> |
481 | #define __NEED_I_SIZE_ORDERED | 481 | #define __NEED_I_SIZE_ORDERED |
482 | #define i_size_ordered_init(inode) seqcount_init(&inode->i_size_seqcount) | 482 | #define i_size_ordered_init(inode) seqcount_init(&inode->i_size_seqcount) |
483 | #else | 483 | #else |
484 | #define i_size_ordered_init(inode) do { } while (0) | 484 | #define i_size_ordered_init(inode) do { } while (0) |
485 | #endif | 485 | #endif |
486 | 486 | ||
487 | struct posix_acl; | 487 | struct posix_acl; |
488 | #define ACL_NOT_CACHED ((void *)(-1)) | 488 | #define ACL_NOT_CACHED ((void *)(-1)) |
489 | 489 | ||
490 | #define IOP_FASTPERM 0x0001 | 490 | #define IOP_FASTPERM 0x0001 |
491 | #define IOP_LOOKUP 0x0002 | 491 | #define IOP_LOOKUP 0x0002 |
492 | #define IOP_NOFOLLOW 0x0004 | 492 | #define IOP_NOFOLLOW 0x0004 |
493 | 493 | ||
494 | /* | 494 | /* |
495 | * Keep mostly read-only and often accessed (especially for | 495 | * Keep mostly read-only and often accessed (especially for |
496 | * the RCU path lookup and 'stat' data) fields at the beginning | 496 | * the RCU path lookup and 'stat' data) fields at the beginning |
497 | * of the 'struct inode' | 497 | * of the 'struct inode' |
498 | */ | 498 | */ |
499 | struct inode { | 499 | struct inode { |
500 | umode_t i_mode; | 500 | umode_t i_mode; |
501 | unsigned short i_opflags; | 501 | unsigned short i_opflags; |
502 | kuid_t i_uid; | 502 | kuid_t i_uid; |
503 | kgid_t i_gid; | 503 | kgid_t i_gid; |
504 | unsigned int i_flags; | 504 | unsigned int i_flags; |
505 | 505 | ||
506 | #ifdef CONFIG_FS_POSIX_ACL | 506 | #ifdef CONFIG_FS_POSIX_ACL |
507 | struct posix_acl *i_acl; | 507 | struct posix_acl *i_acl; |
508 | struct posix_acl *i_default_acl; | 508 | struct posix_acl *i_default_acl; |
509 | #endif | 509 | #endif |
510 | 510 | ||
511 | const struct inode_operations *i_op; | 511 | const struct inode_operations *i_op; |
512 | struct super_block *i_sb; | 512 | struct super_block *i_sb; |
513 | struct address_space *i_mapping; | 513 | struct address_space *i_mapping; |
514 | 514 | ||
515 | #ifdef CONFIG_SECURITY | 515 | #ifdef CONFIG_SECURITY |
516 | void *i_security; | 516 | void *i_security; |
517 | #endif | 517 | #endif |
518 | 518 | ||
519 | /* Stat data, not accessed from path walking */ | 519 | /* Stat data, not accessed from path walking */ |
520 | unsigned long i_ino; | 520 | unsigned long i_ino; |
521 | /* | 521 | /* |
522 | * Filesystems may only read i_nlink directly. They shall use the | 522 | * Filesystems may only read i_nlink directly. They shall use the |
523 | * following functions for modification: | 523 | * following functions for modification: |
524 | * | 524 | * |
525 | * (set|clear|inc|drop)_nlink | 525 | * (set|clear|inc|drop)_nlink |
526 | * inode_(inc|dec)_link_count | 526 | * inode_(inc|dec)_link_count |
527 | */ | 527 | */ |
528 | union { | 528 | union { |
529 | const unsigned int i_nlink; | 529 | const unsigned int i_nlink; |
530 | unsigned int __i_nlink; | 530 | unsigned int __i_nlink; |
531 | }; | 531 | }; |
532 | dev_t i_rdev; | 532 | dev_t i_rdev; |
533 | loff_t i_size; | 533 | loff_t i_size; |
534 | struct timespec i_atime; | 534 | struct timespec i_atime; |
535 | struct timespec i_mtime; | 535 | struct timespec i_mtime; |
536 | struct timespec i_ctime; | 536 | struct timespec i_ctime; |
537 | spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */ | 537 | spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */ |
538 | unsigned short i_bytes; | 538 | unsigned short i_bytes; |
539 | unsigned int i_blkbits; | 539 | unsigned int i_blkbits; |
540 | blkcnt_t i_blocks; | 540 | blkcnt_t i_blocks; |
541 | 541 | ||
542 | #ifdef __NEED_I_SIZE_ORDERED | 542 | #ifdef __NEED_I_SIZE_ORDERED |
543 | seqcount_t i_size_seqcount; | 543 | seqcount_t i_size_seqcount; |
544 | #endif | 544 | #endif |
545 | 545 | ||
546 | /* Misc */ | 546 | /* Misc */ |
547 | unsigned long i_state; | 547 | unsigned long i_state; |
548 | struct mutex i_mutex; | 548 | struct mutex i_mutex; |
549 | 549 | ||
550 | unsigned long dirtied_when; /* jiffies of first dirtying */ | 550 | unsigned long dirtied_when; /* jiffies of first dirtying */ |
551 | 551 | ||
552 | struct hlist_node i_hash; | 552 | struct hlist_node i_hash; |
553 | struct list_head i_wb_list; /* backing dev IO list */ | 553 | struct list_head i_wb_list; /* backing dev IO list */ |
554 | struct list_head i_lru; /* inode LRU list */ | 554 | struct list_head i_lru; /* inode LRU list */ |
555 | struct list_head i_sb_list; | 555 | struct list_head i_sb_list; |
556 | union { | 556 | union { |
557 | struct hlist_head i_dentry; | 557 | struct hlist_head i_dentry; |
558 | struct rcu_head i_rcu; | 558 | struct rcu_head i_rcu; |
559 | }; | 559 | }; |
560 | u64 i_version; | 560 | u64 i_version; |
561 | atomic_t i_count; | 561 | atomic_t i_count; |
562 | atomic_t i_dio_count; | 562 | atomic_t i_dio_count; |
563 | atomic_t i_writecount; | 563 | atomic_t i_writecount; |
564 | #ifdef CONFIG_IMA | 564 | #ifdef CONFIG_IMA |
565 | atomic_t i_readcount; /* struct files open RO */ | 565 | atomic_t i_readcount; /* struct files open RO */ |
566 | #endif | 566 | #endif |
567 | const struct file_operations *i_fop; /* former ->i_op->default_file_ops */ | 567 | const struct file_operations *i_fop; /* former ->i_op->default_file_ops */ |
568 | struct file_lock *i_flock; | 568 | struct file_lock *i_flock; |
569 | struct address_space i_data; | 569 | struct address_space i_data; |
570 | #ifdef CONFIG_QUOTA | 570 | #ifdef CONFIG_QUOTA |
571 | struct dquot *i_dquot[MAXQUOTAS]; | 571 | struct dquot *i_dquot[MAXQUOTAS]; |
572 | #endif | 572 | #endif |
573 | struct list_head i_devices; | 573 | struct list_head i_devices; |
574 | union { | 574 | union { |
575 | struct pipe_inode_info *i_pipe; | 575 | struct pipe_inode_info *i_pipe; |
576 | struct block_device *i_bdev; | 576 | struct block_device *i_bdev; |
577 | struct cdev *i_cdev; | 577 | struct cdev *i_cdev; |
578 | }; | 578 | }; |
579 | 579 | ||
580 | __u32 i_generation; | 580 | __u32 i_generation; |
581 | 581 | ||
582 | #ifdef CONFIG_FSNOTIFY | 582 | #ifdef CONFIG_FSNOTIFY |
583 | __u32 i_fsnotify_mask; /* all events this inode cares about */ | 583 | __u32 i_fsnotify_mask; /* all events this inode cares about */ |
584 | struct hlist_head i_fsnotify_marks; | 584 | struct hlist_head i_fsnotify_marks; |
585 | #endif | 585 | #endif |
586 | 586 | ||
587 | void *i_private; /* fs or device private pointer */ | 587 | void *i_private; /* fs or device private pointer */ |
588 | }; | 588 | }; |
589 | 589 | ||
590 | static inline int inode_unhashed(struct inode *inode) | 590 | static inline int inode_unhashed(struct inode *inode) |
591 | { | 591 | { |
592 | return hlist_unhashed(&inode->i_hash); | 592 | return hlist_unhashed(&inode->i_hash); |
593 | } | 593 | } |
594 | 594 | ||
595 | /* | 595 | /* |
596 | * inode->i_mutex nesting subclasses for the lock validator: | 596 | * inode->i_mutex nesting subclasses for the lock validator: |
597 | * | 597 | * |
598 | * 0: the object of the current VFS operation | 598 | * 0: the object of the current VFS operation |
599 | * 1: parent | 599 | * 1: parent |
600 | * 2: child/target | 600 | * 2: child/target |
601 | * 3: xattr | 601 | * 3: xattr |
602 | * 4: second non-directory | 602 | * 4: second non-directory |
603 | * The last is for certain operations (such as rename) which lock two | 603 | * The last is for certain operations (such as rename) which lock two |
604 | * non-directories at once. | 604 | * non-directories at once. |
605 | * | 605 | * |
606 | * The locking order between these classes is | 606 | * The locking order between these classes is |
607 | * parent -> child -> normal -> xattr -> second non-directory | 607 | * parent -> child -> normal -> xattr -> second non-directory |
608 | */ | 608 | */ |
609 | enum inode_i_mutex_lock_class | 609 | enum inode_i_mutex_lock_class |
610 | { | 610 | { |
611 | I_MUTEX_NORMAL, | 611 | I_MUTEX_NORMAL, |
612 | I_MUTEX_PARENT, | 612 | I_MUTEX_PARENT, |
613 | I_MUTEX_CHILD, | 613 | I_MUTEX_CHILD, |
614 | I_MUTEX_XATTR, | 614 | I_MUTEX_XATTR, |
615 | I_MUTEX_NONDIR2 | 615 | I_MUTEX_NONDIR2 |
616 | }; | 616 | }; |
617 | 617 | ||
618 | void lock_two_nondirectories(struct inode *, struct inode*); | 618 | void lock_two_nondirectories(struct inode *, struct inode*); |
619 | void unlock_two_nondirectories(struct inode *, struct inode*); | 619 | void unlock_two_nondirectories(struct inode *, struct inode*); |
620 | 620 | ||
621 | /* | 621 | /* |
622 | * NOTE: in a 32bit arch with a preemptable kernel and | 622 | * NOTE: in a 32bit arch with a preemptable kernel and |
623 | * an UP compile the i_size_read/write must be atomic | 623 | * an UP compile the i_size_read/write must be atomic |
624 | * with respect to the local cpu (unlike with preempt disabled), | 624 | * with respect to the local cpu (unlike with preempt disabled), |
625 | * but they don't need to be atomic with respect to other cpus like in | 625 | * but they don't need to be atomic with respect to other cpus like in |
626 | * true SMP (so they need either to either locally disable irq around | 626 | * true SMP (so they need either to either locally disable irq around |
627 | * the read or for example on x86 they can be still implemented as a | 627 | * the read or for example on x86 they can be still implemented as a |
628 | * cmpxchg8b without the need of the lock prefix). For SMP compiles | 628 | * cmpxchg8b without the need of the lock prefix). For SMP compiles |
629 | * and 64bit archs it makes no difference if preempt is enabled or not. | 629 | * and 64bit archs it makes no difference if preempt is enabled or not. |
630 | */ | 630 | */ |
631 | static inline loff_t i_size_read(const struct inode *inode) | 631 | static inline loff_t i_size_read(const struct inode *inode) |
632 | { | 632 | { |
633 | #if BITS_PER_LONG==32 && defined(CONFIG_SMP) | 633 | #if BITS_PER_LONG==32 && defined(CONFIG_SMP) |
634 | loff_t i_size; | 634 | loff_t i_size; |
635 | unsigned int seq; | 635 | unsigned int seq; |
636 | 636 | ||
637 | do { | 637 | do { |
638 | seq = read_seqcount_begin(&inode->i_size_seqcount); | 638 | seq = read_seqcount_begin(&inode->i_size_seqcount); |
639 | i_size = inode->i_size; | 639 | i_size = inode->i_size; |
640 | } while (read_seqcount_retry(&inode->i_size_seqcount, seq)); | 640 | } while (read_seqcount_retry(&inode->i_size_seqcount, seq)); |
641 | return i_size; | 641 | return i_size; |
642 | #elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPT) | 642 | #elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPT) |
643 | loff_t i_size; | 643 | loff_t i_size; |
644 | 644 | ||
645 | preempt_disable(); | 645 | preempt_disable(); |
646 | i_size = inode->i_size; | 646 | i_size = inode->i_size; |
647 | preempt_enable(); | 647 | preempt_enable(); |
648 | return i_size; | 648 | return i_size; |
649 | #else | 649 | #else |
650 | return inode->i_size; | 650 | return inode->i_size; |
651 | #endif | 651 | #endif |
652 | } | 652 | } |
653 | 653 | ||
654 | /* | 654 | /* |
655 | * NOTE: unlike i_size_read(), i_size_write() does need locking around it | 655 | * NOTE: unlike i_size_read(), i_size_write() does need locking around it |
656 | * (normally i_mutex), otherwise on 32bit/SMP an update of i_size_seqcount | 656 | * (normally i_mutex), otherwise on 32bit/SMP an update of i_size_seqcount |
657 | * can be lost, resulting in subsequent i_size_read() calls spinning forever. | 657 | * can be lost, resulting in subsequent i_size_read() calls spinning forever. |
658 | */ | 658 | */ |
659 | static inline void i_size_write(struct inode *inode, loff_t i_size) | 659 | static inline void i_size_write(struct inode *inode, loff_t i_size) |
660 | { | 660 | { |
661 | #if BITS_PER_LONG==32 && defined(CONFIG_SMP) | 661 | #if BITS_PER_LONG==32 && defined(CONFIG_SMP) |
662 | preempt_disable(); | 662 | preempt_disable(); |
663 | write_seqcount_begin(&inode->i_size_seqcount); | 663 | write_seqcount_begin(&inode->i_size_seqcount); |
664 | inode->i_size = i_size; | 664 | inode->i_size = i_size; |
665 | write_seqcount_end(&inode->i_size_seqcount); | 665 | write_seqcount_end(&inode->i_size_seqcount); |
666 | preempt_enable(); | 666 | preempt_enable(); |
667 | #elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPT) | 667 | #elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPT) |
668 | preempt_disable(); | 668 | preempt_disable(); |
669 | inode->i_size = i_size; | 669 | inode->i_size = i_size; |
670 | preempt_enable(); | 670 | preempt_enable(); |
671 | #else | 671 | #else |
672 | inode->i_size = i_size; | 672 | inode->i_size = i_size; |
673 | #endif | 673 | #endif |
674 | } | 674 | } |
675 | 675 | ||
676 | /* Helper functions so that in most cases filesystems will | 676 | /* Helper functions so that in most cases filesystems will |
677 | * not need to deal directly with kuid_t and kgid_t and can | 677 | * not need to deal directly with kuid_t and kgid_t and can |
678 | * instead deal with the raw numeric values that are stored | 678 | * instead deal with the raw numeric values that are stored |
679 | * in the filesystem. | 679 | * in the filesystem. |
680 | */ | 680 | */ |
681 | static inline uid_t i_uid_read(const struct inode *inode) | 681 | static inline uid_t i_uid_read(const struct inode *inode) |
682 | { | 682 | { |
683 | return from_kuid(&init_user_ns, inode->i_uid); | 683 | return from_kuid(&init_user_ns, inode->i_uid); |
684 | } | 684 | } |
685 | 685 | ||
686 | static inline gid_t i_gid_read(const struct inode *inode) | 686 | static inline gid_t i_gid_read(const struct inode *inode) |
687 | { | 687 | { |
688 | return from_kgid(&init_user_ns, inode->i_gid); | 688 | return from_kgid(&init_user_ns, inode->i_gid); |
689 | } | 689 | } |
690 | 690 | ||
691 | static inline void i_uid_write(struct inode *inode, uid_t uid) | 691 | static inline void i_uid_write(struct inode *inode, uid_t uid) |
692 | { | 692 | { |
693 | inode->i_uid = make_kuid(&init_user_ns, uid); | 693 | inode->i_uid = make_kuid(&init_user_ns, uid); |
694 | } | 694 | } |
695 | 695 | ||
696 | static inline void i_gid_write(struct inode *inode, gid_t gid) | 696 | static inline void i_gid_write(struct inode *inode, gid_t gid) |
697 | { | 697 | { |
698 | inode->i_gid = make_kgid(&init_user_ns, gid); | 698 | inode->i_gid = make_kgid(&init_user_ns, gid); |
699 | } | 699 | } |
700 | 700 | ||
701 | static inline unsigned iminor(const struct inode *inode) | 701 | static inline unsigned iminor(const struct inode *inode) |
702 | { | 702 | { |
703 | return MINOR(inode->i_rdev); | 703 | return MINOR(inode->i_rdev); |
704 | } | 704 | } |
705 | 705 | ||
706 | static inline unsigned imajor(const struct inode *inode) | 706 | static inline unsigned imajor(const struct inode *inode) |
707 | { | 707 | { |
708 | return MAJOR(inode->i_rdev); | 708 | return MAJOR(inode->i_rdev); |
709 | } | 709 | } |
710 | 710 | ||
711 | extern struct block_device *I_BDEV(struct inode *inode); | 711 | extern struct block_device *I_BDEV(struct inode *inode); |
712 | 712 | ||
713 | struct fown_struct { | 713 | struct fown_struct { |
714 | rwlock_t lock; /* protects pid, uid, euid fields */ | 714 | rwlock_t lock; /* protects pid, uid, euid fields */ |
715 | struct pid *pid; /* pid or -pgrp where SIGIO should be sent */ | 715 | struct pid *pid; /* pid or -pgrp where SIGIO should be sent */ |
716 | enum pid_type pid_type; /* Kind of process group SIGIO should be sent to */ | 716 | enum pid_type pid_type; /* Kind of process group SIGIO should be sent to */ |
717 | kuid_t uid, euid; /* uid/euid of process setting the owner */ | 717 | kuid_t uid, euid; /* uid/euid of process setting the owner */ |
718 | int signum; /* posix.1b rt signal to be delivered on IO */ | 718 | int signum; /* posix.1b rt signal to be delivered on IO */ |
719 | }; | 719 | }; |
720 | 720 | ||
721 | /* | 721 | /* |
722 | * Track a single file's readahead state | 722 | * Track a single file's readahead state |
723 | */ | 723 | */ |
724 | struct file_ra_state { | 724 | struct file_ra_state { |
725 | pgoff_t start; /* where readahead started */ | 725 | pgoff_t start; /* where readahead started */ |
726 | unsigned int size; /* # of readahead pages */ | 726 | unsigned int size; /* # of readahead pages */ |
727 | unsigned int async_size; /* do asynchronous readahead when | 727 | unsigned int async_size; /* do asynchronous readahead when |
728 | there are only # of pages ahead */ | 728 | there are only # of pages ahead */ |
729 | 729 | ||
730 | unsigned int ra_pages; /* Maximum readahead window */ | 730 | unsigned int ra_pages; /* Maximum readahead window */ |
731 | unsigned int mmap_miss; /* Cache miss stat for mmap accesses */ | 731 | unsigned int mmap_miss; /* Cache miss stat for mmap accesses */ |
732 | loff_t prev_pos; /* Cache last read() position */ | 732 | loff_t prev_pos; /* Cache last read() position */ |
733 | }; | 733 | }; |
734 | 734 | ||
735 | /* | 735 | /* |
736 | * Check if @index falls in the readahead windows. | 736 | * Check if @index falls in the readahead windows. |
737 | */ | 737 | */ |
738 | static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index) | 738 | static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index) |
739 | { | 739 | { |
740 | return (index >= ra->start && | 740 | return (index >= ra->start && |
741 | index < ra->start + ra->size); | 741 | index < ra->start + ra->size); |
742 | } | 742 | } |
743 | 743 | ||
744 | struct file { | 744 | struct file { |
745 | union { | 745 | union { |
746 | struct llist_node fu_llist; | 746 | struct llist_node fu_llist; |
747 | struct rcu_head fu_rcuhead; | 747 | struct rcu_head fu_rcuhead; |
748 | } f_u; | 748 | } f_u; |
749 | struct path f_path; | 749 | struct path f_path; |
750 | #define f_dentry f_path.dentry | 750 | #define f_dentry f_path.dentry |
751 | struct inode *f_inode; /* cached value */ | 751 | struct inode *f_inode; /* cached value */ |
752 | const struct file_operations *f_op; | 752 | const struct file_operations *f_op; |
753 | 753 | ||
754 | /* | 754 | /* |
755 | * Protects f_ep_links, f_flags. | 755 | * Protects f_ep_links, f_flags. |
756 | * Must not be taken from IRQ context. | 756 | * Must not be taken from IRQ context. |
757 | */ | 757 | */ |
758 | spinlock_t f_lock; | 758 | spinlock_t f_lock; |
759 | atomic_long_t f_count; | 759 | atomic_long_t f_count; |
760 | unsigned int f_flags; | 760 | unsigned int f_flags; |
761 | fmode_t f_mode; | 761 | fmode_t f_mode; |
762 | struct mutex f_pos_lock; | 762 | struct mutex f_pos_lock; |
763 | loff_t f_pos; | 763 | loff_t f_pos; |
764 | struct fown_struct f_owner; | 764 | struct fown_struct f_owner; |
765 | const struct cred *f_cred; | 765 | const struct cred *f_cred; |
766 | struct file_ra_state f_ra; | 766 | struct file_ra_state f_ra; |
767 | 767 | ||
768 | u64 f_version; | 768 | u64 f_version; |
769 | #ifdef CONFIG_SECURITY | 769 | #ifdef CONFIG_SECURITY |
770 | void *f_security; | 770 | void *f_security; |
771 | #endif | 771 | #endif |
772 | /* needed for tty driver, and maybe others */ | 772 | /* needed for tty driver, and maybe others */ |
773 | void *private_data; | 773 | void *private_data; |
774 | 774 | ||
775 | #ifdef CONFIG_EPOLL | 775 | #ifdef CONFIG_EPOLL |
776 | /* Used by fs/eventpoll.c to link all the hooks to this file */ | 776 | /* Used by fs/eventpoll.c to link all the hooks to this file */ |
777 | struct list_head f_ep_links; | 777 | struct list_head f_ep_links; |
778 | struct list_head f_tfile_llink; | 778 | struct list_head f_tfile_llink; |
779 | #endif /* #ifdef CONFIG_EPOLL */ | 779 | #endif /* #ifdef CONFIG_EPOLL */ |
780 | struct address_space *f_mapping; | 780 | struct address_space *f_mapping; |
781 | } __attribute__((aligned(4))); /* lest something weird decides that 2 is OK */ | 781 | } __attribute__((aligned(4))); /* lest something weird decides that 2 is OK */ |
782 | 782 | ||
783 | struct file_handle { | 783 | struct file_handle { |
784 | __u32 handle_bytes; | 784 | __u32 handle_bytes; |
785 | int handle_type; | 785 | int handle_type; |
786 | /* file identifier */ | 786 | /* file identifier */ |
787 | unsigned char f_handle[0]; | 787 | unsigned char f_handle[0]; |
788 | }; | 788 | }; |
789 | 789 | ||
790 | static inline struct file *get_file(struct file *f) | 790 | static inline struct file *get_file(struct file *f) |
791 | { | 791 | { |
792 | atomic_long_inc(&f->f_count); | 792 | atomic_long_inc(&f->f_count); |
793 | return f; | 793 | return f; |
794 | } | 794 | } |
795 | #define fput_atomic(x) atomic_long_add_unless(&(x)->f_count, -1, 1) | 795 | #define fput_atomic(x) atomic_long_add_unless(&(x)->f_count, -1, 1) |
796 | #define file_count(x) atomic_long_read(&(x)->f_count) | 796 | #define file_count(x) atomic_long_read(&(x)->f_count) |
797 | 797 | ||
798 | #define MAX_NON_LFS ((1UL<<31) - 1) | 798 | #define MAX_NON_LFS ((1UL<<31) - 1) |
799 | 799 | ||
800 | /* Page cache limit. The filesystems should put that into their s_maxbytes | 800 | /* Page cache limit. The filesystems should put that into their s_maxbytes |
801 | limits, otherwise bad things can happen in VM. */ | 801 | limits, otherwise bad things can happen in VM. */ |
802 | #if BITS_PER_LONG==32 | 802 | #if BITS_PER_LONG==32 |
803 | #define MAX_LFS_FILESIZE (((loff_t)PAGE_CACHE_SIZE << (BITS_PER_LONG-1))-1) | 803 | #define MAX_LFS_FILESIZE (((loff_t)PAGE_CACHE_SIZE << (BITS_PER_LONG-1))-1) |
804 | #elif BITS_PER_LONG==64 | 804 | #elif BITS_PER_LONG==64 |
805 | #define MAX_LFS_FILESIZE ((loff_t)0x7fffffffffffffffLL) | 805 | #define MAX_LFS_FILESIZE ((loff_t)0x7fffffffffffffffLL) |
806 | #endif | 806 | #endif |
807 | 807 | ||
808 | #define FL_POSIX 1 | 808 | #define FL_POSIX 1 |
809 | #define FL_FLOCK 2 | 809 | #define FL_FLOCK 2 |
810 | #define FL_DELEG 4 /* NFSv4 delegation */ | 810 | #define FL_DELEG 4 /* NFSv4 delegation */ |
811 | #define FL_ACCESS 8 /* not trying to lock, just looking */ | 811 | #define FL_ACCESS 8 /* not trying to lock, just looking */ |
812 | #define FL_EXISTS 16 /* when unlocking, test for existence */ | 812 | #define FL_EXISTS 16 /* when unlocking, test for existence */ |
813 | #define FL_LEASE 32 /* lease held on this file */ | 813 | #define FL_LEASE 32 /* lease held on this file */ |
814 | #define FL_CLOSE 64 /* unlock on close */ | 814 | #define FL_CLOSE 64 /* unlock on close */ |
815 | #define FL_SLEEP 128 /* A blocking lock */ | 815 | #define FL_SLEEP 128 /* A blocking lock */ |
816 | #define FL_DOWNGRADE_PENDING 256 /* Lease is being downgraded */ | 816 | #define FL_DOWNGRADE_PENDING 256 /* Lease is being downgraded */ |
817 | #define FL_UNLOCK_PENDING 512 /* Lease is being broken */ | 817 | #define FL_UNLOCK_PENDING 512 /* Lease is being broken */ |
818 | #define FL_FILE_PVT 1024 /* lock is private to the file */ | 818 | #define FL_OFDLCK 1024 /* lock is "owned" by struct file */ |
819 | 819 | ||
820 | /* | 820 | /* |
821 | * Special return value from posix_lock_file() and vfs_lock_file() for | 821 | * Special return value from posix_lock_file() and vfs_lock_file() for |
822 | * asynchronous locking. | 822 | * asynchronous locking. |
823 | */ | 823 | */ |
824 | #define FILE_LOCK_DEFERRED 1 | 824 | #define FILE_LOCK_DEFERRED 1 |
825 | 825 | ||
826 | /* | 826 | /* |
827 | * The POSIX file lock owner is determined by | 827 | * The POSIX file lock owner is determined by |
828 | * the "struct files_struct" in the thread group | 828 | * the "struct files_struct" in the thread group |
829 | * (or NULL for no owner - BSD locks). | 829 | * (or NULL for no owner - BSD locks). |
830 | * | 830 | * |
831 | * Lockd stuffs a "host" pointer into this. | 831 | * Lockd stuffs a "host" pointer into this. |
832 | */ | 832 | */ |
833 | typedef struct files_struct *fl_owner_t; | 833 | typedef struct files_struct *fl_owner_t; |
834 | 834 | ||
835 | struct file_lock_operations { | 835 | struct file_lock_operations { |
836 | void (*fl_copy_lock)(struct file_lock *, struct file_lock *); | 836 | void (*fl_copy_lock)(struct file_lock *, struct file_lock *); |
837 | void (*fl_release_private)(struct file_lock *); | 837 | void (*fl_release_private)(struct file_lock *); |
838 | }; | 838 | }; |
839 | 839 | ||
840 | struct lock_manager_operations { | 840 | struct lock_manager_operations { |
841 | int (*lm_compare_owner)(struct file_lock *, struct file_lock *); | 841 | int (*lm_compare_owner)(struct file_lock *, struct file_lock *); |
842 | unsigned long (*lm_owner_key)(struct file_lock *); | 842 | unsigned long (*lm_owner_key)(struct file_lock *); |
843 | void (*lm_notify)(struct file_lock *); /* unblock callback */ | 843 | void (*lm_notify)(struct file_lock *); /* unblock callback */ |
844 | int (*lm_grant)(struct file_lock *, struct file_lock *, int); | 844 | int (*lm_grant)(struct file_lock *, struct file_lock *, int); |
845 | void (*lm_break)(struct file_lock *); | 845 | void (*lm_break)(struct file_lock *); |
846 | int (*lm_change)(struct file_lock **, int); | 846 | int (*lm_change)(struct file_lock **, int); |
847 | }; | 847 | }; |
848 | 848 | ||
849 | struct lock_manager { | 849 | struct lock_manager { |
850 | struct list_head list; | 850 | struct list_head list; |
851 | }; | 851 | }; |
852 | 852 | ||
853 | struct net; | 853 | struct net; |
854 | void locks_start_grace(struct net *, struct lock_manager *); | 854 | void locks_start_grace(struct net *, struct lock_manager *); |
855 | void locks_end_grace(struct lock_manager *); | 855 | void locks_end_grace(struct lock_manager *); |
856 | int locks_in_grace(struct net *); | 856 | int locks_in_grace(struct net *); |
857 | 857 | ||
858 | /* that will die - we need it for nfs_lock_info */ | 858 | /* that will die - we need it for nfs_lock_info */ |
859 | #include <linux/nfs_fs_i.h> | 859 | #include <linux/nfs_fs_i.h> |
860 | 860 | ||
861 | /* | 861 | /* |
862 | * struct file_lock represents a generic "file lock". It's used to represent | 862 | * struct file_lock represents a generic "file lock". It's used to represent |
863 | * POSIX byte range locks, BSD (flock) locks, and leases. It's important to | 863 | * POSIX byte range locks, BSD (flock) locks, and leases. It's important to |
864 | * note that the same struct is used to represent both a request for a lock and | 864 | * note that the same struct is used to represent both a request for a lock and |
865 | * the lock itself, but the same object is never used for both. | 865 | * the lock itself, but the same object is never used for both. |
866 | * | 866 | * |
867 | * FIXME: should we create a separate "struct lock_request" to help distinguish | 867 | * FIXME: should we create a separate "struct lock_request" to help distinguish |
868 | * these two uses? | 868 | * these two uses? |
869 | * | 869 | * |
870 | * The i_flock list is ordered by: | 870 | * The i_flock list is ordered by: |
871 | * | 871 | * |
872 | * 1) lock type -- FL_LEASEs first, then FL_FLOCK, and finally FL_POSIX | 872 | * 1) lock type -- FL_LEASEs first, then FL_FLOCK, and finally FL_POSIX |
873 | * 2) lock owner | 873 | * 2) lock owner |
874 | * 3) lock range start | 874 | * 3) lock range start |
875 | * 4) lock range end | 875 | * 4) lock range end |
876 | * | 876 | * |
877 | * Obviously, the last two criteria only matter for POSIX locks. | 877 | * Obviously, the last two criteria only matter for POSIX locks. |
878 | */ | 878 | */ |
879 | struct file_lock { | 879 | struct file_lock { |
880 | struct file_lock *fl_next; /* singly linked list for this inode */ | 880 | struct file_lock *fl_next; /* singly linked list for this inode */ |
881 | struct hlist_node fl_link; /* node in global lists */ | 881 | struct hlist_node fl_link; /* node in global lists */ |
882 | struct list_head fl_block; /* circular list of blocked processes */ | 882 | struct list_head fl_block; /* circular list of blocked processes */ |
883 | fl_owner_t fl_owner; | 883 | fl_owner_t fl_owner; |
884 | unsigned int fl_flags; | 884 | unsigned int fl_flags; |
885 | unsigned char fl_type; | 885 | unsigned char fl_type; |
886 | unsigned int fl_pid; | 886 | unsigned int fl_pid; |
887 | int fl_link_cpu; /* what cpu's list is this on? */ | 887 | int fl_link_cpu; /* what cpu's list is this on? */ |
888 | struct pid *fl_nspid; | 888 | struct pid *fl_nspid; |
889 | wait_queue_head_t fl_wait; | 889 | wait_queue_head_t fl_wait; |
890 | struct file *fl_file; | 890 | struct file *fl_file; |
891 | loff_t fl_start; | 891 | loff_t fl_start; |
892 | loff_t fl_end; | 892 | loff_t fl_end; |
893 | 893 | ||
894 | struct fasync_struct * fl_fasync; /* for lease break notifications */ | 894 | struct fasync_struct * fl_fasync; /* for lease break notifications */ |
895 | /* for lease breaks: */ | 895 | /* for lease breaks: */ |
896 | unsigned long fl_break_time; | 896 | unsigned long fl_break_time; |
897 | unsigned long fl_downgrade_time; | 897 | unsigned long fl_downgrade_time; |
898 | 898 | ||
899 | const struct file_lock_operations *fl_ops; /* Callbacks for filesystems */ | 899 | const struct file_lock_operations *fl_ops; /* Callbacks for filesystems */ |
900 | const struct lock_manager_operations *fl_lmops; /* Callbacks for lockmanagers */ | 900 | const struct lock_manager_operations *fl_lmops; /* Callbacks for lockmanagers */ |
901 | union { | 901 | union { |
902 | struct nfs_lock_info nfs_fl; | 902 | struct nfs_lock_info nfs_fl; |
903 | struct nfs4_lock_info nfs4_fl; | 903 | struct nfs4_lock_info nfs4_fl; |
904 | struct { | 904 | struct { |
905 | struct list_head link; /* link in AFS vnode's pending_locks list */ | 905 | struct list_head link; /* link in AFS vnode's pending_locks list */ |
906 | int state; /* state of grant or error if -ve */ | 906 | int state; /* state of grant or error if -ve */ |
907 | } afs; | 907 | } afs; |
908 | } fl_u; | 908 | } fl_u; |
909 | }; | 909 | }; |
910 | 910 | ||
911 | /* The following constant reflects the upper bound of the file/locking space */ | 911 | /* The following constant reflects the upper bound of the file/locking space */ |
912 | #ifndef OFFSET_MAX | 912 | #ifndef OFFSET_MAX |
913 | #define INT_LIMIT(x) (~((x)1 << (sizeof(x)*8 - 1))) | 913 | #define INT_LIMIT(x) (~((x)1 << (sizeof(x)*8 - 1))) |
914 | #define OFFSET_MAX INT_LIMIT(loff_t) | 914 | #define OFFSET_MAX INT_LIMIT(loff_t) |
915 | #define OFFT_OFFSET_MAX INT_LIMIT(off_t) | 915 | #define OFFT_OFFSET_MAX INT_LIMIT(off_t) |
916 | #endif | 916 | #endif |
917 | 917 | ||
918 | #include <linux/fcntl.h> | 918 | #include <linux/fcntl.h> |
919 | 919 | ||
920 | extern void send_sigio(struct fown_struct *fown, int fd, int band); | 920 | extern void send_sigio(struct fown_struct *fown, int fd, int band); |
921 | 921 | ||
922 | #ifdef CONFIG_FILE_LOCKING | 922 | #ifdef CONFIG_FILE_LOCKING |
923 | extern int fcntl_getlk(struct file *, unsigned int, struct flock __user *); | 923 | extern int fcntl_getlk(struct file *, unsigned int, struct flock __user *); |
924 | extern int fcntl_setlk(unsigned int, struct file *, unsigned int, | 924 | extern int fcntl_setlk(unsigned int, struct file *, unsigned int, |
925 | struct flock __user *); | 925 | struct flock __user *); |
926 | 926 | ||
927 | #if BITS_PER_LONG == 32 | 927 | #if BITS_PER_LONG == 32 |
928 | extern int fcntl_getlk64(struct file *, unsigned int, struct flock64 __user *); | 928 | extern int fcntl_getlk64(struct file *, unsigned int, struct flock64 __user *); |
929 | extern int fcntl_setlk64(unsigned int, struct file *, unsigned int, | 929 | extern int fcntl_setlk64(unsigned int, struct file *, unsigned int, |
930 | struct flock64 __user *); | 930 | struct flock64 __user *); |
931 | #endif | 931 | #endif |
932 | 932 | ||
933 | extern int fcntl_setlease(unsigned int fd, struct file *filp, long arg); | 933 | extern int fcntl_setlease(unsigned int fd, struct file *filp, long arg); |
934 | extern int fcntl_getlease(struct file *filp); | 934 | extern int fcntl_getlease(struct file *filp); |
935 | 935 | ||
936 | /* fs/locks.c */ | 936 | /* fs/locks.c */ |
937 | void locks_free_lock(struct file_lock *fl); | 937 | void locks_free_lock(struct file_lock *fl); |
938 | extern void locks_init_lock(struct file_lock *); | 938 | extern void locks_init_lock(struct file_lock *); |
939 | extern struct file_lock * locks_alloc_lock(void); | 939 | extern struct file_lock * locks_alloc_lock(void); |
940 | extern void locks_copy_lock(struct file_lock *, struct file_lock *); | 940 | extern void locks_copy_lock(struct file_lock *, struct file_lock *); |
941 | extern void __locks_copy_lock(struct file_lock *, const struct file_lock *); | 941 | extern void __locks_copy_lock(struct file_lock *, const struct file_lock *); |
942 | extern void locks_remove_posix(struct file *, fl_owner_t); | 942 | extern void locks_remove_posix(struct file *, fl_owner_t); |
943 | extern void locks_remove_file(struct file *); | 943 | extern void locks_remove_file(struct file *); |
944 | extern void locks_release_private(struct file_lock *); | 944 | extern void locks_release_private(struct file_lock *); |
945 | extern void posix_test_lock(struct file *, struct file_lock *); | 945 | extern void posix_test_lock(struct file *, struct file_lock *); |
946 | extern int posix_lock_file(struct file *, struct file_lock *, struct file_lock *); | 946 | extern int posix_lock_file(struct file *, struct file_lock *, struct file_lock *); |
947 | extern int posix_lock_file_wait(struct file *, struct file_lock *); | 947 | extern int posix_lock_file_wait(struct file *, struct file_lock *); |
948 | extern int posix_unblock_lock(struct file_lock *); | 948 | extern int posix_unblock_lock(struct file_lock *); |
949 | extern int vfs_test_lock(struct file *, struct file_lock *); | 949 | extern int vfs_test_lock(struct file *, struct file_lock *); |
950 | extern int vfs_lock_file(struct file *, unsigned int, struct file_lock *, struct file_lock *); | 950 | extern int vfs_lock_file(struct file *, unsigned int, struct file_lock *, struct file_lock *); |
951 | extern int vfs_cancel_lock(struct file *filp, struct file_lock *fl); | 951 | extern int vfs_cancel_lock(struct file *filp, struct file_lock *fl); |
952 | extern int flock_lock_file_wait(struct file *filp, struct file_lock *fl); | 952 | extern int flock_lock_file_wait(struct file *filp, struct file_lock *fl); |
953 | extern int __break_lease(struct inode *inode, unsigned int flags, unsigned int type); | 953 | extern int __break_lease(struct inode *inode, unsigned int flags, unsigned int type); |
954 | extern void lease_get_mtime(struct inode *, struct timespec *time); | 954 | extern void lease_get_mtime(struct inode *, struct timespec *time); |
955 | extern int generic_setlease(struct file *, long, struct file_lock **); | 955 | extern int generic_setlease(struct file *, long, struct file_lock **); |
956 | extern int vfs_setlease(struct file *, long, struct file_lock **); | 956 | extern int vfs_setlease(struct file *, long, struct file_lock **); |
957 | extern int lease_modify(struct file_lock **, int); | 957 | extern int lease_modify(struct file_lock **, int); |
958 | extern int lock_may_read(struct inode *, loff_t start, unsigned long count); | 958 | extern int lock_may_read(struct inode *, loff_t start, unsigned long count); |
959 | extern int lock_may_write(struct inode *, loff_t start, unsigned long count); | 959 | extern int lock_may_write(struct inode *, loff_t start, unsigned long count); |
960 | #else /* !CONFIG_FILE_LOCKING */ | 960 | #else /* !CONFIG_FILE_LOCKING */ |
961 | static inline int fcntl_getlk(struct file *file, unsigned int cmd, | 961 | static inline int fcntl_getlk(struct file *file, unsigned int cmd, |
962 | struct flock __user *user) | 962 | struct flock __user *user) |
963 | { | 963 | { |
964 | return -EINVAL; | 964 | return -EINVAL; |
965 | } | 965 | } |
966 | 966 | ||
967 | static inline int fcntl_setlk(unsigned int fd, struct file *file, | 967 | static inline int fcntl_setlk(unsigned int fd, struct file *file, |
968 | unsigned int cmd, struct flock __user *user) | 968 | unsigned int cmd, struct flock __user *user) |
969 | { | 969 | { |
970 | return -EACCES; | 970 | return -EACCES; |
971 | } | 971 | } |
972 | 972 | ||
973 | #if BITS_PER_LONG == 32 | 973 | #if BITS_PER_LONG == 32 |
974 | static inline int fcntl_getlk64(struct file *file, unsigned int cmd, | 974 | static inline int fcntl_getlk64(struct file *file, unsigned int cmd, |
975 | struct flock64 __user *user) | 975 | struct flock64 __user *user) |
976 | { | 976 | { |
977 | return -EINVAL; | 977 | return -EINVAL; |
978 | } | 978 | } |
979 | 979 | ||
980 | static inline int fcntl_setlk64(unsigned int fd, struct file *file, | 980 | static inline int fcntl_setlk64(unsigned int fd, struct file *file, |
981 | unsigned int cmd, struct flock64 __user *user) | 981 | unsigned int cmd, struct flock64 __user *user) |
982 | { | 982 | { |
983 | return -EACCES; | 983 | return -EACCES; |
984 | } | 984 | } |
985 | #endif | 985 | #endif |
986 | static inline int fcntl_setlease(unsigned int fd, struct file *filp, long arg) | 986 | static inline int fcntl_setlease(unsigned int fd, struct file *filp, long arg) |
987 | { | 987 | { |
988 | return 0; | 988 | return 0; |
989 | } | 989 | } |
990 | 990 | ||
991 | static inline int fcntl_getlease(struct file *filp) | 991 | static inline int fcntl_getlease(struct file *filp) |
992 | { | 992 | { |
993 | return 0; | 993 | return 0; |
994 | } | 994 | } |
995 | 995 | ||
996 | static inline void locks_init_lock(struct file_lock *fl) | 996 | static inline void locks_init_lock(struct file_lock *fl) |
997 | { | 997 | { |
998 | return; | 998 | return; |
999 | } | 999 | } |
1000 | 1000 | ||
1001 | static inline void __locks_copy_lock(struct file_lock *new, struct file_lock *fl) | 1001 | static inline void __locks_copy_lock(struct file_lock *new, struct file_lock *fl) |
1002 | { | 1002 | { |
1003 | return; | 1003 | return; |
1004 | } | 1004 | } |
1005 | 1005 | ||
1006 | static inline void locks_copy_lock(struct file_lock *new, struct file_lock *fl) | 1006 | static inline void locks_copy_lock(struct file_lock *new, struct file_lock *fl) |
1007 | { | 1007 | { |
1008 | return; | 1008 | return; |
1009 | } | 1009 | } |
1010 | 1010 | ||
1011 | static inline void locks_remove_posix(struct file *filp, fl_owner_t owner) | 1011 | static inline void locks_remove_posix(struct file *filp, fl_owner_t owner) |
1012 | { | 1012 | { |
1013 | return; | 1013 | return; |
1014 | } | 1014 | } |
1015 | 1015 | ||
1016 | static inline void locks_remove_file(struct file *filp) | 1016 | static inline void locks_remove_file(struct file *filp) |
1017 | { | 1017 | { |
1018 | return; | 1018 | return; |
1019 | } | 1019 | } |
1020 | 1020 | ||
1021 | static inline void posix_test_lock(struct file *filp, struct file_lock *fl) | 1021 | static inline void posix_test_lock(struct file *filp, struct file_lock *fl) |
1022 | { | 1022 | { |
1023 | return; | 1023 | return; |
1024 | } | 1024 | } |
1025 | 1025 | ||
1026 | static inline int posix_lock_file(struct file *filp, struct file_lock *fl, | 1026 | static inline int posix_lock_file(struct file *filp, struct file_lock *fl, |
1027 | struct file_lock *conflock) | 1027 | struct file_lock *conflock) |
1028 | { | 1028 | { |
1029 | return -ENOLCK; | 1029 | return -ENOLCK; |
1030 | } | 1030 | } |
1031 | 1031 | ||
1032 | static inline int posix_lock_file_wait(struct file *filp, struct file_lock *fl) | 1032 | static inline int posix_lock_file_wait(struct file *filp, struct file_lock *fl) |
1033 | { | 1033 | { |
1034 | return -ENOLCK; | 1034 | return -ENOLCK; |
1035 | } | 1035 | } |
1036 | 1036 | ||
1037 | static inline int posix_unblock_lock(struct file_lock *waiter) | 1037 | static inline int posix_unblock_lock(struct file_lock *waiter) |
1038 | { | 1038 | { |
1039 | return -ENOENT; | 1039 | return -ENOENT; |
1040 | } | 1040 | } |
1041 | 1041 | ||
1042 | static inline int vfs_test_lock(struct file *filp, struct file_lock *fl) | 1042 | static inline int vfs_test_lock(struct file *filp, struct file_lock *fl) |
1043 | { | 1043 | { |
1044 | return 0; | 1044 | return 0; |
1045 | } | 1045 | } |
1046 | 1046 | ||
1047 | static inline int vfs_lock_file(struct file *filp, unsigned int cmd, | 1047 | static inline int vfs_lock_file(struct file *filp, unsigned int cmd, |
1048 | struct file_lock *fl, struct file_lock *conf) | 1048 | struct file_lock *fl, struct file_lock *conf) |
1049 | { | 1049 | { |
1050 | return -ENOLCK; | 1050 | return -ENOLCK; |
1051 | } | 1051 | } |
1052 | 1052 | ||
1053 | static inline int vfs_cancel_lock(struct file *filp, struct file_lock *fl) | 1053 | static inline int vfs_cancel_lock(struct file *filp, struct file_lock *fl) |
1054 | { | 1054 | { |
1055 | return 0; | 1055 | return 0; |
1056 | } | 1056 | } |
1057 | 1057 | ||
1058 | static inline int flock_lock_file_wait(struct file *filp, | 1058 | static inline int flock_lock_file_wait(struct file *filp, |
1059 | struct file_lock *request) | 1059 | struct file_lock *request) |
1060 | { | 1060 | { |
1061 | return -ENOLCK; | 1061 | return -ENOLCK; |
1062 | } | 1062 | } |
1063 | 1063 | ||
1064 | static inline int __break_lease(struct inode *inode, unsigned int mode, unsigned int type) | 1064 | static inline int __break_lease(struct inode *inode, unsigned int mode, unsigned int type) |
1065 | { | 1065 | { |
1066 | return 0; | 1066 | return 0; |
1067 | } | 1067 | } |
1068 | 1068 | ||
1069 | static inline void lease_get_mtime(struct inode *inode, struct timespec *time) | 1069 | static inline void lease_get_mtime(struct inode *inode, struct timespec *time) |
1070 | { | 1070 | { |
1071 | return; | 1071 | return; |
1072 | } | 1072 | } |
1073 | 1073 | ||
1074 | static inline int generic_setlease(struct file *filp, long arg, | 1074 | static inline int generic_setlease(struct file *filp, long arg, |
1075 | struct file_lock **flp) | 1075 | struct file_lock **flp) |
1076 | { | 1076 | { |
1077 | return -EINVAL; | 1077 | return -EINVAL; |
1078 | } | 1078 | } |
1079 | 1079 | ||
1080 | static inline int vfs_setlease(struct file *filp, long arg, | 1080 | static inline int vfs_setlease(struct file *filp, long arg, |
1081 | struct file_lock **lease) | 1081 | struct file_lock **lease) |
1082 | { | 1082 | { |
1083 | return -EINVAL; | 1083 | return -EINVAL; |
1084 | } | 1084 | } |
1085 | 1085 | ||
1086 | static inline int lease_modify(struct file_lock **before, int arg) | 1086 | static inline int lease_modify(struct file_lock **before, int arg) |
1087 | { | 1087 | { |
1088 | return -EINVAL; | 1088 | return -EINVAL; |
1089 | } | 1089 | } |
1090 | 1090 | ||
1091 | static inline int lock_may_read(struct inode *inode, loff_t start, | 1091 | static inline int lock_may_read(struct inode *inode, loff_t start, |
1092 | unsigned long len) | 1092 | unsigned long len) |
1093 | { | 1093 | { |
1094 | return 1; | 1094 | return 1; |
1095 | } | 1095 | } |
1096 | 1096 | ||
1097 | static inline int lock_may_write(struct inode *inode, loff_t start, | 1097 | static inline int lock_may_write(struct inode *inode, loff_t start, |
1098 | unsigned long len) | 1098 | unsigned long len) |
1099 | { | 1099 | { |
1100 | return 1; | 1100 | return 1; |
1101 | } | 1101 | } |
1102 | #endif /* !CONFIG_FILE_LOCKING */ | 1102 | #endif /* !CONFIG_FILE_LOCKING */ |
1103 | 1103 | ||
1104 | 1104 | ||
1105 | struct fasync_struct { | 1105 | struct fasync_struct { |
1106 | spinlock_t fa_lock; | 1106 | spinlock_t fa_lock; |
1107 | int magic; | 1107 | int magic; |
1108 | int fa_fd; | 1108 | int fa_fd; |
1109 | struct fasync_struct *fa_next; /* singly linked list */ | 1109 | struct fasync_struct *fa_next; /* singly linked list */ |
1110 | struct file *fa_file; | 1110 | struct file *fa_file; |
1111 | struct rcu_head fa_rcu; | 1111 | struct rcu_head fa_rcu; |
1112 | }; | 1112 | }; |
1113 | 1113 | ||
1114 | #define FASYNC_MAGIC 0x4601 | 1114 | #define FASYNC_MAGIC 0x4601 |
1115 | 1115 | ||
1116 | /* SMP safe fasync helpers: */ | 1116 | /* SMP safe fasync helpers: */ |
1117 | extern int fasync_helper(int, struct file *, int, struct fasync_struct **); | 1117 | extern int fasync_helper(int, struct file *, int, struct fasync_struct **); |
1118 | extern struct fasync_struct *fasync_insert_entry(int, struct file *, struct fasync_struct **, struct fasync_struct *); | 1118 | extern struct fasync_struct *fasync_insert_entry(int, struct file *, struct fasync_struct **, struct fasync_struct *); |
1119 | extern int fasync_remove_entry(struct file *, struct fasync_struct **); | 1119 | extern int fasync_remove_entry(struct file *, struct fasync_struct **); |
1120 | extern struct fasync_struct *fasync_alloc(void); | 1120 | extern struct fasync_struct *fasync_alloc(void); |
1121 | extern void fasync_free(struct fasync_struct *); | 1121 | extern void fasync_free(struct fasync_struct *); |
1122 | 1122 | ||
1123 | /* can be called from interrupts */ | 1123 | /* can be called from interrupts */ |
1124 | extern void kill_fasync(struct fasync_struct **, int, int); | 1124 | extern void kill_fasync(struct fasync_struct **, int, int); |
1125 | 1125 | ||
1126 | extern int __f_setown(struct file *filp, struct pid *, enum pid_type, int force); | 1126 | extern int __f_setown(struct file *filp, struct pid *, enum pid_type, int force); |
1127 | extern int f_setown(struct file *filp, unsigned long arg, int force); | 1127 | extern int f_setown(struct file *filp, unsigned long arg, int force); |
1128 | extern void f_delown(struct file *filp); | 1128 | extern void f_delown(struct file *filp); |
1129 | extern pid_t f_getown(struct file *filp); | 1129 | extern pid_t f_getown(struct file *filp); |
1130 | extern int send_sigurg(struct fown_struct *fown); | 1130 | extern int send_sigurg(struct fown_struct *fown); |
1131 | 1131 | ||
1132 | struct mm_struct; | 1132 | struct mm_struct; |
1133 | 1133 | ||
1134 | /* | 1134 | /* |
1135 | * Umount options | 1135 | * Umount options |
1136 | */ | 1136 | */ |
1137 | 1137 | ||
1138 | #define MNT_FORCE 0x00000001 /* Attempt to forcibily umount */ | 1138 | #define MNT_FORCE 0x00000001 /* Attempt to forcibily umount */ |
1139 | #define MNT_DETACH 0x00000002 /* Just detach from the tree */ | 1139 | #define MNT_DETACH 0x00000002 /* Just detach from the tree */ |
1140 | #define MNT_EXPIRE 0x00000004 /* Mark for expiry */ | 1140 | #define MNT_EXPIRE 0x00000004 /* Mark for expiry */ |
1141 | #define UMOUNT_NOFOLLOW 0x00000008 /* Don't follow symlink on umount */ | 1141 | #define UMOUNT_NOFOLLOW 0x00000008 /* Don't follow symlink on umount */ |
1142 | #define UMOUNT_UNUSED 0x80000000 /* Flag guaranteed to be unused */ | 1142 | #define UMOUNT_UNUSED 0x80000000 /* Flag guaranteed to be unused */ |
1143 | 1143 | ||
1144 | extern struct list_head super_blocks; | 1144 | extern struct list_head super_blocks; |
1145 | extern spinlock_t sb_lock; | 1145 | extern spinlock_t sb_lock; |
1146 | 1146 | ||
1147 | /* Possible states of 'frozen' field */ | 1147 | /* Possible states of 'frozen' field */ |
1148 | enum { | 1148 | enum { |
1149 | SB_UNFROZEN = 0, /* FS is unfrozen */ | 1149 | SB_UNFROZEN = 0, /* FS is unfrozen */ |
1150 | SB_FREEZE_WRITE = 1, /* Writes, dir ops, ioctls frozen */ | 1150 | SB_FREEZE_WRITE = 1, /* Writes, dir ops, ioctls frozen */ |
1151 | SB_FREEZE_PAGEFAULT = 2, /* Page faults stopped as well */ | 1151 | SB_FREEZE_PAGEFAULT = 2, /* Page faults stopped as well */ |
1152 | SB_FREEZE_FS = 3, /* For internal FS use (e.g. to stop | 1152 | SB_FREEZE_FS = 3, /* For internal FS use (e.g. to stop |
1153 | * internal threads if needed) */ | 1153 | * internal threads if needed) */ |
1154 | SB_FREEZE_COMPLETE = 4, /* ->freeze_fs finished successfully */ | 1154 | SB_FREEZE_COMPLETE = 4, /* ->freeze_fs finished successfully */ |
1155 | }; | 1155 | }; |
1156 | 1156 | ||
1157 | #define SB_FREEZE_LEVELS (SB_FREEZE_COMPLETE - 1) | 1157 | #define SB_FREEZE_LEVELS (SB_FREEZE_COMPLETE - 1) |
1158 | 1158 | ||
1159 | struct sb_writers { | 1159 | struct sb_writers { |
1160 | /* Counters for counting writers at each level */ | 1160 | /* Counters for counting writers at each level */ |
1161 | struct percpu_counter counter[SB_FREEZE_LEVELS]; | 1161 | struct percpu_counter counter[SB_FREEZE_LEVELS]; |
1162 | wait_queue_head_t wait; /* queue for waiting for | 1162 | wait_queue_head_t wait; /* queue for waiting for |
1163 | writers / faults to finish */ | 1163 | writers / faults to finish */ |
1164 | int frozen; /* Is sb frozen? */ | 1164 | int frozen; /* Is sb frozen? */ |
1165 | wait_queue_head_t wait_unfrozen; /* queue for waiting for | 1165 | wait_queue_head_t wait_unfrozen; /* queue for waiting for |
1166 | sb to be thawed */ | 1166 | sb to be thawed */ |
1167 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | 1167 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
1168 | struct lockdep_map lock_map[SB_FREEZE_LEVELS]; | 1168 | struct lockdep_map lock_map[SB_FREEZE_LEVELS]; |
1169 | #endif | 1169 | #endif |
1170 | }; | 1170 | }; |
1171 | 1171 | ||
1172 | struct super_block { | 1172 | struct super_block { |
1173 | struct list_head s_list; /* Keep this first */ | 1173 | struct list_head s_list; /* Keep this first */ |
1174 | dev_t s_dev; /* search index; _not_ kdev_t */ | 1174 | dev_t s_dev; /* search index; _not_ kdev_t */ |
1175 | unsigned char s_blocksize_bits; | 1175 | unsigned char s_blocksize_bits; |
1176 | unsigned long s_blocksize; | 1176 | unsigned long s_blocksize; |
1177 | loff_t s_maxbytes; /* Max file size */ | 1177 | loff_t s_maxbytes; /* Max file size */ |
1178 | struct file_system_type *s_type; | 1178 | struct file_system_type *s_type; |
1179 | const struct super_operations *s_op; | 1179 | const struct super_operations *s_op; |
1180 | const struct dquot_operations *dq_op; | 1180 | const struct dquot_operations *dq_op; |
1181 | const struct quotactl_ops *s_qcop; | 1181 | const struct quotactl_ops *s_qcop; |
1182 | const struct export_operations *s_export_op; | 1182 | const struct export_operations *s_export_op; |
1183 | unsigned long s_flags; | 1183 | unsigned long s_flags; |
1184 | unsigned long s_magic; | 1184 | unsigned long s_magic; |
1185 | struct dentry *s_root; | 1185 | struct dentry *s_root; |
1186 | struct rw_semaphore s_umount; | 1186 | struct rw_semaphore s_umount; |
1187 | int s_count; | 1187 | int s_count; |
1188 | atomic_t s_active; | 1188 | atomic_t s_active; |
1189 | #ifdef CONFIG_SECURITY | 1189 | #ifdef CONFIG_SECURITY |
1190 | void *s_security; | 1190 | void *s_security; |
1191 | #endif | 1191 | #endif |
1192 | const struct xattr_handler **s_xattr; | 1192 | const struct xattr_handler **s_xattr; |
1193 | 1193 | ||
1194 | struct list_head s_inodes; /* all inodes */ | 1194 | struct list_head s_inodes; /* all inodes */ |
1195 | struct hlist_bl_head s_anon; /* anonymous dentries for (nfs) exporting */ | 1195 | struct hlist_bl_head s_anon; /* anonymous dentries for (nfs) exporting */ |
1196 | struct list_head s_mounts; /* list of mounts; _not_ for fs use */ | 1196 | struct list_head s_mounts; /* list of mounts; _not_ for fs use */ |
1197 | struct block_device *s_bdev; | 1197 | struct block_device *s_bdev; |
1198 | struct backing_dev_info *s_bdi; | 1198 | struct backing_dev_info *s_bdi; |
1199 | struct mtd_info *s_mtd; | 1199 | struct mtd_info *s_mtd; |
1200 | struct hlist_node s_instances; | 1200 | struct hlist_node s_instances; |
1201 | struct quota_info s_dquot; /* Diskquota specific options */ | 1201 | struct quota_info s_dquot; /* Diskquota specific options */ |
1202 | 1202 | ||
1203 | struct sb_writers s_writers; | 1203 | struct sb_writers s_writers; |
1204 | 1204 | ||
1205 | char s_id[32]; /* Informational name */ | 1205 | char s_id[32]; /* Informational name */ |
1206 | u8 s_uuid[16]; /* UUID */ | 1206 | u8 s_uuid[16]; /* UUID */ |
1207 | 1207 | ||
1208 | void *s_fs_info; /* Filesystem private info */ | 1208 | void *s_fs_info; /* Filesystem private info */ |
1209 | unsigned int s_max_links; | 1209 | unsigned int s_max_links; |
1210 | fmode_t s_mode; | 1210 | fmode_t s_mode; |
1211 | 1211 | ||
1212 | /* Granularity of c/m/atime in ns. | 1212 | /* Granularity of c/m/atime in ns. |
1213 | Cannot be worse than a second */ | 1213 | Cannot be worse than a second */ |
1214 | u32 s_time_gran; | 1214 | u32 s_time_gran; |
1215 | 1215 | ||
1216 | /* | 1216 | /* |
1217 | * The next field is for VFS *only*. No filesystems have any business | 1217 | * The next field is for VFS *only*. No filesystems have any business |
1218 | * even looking at it. You had been warned. | 1218 | * even looking at it. You had been warned. |
1219 | */ | 1219 | */ |
1220 | struct mutex s_vfs_rename_mutex; /* Kludge */ | 1220 | struct mutex s_vfs_rename_mutex; /* Kludge */ |
1221 | 1221 | ||
1222 | /* | 1222 | /* |
1223 | * Filesystem subtype. If non-empty the filesystem type field | 1223 | * Filesystem subtype. If non-empty the filesystem type field |
1224 | * in /proc/mounts will be "type.subtype" | 1224 | * in /proc/mounts will be "type.subtype" |
1225 | */ | 1225 | */ |
1226 | char *s_subtype; | 1226 | char *s_subtype; |
1227 | 1227 | ||
1228 | /* | 1228 | /* |
1229 | * Saved mount options for lazy filesystems using | 1229 | * Saved mount options for lazy filesystems using |
1230 | * generic_show_options() | 1230 | * generic_show_options() |
1231 | */ | 1231 | */ |
1232 | char __rcu *s_options; | 1232 | char __rcu *s_options; |
1233 | const struct dentry_operations *s_d_op; /* default d_op for dentries */ | 1233 | const struct dentry_operations *s_d_op; /* default d_op for dentries */ |
1234 | 1234 | ||
1235 | /* | 1235 | /* |
1236 | * Saved pool identifier for cleancache (-1 means none) | 1236 | * Saved pool identifier for cleancache (-1 means none) |
1237 | */ | 1237 | */ |
1238 | int cleancache_poolid; | 1238 | int cleancache_poolid; |
1239 | 1239 | ||
1240 | struct shrinker s_shrink; /* per-sb shrinker handle */ | 1240 | struct shrinker s_shrink; /* per-sb shrinker handle */ |
1241 | 1241 | ||
1242 | /* Number of inodes with nlink == 0 but still referenced */ | 1242 | /* Number of inodes with nlink == 0 but still referenced */ |
1243 | atomic_long_t s_remove_count; | 1243 | atomic_long_t s_remove_count; |
1244 | 1244 | ||
1245 | /* Being remounted read-only */ | 1245 | /* Being remounted read-only */ |
1246 | int s_readonly_remount; | 1246 | int s_readonly_remount; |
1247 | 1247 | ||
1248 | /* AIO completions deferred from interrupt context */ | 1248 | /* AIO completions deferred from interrupt context */ |
1249 | struct workqueue_struct *s_dio_done_wq; | 1249 | struct workqueue_struct *s_dio_done_wq; |
1250 | 1250 | ||
1251 | /* | 1251 | /* |
1252 | * Keep the lru lists last in the structure so they always sit on their | 1252 | * Keep the lru lists last in the structure so they always sit on their |
1253 | * own individual cachelines. | 1253 | * own individual cachelines. |
1254 | */ | 1254 | */ |
1255 | struct list_lru s_dentry_lru ____cacheline_aligned_in_smp; | 1255 | struct list_lru s_dentry_lru ____cacheline_aligned_in_smp; |
1256 | struct list_lru s_inode_lru ____cacheline_aligned_in_smp; | 1256 | struct list_lru s_inode_lru ____cacheline_aligned_in_smp; |
1257 | struct rcu_head rcu; | 1257 | struct rcu_head rcu; |
1258 | }; | 1258 | }; |
1259 | 1259 | ||
1260 | extern struct timespec current_fs_time(struct super_block *sb); | 1260 | extern struct timespec current_fs_time(struct super_block *sb); |
1261 | 1261 | ||
1262 | /* | 1262 | /* |
1263 | * Snapshotting support. | 1263 | * Snapshotting support. |
1264 | */ | 1264 | */ |
1265 | 1265 | ||
1266 | void __sb_end_write(struct super_block *sb, int level); | 1266 | void __sb_end_write(struct super_block *sb, int level); |
1267 | int __sb_start_write(struct super_block *sb, int level, bool wait); | 1267 | int __sb_start_write(struct super_block *sb, int level, bool wait); |
1268 | 1268 | ||
1269 | /** | 1269 | /** |
1270 | * sb_end_write - drop write access to a superblock | 1270 | * sb_end_write - drop write access to a superblock |
1271 | * @sb: the super we wrote to | 1271 | * @sb: the super we wrote to |
1272 | * | 1272 | * |
1273 | * Decrement number of writers to the filesystem. Wake up possible waiters | 1273 | * Decrement number of writers to the filesystem. Wake up possible waiters |
1274 | * wanting to freeze the filesystem. | 1274 | * wanting to freeze the filesystem. |
1275 | */ | 1275 | */ |
1276 | static inline void sb_end_write(struct super_block *sb) | 1276 | static inline void sb_end_write(struct super_block *sb) |
1277 | { | 1277 | { |
1278 | __sb_end_write(sb, SB_FREEZE_WRITE); | 1278 | __sb_end_write(sb, SB_FREEZE_WRITE); |
1279 | } | 1279 | } |
1280 | 1280 | ||
1281 | /** | 1281 | /** |
1282 | * sb_end_pagefault - drop write access to a superblock from a page fault | 1282 | * sb_end_pagefault - drop write access to a superblock from a page fault |
1283 | * @sb: the super we wrote to | 1283 | * @sb: the super we wrote to |
1284 | * | 1284 | * |
1285 | * Decrement number of processes handling write page fault to the filesystem. | 1285 | * Decrement number of processes handling write page fault to the filesystem. |
1286 | * Wake up possible waiters wanting to freeze the filesystem. | 1286 | * Wake up possible waiters wanting to freeze the filesystem. |
1287 | */ | 1287 | */ |
1288 | static inline void sb_end_pagefault(struct super_block *sb) | 1288 | static inline void sb_end_pagefault(struct super_block *sb) |
1289 | { | 1289 | { |
1290 | __sb_end_write(sb, SB_FREEZE_PAGEFAULT); | 1290 | __sb_end_write(sb, SB_FREEZE_PAGEFAULT); |
1291 | } | 1291 | } |
1292 | 1292 | ||
1293 | /** | 1293 | /** |
1294 | * sb_end_intwrite - drop write access to a superblock for internal fs purposes | 1294 | * sb_end_intwrite - drop write access to a superblock for internal fs purposes |
1295 | * @sb: the super we wrote to | 1295 | * @sb: the super we wrote to |
1296 | * | 1296 | * |
1297 | * Decrement fs-internal number of writers to the filesystem. Wake up possible | 1297 | * Decrement fs-internal number of writers to the filesystem. Wake up possible |
1298 | * waiters wanting to freeze the filesystem. | 1298 | * waiters wanting to freeze the filesystem. |
1299 | */ | 1299 | */ |
1300 | static inline void sb_end_intwrite(struct super_block *sb) | 1300 | static inline void sb_end_intwrite(struct super_block *sb) |
1301 | { | 1301 | { |
1302 | __sb_end_write(sb, SB_FREEZE_FS); | 1302 | __sb_end_write(sb, SB_FREEZE_FS); |
1303 | } | 1303 | } |
1304 | 1304 | ||
1305 | /** | 1305 | /** |
1306 | * sb_start_write - get write access to a superblock | 1306 | * sb_start_write - get write access to a superblock |
1307 | * @sb: the super we write to | 1307 | * @sb: the super we write to |
1308 | * | 1308 | * |
1309 | * When a process wants to write data or metadata to a file system (i.e. dirty | 1309 | * When a process wants to write data or metadata to a file system (i.e. dirty |
1310 | * a page or an inode), it should embed the operation in a sb_start_write() - | 1310 | * a page or an inode), it should embed the operation in a sb_start_write() - |
1311 | * sb_end_write() pair to get exclusion against file system freezing. This | 1311 | * sb_end_write() pair to get exclusion against file system freezing. This |
1312 | * function increments number of writers preventing freezing. If the file | 1312 | * function increments number of writers preventing freezing. If the file |
1313 | * system is already frozen, the function waits until the file system is | 1313 | * system is already frozen, the function waits until the file system is |
1314 | * thawed. | 1314 | * thawed. |
1315 | * | 1315 | * |
1316 | * Since freeze protection behaves as a lock, users have to preserve | 1316 | * Since freeze protection behaves as a lock, users have to preserve |
1317 | * ordering of freeze protection and other filesystem locks. Generally, | 1317 | * ordering of freeze protection and other filesystem locks. Generally, |
1318 | * freeze protection should be the outermost lock. In particular, we have: | 1318 | * freeze protection should be the outermost lock. In particular, we have: |
1319 | * | 1319 | * |
1320 | * sb_start_write | 1320 | * sb_start_write |
1321 | * -> i_mutex (write path, truncate, directory ops, ...) | 1321 | * -> i_mutex (write path, truncate, directory ops, ...) |
1322 | * -> s_umount (freeze_super, thaw_super) | 1322 | * -> s_umount (freeze_super, thaw_super) |
1323 | */ | 1323 | */ |
1324 | static inline void sb_start_write(struct super_block *sb) | 1324 | static inline void sb_start_write(struct super_block *sb) |
1325 | { | 1325 | { |
1326 | __sb_start_write(sb, SB_FREEZE_WRITE, true); | 1326 | __sb_start_write(sb, SB_FREEZE_WRITE, true); |
1327 | } | 1327 | } |
1328 | 1328 | ||
1329 | static inline int sb_start_write_trylock(struct super_block *sb) | 1329 | static inline int sb_start_write_trylock(struct super_block *sb) |
1330 | { | 1330 | { |
1331 | return __sb_start_write(sb, SB_FREEZE_WRITE, false); | 1331 | return __sb_start_write(sb, SB_FREEZE_WRITE, false); |
1332 | } | 1332 | } |
1333 | 1333 | ||
1334 | /** | 1334 | /** |
1335 | * sb_start_pagefault - get write access to a superblock from a page fault | 1335 | * sb_start_pagefault - get write access to a superblock from a page fault |
1336 | * @sb: the super we write to | 1336 | * @sb: the super we write to |
1337 | * | 1337 | * |
1338 | * When a process starts handling write page fault, it should embed the | 1338 | * When a process starts handling write page fault, it should embed the |
1339 | * operation into sb_start_pagefault() - sb_end_pagefault() pair to get | 1339 | * operation into sb_start_pagefault() - sb_end_pagefault() pair to get |
1340 | * exclusion against file system freezing. This is needed since the page fault | 1340 | * exclusion against file system freezing. This is needed since the page fault |
1341 | * is going to dirty a page. This function increments number of running page | 1341 | * is going to dirty a page. This function increments number of running page |
1342 | * faults preventing freezing. If the file system is already frozen, the | 1342 | * faults preventing freezing. If the file system is already frozen, the |
1343 | * function waits until the file system is thawed. | 1343 | * function waits until the file system is thawed. |
1344 | * | 1344 | * |
1345 | * Since page fault freeze protection behaves as a lock, users have to preserve | 1345 | * Since page fault freeze protection behaves as a lock, users have to preserve |
1346 | * ordering of freeze protection and other filesystem locks. It is advised to | 1346 | * ordering of freeze protection and other filesystem locks. It is advised to |
1347 | * put sb_start_pagefault() close to mmap_sem in lock ordering. Page fault | 1347 | * put sb_start_pagefault() close to mmap_sem in lock ordering. Page fault |
1348 | * handling code implies lock dependency: | 1348 | * handling code implies lock dependency: |
1349 | * | 1349 | * |
1350 | * mmap_sem | 1350 | * mmap_sem |
1351 | * -> sb_start_pagefault | 1351 | * -> sb_start_pagefault |
1352 | */ | 1352 | */ |
1353 | static inline void sb_start_pagefault(struct super_block *sb) | 1353 | static inline void sb_start_pagefault(struct super_block *sb) |
1354 | { | 1354 | { |
1355 | __sb_start_write(sb, SB_FREEZE_PAGEFAULT, true); | 1355 | __sb_start_write(sb, SB_FREEZE_PAGEFAULT, true); |
1356 | } | 1356 | } |
1357 | 1357 | ||
1358 | /* | 1358 | /* |
1359 | * sb_start_intwrite - get write access to a superblock for internal fs purposes | 1359 | * sb_start_intwrite - get write access to a superblock for internal fs purposes |
1360 | * @sb: the super we write to | 1360 | * @sb: the super we write to |
1361 | * | 1361 | * |
1362 | * This is the third level of protection against filesystem freezing. It is | 1362 | * This is the third level of protection against filesystem freezing. It is |
1363 | * free for use by a filesystem. The only requirement is that it must rank | 1363 | * free for use by a filesystem. The only requirement is that it must rank |
1364 | * below sb_start_pagefault. | 1364 | * below sb_start_pagefault. |
1365 | * | 1365 | * |
1366 | * For example filesystem can call sb_start_intwrite() when starting a | 1366 | * For example filesystem can call sb_start_intwrite() when starting a |
1367 | * transaction which somewhat eases handling of freezing for internal sources | 1367 | * transaction which somewhat eases handling of freezing for internal sources |
1368 | * of filesystem changes (internal fs threads, discarding preallocation on file | 1368 | * of filesystem changes (internal fs threads, discarding preallocation on file |
1369 | * close, etc.). | 1369 | * close, etc.). |
1370 | */ | 1370 | */ |
1371 | static inline void sb_start_intwrite(struct super_block *sb) | 1371 | static inline void sb_start_intwrite(struct super_block *sb) |
1372 | { | 1372 | { |
1373 | __sb_start_write(sb, SB_FREEZE_FS, true); | 1373 | __sb_start_write(sb, SB_FREEZE_FS, true); |
1374 | } | 1374 | } |
1375 | 1375 | ||
1376 | 1376 | ||
1377 | extern bool inode_owner_or_capable(const struct inode *inode); | 1377 | extern bool inode_owner_or_capable(const struct inode *inode); |
1378 | 1378 | ||
1379 | /* | 1379 | /* |
1380 | * VFS helper functions.. | 1380 | * VFS helper functions.. |
1381 | */ | 1381 | */ |
1382 | extern int vfs_create(struct inode *, struct dentry *, umode_t, bool); | 1382 | extern int vfs_create(struct inode *, struct dentry *, umode_t, bool); |
1383 | extern int vfs_mkdir(struct inode *, struct dentry *, umode_t); | 1383 | extern int vfs_mkdir(struct inode *, struct dentry *, umode_t); |
1384 | extern int vfs_mknod(struct inode *, struct dentry *, umode_t, dev_t); | 1384 | extern int vfs_mknod(struct inode *, struct dentry *, umode_t, dev_t); |
1385 | extern int vfs_symlink(struct inode *, struct dentry *, const char *); | 1385 | extern int vfs_symlink(struct inode *, struct dentry *, const char *); |
1386 | extern int vfs_link(struct dentry *, struct inode *, struct dentry *, struct inode **); | 1386 | extern int vfs_link(struct dentry *, struct inode *, struct dentry *, struct inode **); |
1387 | extern int vfs_rmdir(struct inode *, struct dentry *); | 1387 | extern int vfs_rmdir(struct inode *, struct dentry *); |
1388 | extern int vfs_unlink(struct inode *, struct dentry *, struct inode **); | 1388 | extern int vfs_unlink(struct inode *, struct dentry *, struct inode **); |
1389 | extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *, struct inode **, unsigned int); | 1389 | extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *, struct inode **, unsigned int); |
1390 | 1390 | ||
1391 | /* | 1391 | /* |
1392 | * VFS dentry helper functions. | 1392 | * VFS dentry helper functions. |
1393 | */ | 1393 | */ |
1394 | extern void dentry_unhash(struct dentry *dentry); | 1394 | extern void dentry_unhash(struct dentry *dentry); |
1395 | 1395 | ||
1396 | /* | 1396 | /* |
1397 | * VFS file helper functions. | 1397 | * VFS file helper functions. |
1398 | */ | 1398 | */ |
1399 | extern void inode_init_owner(struct inode *inode, const struct inode *dir, | 1399 | extern void inode_init_owner(struct inode *inode, const struct inode *dir, |
1400 | umode_t mode); | 1400 | umode_t mode); |
1401 | /* | 1401 | /* |
1402 | * VFS FS_IOC_FIEMAP helper definitions. | 1402 | * VFS FS_IOC_FIEMAP helper definitions. |
1403 | */ | 1403 | */ |
1404 | struct fiemap_extent_info { | 1404 | struct fiemap_extent_info { |
1405 | unsigned int fi_flags; /* Flags as passed from user */ | 1405 | unsigned int fi_flags; /* Flags as passed from user */ |
1406 | unsigned int fi_extents_mapped; /* Number of mapped extents */ | 1406 | unsigned int fi_extents_mapped; /* Number of mapped extents */ |
1407 | unsigned int fi_extents_max; /* Size of fiemap_extent array */ | 1407 | unsigned int fi_extents_max; /* Size of fiemap_extent array */ |
1408 | struct fiemap_extent __user *fi_extents_start; /* Start of | 1408 | struct fiemap_extent __user *fi_extents_start; /* Start of |
1409 | fiemap_extent array */ | 1409 | fiemap_extent array */ |
1410 | }; | 1410 | }; |
1411 | int fiemap_fill_next_extent(struct fiemap_extent_info *info, u64 logical, | 1411 | int fiemap_fill_next_extent(struct fiemap_extent_info *info, u64 logical, |
1412 | u64 phys, u64 len, u32 flags); | 1412 | u64 phys, u64 len, u32 flags); |
1413 | int fiemap_check_flags(struct fiemap_extent_info *fieinfo, u32 fs_flags); | 1413 | int fiemap_check_flags(struct fiemap_extent_info *fieinfo, u32 fs_flags); |
1414 | 1414 | ||
1415 | /* | 1415 | /* |
1416 | * File types | 1416 | * File types |
1417 | * | 1417 | * |
1418 | * NOTE! These match bits 12..15 of stat.st_mode | 1418 | * NOTE! These match bits 12..15 of stat.st_mode |
1419 | * (ie "(i_mode >> 12) & 15"). | 1419 | * (ie "(i_mode >> 12) & 15"). |
1420 | */ | 1420 | */ |
1421 | #define DT_UNKNOWN 0 | 1421 | #define DT_UNKNOWN 0 |
1422 | #define DT_FIFO 1 | 1422 | #define DT_FIFO 1 |
1423 | #define DT_CHR 2 | 1423 | #define DT_CHR 2 |
1424 | #define DT_DIR 4 | 1424 | #define DT_DIR 4 |
1425 | #define DT_BLK 6 | 1425 | #define DT_BLK 6 |
1426 | #define DT_REG 8 | 1426 | #define DT_REG 8 |
1427 | #define DT_LNK 10 | 1427 | #define DT_LNK 10 |
1428 | #define DT_SOCK 12 | 1428 | #define DT_SOCK 12 |
1429 | #define DT_WHT 14 | 1429 | #define DT_WHT 14 |
1430 | 1430 | ||
1431 | /* | 1431 | /* |
1432 | * This is the "filldir" function type, used by readdir() to let | 1432 | * This is the "filldir" function type, used by readdir() to let |
1433 | * the kernel specify what kind of dirent layout it wants to have. | 1433 | * the kernel specify what kind of dirent layout it wants to have. |
1434 | * This allows the kernel to read directories into kernel space or | 1434 | * This allows the kernel to read directories into kernel space or |
1435 | * to have different dirent layouts depending on the binary type. | 1435 | * to have different dirent layouts depending on the binary type. |
1436 | */ | 1436 | */ |
1437 | typedef int (*filldir_t)(void *, const char *, int, loff_t, u64, unsigned); | 1437 | typedef int (*filldir_t)(void *, const char *, int, loff_t, u64, unsigned); |
1438 | struct dir_context { | 1438 | struct dir_context { |
1439 | const filldir_t actor; | 1439 | const filldir_t actor; |
1440 | loff_t pos; | 1440 | loff_t pos; |
1441 | }; | 1441 | }; |
1442 | 1442 | ||
1443 | struct block_device_operations; | 1443 | struct block_device_operations; |
1444 | 1444 | ||
1445 | /* These macros are for out of kernel modules to test that | 1445 | /* These macros are for out of kernel modules to test that |
1446 | * the kernel supports the unlocked_ioctl and compat_ioctl | 1446 | * the kernel supports the unlocked_ioctl and compat_ioctl |
1447 | * fields in struct file_operations. */ | 1447 | * fields in struct file_operations. */ |
1448 | #define HAVE_COMPAT_IOCTL 1 | 1448 | #define HAVE_COMPAT_IOCTL 1 |
1449 | #define HAVE_UNLOCKED_IOCTL 1 | 1449 | #define HAVE_UNLOCKED_IOCTL 1 |
1450 | 1450 | ||
1451 | struct file_operations { | 1451 | struct file_operations { |
1452 | struct module *owner; | 1452 | struct module *owner; |
1453 | loff_t (*llseek) (struct file *, loff_t, int); | 1453 | loff_t (*llseek) (struct file *, loff_t, int); |
1454 | ssize_t (*read) (struct file *, char __user *, size_t, loff_t *); | 1454 | ssize_t (*read) (struct file *, char __user *, size_t, loff_t *); |
1455 | ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *); | 1455 | ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *); |
1456 | ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t); | 1456 | ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t); |
1457 | ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t); | 1457 | ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t); |
1458 | int (*iterate) (struct file *, struct dir_context *); | 1458 | int (*iterate) (struct file *, struct dir_context *); |
1459 | unsigned int (*poll) (struct file *, struct poll_table_struct *); | 1459 | unsigned int (*poll) (struct file *, struct poll_table_struct *); |
1460 | long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long); | 1460 | long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long); |
1461 | long (*compat_ioctl) (struct file *, unsigned int, unsigned long); | 1461 | long (*compat_ioctl) (struct file *, unsigned int, unsigned long); |
1462 | int (*mmap) (struct file *, struct vm_area_struct *); | 1462 | int (*mmap) (struct file *, struct vm_area_struct *); |
1463 | int (*open) (struct inode *, struct file *); | 1463 | int (*open) (struct inode *, struct file *); |
1464 | int (*flush) (struct file *, fl_owner_t id); | 1464 | int (*flush) (struct file *, fl_owner_t id); |
1465 | int (*release) (struct inode *, struct file *); | 1465 | int (*release) (struct inode *, struct file *); |
1466 | int (*fsync) (struct file *, loff_t, loff_t, int datasync); | 1466 | int (*fsync) (struct file *, loff_t, loff_t, int datasync); |
1467 | int (*aio_fsync) (struct kiocb *, int datasync); | 1467 | int (*aio_fsync) (struct kiocb *, int datasync); |
1468 | int (*fasync) (int, struct file *, int); | 1468 | int (*fasync) (int, struct file *, int); |
1469 | int (*lock) (struct file *, int, struct file_lock *); | 1469 | int (*lock) (struct file *, int, struct file_lock *); |
1470 | ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int); | 1470 | ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int); |
1471 | unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long); | 1471 | unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long); |
1472 | int (*check_flags)(int); | 1472 | int (*check_flags)(int); |
1473 | int (*flock) (struct file *, int, struct file_lock *); | 1473 | int (*flock) (struct file *, int, struct file_lock *); |
1474 | ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int); | 1474 | ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int); |
1475 | ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int); | 1475 | ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int); |
1476 | int (*setlease)(struct file *, long, struct file_lock **); | 1476 | int (*setlease)(struct file *, long, struct file_lock **); |
1477 | long (*fallocate)(struct file *file, int mode, loff_t offset, | 1477 | long (*fallocate)(struct file *file, int mode, loff_t offset, |
1478 | loff_t len); | 1478 | loff_t len); |
1479 | int (*show_fdinfo)(struct seq_file *m, struct file *f); | 1479 | int (*show_fdinfo)(struct seq_file *m, struct file *f); |
1480 | }; | 1480 | }; |
1481 | 1481 | ||
1482 | struct inode_operations { | 1482 | struct inode_operations { |
1483 | struct dentry * (*lookup) (struct inode *,struct dentry *, unsigned int); | 1483 | struct dentry * (*lookup) (struct inode *,struct dentry *, unsigned int); |
1484 | void * (*follow_link) (struct dentry *, struct nameidata *); | 1484 | void * (*follow_link) (struct dentry *, struct nameidata *); |
1485 | int (*permission) (struct inode *, int); | 1485 | int (*permission) (struct inode *, int); |
1486 | struct posix_acl * (*get_acl)(struct inode *, int); | 1486 | struct posix_acl * (*get_acl)(struct inode *, int); |
1487 | 1487 | ||
1488 | int (*readlink) (struct dentry *, char __user *,int); | 1488 | int (*readlink) (struct dentry *, char __user *,int); |
1489 | void (*put_link) (struct dentry *, struct nameidata *, void *); | 1489 | void (*put_link) (struct dentry *, struct nameidata *, void *); |
1490 | 1490 | ||
1491 | int (*create) (struct inode *,struct dentry *, umode_t, bool); | 1491 | int (*create) (struct inode *,struct dentry *, umode_t, bool); |
1492 | int (*link) (struct dentry *,struct inode *,struct dentry *); | 1492 | int (*link) (struct dentry *,struct inode *,struct dentry *); |
1493 | int (*unlink) (struct inode *,struct dentry *); | 1493 | int (*unlink) (struct inode *,struct dentry *); |
1494 | int (*symlink) (struct inode *,struct dentry *,const char *); | 1494 | int (*symlink) (struct inode *,struct dentry *,const char *); |
1495 | int (*mkdir) (struct inode *,struct dentry *,umode_t); | 1495 | int (*mkdir) (struct inode *,struct dentry *,umode_t); |
1496 | int (*rmdir) (struct inode *,struct dentry *); | 1496 | int (*rmdir) (struct inode *,struct dentry *); |
1497 | int (*mknod) (struct inode *,struct dentry *,umode_t,dev_t); | 1497 | int (*mknod) (struct inode *,struct dentry *,umode_t,dev_t); |
1498 | int (*rename) (struct inode *, struct dentry *, | 1498 | int (*rename) (struct inode *, struct dentry *, |
1499 | struct inode *, struct dentry *); | 1499 | struct inode *, struct dentry *); |
1500 | int (*rename2) (struct inode *, struct dentry *, | 1500 | int (*rename2) (struct inode *, struct dentry *, |
1501 | struct inode *, struct dentry *, unsigned int); | 1501 | struct inode *, struct dentry *, unsigned int); |
1502 | int (*setattr) (struct dentry *, struct iattr *); | 1502 | int (*setattr) (struct dentry *, struct iattr *); |
1503 | int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *); | 1503 | int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *); |
1504 | int (*setxattr) (struct dentry *, const char *,const void *,size_t,int); | 1504 | int (*setxattr) (struct dentry *, const char *,const void *,size_t,int); |
1505 | ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t); | 1505 | ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t); |
1506 | ssize_t (*listxattr) (struct dentry *, char *, size_t); | 1506 | ssize_t (*listxattr) (struct dentry *, char *, size_t); |
1507 | int (*removexattr) (struct dentry *, const char *); | 1507 | int (*removexattr) (struct dentry *, const char *); |
1508 | int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, | 1508 | int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, |
1509 | u64 len); | 1509 | u64 len); |
1510 | int (*update_time)(struct inode *, struct timespec *, int); | 1510 | int (*update_time)(struct inode *, struct timespec *, int); |
1511 | int (*atomic_open)(struct inode *, struct dentry *, | 1511 | int (*atomic_open)(struct inode *, struct dentry *, |
1512 | struct file *, unsigned open_flag, | 1512 | struct file *, unsigned open_flag, |
1513 | umode_t create_mode, int *opened); | 1513 | umode_t create_mode, int *opened); |
1514 | int (*tmpfile) (struct inode *, struct dentry *, umode_t); | 1514 | int (*tmpfile) (struct inode *, struct dentry *, umode_t); |
1515 | int (*set_acl)(struct inode *, struct posix_acl *, int); | 1515 | int (*set_acl)(struct inode *, struct posix_acl *, int); |
1516 | } ____cacheline_aligned; | 1516 | } ____cacheline_aligned; |
1517 | 1517 | ||
1518 | ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector, | 1518 | ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector, |
1519 | unsigned long nr_segs, unsigned long fast_segs, | 1519 | unsigned long nr_segs, unsigned long fast_segs, |
1520 | struct iovec *fast_pointer, | 1520 | struct iovec *fast_pointer, |
1521 | struct iovec **ret_pointer); | 1521 | struct iovec **ret_pointer); |
1522 | 1522 | ||
1523 | extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *); | 1523 | extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *); |
1524 | extern ssize_t vfs_write(struct file *, const char __user *, size_t, loff_t *); | 1524 | extern ssize_t vfs_write(struct file *, const char __user *, size_t, loff_t *); |
1525 | extern ssize_t vfs_readv(struct file *, const struct iovec __user *, | 1525 | extern ssize_t vfs_readv(struct file *, const struct iovec __user *, |
1526 | unsigned long, loff_t *); | 1526 | unsigned long, loff_t *); |
1527 | extern ssize_t vfs_writev(struct file *, const struct iovec __user *, | 1527 | extern ssize_t vfs_writev(struct file *, const struct iovec __user *, |
1528 | unsigned long, loff_t *); | 1528 | unsigned long, loff_t *); |
1529 | 1529 | ||
1530 | struct super_operations { | 1530 | struct super_operations { |
1531 | struct inode *(*alloc_inode)(struct super_block *sb); | 1531 | struct inode *(*alloc_inode)(struct super_block *sb); |
1532 | void (*destroy_inode)(struct inode *); | 1532 | void (*destroy_inode)(struct inode *); |
1533 | 1533 | ||
1534 | void (*dirty_inode) (struct inode *, int flags); | 1534 | void (*dirty_inode) (struct inode *, int flags); |
1535 | int (*write_inode) (struct inode *, struct writeback_control *wbc); | 1535 | int (*write_inode) (struct inode *, struct writeback_control *wbc); |
1536 | int (*drop_inode) (struct inode *); | 1536 | int (*drop_inode) (struct inode *); |
1537 | void (*evict_inode) (struct inode *); | 1537 | void (*evict_inode) (struct inode *); |
1538 | void (*put_super) (struct super_block *); | 1538 | void (*put_super) (struct super_block *); |
1539 | int (*sync_fs)(struct super_block *sb, int wait); | 1539 | int (*sync_fs)(struct super_block *sb, int wait); |
1540 | int (*freeze_fs) (struct super_block *); | 1540 | int (*freeze_fs) (struct super_block *); |
1541 | int (*unfreeze_fs) (struct super_block *); | 1541 | int (*unfreeze_fs) (struct super_block *); |
1542 | int (*statfs) (struct dentry *, struct kstatfs *); | 1542 | int (*statfs) (struct dentry *, struct kstatfs *); |
1543 | int (*remount_fs) (struct super_block *, int *, char *); | 1543 | int (*remount_fs) (struct super_block *, int *, char *); |
1544 | void (*umount_begin) (struct super_block *); | 1544 | void (*umount_begin) (struct super_block *); |
1545 | 1545 | ||
1546 | int (*show_options)(struct seq_file *, struct dentry *); | 1546 | int (*show_options)(struct seq_file *, struct dentry *); |
1547 | int (*show_devname)(struct seq_file *, struct dentry *); | 1547 | int (*show_devname)(struct seq_file *, struct dentry *); |
1548 | int (*show_path)(struct seq_file *, struct dentry *); | 1548 | int (*show_path)(struct seq_file *, struct dentry *); |
1549 | int (*show_stats)(struct seq_file *, struct dentry *); | 1549 | int (*show_stats)(struct seq_file *, struct dentry *); |
1550 | #ifdef CONFIG_QUOTA | 1550 | #ifdef CONFIG_QUOTA |
1551 | ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t); | 1551 | ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t); |
1552 | ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t); | 1552 | ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t); |
1553 | #endif | 1553 | #endif |
1554 | int (*bdev_try_to_free_page)(struct super_block*, struct page*, gfp_t); | 1554 | int (*bdev_try_to_free_page)(struct super_block*, struct page*, gfp_t); |
1555 | long (*nr_cached_objects)(struct super_block *, int); | 1555 | long (*nr_cached_objects)(struct super_block *, int); |
1556 | long (*free_cached_objects)(struct super_block *, long, int); | 1556 | long (*free_cached_objects)(struct super_block *, long, int); |
1557 | }; | 1557 | }; |
1558 | 1558 | ||
1559 | /* | 1559 | /* |
1560 | * Inode flags - they have no relation to superblock flags now | 1560 | * Inode flags - they have no relation to superblock flags now |
1561 | */ | 1561 | */ |
1562 | #define S_SYNC 1 /* Writes are synced at once */ | 1562 | #define S_SYNC 1 /* Writes are synced at once */ |
1563 | #define S_NOATIME 2 /* Do not update access times */ | 1563 | #define S_NOATIME 2 /* Do not update access times */ |
1564 | #define S_APPEND 4 /* Append-only file */ | 1564 | #define S_APPEND 4 /* Append-only file */ |
1565 | #define S_IMMUTABLE 8 /* Immutable file */ | 1565 | #define S_IMMUTABLE 8 /* Immutable file */ |
1566 | #define S_DEAD 16 /* removed, but still open directory */ | 1566 | #define S_DEAD 16 /* removed, but still open directory */ |
1567 | #define S_NOQUOTA 32 /* Inode is not counted to quota */ | 1567 | #define S_NOQUOTA 32 /* Inode is not counted to quota */ |
1568 | #define S_DIRSYNC 64 /* Directory modifications are synchronous */ | 1568 | #define S_DIRSYNC 64 /* Directory modifications are synchronous */ |
1569 | #define S_NOCMTIME 128 /* Do not update file c/mtime */ | 1569 | #define S_NOCMTIME 128 /* Do not update file c/mtime */ |
1570 | #define S_SWAPFILE 256 /* Do not truncate: swapon got its bmaps */ | 1570 | #define S_SWAPFILE 256 /* Do not truncate: swapon got its bmaps */ |
1571 | #define S_PRIVATE 512 /* Inode is fs-internal */ | 1571 | #define S_PRIVATE 512 /* Inode is fs-internal */ |
1572 | #define S_IMA 1024 /* Inode has an associated IMA struct */ | 1572 | #define S_IMA 1024 /* Inode has an associated IMA struct */ |
1573 | #define S_AUTOMOUNT 2048 /* Automount/referral quasi-directory */ | 1573 | #define S_AUTOMOUNT 2048 /* Automount/referral quasi-directory */ |
1574 | #define S_NOSEC 4096 /* no suid or xattr security attributes */ | 1574 | #define S_NOSEC 4096 /* no suid or xattr security attributes */ |
1575 | 1575 | ||
1576 | /* | 1576 | /* |
1577 | * Note that nosuid etc flags are inode-specific: setting some file-system | 1577 | * Note that nosuid etc flags are inode-specific: setting some file-system |
1578 | * flags just means all the inodes inherit those flags by default. It might be | 1578 | * flags just means all the inodes inherit those flags by default. It might be |
1579 | * possible to override it selectively if you really wanted to with some | 1579 | * possible to override it selectively if you really wanted to with some |
1580 | * ioctl() that is not currently implemented. | 1580 | * ioctl() that is not currently implemented. |
1581 | * | 1581 | * |
1582 | * Exception: MS_RDONLY is always applied to the entire file system. | 1582 | * Exception: MS_RDONLY is always applied to the entire file system. |
1583 | * | 1583 | * |
1584 | * Unfortunately, it is possible to change a filesystems flags with it mounted | 1584 | * Unfortunately, it is possible to change a filesystems flags with it mounted |
1585 | * with files in use. This means that all of the inodes will not have their | 1585 | * with files in use. This means that all of the inodes will not have their |
1586 | * i_flags updated. Hence, i_flags no longer inherit the superblock mount | 1586 | * i_flags updated. Hence, i_flags no longer inherit the superblock mount |
1587 | * flags, so these have to be checked separately. -- rmk@arm.uk.linux.org | 1587 | * flags, so these have to be checked separately. -- rmk@arm.uk.linux.org |
1588 | */ | 1588 | */ |
1589 | #define __IS_FLG(inode, flg) ((inode)->i_sb->s_flags & (flg)) | 1589 | #define __IS_FLG(inode, flg) ((inode)->i_sb->s_flags & (flg)) |
1590 | 1590 | ||
1591 | #define IS_RDONLY(inode) ((inode)->i_sb->s_flags & MS_RDONLY) | 1591 | #define IS_RDONLY(inode) ((inode)->i_sb->s_flags & MS_RDONLY) |
1592 | #define IS_SYNC(inode) (__IS_FLG(inode, MS_SYNCHRONOUS) || \ | 1592 | #define IS_SYNC(inode) (__IS_FLG(inode, MS_SYNCHRONOUS) || \ |
1593 | ((inode)->i_flags & S_SYNC)) | 1593 | ((inode)->i_flags & S_SYNC)) |
1594 | #define IS_DIRSYNC(inode) (__IS_FLG(inode, MS_SYNCHRONOUS|MS_DIRSYNC) || \ | 1594 | #define IS_DIRSYNC(inode) (__IS_FLG(inode, MS_SYNCHRONOUS|MS_DIRSYNC) || \ |
1595 | ((inode)->i_flags & (S_SYNC|S_DIRSYNC))) | 1595 | ((inode)->i_flags & (S_SYNC|S_DIRSYNC))) |
1596 | #define IS_MANDLOCK(inode) __IS_FLG(inode, MS_MANDLOCK) | 1596 | #define IS_MANDLOCK(inode) __IS_FLG(inode, MS_MANDLOCK) |
1597 | #define IS_NOATIME(inode) __IS_FLG(inode, MS_RDONLY|MS_NOATIME) | 1597 | #define IS_NOATIME(inode) __IS_FLG(inode, MS_RDONLY|MS_NOATIME) |
1598 | #define IS_I_VERSION(inode) __IS_FLG(inode, MS_I_VERSION) | 1598 | #define IS_I_VERSION(inode) __IS_FLG(inode, MS_I_VERSION) |
1599 | 1599 | ||
1600 | #define IS_NOQUOTA(inode) ((inode)->i_flags & S_NOQUOTA) | 1600 | #define IS_NOQUOTA(inode) ((inode)->i_flags & S_NOQUOTA) |
1601 | #define IS_APPEND(inode) ((inode)->i_flags & S_APPEND) | 1601 | #define IS_APPEND(inode) ((inode)->i_flags & S_APPEND) |
1602 | #define IS_IMMUTABLE(inode) ((inode)->i_flags & S_IMMUTABLE) | 1602 | #define IS_IMMUTABLE(inode) ((inode)->i_flags & S_IMMUTABLE) |
1603 | #define IS_POSIXACL(inode) __IS_FLG(inode, MS_POSIXACL) | 1603 | #define IS_POSIXACL(inode) __IS_FLG(inode, MS_POSIXACL) |
1604 | 1604 | ||
1605 | #define IS_DEADDIR(inode) ((inode)->i_flags & S_DEAD) | 1605 | #define IS_DEADDIR(inode) ((inode)->i_flags & S_DEAD) |
1606 | #define IS_NOCMTIME(inode) ((inode)->i_flags & S_NOCMTIME) | 1606 | #define IS_NOCMTIME(inode) ((inode)->i_flags & S_NOCMTIME) |
1607 | #define IS_SWAPFILE(inode) ((inode)->i_flags & S_SWAPFILE) | 1607 | #define IS_SWAPFILE(inode) ((inode)->i_flags & S_SWAPFILE) |
1608 | #define IS_PRIVATE(inode) ((inode)->i_flags & S_PRIVATE) | 1608 | #define IS_PRIVATE(inode) ((inode)->i_flags & S_PRIVATE) |
1609 | #define IS_IMA(inode) ((inode)->i_flags & S_IMA) | 1609 | #define IS_IMA(inode) ((inode)->i_flags & S_IMA) |
1610 | #define IS_AUTOMOUNT(inode) ((inode)->i_flags & S_AUTOMOUNT) | 1610 | #define IS_AUTOMOUNT(inode) ((inode)->i_flags & S_AUTOMOUNT) |
1611 | #define IS_NOSEC(inode) ((inode)->i_flags & S_NOSEC) | 1611 | #define IS_NOSEC(inode) ((inode)->i_flags & S_NOSEC) |
1612 | 1612 | ||
1613 | /* | 1613 | /* |
1614 | * Inode state bits. Protected by inode->i_lock | 1614 | * Inode state bits. Protected by inode->i_lock |
1615 | * | 1615 | * |
1616 | * Three bits determine the dirty state of the inode, I_DIRTY_SYNC, | 1616 | * Three bits determine the dirty state of the inode, I_DIRTY_SYNC, |
1617 | * I_DIRTY_DATASYNC and I_DIRTY_PAGES. | 1617 | * I_DIRTY_DATASYNC and I_DIRTY_PAGES. |
1618 | * | 1618 | * |
1619 | * Four bits define the lifetime of an inode. Initially, inodes are I_NEW, | 1619 | * Four bits define the lifetime of an inode. Initially, inodes are I_NEW, |
1620 | * until that flag is cleared. I_WILL_FREE, I_FREEING and I_CLEAR are set at | 1620 | * until that flag is cleared. I_WILL_FREE, I_FREEING and I_CLEAR are set at |
1621 | * various stages of removing an inode. | 1621 | * various stages of removing an inode. |
1622 | * | 1622 | * |
1623 | * Two bits are used for locking and completion notification, I_NEW and I_SYNC. | 1623 | * Two bits are used for locking and completion notification, I_NEW and I_SYNC. |
1624 | * | 1624 | * |
1625 | * I_DIRTY_SYNC Inode is dirty, but doesn't have to be written on | 1625 | * I_DIRTY_SYNC Inode is dirty, but doesn't have to be written on |
1626 | * fdatasync(). i_atime is the usual cause. | 1626 | * fdatasync(). i_atime is the usual cause. |
1627 | * I_DIRTY_DATASYNC Data-related inode changes pending. We keep track of | 1627 | * I_DIRTY_DATASYNC Data-related inode changes pending. We keep track of |
1628 | * these changes separately from I_DIRTY_SYNC so that we | 1628 | * these changes separately from I_DIRTY_SYNC so that we |
1629 | * don't have to write inode on fdatasync() when only | 1629 | * don't have to write inode on fdatasync() when only |
1630 | * mtime has changed in it. | 1630 | * mtime has changed in it. |
1631 | * I_DIRTY_PAGES Inode has dirty pages. Inode itself may be clean. | 1631 | * I_DIRTY_PAGES Inode has dirty pages. Inode itself may be clean. |
1632 | * I_NEW Serves as both a mutex and completion notification. | 1632 | * I_NEW Serves as both a mutex and completion notification. |
1633 | * New inodes set I_NEW. If two processes both create | 1633 | * New inodes set I_NEW. If two processes both create |
1634 | * the same inode, one of them will release its inode and | 1634 | * the same inode, one of them will release its inode and |
1635 | * wait for I_NEW to be released before returning. | 1635 | * wait for I_NEW to be released before returning. |
1636 | * Inodes in I_WILL_FREE, I_FREEING or I_CLEAR state can | 1636 | * Inodes in I_WILL_FREE, I_FREEING or I_CLEAR state can |
1637 | * also cause waiting on I_NEW, without I_NEW actually | 1637 | * also cause waiting on I_NEW, without I_NEW actually |
1638 | * being set. find_inode() uses this to prevent returning | 1638 | * being set. find_inode() uses this to prevent returning |
1639 | * nearly-dead inodes. | 1639 | * nearly-dead inodes. |
1640 | * I_WILL_FREE Must be set when calling write_inode_now() if i_count | 1640 | * I_WILL_FREE Must be set when calling write_inode_now() if i_count |
1641 | * is zero. I_FREEING must be set when I_WILL_FREE is | 1641 | * is zero. I_FREEING must be set when I_WILL_FREE is |
1642 | * cleared. | 1642 | * cleared. |
1643 | * I_FREEING Set when inode is about to be freed but still has dirty | 1643 | * I_FREEING Set when inode is about to be freed but still has dirty |
1644 | * pages or buffers attached or the inode itself is still | 1644 | * pages or buffers attached or the inode itself is still |
1645 | * dirty. | 1645 | * dirty. |
1646 | * I_CLEAR Added by clear_inode(). In this state the inode is | 1646 | * I_CLEAR Added by clear_inode(). In this state the inode is |
1647 | * clean and can be destroyed. Inode keeps I_FREEING. | 1647 | * clean and can be destroyed. Inode keeps I_FREEING. |
1648 | * | 1648 | * |
1649 | * Inodes that are I_WILL_FREE, I_FREEING or I_CLEAR are | 1649 | * Inodes that are I_WILL_FREE, I_FREEING or I_CLEAR are |
1650 | * prohibited for many purposes. iget() must wait for | 1650 | * prohibited for many purposes. iget() must wait for |
1651 | * the inode to be completely released, then create it | 1651 | * the inode to be completely released, then create it |
1652 | * anew. Other functions will just ignore such inodes, | 1652 | * anew. Other functions will just ignore such inodes, |
1653 | * if appropriate. I_NEW is used for waiting. | 1653 | * if appropriate. I_NEW is used for waiting. |
1654 | * | 1654 | * |
1655 | * I_SYNC Writeback of inode is running. The bit is set during | 1655 | * I_SYNC Writeback of inode is running. The bit is set during |
1656 | * data writeback, and cleared with a wakeup on the bit | 1656 | * data writeback, and cleared with a wakeup on the bit |
1657 | * address once it is done. The bit is also used to pin | 1657 | * address once it is done. The bit is also used to pin |
1658 | * the inode in memory for flusher thread. | 1658 | * the inode in memory for flusher thread. |
1659 | * | 1659 | * |
1660 | * I_REFERENCED Marks the inode as recently references on the LRU list. | 1660 | * I_REFERENCED Marks the inode as recently references on the LRU list. |
1661 | * | 1661 | * |
1662 | * I_DIO_WAKEUP Never set. Only used as a key for wait_on_bit(). | 1662 | * I_DIO_WAKEUP Never set. Only used as a key for wait_on_bit(). |
1663 | * | 1663 | * |
1664 | * Q: What is the difference between I_WILL_FREE and I_FREEING? | 1664 | * Q: What is the difference between I_WILL_FREE and I_FREEING? |
1665 | */ | 1665 | */ |
1666 | #define I_DIRTY_SYNC (1 << 0) | 1666 | #define I_DIRTY_SYNC (1 << 0) |
1667 | #define I_DIRTY_DATASYNC (1 << 1) | 1667 | #define I_DIRTY_DATASYNC (1 << 1) |
1668 | #define I_DIRTY_PAGES (1 << 2) | 1668 | #define I_DIRTY_PAGES (1 << 2) |
1669 | #define __I_NEW 3 | 1669 | #define __I_NEW 3 |
1670 | #define I_NEW (1 << __I_NEW) | 1670 | #define I_NEW (1 << __I_NEW) |
1671 | #define I_WILL_FREE (1 << 4) | 1671 | #define I_WILL_FREE (1 << 4) |
1672 | #define I_FREEING (1 << 5) | 1672 | #define I_FREEING (1 << 5) |
1673 | #define I_CLEAR (1 << 6) | 1673 | #define I_CLEAR (1 << 6) |
1674 | #define __I_SYNC 7 | 1674 | #define __I_SYNC 7 |
1675 | #define I_SYNC (1 << __I_SYNC) | 1675 | #define I_SYNC (1 << __I_SYNC) |
1676 | #define I_REFERENCED (1 << 8) | 1676 | #define I_REFERENCED (1 << 8) |
1677 | #define __I_DIO_WAKEUP 9 | 1677 | #define __I_DIO_WAKEUP 9 |
1678 | #define I_DIO_WAKEUP (1 << I_DIO_WAKEUP) | 1678 | #define I_DIO_WAKEUP (1 << I_DIO_WAKEUP) |
1679 | #define I_LINKABLE (1 << 10) | 1679 | #define I_LINKABLE (1 << 10) |
1680 | 1680 | ||
1681 | #define I_DIRTY (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES) | 1681 | #define I_DIRTY (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES) |
1682 | 1682 | ||
1683 | extern void __mark_inode_dirty(struct inode *, int); | 1683 | extern void __mark_inode_dirty(struct inode *, int); |
1684 | static inline void mark_inode_dirty(struct inode *inode) | 1684 | static inline void mark_inode_dirty(struct inode *inode) |
1685 | { | 1685 | { |
1686 | __mark_inode_dirty(inode, I_DIRTY); | 1686 | __mark_inode_dirty(inode, I_DIRTY); |
1687 | } | 1687 | } |
1688 | 1688 | ||
1689 | static inline void mark_inode_dirty_sync(struct inode *inode) | 1689 | static inline void mark_inode_dirty_sync(struct inode *inode) |
1690 | { | 1690 | { |
1691 | __mark_inode_dirty(inode, I_DIRTY_SYNC); | 1691 | __mark_inode_dirty(inode, I_DIRTY_SYNC); |
1692 | } | 1692 | } |
1693 | 1693 | ||
1694 | extern void inc_nlink(struct inode *inode); | 1694 | extern void inc_nlink(struct inode *inode); |
1695 | extern void drop_nlink(struct inode *inode); | 1695 | extern void drop_nlink(struct inode *inode); |
1696 | extern void clear_nlink(struct inode *inode); | 1696 | extern void clear_nlink(struct inode *inode); |
1697 | extern void set_nlink(struct inode *inode, unsigned int nlink); | 1697 | extern void set_nlink(struct inode *inode, unsigned int nlink); |
1698 | 1698 | ||
1699 | static inline void inode_inc_link_count(struct inode *inode) | 1699 | static inline void inode_inc_link_count(struct inode *inode) |
1700 | { | 1700 | { |
1701 | inc_nlink(inode); | 1701 | inc_nlink(inode); |
1702 | mark_inode_dirty(inode); | 1702 | mark_inode_dirty(inode); |
1703 | } | 1703 | } |
1704 | 1704 | ||
1705 | static inline void inode_dec_link_count(struct inode *inode) | 1705 | static inline void inode_dec_link_count(struct inode *inode) |
1706 | { | 1706 | { |
1707 | drop_nlink(inode); | 1707 | drop_nlink(inode); |
1708 | mark_inode_dirty(inode); | 1708 | mark_inode_dirty(inode); |
1709 | } | 1709 | } |
1710 | 1710 | ||
1711 | /** | 1711 | /** |
1712 | * inode_inc_iversion - increments i_version | 1712 | * inode_inc_iversion - increments i_version |
1713 | * @inode: inode that need to be updated | 1713 | * @inode: inode that need to be updated |
1714 | * | 1714 | * |
1715 | * Every time the inode is modified, the i_version field will be incremented. | 1715 | * Every time the inode is modified, the i_version field will be incremented. |
1716 | * The filesystem has to be mounted with i_version flag | 1716 | * The filesystem has to be mounted with i_version flag |
1717 | */ | 1717 | */ |
1718 | 1718 | ||
1719 | static inline void inode_inc_iversion(struct inode *inode) | 1719 | static inline void inode_inc_iversion(struct inode *inode) |
1720 | { | 1720 | { |
1721 | spin_lock(&inode->i_lock); | 1721 | spin_lock(&inode->i_lock); |
1722 | inode->i_version++; | 1722 | inode->i_version++; |
1723 | spin_unlock(&inode->i_lock); | 1723 | spin_unlock(&inode->i_lock); |
1724 | } | 1724 | } |
1725 | 1725 | ||
1726 | enum file_time_flags { | 1726 | enum file_time_flags { |
1727 | S_ATIME = 1, | 1727 | S_ATIME = 1, |
1728 | S_MTIME = 2, | 1728 | S_MTIME = 2, |
1729 | S_CTIME = 4, | 1729 | S_CTIME = 4, |
1730 | S_VERSION = 8, | 1730 | S_VERSION = 8, |
1731 | }; | 1731 | }; |
1732 | 1732 | ||
1733 | extern void touch_atime(const struct path *); | 1733 | extern void touch_atime(const struct path *); |
1734 | static inline void file_accessed(struct file *file) | 1734 | static inline void file_accessed(struct file *file) |
1735 | { | 1735 | { |
1736 | if (!(file->f_flags & O_NOATIME)) | 1736 | if (!(file->f_flags & O_NOATIME)) |
1737 | touch_atime(&file->f_path); | 1737 | touch_atime(&file->f_path); |
1738 | } | 1738 | } |
1739 | 1739 | ||
1740 | int sync_inode(struct inode *inode, struct writeback_control *wbc); | 1740 | int sync_inode(struct inode *inode, struct writeback_control *wbc); |
1741 | int sync_inode_metadata(struct inode *inode, int wait); | 1741 | int sync_inode_metadata(struct inode *inode, int wait); |
1742 | 1742 | ||
1743 | struct file_system_type { | 1743 | struct file_system_type { |
1744 | const char *name; | 1744 | const char *name; |
1745 | int fs_flags; | 1745 | int fs_flags; |
1746 | #define FS_REQUIRES_DEV 1 | 1746 | #define FS_REQUIRES_DEV 1 |
1747 | #define FS_BINARY_MOUNTDATA 2 | 1747 | #define FS_BINARY_MOUNTDATA 2 |
1748 | #define FS_HAS_SUBTYPE 4 | 1748 | #define FS_HAS_SUBTYPE 4 |
1749 | #define FS_USERNS_MOUNT 8 /* Can be mounted by userns root */ | 1749 | #define FS_USERNS_MOUNT 8 /* Can be mounted by userns root */ |
1750 | #define FS_USERNS_DEV_MOUNT 16 /* A userns mount does not imply MNT_NODEV */ | 1750 | #define FS_USERNS_DEV_MOUNT 16 /* A userns mount does not imply MNT_NODEV */ |
1751 | #define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() during rename() internally. */ | 1751 | #define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() during rename() internally. */ |
1752 | struct dentry *(*mount) (struct file_system_type *, int, | 1752 | struct dentry *(*mount) (struct file_system_type *, int, |
1753 | const char *, void *); | 1753 | const char *, void *); |
1754 | void (*kill_sb) (struct super_block *); | 1754 | void (*kill_sb) (struct super_block *); |
1755 | struct module *owner; | 1755 | struct module *owner; |
1756 | struct file_system_type * next; | 1756 | struct file_system_type * next; |
1757 | struct hlist_head fs_supers; | 1757 | struct hlist_head fs_supers; |
1758 | 1758 | ||
1759 | struct lock_class_key s_lock_key; | 1759 | struct lock_class_key s_lock_key; |
1760 | struct lock_class_key s_umount_key; | 1760 | struct lock_class_key s_umount_key; |
1761 | struct lock_class_key s_vfs_rename_key; | 1761 | struct lock_class_key s_vfs_rename_key; |
1762 | struct lock_class_key s_writers_key[SB_FREEZE_LEVELS]; | 1762 | struct lock_class_key s_writers_key[SB_FREEZE_LEVELS]; |
1763 | 1763 | ||
1764 | struct lock_class_key i_lock_key; | 1764 | struct lock_class_key i_lock_key; |
1765 | struct lock_class_key i_mutex_key; | 1765 | struct lock_class_key i_mutex_key; |
1766 | struct lock_class_key i_mutex_dir_key; | 1766 | struct lock_class_key i_mutex_dir_key; |
1767 | }; | 1767 | }; |
1768 | 1768 | ||
1769 | #define MODULE_ALIAS_FS(NAME) MODULE_ALIAS("fs-" NAME) | 1769 | #define MODULE_ALIAS_FS(NAME) MODULE_ALIAS("fs-" NAME) |
1770 | 1770 | ||
1771 | extern struct dentry *mount_ns(struct file_system_type *fs_type, int flags, | 1771 | extern struct dentry *mount_ns(struct file_system_type *fs_type, int flags, |
1772 | void *data, int (*fill_super)(struct super_block *, void *, int)); | 1772 | void *data, int (*fill_super)(struct super_block *, void *, int)); |
1773 | extern struct dentry *mount_bdev(struct file_system_type *fs_type, | 1773 | extern struct dentry *mount_bdev(struct file_system_type *fs_type, |
1774 | int flags, const char *dev_name, void *data, | 1774 | int flags, const char *dev_name, void *data, |
1775 | int (*fill_super)(struct super_block *, void *, int)); | 1775 | int (*fill_super)(struct super_block *, void *, int)); |
1776 | extern struct dentry *mount_single(struct file_system_type *fs_type, | 1776 | extern struct dentry *mount_single(struct file_system_type *fs_type, |
1777 | int flags, void *data, | 1777 | int flags, void *data, |
1778 | int (*fill_super)(struct super_block *, void *, int)); | 1778 | int (*fill_super)(struct super_block *, void *, int)); |
1779 | extern struct dentry *mount_nodev(struct file_system_type *fs_type, | 1779 | extern struct dentry *mount_nodev(struct file_system_type *fs_type, |
1780 | int flags, void *data, | 1780 | int flags, void *data, |
1781 | int (*fill_super)(struct super_block *, void *, int)); | 1781 | int (*fill_super)(struct super_block *, void *, int)); |
1782 | extern struct dentry *mount_subtree(struct vfsmount *mnt, const char *path); | 1782 | extern struct dentry *mount_subtree(struct vfsmount *mnt, const char *path); |
1783 | void generic_shutdown_super(struct super_block *sb); | 1783 | void generic_shutdown_super(struct super_block *sb); |
1784 | void kill_block_super(struct super_block *sb); | 1784 | void kill_block_super(struct super_block *sb); |
1785 | void kill_anon_super(struct super_block *sb); | 1785 | void kill_anon_super(struct super_block *sb); |
1786 | void kill_litter_super(struct super_block *sb); | 1786 | void kill_litter_super(struct super_block *sb); |
1787 | void deactivate_super(struct super_block *sb); | 1787 | void deactivate_super(struct super_block *sb); |
1788 | void deactivate_locked_super(struct super_block *sb); | 1788 | void deactivate_locked_super(struct super_block *sb); |
1789 | int set_anon_super(struct super_block *s, void *data); | 1789 | int set_anon_super(struct super_block *s, void *data); |
1790 | int get_anon_bdev(dev_t *); | 1790 | int get_anon_bdev(dev_t *); |
1791 | void free_anon_bdev(dev_t); | 1791 | void free_anon_bdev(dev_t); |
1792 | struct super_block *sget(struct file_system_type *type, | 1792 | struct super_block *sget(struct file_system_type *type, |
1793 | int (*test)(struct super_block *,void *), | 1793 | int (*test)(struct super_block *,void *), |
1794 | int (*set)(struct super_block *,void *), | 1794 | int (*set)(struct super_block *,void *), |
1795 | int flags, void *data); | 1795 | int flags, void *data); |
1796 | extern struct dentry *mount_pseudo(struct file_system_type *, char *, | 1796 | extern struct dentry *mount_pseudo(struct file_system_type *, char *, |
1797 | const struct super_operations *ops, | 1797 | const struct super_operations *ops, |
1798 | const struct dentry_operations *dops, | 1798 | const struct dentry_operations *dops, |
1799 | unsigned long); | 1799 | unsigned long); |
1800 | 1800 | ||
1801 | /* Alas, no aliases. Too much hassle with bringing module.h everywhere */ | 1801 | /* Alas, no aliases. Too much hassle with bringing module.h everywhere */ |
1802 | #define fops_get(fops) \ | 1802 | #define fops_get(fops) \ |
1803 | (((fops) && try_module_get((fops)->owner) ? (fops) : NULL)) | 1803 | (((fops) && try_module_get((fops)->owner) ? (fops) : NULL)) |
1804 | #define fops_put(fops) \ | 1804 | #define fops_put(fops) \ |
1805 | do { if (fops) module_put((fops)->owner); } while(0) | 1805 | do { if (fops) module_put((fops)->owner); } while(0) |
1806 | /* | 1806 | /* |
1807 | * This one is to be used *ONLY* from ->open() instances. | 1807 | * This one is to be used *ONLY* from ->open() instances. |
1808 | * fops must be non-NULL, pinned down *and* module dependencies | 1808 | * fops must be non-NULL, pinned down *and* module dependencies |
1809 | * should be sufficient to pin the caller down as well. | 1809 | * should be sufficient to pin the caller down as well. |
1810 | */ | 1810 | */ |
1811 | #define replace_fops(f, fops) \ | 1811 | #define replace_fops(f, fops) \ |
1812 | do { \ | 1812 | do { \ |
1813 | struct file *__file = (f); \ | 1813 | struct file *__file = (f); \ |
1814 | fops_put(__file->f_op); \ | 1814 | fops_put(__file->f_op); \ |
1815 | BUG_ON(!(__file->f_op = (fops))); \ | 1815 | BUG_ON(!(__file->f_op = (fops))); \ |
1816 | } while(0) | 1816 | } while(0) |
1817 | 1817 | ||
1818 | extern int register_filesystem(struct file_system_type *); | 1818 | extern int register_filesystem(struct file_system_type *); |
1819 | extern int unregister_filesystem(struct file_system_type *); | 1819 | extern int unregister_filesystem(struct file_system_type *); |
1820 | extern struct vfsmount *kern_mount_data(struct file_system_type *, void *data); | 1820 | extern struct vfsmount *kern_mount_data(struct file_system_type *, void *data); |
1821 | #define kern_mount(type) kern_mount_data(type, NULL) | 1821 | #define kern_mount(type) kern_mount_data(type, NULL) |
1822 | extern void kern_unmount(struct vfsmount *mnt); | 1822 | extern void kern_unmount(struct vfsmount *mnt); |
1823 | extern int may_umount_tree(struct vfsmount *); | 1823 | extern int may_umount_tree(struct vfsmount *); |
1824 | extern int may_umount(struct vfsmount *); | 1824 | extern int may_umount(struct vfsmount *); |
1825 | extern long do_mount(const char *, const char *, const char *, unsigned long, void *); | 1825 | extern long do_mount(const char *, const char *, const char *, unsigned long, void *); |
1826 | extern struct vfsmount *collect_mounts(struct path *); | 1826 | extern struct vfsmount *collect_mounts(struct path *); |
1827 | extern void drop_collected_mounts(struct vfsmount *); | 1827 | extern void drop_collected_mounts(struct vfsmount *); |
1828 | extern int iterate_mounts(int (*)(struct vfsmount *, void *), void *, | 1828 | extern int iterate_mounts(int (*)(struct vfsmount *, void *), void *, |
1829 | struct vfsmount *); | 1829 | struct vfsmount *); |
1830 | extern int vfs_statfs(struct path *, struct kstatfs *); | 1830 | extern int vfs_statfs(struct path *, struct kstatfs *); |
1831 | extern int user_statfs(const char __user *, struct kstatfs *); | 1831 | extern int user_statfs(const char __user *, struct kstatfs *); |
1832 | extern int fd_statfs(int, struct kstatfs *); | 1832 | extern int fd_statfs(int, struct kstatfs *); |
1833 | extern int vfs_ustat(dev_t, struct kstatfs *); | 1833 | extern int vfs_ustat(dev_t, struct kstatfs *); |
1834 | extern int freeze_super(struct super_block *super); | 1834 | extern int freeze_super(struct super_block *super); |
1835 | extern int thaw_super(struct super_block *super); | 1835 | extern int thaw_super(struct super_block *super); |
1836 | extern bool our_mnt(struct vfsmount *mnt); | 1836 | extern bool our_mnt(struct vfsmount *mnt); |
1837 | extern bool fs_fully_visible(struct file_system_type *); | 1837 | extern bool fs_fully_visible(struct file_system_type *); |
1838 | 1838 | ||
1839 | extern int current_umask(void); | 1839 | extern int current_umask(void); |
1840 | 1840 | ||
1841 | extern void ihold(struct inode * inode); | 1841 | extern void ihold(struct inode * inode); |
1842 | extern void iput(struct inode *); | 1842 | extern void iput(struct inode *); |
1843 | 1843 | ||
1844 | static inline struct inode *file_inode(struct file *f) | 1844 | static inline struct inode *file_inode(struct file *f) |
1845 | { | 1845 | { |
1846 | return f->f_inode; | 1846 | return f->f_inode; |
1847 | } | 1847 | } |
1848 | 1848 | ||
1849 | /* /sys/fs */ | 1849 | /* /sys/fs */ |
1850 | extern struct kobject *fs_kobj; | 1850 | extern struct kobject *fs_kobj; |
1851 | 1851 | ||
1852 | #define MAX_RW_COUNT (INT_MAX & PAGE_CACHE_MASK) | 1852 | #define MAX_RW_COUNT (INT_MAX & PAGE_CACHE_MASK) |
1853 | 1853 | ||
1854 | #define FLOCK_VERIFY_READ 1 | 1854 | #define FLOCK_VERIFY_READ 1 |
1855 | #define FLOCK_VERIFY_WRITE 2 | 1855 | #define FLOCK_VERIFY_WRITE 2 |
1856 | 1856 | ||
1857 | #ifdef CONFIG_FILE_LOCKING | 1857 | #ifdef CONFIG_FILE_LOCKING |
1858 | extern int locks_mandatory_locked(struct file *); | 1858 | extern int locks_mandatory_locked(struct file *); |
1859 | extern int locks_mandatory_area(int, struct inode *, struct file *, loff_t, size_t); | 1859 | extern int locks_mandatory_area(int, struct inode *, struct file *, loff_t, size_t); |
1860 | 1860 | ||
1861 | /* | 1861 | /* |
1862 | * Candidates for mandatory locking have the setgid bit set | 1862 | * Candidates for mandatory locking have the setgid bit set |
1863 | * but no group execute bit - an otherwise meaningless combination. | 1863 | * but no group execute bit - an otherwise meaningless combination. |
1864 | */ | 1864 | */ |
1865 | 1865 | ||
1866 | static inline int __mandatory_lock(struct inode *ino) | 1866 | static inline int __mandatory_lock(struct inode *ino) |
1867 | { | 1867 | { |
1868 | return (ino->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID; | 1868 | return (ino->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID; |
1869 | } | 1869 | } |
1870 | 1870 | ||
1871 | /* | 1871 | /* |
1872 | * ... and these candidates should be on MS_MANDLOCK mounted fs, | 1872 | * ... and these candidates should be on MS_MANDLOCK mounted fs, |
1873 | * otherwise these will be advisory locks | 1873 | * otherwise these will be advisory locks |
1874 | */ | 1874 | */ |
1875 | 1875 | ||
1876 | static inline int mandatory_lock(struct inode *ino) | 1876 | static inline int mandatory_lock(struct inode *ino) |
1877 | { | 1877 | { |
1878 | return IS_MANDLOCK(ino) && __mandatory_lock(ino); | 1878 | return IS_MANDLOCK(ino) && __mandatory_lock(ino); |
1879 | } | 1879 | } |
1880 | 1880 | ||
1881 | static inline int locks_verify_locked(struct file *file) | 1881 | static inline int locks_verify_locked(struct file *file) |
1882 | { | 1882 | { |
1883 | if (mandatory_lock(file_inode(file))) | 1883 | if (mandatory_lock(file_inode(file))) |
1884 | return locks_mandatory_locked(file); | 1884 | return locks_mandatory_locked(file); |
1885 | return 0; | 1885 | return 0; |
1886 | } | 1886 | } |
1887 | 1887 | ||
1888 | static inline int locks_verify_truncate(struct inode *inode, | 1888 | static inline int locks_verify_truncate(struct inode *inode, |
1889 | struct file *filp, | 1889 | struct file *filp, |
1890 | loff_t size) | 1890 | loff_t size) |
1891 | { | 1891 | { |
1892 | if (inode->i_flock && mandatory_lock(inode)) | 1892 | if (inode->i_flock && mandatory_lock(inode)) |
1893 | return locks_mandatory_area( | 1893 | return locks_mandatory_area( |
1894 | FLOCK_VERIFY_WRITE, inode, filp, | 1894 | FLOCK_VERIFY_WRITE, inode, filp, |
1895 | size < inode->i_size ? size : inode->i_size, | 1895 | size < inode->i_size ? size : inode->i_size, |
1896 | (size < inode->i_size ? inode->i_size - size | 1896 | (size < inode->i_size ? inode->i_size - size |
1897 | : size - inode->i_size) | 1897 | : size - inode->i_size) |
1898 | ); | 1898 | ); |
1899 | return 0; | 1899 | return 0; |
1900 | } | 1900 | } |
1901 | 1901 | ||
1902 | static inline int break_lease(struct inode *inode, unsigned int mode) | 1902 | static inline int break_lease(struct inode *inode, unsigned int mode) |
1903 | { | 1903 | { |
1904 | /* | 1904 | /* |
1905 | * Since this check is lockless, we must ensure that any refcounts | 1905 | * Since this check is lockless, we must ensure that any refcounts |
1906 | * taken are done before checking inode->i_flock. Otherwise, we could | 1906 | * taken are done before checking inode->i_flock. Otherwise, we could |
1907 | * end up racing with tasks trying to set a new lease on this file. | 1907 | * end up racing with tasks trying to set a new lease on this file. |
1908 | */ | 1908 | */ |
1909 | smp_mb(); | 1909 | smp_mb(); |
1910 | if (inode->i_flock) | 1910 | if (inode->i_flock) |
1911 | return __break_lease(inode, mode, FL_LEASE); | 1911 | return __break_lease(inode, mode, FL_LEASE); |
1912 | return 0; | 1912 | return 0; |
1913 | } | 1913 | } |
1914 | 1914 | ||
1915 | static inline int break_deleg(struct inode *inode, unsigned int mode) | 1915 | static inline int break_deleg(struct inode *inode, unsigned int mode) |
1916 | { | 1916 | { |
1917 | if (inode->i_flock) | 1917 | if (inode->i_flock) |
1918 | return __break_lease(inode, mode, FL_DELEG); | 1918 | return __break_lease(inode, mode, FL_DELEG); |
1919 | return 0; | 1919 | return 0; |
1920 | } | 1920 | } |
1921 | 1921 | ||
1922 | static inline int try_break_deleg(struct inode *inode, struct inode **delegated_inode) | 1922 | static inline int try_break_deleg(struct inode *inode, struct inode **delegated_inode) |
1923 | { | 1923 | { |
1924 | int ret; | 1924 | int ret; |
1925 | 1925 | ||
1926 | ret = break_deleg(inode, O_WRONLY|O_NONBLOCK); | 1926 | ret = break_deleg(inode, O_WRONLY|O_NONBLOCK); |
1927 | if (ret == -EWOULDBLOCK && delegated_inode) { | 1927 | if (ret == -EWOULDBLOCK && delegated_inode) { |
1928 | *delegated_inode = inode; | 1928 | *delegated_inode = inode; |
1929 | ihold(inode); | 1929 | ihold(inode); |
1930 | } | 1930 | } |
1931 | return ret; | 1931 | return ret; |
1932 | } | 1932 | } |
1933 | 1933 | ||
1934 | static inline int break_deleg_wait(struct inode **delegated_inode) | 1934 | static inline int break_deleg_wait(struct inode **delegated_inode) |
1935 | { | 1935 | { |
1936 | int ret; | 1936 | int ret; |
1937 | 1937 | ||
1938 | ret = break_deleg(*delegated_inode, O_WRONLY); | 1938 | ret = break_deleg(*delegated_inode, O_WRONLY); |
1939 | iput(*delegated_inode); | 1939 | iput(*delegated_inode); |
1940 | *delegated_inode = NULL; | 1940 | *delegated_inode = NULL; |
1941 | return ret; | 1941 | return ret; |
1942 | } | 1942 | } |
1943 | 1943 | ||
1944 | #else /* !CONFIG_FILE_LOCKING */ | 1944 | #else /* !CONFIG_FILE_LOCKING */ |
1945 | static inline int locks_mandatory_locked(struct file *file) | 1945 | static inline int locks_mandatory_locked(struct file *file) |
1946 | { | 1946 | { |
1947 | return 0; | 1947 | return 0; |
1948 | } | 1948 | } |
1949 | 1949 | ||
1950 | static inline int locks_mandatory_area(int rw, struct inode *inode, | 1950 | static inline int locks_mandatory_area(int rw, struct inode *inode, |
1951 | struct file *filp, loff_t offset, | 1951 | struct file *filp, loff_t offset, |
1952 | size_t count) | 1952 | size_t count) |
1953 | { | 1953 | { |
1954 | return 0; | 1954 | return 0; |
1955 | } | 1955 | } |
1956 | 1956 | ||
1957 | static inline int __mandatory_lock(struct inode *inode) | 1957 | static inline int __mandatory_lock(struct inode *inode) |
1958 | { | 1958 | { |
1959 | return 0; | 1959 | return 0; |
1960 | } | 1960 | } |
1961 | 1961 | ||
1962 | static inline int mandatory_lock(struct inode *inode) | 1962 | static inline int mandatory_lock(struct inode *inode) |
1963 | { | 1963 | { |
1964 | return 0; | 1964 | return 0; |
1965 | } | 1965 | } |
1966 | 1966 | ||
1967 | static inline int locks_verify_locked(struct file *file) | 1967 | static inline int locks_verify_locked(struct file *file) |
1968 | { | 1968 | { |
1969 | return 0; | 1969 | return 0; |
1970 | } | 1970 | } |
1971 | 1971 | ||
1972 | static inline int locks_verify_truncate(struct inode *inode, struct file *filp, | 1972 | static inline int locks_verify_truncate(struct inode *inode, struct file *filp, |
1973 | size_t size) | 1973 | size_t size) |
1974 | { | 1974 | { |
1975 | return 0; | 1975 | return 0; |
1976 | } | 1976 | } |
1977 | 1977 | ||
1978 | static inline int break_lease(struct inode *inode, unsigned int mode) | 1978 | static inline int break_lease(struct inode *inode, unsigned int mode) |
1979 | { | 1979 | { |
1980 | return 0; | 1980 | return 0; |
1981 | } | 1981 | } |
1982 | 1982 | ||
1983 | static inline int break_deleg(struct inode *inode, unsigned int mode) | 1983 | static inline int break_deleg(struct inode *inode, unsigned int mode) |
1984 | { | 1984 | { |
1985 | return 0; | 1985 | return 0; |
1986 | } | 1986 | } |
1987 | 1987 | ||
1988 | static inline int try_break_deleg(struct inode *inode, struct inode **delegated_inode) | 1988 | static inline int try_break_deleg(struct inode *inode, struct inode **delegated_inode) |
1989 | { | 1989 | { |
1990 | return 0; | 1990 | return 0; |
1991 | } | 1991 | } |
1992 | 1992 | ||
1993 | static inline int break_deleg_wait(struct inode **delegated_inode) | 1993 | static inline int break_deleg_wait(struct inode **delegated_inode) |
1994 | { | 1994 | { |
1995 | BUG(); | 1995 | BUG(); |
1996 | return 0; | 1996 | return 0; |
1997 | } | 1997 | } |
1998 | 1998 | ||
1999 | #endif /* CONFIG_FILE_LOCKING */ | 1999 | #endif /* CONFIG_FILE_LOCKING */ |
2000 | 2000 | ||
2001 | /* fs/open.c */ | 2001 | /* fs/open.c */ |
2002 | struct audit_names; | 2002 | struct audit_names; |
2003 | struct filename { | 2003 | struct filename { |
2004 | const char *name; /* pointer to actual string */ | 2004 | const char *name; /* pointer to actual string */ |
2005 | const __user char *uptr; /* original userland pointer */ | 2005 | const __user char *uptr; /* original userland pointer */ |
2006 | struct audit_names *aname; | 2006 | struct audit_names *aname; |
2007 | bool separate; /* should "name" be freed? */ | 2007 | bool separate; /* should "name" be freed? */ |
2008 | }; | 2008 | }; |
2009 | 2009 | ||
2010 | extern long vfs_truncate(struct path *, loff_t); | 2010 | extern long vfs_truncate(struct path *, loff_t); |
2011 | extern int do_truncate(struct dentry *, loff_t start, unsigned int time_attrs, | 2011 | extern int do_truncate(struct dentry *, loff_t start, unsigned int time_attrs, |
2012 | struct file *filp); | 2012 | struct file *filp); |
2013 | extern int do_fallocate(struct file *file, int mode, loff_t offset, | 2013 | extern int do_fallocate(struct file *file, int mode, loff_t offset, |
2014 | loff_t len); | 2014 | loff_t len); |
2015 | extern long do_sys_open(int dfd, const char __user *filename, int flags, | 2015 | extern long do_sys_open(int dfd, const char __user *filename, int flags, |
2016 | umode_t mode); | 2016 | umode_t mode); |
2017 | extern struct file *file_open_name(struct filename *, int, umode_t); | 2017 | extern struct file *file_open_name(struct filename *, int, umode_t); |
2018 | extern struct file *filp_open(const char *, int, umode_t); | 2018 | extern struct file *filp_open(const char *, int, umode_t); |
2019 | extern struct file *file_open_root(struct dentry *, struct vfsmount *, | 2019 | extern struct file *file_open_root(struct dentry *, struct vfsmount *, |
2020 | const char *, int); | 2020 | const char *, int); |
2021 | extern struct file * dentry_open(const struct path *, int, const struct cred *); | 2021 | extern struct file * dentry_open(const struct path *, int, const struct cred *); |
2022 | extern int filp_close(struct file *, fl_owner_t id); | 2022 | extern int filp_close(struct file *, fl_owner_t id); |
2023 | 2023 | ||
2024 | extern struct filename *getname(const char __user *); | 2024 | extern struct filename *getname(const char __user *); |
2025 | extern struct filename *getname_kernel(const char *); | 2025 | extern struct filename *getname_kernel(const char *); |
2026 | 2026 | ||
2027 | enum { | 2027 | enum { |
2028 | FILE_CREATED = 1, | 2028 | FILE_CREATED = 1, |
2029 | FILE_OPENED = 2 | 2029 | FILE_OPENED = 2 |
2030 | }; | 2030 | }; |
2031 | extern int finish_open(struct file *file, struct dentry *dentry, | 2031 | extern int finish_open(struct file *file, struct dentry *dentry, |
2032 | int (*open)(struct inode *, struct file *), | 2032 | int (*open)(struct inode *, struct file *), |
2033 | int *opened); | 2033 | int *opened); |
2034 | extern int finish_no_open(struct file *file, struct dentry *dentry); | 2034 | extern int finish_no_open(struct file *file, struct dentry *dentry); |
2035 | 2035 | ||
2036 | /* fs/ioctl.c */ | 2036 | /* fs/ioctl.c */ |
2037 | 2037 | ||
2038 | extern int ioctl_preallocate(struct file *filp, void __user *argp); | 2038 | extern int ioctl_preallocate(struct file *filp, void __user *argp); |
2039 | 2039 | ||
2040 | /* fs/dcache.c */ | 2040 | /* fs/dcache.c */ |
2041 | extern void __init vfs_caches_init_early(void); | 2041 | extern void __init vfs_caches_init_early(void); |
2042 | extern void __init vfs_caches_init(unsigned long); | 2042 | extern void __init vfs_caches_init(unsigned long); |
2043 | 2043 | ||
2044 | extern struct kmem_cache *names_cachep; | 2044 | extern struct kmem_cache *names_cachep; |
2045 | 2045 | ||
2046 | extern void final_putname(struct filename *name); | 2046 | extern void final_putname(struct filename *name); |
2047 | 2047 | ||
2048 | #define __getname() kmem_cache_alloc(names_cachep, GFP_KERNEL) | 2048 | #define __getname() kmem_cache_alloc(names_cachep, GFP_KERNEL) |
2049 | #define __putname(name) kmem_cache_free(names_cachep, (void *)(name)) | 2049 | #define __putname(name) kmem_cache_free(names_cachep, (void *)(name)) |
2050 | #ifndef CONFIG_AUDITSYSCALL | 2050 | #ifndef CONFIG_AUDITSYSCALL |
2051 | #define putname(name) final_putname(name) | 2051 | #define putname(name) final_putname(name) |
2052 | #else | 2052 | #else |
2053 | extern void putname(struct filename *name); | 2053 | extern void putname(struct filename *name); |
2054 | #endif | 2054 | #endif |
2055 | 2055 | ||
2056 | #ifdef CONFIG_BLOCK | 2056 | #ifdef CONFIG_BLOCK |
2057 | extern int register_blkdev(unsigned int, const char *); | 2057 | extern int register_blkdev(unsigned int, const char *); |
2058 | extern void unregister_blkdev(unsigned int, const char *); | 2058 | extern void unregister_blkdev(unsigned int, const char *); |
2059 | extern struct block_device *bdget(dev_t); | 2059 | extern struct block_device *bdget(dev_t); |
2060 | extern struct block_device *bdgrab(struct block_device *bdev); | 2060 | extern struct block_device *bdgrab(struct block_device *bdev); |
2061 | extern void bd_set_size(struct block_device *, loff_t size); | 2061 | extern void bd_set_size(struct block_device *, loff_t size); |
2062 | extern void bd_forget(struct inode *inode); | 2062 | extern void bd_forget(struct inode *inode); |
2063 | extern void bdput(struct block_device *); | 2063 | extern void bdput(struct block_device *); |
2064 | extern void invalidate_bdev(struct block_device *); | 2064 | extern void invalidate_bdev(struct block_device *); |
2065 | extern void iterate_bdevs(void (*)(struct block_device *, void *), void *); | 2065 | extern void iterate_bdevs(void (*)(struct block_device *, void *), void *); |
2066 | extern int sync_blockdev(struct block_device *bdev); | 2066 | extern int sync_blockdev(struct block_device *bdev); |
2067 | extern void kill_bdev(struct block_device *); | 2067 | extern void kill_bdev(struct block_device *); |
2068 | extern struct super_block *freeze_bdev(struct block_device *); | 2068 | extern struct super_block *freeze_bdev(struct block_device *); |
2069 | extern void emergency_thaw_all(void); | 2069 | extern void emergency_thaw_all(void); |
2070 | extern int thaw_bdev(struct block_device *bdev, struct super_block *sb); | 2070 | extern int thaw_bdev(struct block_device *bdev, struct super_block *sb); |
2071 | extern int fsync_bdev(struct block_device *); | 2071 | extern int fsync_bdev(struct block_device *); |
2072 | extern int sb_is_blkdev_sb(struct super_block *sb); | 2072 | extern int sb_is_blkdev_sb(struct super_block *sb); |
2073 | #else | 2073 | #else |
2074 | static inline void bd_forget(struct inode *inode) {} | 2074 | static inline void bd_forget(struct inode *inode) {} |
2075 | static inline int sync_blockdev(struct block_device *bdev) { return 0; } | 2075 | static inline int sync_blockdev(struct block_device *bdev) { return 0; } |
2076 | static inline void kill_bdev(struct block_device *bdev) {} | 2076 | static inline void kill_bdev(struct block_device *bdev) {} |
2077 | static inline void invalidate_bdev(struct block_device *bdev) {} | 2077 | static inline void invalidate_bdev(struct block_device *bdev) {} |
2078 | 2078 | ||
2079 | static inline struct super_block *freeze_bdev(struct block_device *sb) | 2079 | static inline struct super_block *freeze_bdev(struct block_device *sb) |
2080 | { | 2080 | { |
2081 | return NULL; | 2081 | return NULL; |
2082 | } | 2082 | } |
2083 | 2083 | ||
2084 | static inline int thaw_bdev(struct block_device *bdev, struct super_block *sb) | 2084 | static inline int thaw_bdev(struct block_device *bdev, struct super_block *sb) |
2085 | { | 2085 | { |
2086 | return 0; | 2086 | return 0; |
2087 | } | 2087 | } |
2088 | 2088 | ||
2089 | static inline void iterate_bdevs(void (*f)(struct block_device *, void *), void *arg) | 2089 | static inline void iterate_bdevs(void (*f)(struct block_device *, void *), void *arg) |
2090 | { | 2090 | { |
2091 | } | 2091 | } |
2092 | 2092 | ||
2093 | static inline int sb_is_blkdev_sb(struct super_block *sb) | 2093 | static inline int sb_is_blkdev_sb(struct super_block *sb) |
2094 | { | 2094 | { |
2095 | return 0; | 2095 | return 0; |
2096 | } | 2096 | } |
2097 | #endif | 2097 | #endif |
2098 | extern int sync_filesystem(struct super_block *); | 2098 | extern int sync_filesystem(struct super_block *); |
2099 | extern const struct file_operations def_blk_fops; | 2099 | extern const struct file_operations def_blk_fops; |
2100 | extern const struct file_operations def_chr_fops; | 2100 | extern const struct file_operations def_chr_fops; |
2101 | extern const struct file_operations bad_sock_fops; | 2101 | extern const struct file_operations bad_sock_fops; |
2102 | #ifdef CONFIG_BLOCK | 2102 | #ifdef CONFIG_BLOCK |
2103 | extern int ioctl_by_bdev(struct block_device *, unsigned, unsigned long); | 2103 | extern int ioctl_by_bdev(struct block_device *, unsigned, unsigned long); |
2104 | extern int blkdev_ioctl(struct block_device *, fmode_t, unsigned, unsigned long); | 2104 | extern int blkdev_ioctl(struct block_device *, fmode_t, unsigned, unsigned long); |
2105 | extern long compat_blkdev_ioctl(struct file *, unsigned, unsigned long); | 2105 | extern long compat_blkdev_ioctl(struct file *, unsigned, unsigned long); |
2106 | extern int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder); | 2106 | extern int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder); |
2107 | extern struct block_device *blkdev_get_by_path(const char *path, fmode_t mode, | 2107 | extern struct block_device *blkdev_get_by_path(const char *path, fmode_t mode, |
2108 | void *holder); | 2108 | void *holder); |
2109 | extern struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, | 2109 | extern struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, |
2110 | void *holder); | 2110 | void *holder); |
2111 | extern void blkdev_put(struct block_device *bdev, fmode_t mode); | 2111 | extern void blkdev_put(struct block_device *bdev, fmode_t mode); |
2112 | #ifdef CONFIG_SYSFS | 2112 | #ifdef CONFIG_SYSFS |
2113 | extern int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk); | 2113 | extern int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk); |
2114 | extern void bd_unlink_disk_holder(struct block_device *bdev, | 2114 | extern void bd_unlink_disk_holder(struct block_device *bdev, |
2115 | struct gendisk *disk); | 2115 | struct gendisk *disk); |
2116 | #else | 2116 | #else |
2117 | static inline int bd_link_disk_holder(struct block_device *bdev, | 2117 | static inline int bd_link_disk_holder(struct block_device *bdev, |
2118 | struct gendisk *disk) | 2118 | struct gendisk *disk) |
2119 | { | 2119 | { |
2120 | return 0; | 2120 | return 0; |
2121 | } | 2121 | } |
2122 | static inline void bd_unlink_disk_holder(struct block_device *bdev, | 2122 | static inline void bd_unlink_disk_holder(struct block_device *bdev, |
2123 | struct gendisk *disk) | 2123 | struct gendisk *disk) |
2124 | { | 2124 | { |
2125 | } | 2125 | } |
2126 | #endif | 2126 | #endif |
2127 | #endif | 2127 | #endif |
2128 | 2128 | ||
2129 | /* fs/char_dev.c */ | 2129 | /* fs/char_dev.c */ |
2130 | #define CHRDEV_MAJOR_HASH_SIZE 255 | 2130 | #define CHRDEV_MAJOR_HASH_SIZE 255 |
2131 | extern int alloc_chrdev_region(dev_t *, unsigned, unsigned, const char *); | 2131 | extern int alloc_chrdev_region(dev_t *, unsigned, unsigned, const char *); |
2132 | extern int register_chrdev_region(dev_t, unsigned, const char *); | 2132 | extern int register_chrdev_region(dev_t, unsigned, const char *); |
2133 | extern int __register_chrdev(unsigned int major, unsigned int baseminor, | 2133 | extern int __register_chrdev(unsigned int major, unsigned int baseminor, |
2134 | unsigned int count, const char *name, | 2134 | unsigned int count, const char *name, |
2135 | const struct file_operations *fops); | 2135 | const struct file_operations *fops); |
2136 | extern void __unregister_chrdev(unsigned int major, unsigned int baseminor, | 2136 | extern void __unregister_chrdev(unsigned int major, unsigned int baseminor, |
2137 | unsigned int count, const char *name); | 2137 | unsigned int count, const char *name); |
2138 | extern void unregister_chrdev_region(dev_t, unsigned); | 2138 | extern void unregister_chrdev_region(dev_t, unsigned); |
2139 | extern void chrdev_show(struct seq_file *,off_t); | 2139 | extern void chrdev_show(struct seq_file *,off_t); |
2140 | 2140 | ||
2141 | static inline int register_chrdev(unsigned int major, const char *name, | 2141 | static inline int register_chrdev(unsigned int major, const char *name, |
2142 | const struct file_operations *fops) | 2142 | const struct file_operations *fops) |
2143 | { | 2143 | { |
2144 | return __register_chrdev(major, 0, 256, name, fops); | 2144 | return __register_chrdev(major, 0, 256, name, fops); |
2145 | } | 2145 | } |
2146 | 2146 | ||
2147 | static inline void unregister_chrdev(unsigned int major, const char *name) | 2147 | static inline void unregister_chrdev(unsigned int major, const char *name) |
2148 | { | 2148 | { |
2149 | __unregister_chrdev(major, 0, 256, name); | 2149 | __unregister_chrdev(major, 0, 256, name); |
2150 | } | 2150 | } |
2151 | 2151 | ||
2152 | /* fs/block_dev.c */ | 2152 | /* fs/block_dev.c */ |
2153 | #define BDEVNAME_SIZE 32 /* Largest string for a blockdev identifier */ | 2153 | #define BDEVNAME_SIZE 32 /* Largest string for a blockdev identifier */ |
2154 | #define BDEVT_SIZE 10 /* Largest string for MAJ:MIN for blkdev */ | 2154 | #define BDEVT_SIZE 10 /* Largest string for MAJ:MIN for blkdev */ |
2155 | 2155 | ||
2156 | #ifdef CONFIG_BLOCK | 2156 | #ifdef CONFIG_BLOCK |
2157 | #define BLKDEV_MAJOR_HASH_SIZE 255 | 2157 | #define BLKDEV_MAJOR_HASH_SIZE 255 |
2158 | extern const char *__bdevname(dev_t, char *buffer); | 2158 | extern const char *__bdevname(dev_t, char *buffer); |
2159 | extern const char *bdevname(struct block_device *bdev, char *buffer); | 2159 | extern const char *bdevname(struct block_device *bdev, char *buffer); |
2160 | extern struct block_device *lookup_bdev(const char *); | 2160 | extern struct block_device *lookup_bdev(const char *); |
2161 | extern void blkdev_show(struct seq_file *,off_t); | 2161 | extern void blkdev_show(struct seq_file *,off_t); |
2162 | 2162 | ||
2163 | #else | 2163 | #else |
2164 | #define BLKDEV_MAJOR_HASH_SIZE 0 | 2164 | #define BLKDEV_MAJOR_HASH_SIZE 0 |
2165 | #endif | 2165 | #endif |
2166 | 2166 | ||
2167 | extern void init_special_inode(struct inode *, umode_t, dev_t); | 2167 | extern void init_special_inode(struct inode *, umode_t, dev_t); |
2168 | 2168 | ||
2169 | /* Invalid inode operations -- fs/bad_inode.c */ | 2169 | /* Invalid inode operations -- fs/bad_inode.c */ |
2170 | extern void make_bad_inode(struct inode *); | 2170 | extern void make_bad_inode(struct inode *); |
2171 | extern int is_bad_inode(struct inode *); | 2171 | extern int is_bad_inode(struct inode *); |
2172 | 2172 | ||
2173 | #ifdef CONFIG_BLOCK | 2173 | #ifdef CONFIG_BLOCK |
2174 | /* | 2174 | /* |
2175 | * return READ, READA, or WRITE | 2175 | * return READ, READA, or WRITE |
2176 | */ | 2176 | */ |
2177 | #define bio_rw(bio) ((bio)->bi_rw & (RW_MASK | RWA_MASK)) | 2177 | #define bio_rw(bio) ((bio)->bi_rw & (RW_MASK | RWA_MASK)) |
2178 | 2178 | ||
2179 | /* | 2179 | /* |
2180 | * return data direction, READ or WRITE | 2180 | * return data direction, READ or WRITE |
2181 | */ | 2181 | */ |
2182 | #define bio_data_dir(bio) ((bio)->bi_rw & 1) | 2182 | #define bio_data_dir(bio) ((bio)->bi_rw & 1) |
2183 | 2183 | ||
2184 | extern void check_disk_size_change(struct gendisk *disk, | 2184 | extern void check_disk_size_change(struct gendisk *disk, |
2185 | struct block_device *bdev); | 2185 | struct block_device *bdev); |
2186 | extern int revalidate_disk(struct gendisk *); | 2186 | extern int revalidate_disk(struct gendisk *); |
2187 | extern int check_disk_change(struct block_device *); | 2187 | extern int check_disk_change(struct block_device *); |
2188 | extern int __invalidate_device(struct block_device *, bool); | 2188 | extern int __invalidate_device(struct block_device *, bool); |
2189 | extern int invalidate_partition(struct gendisk *, int); | 2189 | extern int invalidate_partition(struct gendisk *, int); |
2190 | #endif | 2190 | #endif |
2191 | unsigned long invalidate_mapping_pages(struct address_space *mapping, | 2191 | unsigned long invalidate_mapping_pages(struct address_space *mapping, |
2192 | pgoff_t start, pgoff_t end); | 2192 | pgoff_t start, pgoff_t end); |
2193 | 2193 | ||
2194 | static inline void invalidate_remote_inode(struct inode *inode) | 2194 | static inline void invalidate_remote_inode(struct inode *inode) |
2195 | { | 2195 | { |
2196 | if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || | 2196 | if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || |
2197 | S_ISLNK(inode->i_mode)) | 2197 | S_ISLNK(inode->i_mode)) |
2198 | invalidate_mapping_pages(inode->i_mapping, 0, -1); | 2198 | invalidate_mapping_pages(inode->i_mapping, 0, -1); |
2199 | } | 2199 | } |
2200 | extern int invalidate_inode_pages2(struct address_space *mapping); | 2200 | extern int invalidate_inode_pages2(struct address_space *mapping); |
2201 | extern int invalidate_inode_pages2_range(struct address_space *mapping, | 2201 | extern int invalidate_inode_pages2_range(struct address_space *mapping, |
2202 | pgoff_t start, pgoff_t end); | 2202 | pgoff_t start, pgoff_t end); |
2203 | extern int write_inode_now(struct inode *, int); | 2203 | extern int write_inode_now(struct inode *, int); |
2204 | extern int filemap_fdatawrite(struct address_space *); | 2204 | extern int filemap_fdatawrite(struct address_space *); |
2205 | extern int filemap_flush(struct address_space *); | 2205 | extern int filemap_flush(struct address_space *); |
2206 | extern int filemap_fdatawait(struct address_space *); | 2206 | extern int filemap_fdatawait(struct address_space *); |
2207 | extern int filemap_fdatawait_range(struct address_space *, loff_t lstart, | 2207 | extern int filemap_fdatawait_range(struct address_space *, loff_t lstart, |
2208 | loff_t lend); | 2208 | loff_t lend); |
2209 | extern int filemap_write_and_wait(struct address_space *mapping); | 2209 | extern int filemap_write_and_wait(struct address_space *mapping); |
2210 | extern int filemap_write_and_wait_range(struct address_space *mapping, | 2210 | extern int filemap_write_and_wait_range(struct address_space *mapping, |
2211 | loff_t lstart, loff_t lend); | 2211 | loff_t lstart, loff_t lend); |
2212 | extern int __filemap_fdatawrite_range(struct address_space *mapping, | 2212 | extern int __filemap_fdatawrite_range(struct address_space *mapping, |
2213 | loff_t start, loff_t end, int sync_mode); | 2213 | loff_t start, loff_t end, int sync_mode); |
2214 | extern int filemap_fdatawrite_range(struct address_space *mapping, | 2214 | extern int filemap_fdatawrite_range(struct address_space *mapping, |
2215 | loff_t start, loff_t end); | 2215 | loff_t start, loff_t end); |
2216 | 2216 | ||
2217 | extern int vfs_fsync_range(struct file *file, loff_t start, loff_t end, | 2217 | extern int vfs_fsync_range(struct file *file, loff_t start, loff_t end, |
2218 | int datasync); | 2218 | int datasync); |
2219 | extern int vfs_fsync(struct file *file, int datasync); | 2219 | extern int vfs_fsync(struct file *file, int datasync); |
2220 | static inline int generic_write_sync(struct file *file, loff_t pos, loff_t count) | 2220 | static inline int generic_write_sync(struct file *file, loff_t pos, loff_t count) |
2221 | { | 2221 | { |
2222 | if (!(file->f_flags & O_DSYNC) && !IS_SYNC(file->f_mapping->host)) | 2222 | if (!(file->f_flags & O_DSYNC) && !IS_SYNC(file->f_mapping->host)) |
2223 | return 0; | 2223 | return 0; |
2224 | return vfs_fsync_range(file, pos, pos + count - 1, | 2224 | return vfs_fsync_range(file, pos, pos + count - 1, |
2225 | (file->f_flags & __O_SYNC) ? 0 : 1); | 2225 | (file->f_flags & __O_SYNC) ? 0 : 1); |
2226 | } | 2226 | } |
2227 | extern void emergency_sync(void); | 2227 | extern void emergency_sync(void); |
2228 | extern void emergency_remount(void); | 2228 | extern void emergency_remount(void); |
2229 | #ifdef CONFIG_BLOCK | 2229 | #ifdef CONFIG_BLOCK |
2230 | extern sector_t bmap(struct inode *, sector_t); | 2230 | extern sector_t bmap(struct inode *, sector_t); |
2231 | #endif | 2231 | #endif |
2232 | extern int notify_change(struct dentry *, struct iattr *, struct inode **); | 2232 | extern int notify_change(struct dentry *, struct iattr *, struct inode **); |
2233 | extern int inode_permission(struct inode *, int); | 2233 | extern int inode_permission(struct inode *, int); |
2234 | extern int generic_permission(struct inode *, int); | 2234 | extern int generic_permission(struct inode *, int); |
2235 | 2235 | ||
2236 | static inline bool execute_ok(struct inode *inode) | 2236 | static inline bool execute_ok(struct inode *inode) |
2237 | { | 2237 | { |
2238 | return (inode->i_mode & S_IXUGO) || S_ISDIR(inode->i_mode); | 2238 | return (inode->i_mode & S_IXUGO) || S_ISDIR(inode->i_mode); |
2239 | } | 2239 | } |
2240 | 2240 | ||
2241 | static inline void file_start_write(struct file *file) | 2241 | static inline void file_start_write(struct file *file) |
2242 | { | 2242 | { |
2243 | if (!S_ISREG(file_inode(file)->i_mode)) | 2243 | if (!S_ISREG(file_inode(file)->i_mode)) |
2244 | return; | 2244 | return; |
2245 | __sb_start_write(file_inode(file)->i_sb, SB_FREEZE_WRITE, true); | 2245 | __sb_start_write(file_inode(file)->i_sb, SB_FREEZE_WRITE, true); |
2246 | } | 2246 | } |
2247 | 2247 | ||
2248 | static inline bool file_start_write_trylock(struct file *file) | 2248 | static inline bool file_start_write_trylock(struct file *file) |
2249 | { | 2249 | { |
2250 | if (!S_ISREG(file_inode(file)->i_mode)) | 2250 | if (!S_ISREG(file_inode(file)->i_mode)) |
2251 | return true; | 2251 | return true; |
2252 | return __sb_start_write(file_inode(file)->i_sb, SB_FREEZE_WRITE, false); | 2252 | return __sb_start_write(file_inode(file)->i_sb, SB_FREEZE_WRITE, false); |
2253 | } | 2253 | } |
2254 | 2254 | ||
2255 | static inline void file_end_write(struct file *file) | 2255 | static inline void file_end_write(struct file *file) |
2256 | { | 2256 | { |
2257 | if (!S_ISREG(file_inode(file)->i_mode)) | 2257 | if (!S_ISREG(file_inode(file)->i_mode)) |
2258 | return; | 2258 | return; |
2259 | __sb_end_write(file_inode(file)->i_sb, SB_FREEZE_WRITE); | 2259 | __sb_end_write(file_inode(file)->i_sb, SB_FREEZE_WRITE); |
2260 | } | 2260 | } |
2261 | 2261 | ||
2262 | /* | 2262 | /* |
2263 | * get_write_access() gets write permission for a file. | 2263 | * get_write_access() gets write permission for a file. |
2264 | * put_write_access() releases this write permission. | 2264 | * put_write_access() releases this write permission. |
2265 | * This is used for regular files. | 2265 | * This is used for regular files. |
2266 | * We cannot support write (and maybe mmap read-write shared) accesses and | 2266 | * We cannot support write (and maybe mmap read-write shared) accesses and |
2267 | * MAP_DENYWRITE mmappings simultaneously. The i_writecount field of an inode | 2267 | * MAP_DENYWRITE mmappings simultaneously. The i_writecount field of an inode |
2268 | * can have the following values: | 2268 | * can have the following values: |
2269 | * 0: no writers, no VM_DENYWRITE mappings | 2269 | * 0: no writers, no VM_DENYWRITE mappings |
2270 | * < 0: (-i_writecount) vm_area_structs with VM_DENYWRITE set exist | 2270 | * < 0: (-i_writecount) vm_area_structs with VM_DENYWRITE set exist |
2271 | * > 0: (i_writecount) users are writing to the file. | 2271 | * > 0: (i_writecount) users are writing to the file. |
2272 | * | 2272 | * |
2273 | * Normally we operate on that counter with atomic_{inc,dec} and it's safe | 2273 | * Normally we operate on that counter with atomic_{inc,dec} and it's safe |
2274 | * except for the cases where we don't hold i_writecount yet. Then we need to | 2274 | * except for the cases where we don't hold i_writecount yet. Then we need to |
2275 | * use {get,deny}_write_access() - these functions check the sign and refuse | 2275 | * use {get,deny}_write_access() - these functions check the sign and refuse |
2276 | * to do the change if sign is wrong. | 2276 | * to do the change if sign is wrong. |
2277 | */ | 2277 | */ |
2278 | static inline int get_write_access(struct inode *inode) | 2278 | static inline int get_write_access(struct inode *inode) |
2279 | { | 2279 | { |
2280 | return atomic_inc_unless_negative(&inode->i_writecount) ? 0 : -ETXTBSY; | 2280 | return atomic_inc_unless_negative(&inode->i_writecount) ? 0 : -ETXTBSY; |
2281 | } | 2281 | } |
2282 | static inline int deny_write_access(struct file *file) | 2282 | static inline int deny_write_access(struct file *file) |
2283 | { | 2283 | { |
2284 | struct inode *inode = file_inode(file); | 2284 | struct inode *inode = file_inode(file); |
2285 | return atomic_dec_unless_positive(&inode->i_writecount) ? 0 : -ETXTBSY; | 2285 | return atomic_dec_unless_positive(&inode->i_writecount) ? 0 : -ETXTBSY; |
2286 | } | 2286 | } |
2287 | static inline void put_write_access(struct inode * inode) | 2287 | static inline void put_write_access(struct inode * inode) |
2288 | { | 2288 | { |
2289 | atomic_dec(&inode->i_writecount); | 2289 | atomic_dec(&inode->i_writecount); |
2290 | } | 2290 | } |
2291 | static inline void allow_write_access(struct file *file) | 2291 | static inline void allow_write_access(struct file *file) |
2292 | { | 2292 | { |
2293 | if (file) | 2293 | if (file) |
2294 | atomic_inc(&file_inode(file)->i_writecount); | 2294 | atomic_inc(&file_inode(file)->i_writecount); |
2295 | } | 2295 | } |
2296 | static inline bool inode_is_open_for_write(const struct inode *inode) | 2296 | static inline bool inode_is_open_for_write(const struct inode *inode) |
2297 | { | 2297 | { |
2298 | return atomic_read(&inode->i_writecount) > 0; | 2298 | return atomic_read(&inode->i_writecount) > 0; |
2299 | } | 2299 | } |
2300 | 2300 | ||
2301 | #ifdef CONFIG_IMA | 2301 | #ifdef CONFIG_IMA |
2302 | static inline void i_readcount_dec(struct inode *inode) | 2302 | static inline void i_readcount_dec(struct inode *inode) |
2303 | { | 2303 | { |
2304 | BUG_ON(!atomic_read(&inode->i_readcount)); | 2304 | BUG_ON(!atomic_read(&inode->i_readcount)); |
2305 | atomic_dec(&inode->i_readcount); | 2305 | atomic_dec(&inode->i_readcount); |
2306 | } | 2306 | } |
2307 | static inline void i_readcount_inc(struct inode *inode) | 2307 | static inline void i_readcount_inc(struct inode *inode) |
2308 | { | 2308 | { |
2309 | atomic_inc(&inode->i_readcount); | 2309 | atomic_inc(&inode->i_readcount); |
2310 | } | 2310 | } |
2311 | #else | 2311 | #else |
2312 | static inline void i_readcount_dec(struct inode *inode) | 2312 | static inline void i_readcount_dec(struct inode *inode) |
2313 | { | 2313 | { |
2314 | return; | 2314 | return; |
2315 | } | 2315 | } |
2316 | static inline void i_readcount_inc(struct inode *inode) | 2316 | static inline void i_readcount_inc(struct inode *inode) |
2317 | { | 2317 | { |
2318 | return; | 2318 | return; |
2319 | } | 2319 | } |
2320 | #endif | 2320 | #endif |
2321 | extern int do_pipe_flags(int *, int); | 2321 | extern int do_pipe_flags(int *, int); |
2322 | 2322 | ||
2323 | extern int kernel_read(struct file *, loff_t, char *, unsigned long); | 2323 | extern int kernel_read(struct file *, loff_t, char *, unsigned long); |
2324 | extern ssize_t kernel_write(struct file *, const char *, size_t, loff_t); | 2324 | extern ssize_t kernel_write(struct file *, const char *, size_t, loff_t); |
2325 | extern struct file * open_exec(const char *); | 2325 | extern struct file * open_exec(const char *); |
2326 | 2326 | ||
2327 | /* fs/dcache.c -- generic fs support functions */ | 2327 | /* fs/dcache.c -- generic fs support functions */ |
2328 | extern int is_subdir(struct dentry *, struct dentry *); | 2328 | extern int is_subdir(struct dentry *, struct dentry *); |
2329 | extern int path_is_under(struct path *, struct path *); | 2329 | extern int path_is_under(struct path *, struct path *); |
2330 | 2330 | ||
2331 | #include <linux/err.h> | 2331 | #include <linux/err.h> |
2332 | 2332 | ||
2333 | /* needed for stackable file system support */ | 2333 | /* needed for stackable file system support */ |
2334 | extern loff_t default_llseek(struct file *file, loff_t offset, int whence); | 2334 | extern loff_t default_llseek(struct file *file, loff_t offset, int whence); |
2335 | 2335 | ||
2336 | extern loff_t vfs_llseek(struct file *file, loff_t offset, int whence); | 2336 | extern loff_t vfs_llseek(struct file *file, loff_t offset, int whence); |
2337 | 2337 | ||
2338 | extern int inode_init_always(struct super_block *, struct inode *); | 2338 | extern int inode_init_always(struct super_block *, struct inode *); |
2339 | extern void inode_init_once(struct inode *); | 2339 | extern void inode_init_once(struct inode *); |
2340 | extern void address_space_init_once(struct address_space *mapping); | 2340 | extern void address_space_init_once(struct address_space *mapping); |
2341 | extern struct inode * igrab(struct inode *); | 2341 | extern struct inode * igrab(struct inode *); |
2342 | extern ino_t iunique(struct super_block *, ino_t); | 2342 | extern ino_t iunique(struct super_block *, ino_t); |
2343 | extern int inode_needs_sync(struct inode *inode); | 2343 | extern int inode_needs_sync(struct inode *inode); |
2344 | extern int generic_delete_inode(struct inode *inode); | 2344 | extern int generic_delete_inode(struct inode *inode); |
2345 | static inline int generic_drop_inode(struct inode *inode) | 2345 | static inline int generic_drop_inode(struct inode *inode) |
2346 | { | 2346 | { |
2347 | return !inode->i_nlink || inode_unhashed(inode); | 2347 | return !inode->i_nlink || inode_unhashed(inode); |
2348 | } | 2348 | } |
2349 | 2349 | ||
2350 | extern struct inode *ilookup5_nowait(struct super_block *sb, | 2350 | extern struct inode *ilookup5_nowait(struct super_block *sb, |
2351 | unsigned long hashval, int (*test)(struct inode *, void *), | 2351 | unsigned long hashval, int (*test)(struct inode *, void *), |
2352 | void *data); | 2352 | void *data); |
2353 | extern struct inode *ilookup5(struct super_block *sb, unsigned long hashval, | 2353 | extern struct inode *ilookup5(struct super_block *sb, unsigned long hashval, |
2354 | int (*test)(struct inode *, void *), void *data); | 2354 | int (*test)(struct inode *, void *), void *data); |
2355 | extern struct inode *ilookup(struct super_block *sb, unsigned long ino); | 2355 | extern struct inode *ilookup(struct super_block *sb, unsigned long ino); |
2356 | 2356 | ||
2357 | extern struct inode * iget5_locked(struct super_block *, unsigned long, int (*test)(struct inode *, void *), int (*set)(struct inode *, void *), void *); | 2357 | extern struct inode * iget5_locked(struct super_block *, unsigned long, int (*test)(struct inode *, void *), int (*set)(struct inode *, void *), void *); |
2358 | extern struct inode * iget_locked(struct super_block *, unsigned long); | 2358 | extern struct inode * iget_locked(struct super_block *, unsigned long); |
2359 | extern int insert_inode_locked4(struct inode *, unsigned long, int (*test)(struct inode *, void *), void *); | 2359 | extern int insert_inode_locked4(struct inode *, unsigned long, int (*test)(struct inode *, void *), void *); |
2360 | extern int insert_inode_locked(struct inode *); | 2360 | extern int insert_inode_locked(struct inode *); |
2361 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | 2361 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
2362 | extern void lockdep_annotate_inode_mutex_key(struct inode *inode); | 2362 | extern void lockdep_annotate_inode_mutex_key(struct inode *inode); |
2363 | #else | 2363 | #else |
2364 | static inline void lockdep_annotate_inode_mutex_key(struct inode *inode) { }; | 2364 | static inline void lockdep_annotate_inode_mutex_key(struct inode *inode) { }; |
2365 | #endif | 2365 | #endif |
2366 | extern void unlock_new_inode(struct inode *); | 2366 | extern void unlock_new_inode(struct inode *); |
2367 | extern unsigned int get_next_ino(void); | 2367 | extern unsigned int get_next_ino(void); |
2368 | 2368 | ||
2369 | extern void __iget(struct inode * inode); | 2369 | extern void __iget(struct inode * inode); |
2370 | extern void iget_failed(struct inode *); | 2370 | extern void iget_failed(struct inode *); |
2371 | extern void clear_inode(struct inode *); | 2371 | extern void clear_inode(struct inode *); |
2372 | extern void __destroy_inode(struct inode *); | 2372 | extern void __destroy_inode(struct inode *); |
2373 | extern struct inode *new_inode_pseudo(struct super_block *sb); | 2373 | extern struct inode *new_inode_pseudo(struct super_block *sb); |
2374 | extern struct inode *new_inode(struct super_block *sb); | 2374 | extern struct inode *new_inode(struct super_block *sb); |
2375 | extern void free_inode_nonrcu(struct inode *inode); | 2375 | extern void free_inode_nonrcu(struct inode *inode); |
2376 | extern int should_remove_suid(struct dentry *); | 2376 | extern int should_remove_suid(struct dentry *); |
2377 | extern int file_remove_suid(struct file *); | 2377 | extern int file_remove_suid(struct file *); |
2378 | 2378 | ||
2379 | extern void __insert_inode_hash(struct inode *, unsigned long hashval); | 2379 | extern void __insert_inode_hash(struct inode *, unsigned long hashval); |
2380 | static inline void insert_inode_hash(struct inode *inode) | 2380 | static inline void insert_inode_hash(struct inode *inode) |
2381 | { | 2381 | { |
2382 | __insert_inode_hash(inode, inode->i_ino); | 2382 | __insert_inode_hash(inode, inode->i_ino); |
2383 | } | 2383 | } |
2384 | 2384 | ||
2385 | extern void __remove_inode_hash(struct inode *); | 2385 | extern void __remove_inode_hash(struct inode *); |
2386 | static inline void remove_inode_hash(struct inode *inode) | 2386 | static inline void remove_inode_hash(struct inode *inode) |
2387 | { | 2387 | { |
2388 | if (!inode_unhashed(inode)) | 2388 | if (!inode_unhashed(inode)) |
2389 | __remove_inode_hash(inode); | 2389 | __remove_inode_hash(inode); |
2390 | } | 2390 | } |
2391 | 2391 | ||
2392 | extern void inode_sb_list_add(struct inode *inode); | 2392 | extern void inode_sb_list_add(struct inode *inode); |
2393 | 2393 | ||
2394 | #ifdef CONFIG_BLOCK | 2394 | #ifdef CONFIG_BLOCK |
2395 | extern void submit_bio(int, struct bio *); | 2395 | extern void submit_bio(int, struct bio *); |
2396 | extern int bdev_read_only(struct block_device *); | 2396 | extern int bdev_read_only(struct block_device *); |
2397 | #endif | 2397 | #endif |
2398 | extern int set_blocksize(struct block_device *, int); | 2398 | extern int set_blocksize(struct block_device *, int); |
2399 | extern int sb_set_blocksize(struct super_block *, int); | 2399 | extern int sb_set_blocksize(struct super_block *, int); |
2400 | extern int sb_min_blocksize(struct super_block *, int); | 2400 | extern int sb_min_blocksize(struct super_block *, int); |
2401 | 2401 | ||
2402 | extern int generic_file_mmap(struct file *, struct vm_area_struct *); | 2402 | extern int generic_file_mmap(struct file *, struct vm_area_struct *); |
2403 | extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *); | 2403 | extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *); |
2404 | extern int generic_file_remap_pages(struct vm_area_struct *, unsigned long addr, | 2404 | extern int generic_file_remap_pages(struct vm_area_struct *, unsigned long addr, |
2405 | unsigned long size, pgoff_t pgoff); | 2405 | unsigned long size, pgoff_t pgoff); |
2406 | int generic_write_checks(struct file *file, loff_t *pos, size_t *count, int isblk); | 2406 | int generic_write_checks(struct file *file, loff_t *pos, size_t *count, int isblk); |
2407 | extern ssize_t generic_file_aio_read(struct kiocb *, const struct iovec *, unsigned long, loff_t); | 2407 | extern ssize_t generic_file_aio_read(struct kiocb *, const struct iovec *, unsigned long, loff_t); |
2408 | extern ssize_t __generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long); | 2408 | extern ssize_t __generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long); |
2409 | extern ssize_t generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long, loff_t); | 2409 | extern ssize_t generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long, loff_t); |
2410 | extern ssize_t generic_file_direct_write(struct kiocb *, const struct iovec *, | 2410 | extern ssize_t generic_file_direct_write(struct kiocb *, const struct iovec *, |
2411 | unsigned long *, loff_t, size_t, size_t); | 2411 | unsigned long *, loff_t, size_t, size_t); |
2412 | extern ssize_t generic_perform_write(struct file *, struct iov_iter *, loff_t); | 2412 | extern ssize_t generic_perform_write(struct file *, struct iov_iter *, loff_t); |
2413 | extern ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos); | 2413 | extern ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos); |
2414 | extern ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos); | 2414 | extern ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos); |
2415 | extern int generic_segment_checks(const struct iovec *iov, | 2415 | extern int generic_segment_checks(const struct iovec *iov, |
2416 | unsigned long *nr_segs, size_t *count, int access_flags); | 2416 | unsigned long *nr_segs, size_t *count, int access_flags); |
2417 | 2417 | ||
2418 | /* fs/block_dev.c */ | 2418 | /* fs/block_dev.c */ |
2419 | extern ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov, | 2419 | extern ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov, |
2420 | unsigned long nr_segs, loff_t pos); | 2420 | unsigned long nr_segs, loff_t pos); |
2421 | extern int blkdev_fsync(struct file *filp, loff_t start, loff_t end, | 2421 | extern int blkdev_fsync(struct file *filp, loff_t start, loff_t end, |
2422 | int datasync); | 2422 | int datasync); |
2423 | extern void block_sync_page(struct page *page); | 2423 | extern void block_sync_page(struct page *page); |
2424 | 2424 | ||
2425 | /* fs/splice.c */ | 2425 | /* fs/splice.c */ |
2426 | extern ssize_t generic_file_splice_read(struct file *, loff_t *, | 2426 | extern ssize_t generic_file_splice_read(struct file *, loff_t *, |
2427 | struct pipe_inode_info *, size_t, unsigned int); | 2427 | struct pipe_inode_info *, size_t, unsigned int); |
2428 | extern ssize_t default_file_splice_read(struct file *, loff_t *, | 2428 | extern ssize_t default_file_splice_read(struct file *, loff_t *, |
2429 | struct pipe_inode_info *, size_t, unsigned int); | 2429 | struct pipe_inode_info *, size_t, unsigned int); |
2430 | extern ssize_t generic_file_splice_write(struct pipe_inode_info *, | 2430 | extern ssize_t generic_file_splice_write(struct pipe_inode_info *, |
2431 | struct file *, loff_t *, size_t, unsigned int); | 2431 | struct file *, loff_t *, size_t, unsigned int); |
2432 | extern ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, | 2432 | extern ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, |
2433 | struct file *out, loff_t *, size_t len, unsigned int flags); | 2433 | struct file *out, loff_t *, size_t len, unsigned int flags); |
2434 | 2434 | ||
2435 | extern void | 2435 | extern void |
2436 | file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping); | 2436 | file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping); |
2437 | extern loff_t noop_llseek(struct file *file, loff_t offset, int whence); | 2437 | extern loff_t noop_llseek(struct file *file, loff_t offset, int whence); |
2438 | extern loff_t no_llseek(struct file *file, loff_t offset, int whence); | 2438 | extern loff_t no_llseek(struct file *file, loff_t offset, int whence); |
2439 | extern loff_t vfs_setpos(struct file *file, loff_t offset, loff_t maxsize); | 2439 | extern loff_t vfs_setpos(struct file *file, loff_t offset, loff_t maxsize); |
2440 | extern loff_t generic_file_llseek(struct file *file, loff_t offset, int whence); | 2440 | extern loff_t generic_file_llseek(struct file *file, loff_t offset, int whence); |
2441 | extern loff_t generic_file_llseek_size(struct file *file, loff_t offset, | 2441 | extern loff_t generic_file_llseek_size(struct file *file, loff_t offset, |
2442 | int whence, loff_t maxsize, loff_t eof); | 2442 | int whence, loff_t maxsize, loff_t eof); |
2443 | extern loff_t fixed_size_llseek(struct file *file, loff_t offset, | 2443 | extern loff_t fixed_size_llseek(struct file *file, loff_t offset, |
2444 | int whence, loff_t size); | 2444 | int whence, loff_t size); |
2445 | extern int generic_file_open(struct inode * inode, struct file * filp); | 2445 | extern int generic_file_open(struct inode * inode, struct file * filp); |
2446 | extern int nonseekable_open(struct inode * inode, struct file * filp); | 2446 | extern int nonseekable_open(struct inode * inode, struct file * filp); |
2447 | 2447 | ||
2448 | #ifdef CONFIG_FS_XIP | 2448 | #ifdef CONFIG_FS_XIP |
2449 | extern ssize_t xip_file_read(struct file *filp, char __user *buf, size_t len, | 2449 | extern ssize_t xip_file_read(struct file *filp, char __user *buf, size_t len, |
2450 | loff_t *ppos); | 2450 | loff_t *ppos); |
2451 | extern int xip_file_mmap(struct file * file, struct vm_area_struct * vma); | 2451 | extern int xip_file_mmap(struct file * file, struct vm_area_struct * vma); |
2452 | extern ssize_t xip_file_write(struct file *filp, const char __user *buf, | 2452 | extern ssize_t xip_file_write(struct file *filp, const char __user *buf, |
2453 | size_t len, loff_t *ppos); | 2453 | size_t len, loff_t *ppos); |
2454 | extern int xip_truncate_page(struct address_space *mapping, loff_t from); | 2454 | extern int xip_truncate_page(struct address_space *mapping, loff_t from); |
2455 | #else | 2455 | #else |
2456 | static inline int xip_truncate_page(struct address_space *mapping, loff_t from) | 2456 | static inline int xip_truncate_page(struct address_space *mapping, loff_t from) |
2457 | { | 2457 | { |
2458 | return 0; | 2458 | return 0; |
2459 | } | 2459 | } |
2460 | #endif | 2460 | #endif |
2461 | 2461 | ||
2462 | #ifdef CONFIG_BLOCK | 2462 | #ifdef CONFIG_BLOCK |
2463 | typedef void (dio_submit_t)(int rw, struct bio *bio, struct inode *inode, | 2463 | typedef void (dio_submit_t)(int rw, struct bio *bio, struct inode *inode, |
2464 | loff_t file_offset); | 2464 | loff_t file_offset); |
2465 | 2465 | ||
2466 | enum { | 2466 | enum { |
2467 | /* need locking between buffered and direct access */ | 2467 | /* need locking between buffered and direct access */ |
2468 | DIO_LOCKING = 0x01, | 2468 | DIO_LOCKING = 0x01, |
2469 | 2469 | ||
2470 | /* filesystem does not support filling holes */ | 2470 | /* filesystem does not support filling holes */ |
2471 | DIO_SKIP_HOLES = 0x02, | 2471 | DIO_SKIP_HOLES = 0x02, |
2472 | 2472 | ||
2473 | /* filesystem can handle aio writes beyond i_size */ | 2473 | /* filesystem can handle aio writes beyond i_size */ |
2474 | DIO_ASYNC_EXTEND = 0x04, | 2474 | DIO_ASYNC_EXTEND = 0x04, |
2475 | }; | 2475 | }; |
2476 | 2476 | ||
2477 | void dio_end_io(struct bio *bio, int error); | 2477 | void dio_end_io(struct bio *bio, int error); |
2478 | 2478 | ||
2479 | ssize_t __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, | 2479 | ssize_t __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, |
2480 | struct block_device *bdev, const struct iovec *iov, loff_t offset, | 2480 | struct block_device *bdev, const struct iovec *iov, loff_t offset, |
2481 | unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io, | 2481 | unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io, |
2482 | dio_submit_t submit_io, int flags); | 2482 | dio_submit_t submit_io, int flags); |
2483 | 2483 | ||
2484 | static inline ssize_t blockdev_direct_IO(int rw, struct kiocb *iocb, | 2484 | static inline ssize_t blockdev_direct_IO(int rw, struct kiocb *iocb, |
2485 | struct inode *inode, const struct iovec *iov, loff_t offset, | 2485 | struct inode *inode, const struct iovec *iov, loff_t offset, |
2486 | unsigned long nr_segs, get_block_t get_block) | 2486 | unsigned long nr_segs, get_block_t get_block) |
2487 | { | 2487 | { |
2488 | return __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, | 2488 | return __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, |
2489 | offset, nr_segs, get_block, NULL, NULL, | 2489 | offset, nr_segs, get_block, NULL, NULL, |
2490 | DIO_LOCKING | DIO_SKIP_HOLES); | 2490 | DIO_LOCKING | DIO_SKIP_HOLES); |
2491 | } | 2491 | } |
2492 | #endif | 2492 | #endif |
2493 | 2493 | ||
2494 | void inode_dio_wait(struct inode *inode); | 2494 | void inode_dio_wait(struct inode *inode); |
2495 | void inode_dio_done(struct inode *inode); | 2495 | void inode_dio_done(struct inode *inode); |
2496 | 2496 | ||
2497 | extern void inode_set_flags(struct inode *inode, unsigned int flags, | 2497 | extern void inode_set_flags(struct inode *inode, unsigned int flags, |
2498 | unsigned int mask); | 2498 | unsigned int mask); |
2499 | 2499 | ||
2500 | extern const struct file_operations generic_ro_fops; | 2500 | extern const struct file_operations generic_ro_fops; |
2501 | 2501 | ||
2502 | #define special_file(m) (S_ISCHR(m)||S_ISBLK(m)||S_ISFIFO(m)||S_ISSOCK(m)) | 2502 | #define special_file(m) (S_ISCHR(m)||S_ISBLK(m)||S_ISFIFO(m)||S_ISSOCK(m)) |
2503 | 2503 | ||
2504 | extern int readlink_copy(char __user *, int, const char *); | 2504 | extern int readlink_copy(char __user *, int, const char *); |
2505 | extern int page_readlink(struct dentry *, char __user *, int); | 2505 | extern int page_readlink(struct dentry *, char __user *, int); |
2506 | extern void *page_follow_link_light(struct dentry *, struct nameidata *); | 2506 | extern void *page_follow_link_light(struct dentry *, struct nameidata *); |
2507 | extern void page_put_link(struct dentry *, struct nameidata *, void *); | 2507 | extern void page_put_link(struct dentry *, struct nameidata *, void *); |
2508 | extern int __page_symlink(struct inode *inode, const char *symname, int len, | 2508 | extern int __page_symlink(struct inode *inode, const char *symname, int len, |
2509 | int nofs); | 2509 | int nofs); |
2510 | extern int page_symlink(struct inode *inode, const char *symname, int len); | 2510 | extern int page_symlink(struct inode *inode, const char *symname, int len); |
2511 | extern const struct inode_operations page_symlink_inode_operations; | 2511 | extern const struct inode_operations page_symlink_inode_operations; |
2512 | extern void kfree_put_link(struct dentry *, struct nameidata *, void *); | 2512 | extern void kfree_put_link(struct dentry *, struct nameidata *, void *); |
2513 | extern int generic_readlink(struct dentry *, char __user *, int); | 2513 | extern int generic_readlink(struct dentry *, char __user *, int); |
2514 | extern void generic_fillattr(struct inode *, struct kstat *); | 2514 | extern void generic_fillattr(struct inode *, struct kstat *); |
2515 | int vfs_getattr_nosec(struct path *path, struct kstat *stat); | 2515 | int vfs_getattr_nosec(struct path *path, struct kstat *stat); |
2516 | extern int vfs_getattr(struct path *, struct kstat *); | 2516 | extern int vfs_getattr(struct path *, struct kstat *); |
2517 | void __inode_add_bytes(struct inode *inode, loff_t bytes); | 2517 | void __inode_add_bytes(struct inode *inode, loff_t bytes); |
2518 | void inode_add_bytes(struct inode *inode, loff_t bytes); | 2518 | void inode_add_bytes(struct inode *inode, loff_t bytes); |
2519 | void __inode_sub_bytes(struct inode *inode, loff_t bytes); | 2519 | void __inode_sub_bytes(struct inode *inode, loff_t bytes); |
2520 | void inode_sub_bytes(struct inode *inode, loff_t bytes); | 2520 | void inode_sub_bytes(struct inode *inode, loff_t bytes); |
2521 | loff_t inode_get_bytes(struct inode *inode); | 2521 | loff_t inode_get_bytes(struct inode *inode); |
2522 | void inode_set_bytes(struct inode *inode, loff_t bytes); | 2522 | void inode_set_bytes(struct inode *inode, loff_t bytes); |
2523 | 2523 | ||
2524 | extern int vfs_readdir(struct file *, filldir_t, void *); | 2524 | extern int vfs_readdir(struct file *, filldir_t, void *); |
2525 | extern int iterate_dir(struct file *, struct dir_context *); | 2525 | extern int iterate_dir(struct file *, struct dir_context *); |
2526 | 2526 | ||
2527 | extern int vfs_stat(const char __user *, struct kstat *); | 2527 | extern int vfs_stat(const char __user *, struct kstat *); |
2528 | extern int vfs_lstat(const char __user *, struct kstat *); | 2528 | extern int vfs_lstat(const char __user *, struct kstat *); |
2529 | extern int vfs_fstat(unsigned int, struct kstat *); | 2529 | extern int vfs_fstat(unsigned int, struct kstat *); |
2530 | extern int vfs_fstatat(int , const char __user *, struct kstat *, int); | 2530 | extern int vfs_fstatat(int , const char __user *, struct kstat *, int); |
2531 | 2531 | ||
2532 | extern int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd, | 2532 | extern int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd, |
2533 | unsigned long arg); | 2533 | unsigned long arg); |
2534 | extern int __generic_block_fiemap(struct inode *inode, | 2534 | extern int __generic_block_fiemap(struct inode *inode, |
2535 | struct fiemap_extent_info *fieinfo, | 2535 | struct fiemap_extent_info *fieinfo, |
2536 | loff_t start, loff_t len, | 2536 | loff_t start, loff_t len, |
2537 | get_block_t *get_block); | 2537 | get_block_t *get_block); |
2538 | extern int generic_block_fiemap(struct inode *inode, | 2538 | extern int generic_block_fiemap(struct inode *inode, |
2539 | struct fiemap_extent_info *fieinfo, u64 start, | 2539 | struct fiemap_extent_info *fieinfo, u64 start, |
2540 | u64 len, get_block_t *get_block); | 2540 | u64 len, get_block_t *get_block); |
2541 | 2541 | ||
2542 | extern void get_filesystem(struct file_system_type *fs); | 2542 | extern void get_filesystem(struct file_system_type *fs); |
2543 | extern void put_filesystem(struct file_system_type *fs); | 2543 | extern void put_filesystem(struct file_system_type *fs); |
2544 | extern struct file_system_type *get_fs_type(const char *name); | 2544 | extern struct file_system_type *get_fs_type(const char *name); |
2545 | extern struct super_block *get_super(struct block_device *); | 2545 | extern struct super_block *get_super(struct block_device *); |
2546 | extern struct super_block *get_super_thawed(struct block_device *); | 2546 | extern struct super_block *get_super_thawed(struct block_device *); |
2547 | extern struct super_block *get_active_super(struct block_device *bdev); | 2547 | extern struct super_block *get_active_super(struct block_device *bdev); |
2548 | extern void drop_super(struct super_block *sb); | 2548 | extern void drop_super(struct super_block *sb); |
2549 | extern void iterate_supers(void (*)(struct super_block *, void *), void *); | 2549 | extern void iterate_supers(void (*)(struct super_block *, void *), void *); |
2550 | extern void iterate_supers_type(struct file_system_type *, | 2550 | extern void iterate_supers_type(struct file_system_type *, |
2551 | void (*)(struct super_block *, void *), void *); | 2551 | void (*)(struct super_block *, void *), void *); |
2552 | 2552 | ||
2553 | extern int dcache_dir_open(struct inode *, struct file *); | 2553 | extern int dcache_dir_open(struct inode *, struct file *); |
2554 | extern int dcache_dir_close(struct inode *, struct file *); | 2554 | extern int dcache_dir_close(struct inode *, struct file *); |
2555 | extern loff_t dcache_dir_lseek(struct file *, loff_t, int); | 2555 | extern loff_t dcache_dir_lseek(struct file *, loff_t, int); |
2556 | extern int dcache_readdir(struct file *, struct dir_context *); | 2556 | extern int dcache_readdir(struct file *, struct dir_context *); |
2557 | extern int simple_setattr(struct dentry *, struct iattr *); | 2557 | extern int simple_setattr(struct dentry *, struct iattr *); |
2558 | extern int simple_getattr(struct vfsmount *, struct dentry *, struct kstat *); | 2558 | extern int simple_getattr(struct vfsmount *, struct dentry *, struct kstat *); |
2559 | extern int simple_statfs(struct dentry *, struct kstatfs *); | 2559 | extern int simple_statfs(struct dentry *, struct kstatfs *); |
2560 | extern int simple_open(struct inode *inode, struct file *file); | 2560 | extern int simple_open(struct inode *inode, struct file *file); |
2561 | extern int simple_link(struct dentry *, struct inode *, struct dentry *); | 2561 | extern int simple_link(struct dentry *, struct inode *, struct dentry *); |
2562 | extern int simple_unlink(struct inode *, struct dentry *); | 2562 | extern int simple_unlink(struct inode *, struct dentry *); |
2563 | extern int simple_rmdir(struct inode *, struct dentry *); | 2563 | extern int simple_rmdir(struct inode *, struct dentry *); |
2564 | extern int simple_rename(struct inode *, struct dentry *, struct inode *, struct dentry *); | 2564 | extern int simple_rename(struct inode *, struct dentry *, struct inode *, struct dentry *); |
2565 | extern int noop_fsync(struct file *, loff_t, loff_t, int); | 2565 | extern int noop_fsync(struct file *, loff_t, loff_t, int); |
2566 | extern int simple_empty(struct dentry *); | 2566 | extern int simple_empty(struct dentry *); |
2567 | extern int simple_readpage(struct file *file, struct page *page); | 2567 | extern int simple_readpage(struct file *file, struct page *page); |
2568 | extern int simple_write_begin(struct file *file, struct address_space *mapping, | 2568 | extern int simple_write_begin(struct file *file, struct address_space *mapping, |
2569 | loff_t pos, unsigned len, unsigned flags, | 2569 | loff_t pos, unsigned len, unsigned flags, |
2570 | struct page **pagep, void **fsdata); | 2570 | struct page **pagep, void **fsdata); |
2571 | extern int simple_write_end(struct file *file, struct address_space *mapping, | 2571 | extern int simple_write_end(struct file *file, struct address_space *mapping, |
2572 | loff_t pos, unsigned len, unsigned copied, | 2572 | loff_t pos, unsigned len, unsigned copied, |
2573 | struct page *page, void *fsdata); | 2573 | struct page *page, void *fsdata); |
2574 | extern int always_delete_dentry(const struct dentry *); | 2574 | extern int always_delete_dentry(const struct dentry *); |
2575 | extern struct inode *alloc_anon_inode(struct super_block *); | 2575 | extern struct inode *alloc_anon_inode(struct super_block *); |
2576 | extern const struct dentry_operations simple_dentry_operations; | 2576 | extern const struct dentry_operations simple_dentry_operations; |
2577 | 2577 | ||
2578 | extern struct dentry *simple_lookup(struct inode *, struct dentry *, unsigned int flags); | 2578 | extern struct dentry *simple_lookup(struct inode *, struct dentry *, unsigned int flags); |
2579 | extern ssize_t generic_read_dir(struct file *, char __user *, size_t, loff_t *); | 2579 | extern ssize_t generic_read_dir(struct file *, char __user *, size_t, loff_t *); |
2580 | extern const struct file_operations simple_dir_operations; | 2580 | extern const struct file_operations simple_dir_operations; |
2581 | extern const struct inode_operations simple_dir_inode_operations; | 2581 | extern const struct inode_operations simple_dir_inode_operations; |
2582 | struct tree_descr { char *name; const struct file_operations *ops; int mode; }; | 2582 | struct tree_descr { char *name; const struct file_operations *ops; int mode; }; |
2583 | struct dentry *d_alloc_name(struct dentry *, const char *); | 2583 | struct dentry *d_alloc_name(struct dentry *, const char *); |
2584 | extern int simple_fill_super(struct super_block *, unsigned long, struct tree_descr *); | 2584 | extern int simple_fill_super(struct super_block *, unsigned long, struct tree_descr *); |
2585 | extern int simple_pin_fs(struct file_system_type *, struct vfsmount **mount, int *count); | 2585 | extern int simple_pin_fs(struct file_system_type *, struct vfsmount **mount, int *count); |
2586 | extern void simple_release_fs(struct vfsmount **mount, int *count); | 2586 | extern void simple_release_fs(struct vfsmount **mount, int *count); |
2587 | 2587 | ||
2588 | extern ssize_t simple_read_from_buffer(void __user *to, size_t count, | 2588 | extern ssize_t simple_read_from_buffer(void __user *to, size_t count, |
2589 | loff_t *ppos, const void *from, size_t available); | 2589 | loff_t *ppos, const void *from, size_t available); |
2590 | extern ssize_t simple_write_to_buffer(void *to, size_t available, loff_t *ppos, | 2590 | extern ssize_t simple_write_to_buffer(void *to, size_t available, loff_t *ppos, |
2591 | const void __user *from, size_t count); | 2591 | const void __user *from, size_t count); |
2592 | 2592 | ||
2593 | extern int generic_file_fsync(struct file *, loff_t, loff_t, int); | 2593 | extern int generic_file_fsync(struct file *, loff_t, loff_t, int); |
2594 | 2594 | ||
2595 | extern int generic_check_addressable(unsigned, u64); | 2595 | extern int generic_check_addressable(unsigned, u64); |
2596 | 2596 | ||
2597 | #ifdef CONFIG_MIGRATION | 2597 | #ifdef CONFIG_MIGRATION |
2598 | extern int buffer_migrate_page(struct address_space *, | 2598 | extern int buffer_migrate_page(struct address_space *, |
2599 | struct page *, struct page *, | 2599 | struct page *, struct page *, |
2600 | enum migrate_mode); | 2600 | enum migrate_mode); |
2601 | #else | 2601 | #else |
2602 | #define buffer_migrate_page NULL | 2602 | #define buffer_migrate_page NULL |
2603 | #endif | 2603 | #endif |
2604 | 2604 | ||
2605 | extern int inode_change_ok(const struct inode *, struct iattr *); | 2605 | extern int inode_change_ok(const struct inode *, struct iattr *); |
2606 | extern int inode_newsize_ok(const struct inode *, loff_t offset); | 2606 | extern int inode_newsize_ok(const struct inode *, loff_t offset); |
2607 | extern void setattr_copy(struct inode *inode, const struct iattr *attr); | 2607 | extern void setattr_copy(struct inode *inode, const struct iattr *attr); |
2608 | 2608 | ||
2609 | extern int file_update_time(struct file *file); | 2609 | extern int file_update_time(struct file *file); |
2610 | 2610 | ||
2611 | extern int generic_show_options(struct seq_file *m, struct dentry *root); | 2611 | extern int generic_show_options(struct seq_file *m, struct dentry *root); |
2612 | extern void save_mount_options(struct super_block *sb, char *options); | 2612 | extern void save_mount_options(struct super_block *sb, char *options); |
2613 | extern void replace_mount_options(struct super_block *sb, char *options); | 2613 | extern void replace_mount_options(struct super_block *sb, char *options); |
2614 | 2614 | ||
2615 | static inline ino_t parent_ino(struct dentry *dentry) | 2615 | static inline ino_t parent_ino(struct dentry *dentry) |
2616 | { | 2616 | { |
2617 | ino_t res; | 2617 | ino_t res; |
2618 | 2618 | ||
2619 | /* | 2619 | /* |
2620 | * Don't strictly need d_lock here? If the parent ino could change | 2620 | * Don't strictly need d_lock here? If the parent ino could change |
2621 | * then surely we'd have a deeper race in the caller? | 2621 | * then surely we'd have a deeper race in the caller? |
2622 | */ | 2622 | */ |
2623 | spin_lock(&dentry->d_lock); | 2623 | spin_lock(&dentry->d_lock); |
2624 | res = dentry->d_parent->d_inode->i_ino; | 2624 | res = dentry->d_parent->d_inode->i_ino; |
2625 | spin_unlock(&dentry->d_lock); | 2625 | spin_unlock(&dentry->d_lock); |
2626 | return res; | 2626 | return res; |
2627 | } | 2627 | } |
2628 | 2628 | ||
2629 | /* Transaction based IO helpers */ | 2629 | /* Transaction based IO helpers */ |
2630 | 2630 | ||
2631 | /* | 2631 | /* |
2632 | * An argresp is stored in an allocated page and holds the | 2632 | * An argresp is stored in an allocated page and holds the |
2633 | * size of the argument or response, along with its content | 2633 | * size of the argument or response, along with its content |
2634 | */ | 2634 | */ |
2635 | struct simple_transaction_argresp { | 2635 | struct simple_transaction_argresp { |
2636 | ssize_t size; | 2636 | ssize_t size; |
2637 | char data[0]; | 2637 | char data[0]; |
2638 | }; | 2638 | }; |
2639 | 2639 | ||
2640 | #define SIMPLE_TRANSACTION_LIMIT (PAGE_SIZE - sizeof(struct simple_transaction_argresp)) | 2640 | #define SIMPLE_TRANSACTION_LIMIT (PAGE_SIZE - sizeof(struct simple_transaction_argresp)) |
2641 | 2641 | ||
2642 | char *simple_transaction_get(struct file *file, const char __user *buf, | 2642 | char *simple_transaction_get(struct file *file, const char __user *buf, |
2643 | size_t size); | 2643 | size_t size); |
2644 | ssize_t simple_transaction_read(struct file *file, char __user *buf, | 2644 | ssize_t simple_transaction_read(struct file *file, char __user *buf, |
2645 | size_t size, loff_t *pos); | 2645 | size_t size, loff_t *pos); |
2646 | int simple_transaction_release(struct inode *inode, struct file *file); | 2646 | int simple_transaction_release(struct inode *inode, struct file *file); |
2647 | 2647 | ||
2648 | void simple_transaction_set(struct file *file, size_t n); | 2648 | void simple_transaction_set(struct file *file, size_t n); |
2649 | 2649 | ||
2650 | /* | 2650 | /* |
2651 | * simple attribute files | 2651 | * simple attribute files |
2652 | * | 2652 | * |
2653 | * These attributes behave similar to those in sysfs: | 2653 | * These attributes behave similar to those in sysfs: |
2654 | * | 2654 | * |
2655 | * Writing to an attribute immediately sets a value, an open file can be | 2655 | * Writing to an attribute immediately sets a value, an open file can be |
2656 | * written to multiple times. | 2656 | * written to multiple times. |
2657 | * | 2657 | * |
2658 | * Reading from an attribute creates a buffer from the value that might get | 2658 | * Reading from an attribute creates a buffer from the value that might get |
2659 | * read with multiple read calls. When the attribute has been read | 2659 | * read with multiple read calls. When the attribute has been read |
2660 | * completely, no further read calls are possible until the file is opened | 2660 | * completely, no further read calls are possible until the file is opened |
2661 | * again. | 2661 | * again. |
2662 | * | 2662 | * |
2663 | * All attributes contain a text representation of a numeric value | 2663 | * All attributes contain a text representation of a numeric value |
2664 | * that are accessed with the get() and set() functions. | 2664 | * that are accessed with the get() and set() functions. |
2665 | */ | 2665 | */ |
2666 | #define DEFINE_SIMPLE_ATTRIBUTE(__fops, __get, __set, __fmt) \ | 2666 | #define DEFINE_SIMPLE_ATTRIBUTE(__fops, __get, __set, __fmt) \ |
2667 | static int __fops ## _open(struct inode *inode, struct file *file) \ | 2667 | static int __fops ## _open(struct inode *inode, struct file *file) \ |
2668 | { \ | 2668 | { \ |
2669 | __simple_attr_check_format(__fmt, 0ull); \ | 2669 | __simple_attr_check_format(__fmt, 0ull); \ |
2670 | return simple_attr_open(inode, file, __get, __set, __fmt); \ | 2670 | return simple_attr_open(inode, file, __get, __set, __fmt); \ |
2671 | } \ | 2671 | } \ |
2672 | static const struct file_operations __fops = { \ | 2672 | static const struct file_operations __fops = { \ |
2673 | .owner = THIS_MODULE, \ | 2673 | .owner = THIS_MODULE, \ |
2674 | .open = __fops ## _open, \ | 2674 | .open = __fops ## _open, \ |
2675 | .release = simple_attr_release, \ | 2675 | .release = simple_attr_release, \ |
2676 | .read = simple_attr_read, \ | 2676 | .read = simple_attr_read, \ |
2677 | .write = simple_attr_write, \ | 2677 | .write = simple_attr_write, \ |
2678 | .llseek = generic_file_llseek, \ | 2678 | .llseek = generic_file_llseek, \ |
2679 | }; | 2679 | }; |
2680 | 2680 | ||
2681 | static inline __printf(1, 2) | 2681 | static inline __printf(1, 2) |
2682 | void __simple_attr_check_format(const char *fmt, ...) | 2682 | void __simple_attr_check_format(const char *fmt, ...) |
2683 | { | 2683 | { |
2684 | /* don't do anything, just let the compiler check the arguments; */ | 2684 | /* don't do anything, just let the compiler check the arguments; */ |
2685 | } | 2685 | } |
2686 | 2686 | ||
2687 | int simple_attr_open(struct inode *inode, struct file *file, | 2687 | int simple_attr_open(struct inode *inode, struct file *file, |
2688 | int (*get)(void *, u64 *), int (*set)(void *, u64), | 2688 | int (*get)(void *, u64 *), int (*set)(void *, u64), |
2689 | const char *fmt); | 2689 | const char *fmt); |
2690 | int simple_attr_release(struct inode *inode, struct file *file); | 2690 | int simple_attr_release(struct inode *inode, struct file *file); |
2691 | ssize_t simple_attr_read(struct file *file, char __user *buf, | 2691 | ssize_t simple_attr_read(struct file *file, char __user *buf, |
2692 | size_t len, loff_t *ppos); | 2692 | size_t len, loff_t *ppos); |
2693 | ssize_t simple_attr_write(struct file *file, const char __user *buf, | 2693 | ssize_t simple_attr_write(struct file *file, const char __user *buf, |
2694 | size_t len, loff_t *ppos); | 2694 | size_t len, loff_t *ppos); |
2695 | 2695 | ||
2696 | struct ctl_table; | 2696 | struct ctl_table; |
2697 | int proc_nr_files(struct ctl_table *table, int write, | 2697 | int proc_nr_files(struct ctl_table *table, int write, |
2698 | void __user *buffer, size_t *lenp, loff_t *ppos); | 2698 | void __user *buffer, size_t *lenp, loff_t *ppos); |
2699 | int proc_nr_dentry(struct ctl_table *table, int write, | 2699 | int proc_nr_dentry(struct ctl_table *table, int write, |
2700 | void __user *buffer, size_t *lenp, loff_t *ppos); | 2700 | void __user *buffer, size_t *lenp, loff_t *ppos); |
2701 | int proc_nr_inodes(struct ctl_table *table, int write, | 2701 | int proc_nr_inodes(struct ctl_table *table, int write, |
2702 | void __user *buffer, size_t *lenp, loff_t *ppos); | 2702 | void __user *buffer, size_t *lenp, loff_t *ppos); |
2703 | int __init get_filesystem_list(char *buf); | 2703 | int __init get_filesystem_list(char *buf); |
2704 | 2704 | ||
2705 | #define __FMODE_EXEC ((__force int) FMODE_EXEC) | 2705 | #define __FMODE_EXEC ((__force int) FMODE_EXEC) |
2706 | #define __FMODE_NONOTIFY ((__force int) FMODE_NONOTIFY) | 2706 | #define __FMODE_NONOTIFY ((__force int) FMODE_NONOTIFY) |
2707 | 2707 | ||
2708 | #define ACC_MODE(x) ("\004\002\006\006"[(x)&O_ACCMODE]) | 2708 | #define ACC_MODE(x) ("\004\002\006\006"[(x)&O_ACCMODE]) |
2709 | #define OPEN_FMODE(flag) ((__force fmode_t)(((flag + 1) & O_ACCMODE) | \ | 2709 | #define OPEN_FMODE(flag) ((__force fmode_t)(((flag + 1) & O_ACCMODE) | \ |
2710 | (flag & __FMODE_NONOTIFY))) | 2710 | (flag & __FMODE_NONOTIFY))) |
2711 | 2711 | ||
2712 | static inline int is_sxid(umode_t mode) | 2712 | static inline int is_sxid(umode_t mode) |
2713 | { | 2713 | { |
2714 | return (mode & S_ISUID) || ((mode & S_ISGID) && (mode & S_IXGRP)); | 2714 | return (mode & S_ISUID) || ((mode & S_ISGID) && (mode & S_IXGRP)); |
2715 | } | 2715 | } |
2716 | 2716 | ||
2717 | static inline void inode_has_no_xattr(struct inode *inode) | 2717 | static inline void inode_has_no_xattr(struct inode *inode) |
2718 | { | 2718 | { |
2719 | if (!is_sxid(inode->i_mode) && (inode->i_sb->s_flags & MS_NOSEC)) | 2719 | if (!is_sxid(inode->i_mode) && (inode->i_sb->s_flags & MS_NOSEC)) |
2720 | inode->i_flags |= S_NOSEC; | 2720 | inode->i_flags |= S_NOSEC; |
2721 | } | 2721 | } |
2722 | 2722 | ||
2723 | static inline bool dir_emit(struct dir_context *ctx, | 2723 | static inline bool dir_emit(struct dir_context *ctx, |
2724 | const char *name, int namelen, | 2724 | const char *name, int namelen, |
2725 | u64 ino, unsigned type) | 2725 | u64 ino, unsigned type) |
2726 | { | 2726 | { |
2727 | return ctx->actor(ctx, name, namelen, ctx->pos, ino, type) == 0; | 2727 | return ctx->actor(ctx, name, namelen, ctx->pos, ino, type) == 0; |
2728 | } | 2728 | } |
2729 | static inline bool dir_emit_dot(struct file *file, struct dir_context *ctx) | 2729 | static inline bool dir_emit_dot(struct file *file, struct dir_context *ctx) |
2730 | { | 2730 | { |
2731 | return ctx->actor(ctx, ".", 1, ctx->pos, | 2731 | return ctx->actor(ctx, ".", 1, ctx->pos, |
2732 | file->f_path.dentry->d_inode->i_ino, DT_DIR) == 0; | 2732 | file->f_path.dentry->d_inode->i_ino, DT_DIR) == 0; |
2733 | } | 2733 | } |
2734 | static inline bool dir_emit_dotdot(struct file *file, struct dir_context *ctx) | 2734 | static inline bool dir_emit_dotdot(struct file *file, struct dir_context *ctx) |
2735 | { | 2735 | { |
2736 | return ctx->actor(ctx, "..", 2, ctx->pos, | 2736 | return ctx->actor(ctx, "..", 2, ctx->pos, |
2737 | parent_ino(file->f_path.dentry), DT_DIR) == 0; | 2737 | parent_ino(file->f_path.dentry), DT_DIR) == 0; |
2738 | } | 2738 | } |
2739 | static inline bool dir_emit_dots(struct file *file, struct dir_context *ctx) | 2739 | static inline bool dir_emit_dots(struct file *file, struct dir_context *ctx) |
2740 | { | 2740 | { |
2741 | if (ctx->pos == 0) { | 2741 | if (ctx->pos == 0) { |
2742 | if (!dir_emit_dot(file, ctx)) | 2742 | if (!dir_emit_dot(file, ctx)) |
2743 | return false; | 2743 | return false; |
2744 | ctx->pos = 1; | 2744 | ctx->pos = 1; |
2745 | } | 2745 | } |
2746 | if (ctx->pos == 1) { | 2746 | if (ctx->pos == 1) { |
2747 | if (!dir_emit_dotdot(file, ctx)) | 2747 | if (!dir_emit_dotdot(file, ctx)) |
2748 | return false; | 2748 | return false; |
2749 | ctx->pos = 2; | 2749 | ctx->pos = 2; |
2750 | } | 2750 | } |
2751 | return true; | 2751 | return true; |
2752 | } | 2752 | } |
2753 | static inline bool dir_relax(struct inode *inode) | 2753 | static inline bool dir_relax(struct inode *inode) |
2754 | { | 2754 | { |
2755 | mutex_unlock(&inode->i_mutex); | 2755 | mutex_unlock(&inode->i_mutex); |
2756 | mutex_lock(&inode->i_mutex); | 2756 | mutex_lock(&inode->i_mutex); |
2757 | return !IS_DEADDIR(inode); | 2757 | return !IS_DEADDIR(inode); |
2758 | } | 2758 | } |
2759 | 2759 | ||
2760 | #endif /* _LINUX_FS_H */ | 2760 | #endif /* _LINUX_FS_H */ |
2761 | 2761 |
include/uapi/asm-generic/fcntl.h
1 | #ifndef _ASM_GENERIC_FCNTL_H | 1 | #ifndef _ASM_GENERIC_FCNTL_H |
2 | #define _ASM_GENERIC_FCNTL_H | 2 | #define _ASM_GENERIC_FCNTL_H |
3 | 3 | ||
4 | #include <linux/types.h> | 4 | #include <linux/types.h> |
5 | 5 | ||
6 | /* | 6 | /* |
7 | * FMODE_EXEC is 0x20 | 7 | * FMODE_EXEC is 0x20 |
8 | * FMODE_NONOTIFY is 0x1000000 | 8 | * FMODE_NONOTIFY is 0x1000000 |
9 | * These cannot be used by userspace O_* until internal and external open | 9 | * These cannot be used by userspace O_* until internal and external open |
10 | * flags are split. | 10 | * flags are split. |
11 | * -Eric Paris | 11 | * -Eric Paris |
12 | */ | 12 | */ |
13 | 13 | ||
14 | /* | 14 | /* |
15 | * When introducing new O_* bits, please check its uniqueness in fcntl_init(). | 15 | * When introducing new O_* bits, please check its uniqueness in fcntl_init(). |
16 | */ | 16 | */ |
17 | 17 | ||
18 | #define O_ACCMODE 00000003 | 18 | #define O_ACCMODE 00000003 |
19 | #define O_RDONLY 00000000 | 19 | #define O_RDONLY 00000000 |
20 | #define O_WRONLY 00000001 | 20 | #define O_WRONLY 00000001 |
21 | #define O_RDWR 00000002 | 21 | #define O_RDWR 00000002 |
22 | #ifndef O_CREAT | 22 | #ifndef O_CREAT |
23 | #define O_CREAT 00000100 /* not fcntl */ | 23 | #define O_CREAT 00000100 /* not fcntl */ |
24 | #endif | 24 | #endif |
25 | #ifndef O_EXCL | 25 | #ifndef O_EXCL |
26 | #define O_EXCL 00000200 /* not fcntl */ | 26 | #define O_EXCL 00000200 /* not fcntl */ |
27 | #endif | 27 | #endif |
28 | #ifndef O_NOCTTY | 28 | #ifndef O_NOCTTY |
29 | #define O_NOCTTY 00000400 /* not fcntl */ | 29 | #define O_NOCTTY 00000400 /* not fcntl */ |
30 | #endif | 30 | #endif |
31 | #ifndef O_TRUNC | 31 | #ifndef O_TRUNC |
32 | #define O_TRUNC 00001000 /* not fcntl */ | 32 | #define O_TRUNC 00001000 /* not fcntl */ |
33 | #endif | 33 | #endif |
34 | #ifndef O_APPEND | 34 | #ifndef O_APPEND |
35 | #define O_APPEND 00002000 | 35 | #define O_APPEND 00002000 |
36 | #endif | 36 | #endif |
37 | #ifndef O_NONBLOCK | 37 | #ifndef O_NONBLOCK |
38 | #define O_NONBLOCK 00004000 | 38 | #define O_NONBLOCK 00004000 |
39 | #endif | 39 | #endif |
40 | #ifndef O_DSYNC | 40 | #ifndef O_DSYNC |
41 | #define O_DSYNC 00010000 /* used to be O_SYNC, see below */ | 41 | #define O_DSYNC 00010000 /* used to be O_SYNC, see below */ |
42 | #endif | 42 | #endif |
43 | #ifndef FASYNC | 43 | #ifndef FASYNC |
44 | #define FASYNC 00020000 /* fcntl, for BSD compatibility */ | 44 | #define FASYNC 00020000 /* fcntl, for BSD compatibility */ |
45 | #endif | 45 | #endif |
46 | #ifndef O_DIRECT | 46 | #ifndef O_DIRECT |
47 | #define O_DIRECT 00040000 /* direct disk access hint */ | 47 | #define O_DIRECT 00040000 /* direct disk access hint */ |
48 | #endif | 48 | #endif |
49 | #ifndef O_LARGEFILE | 49 | #ifndef O_LARGEFILE |
50 | #define O_LARGEFILE 00100000 | 50 | #define O_LARGEFILE 00100000 |
51 | #endif | 51 | #endif |
52 | #ifndef O_DIRECTORY | 52 | #ifndef O_DIRECTORY |
53 | #define O_DIRECTORY 00200000 /* must be a directory */ | 53 | #define O_DIRECTORY 00200000 /* must be a directory */ |
54 | #endif | 54 | #endif |
55 | #ifndef O_NOFOLLOW | 55 | #ifndef O_NOFOLLOW |
56 | #define O_NOFOLLOW 00400000 /* don't follow links */ | 56 | #define O_NOFOLLOW 00400000 /* don't follow links */ |
57 | #endif | 57 | #endif |
58 | #ifndef O_NOATIME | 58 | #ifndef O_NOATIME |
59 | #define O_NOATIME 01000000 | 59 | #define O_NOATIME 01000000 |
60 | #endif | 60 | #endif |
61 | #ifndef O_CLOEXEC | 61 | #ifndef O_CLOEXEC |
62 | #define O_CLOEXEC 02000000 /* set close_on_exec */ | 62 | #define O_CLOEXEC 02000000 /* set close_on_exec */ |
63 | #endif | 63 | #endif |
64 | 64 | ||
65 | /* | 65 | /* |
66 | * Before Linux 2.6.33 only O_DSYNC semantics were implemented, but using | 66 | * Before Linux 2.6.33 only O_DSYNC semantics were implemented, but using |
67 | * the O_SYNC flag. We continue to use the existing numerical value | 67 | * the O_SYNC flag. We continue to use the existing numerical value |
68 | * for O_DSYNC semantics now, but using the correct symbolic name for it. | 68 | * for O_DSYNC semantics now, but using the correct symbolic name for it. |
69 | * This new value is used to request true Posix O_SYNC semantics. It is | 69 | * This new value is used to request true Posix O_SYNC semantics. It is |
70 | * defined in this strange way to make sure applications compiled against | 70 | * defined in this strange way to make sure applications compiled against |
71 | * new headers get at least O_DSYNC semantics on older kernels. | 71 | * new headers get at least O_DSYNC semantics on older kernels. |
72 | * | 72 | * |
73 | * This has the nice side-effect that we can simply test for O_DSYNC | 73 | * This has the nice side-effect that we can simply test for O_DSYNC |
74 | * wherever we do not care if O_DSYNC or O_SYNC is used. | 74 | * wherever we do not care if O_DSYNC or O_SYNC is used. |
75 | * | 75 | * |
76 | * Note: __O_SYNC must never be used directly. | 76 | * Note: __O_SYNC must never be used directly. |
77 | */ | 77 | */ |
78 | #ifndef O_SYNC | 78 | #ifndef O_SYNC |
79 | #define __O_SYNC 04000000 | 79 | #define __O_SYNC 04000000 |
80 | #define O_SYNC (__O_SYNC|O_DSYNC) | 80 | #define O_SYNC (__O_SYNC|O_DSYNC) |
81 | #endif | 81 | #endif |
82 | 82 | ||
83 | #ifndef O_PATH | 83 | #ifndef O_PATH |
84 | #define O_PATH 010000000 | 84 | #define O_PATH 010000000 |
85 | #endif | 85 | #endif |
86 | 86 | ||
87 | #ifndef __O_TMPFILE | 87 | #ifndef __O_TMPFILE |
88 | #define __O_TMPFILE 020000000 | 88 | #define __O_TMPFILE 020000000 |
89 | #endif | 89 | #endif |
90 | 90 | ||
91 | /* a horrid kludge trying to make sure that this will fail on old kernels */ | 91 | /* a horrid kludge trying to make sure that this will fail on old kernels */ |
92 | #define O_TMPFILE (__O_TMPFILE | O_DIRECTORY) | 92 | #define O_TMPFILE (__O_TMPFILE | O_DIRECTORY) |
93 | #define O_TMPFILE_MASK (__O_TMPFILE | O_DIRECTORY | O_CREAT) | 93 | #define O_TMPFILE_MASK (__O_TMPFILE | O_DIRECTORY | O_CREAT) |
94 | 94 | ||
95 | #ifndef O_NDELAY | 95 | #ifndef O_NDELAY |
96 | #define O_NDELAY O_NONBLOCK | 96 | #define O_NDELAY O_NONBLOCK |
97 | #endif | 97 | #endif |
98 | 98 | ||
99 | #define F_DUPFD 0 /* dup */ | 99 | #define F_DUPFD 0 /* dup */ |
100 | #define F_GETFD 1 /* get close_on_exec */ | 100 | #define F_GETFD 1 /* get close_on_exec */ |
101 | #define F_SETFD 2 /* set/clear close_on_exec */ | 101 | #define F_SETFD 2 /* set/clear close_on_exec */ |
102 | #define F_GETFL 3 /* get file->f_flags */ | 102 | #define F_GETFL 3 /* get file->f_flags */ |
103 | #define F_SETFL 4 /* set file->f_flags */ | 103 | #define F_SETFL 4 /* set file->f_flags */ |
104 | #ifndef F_GETLK | 104 | #ifndef F_GETLK |
105 | #define F_GETLK 5 | 105 | #define F_GETLK 5 |
106 | #define F_SETLK 6 | 106 | #define F_SETLK 6 |
107 | #define F_SETLKW 7 | 107 | #define F_SETLKW 7 |
108 | #endif | 108 | #endif |
109 | #ifndef F_SETOWN | 109 | #ifndef F_SETOWN |
110 | #define F_SETOWN 8 /* for sockets. */ | 110 | #define F_SETOWN 8 /* for sockets. */ |
111 | #define F_GETOWN 9 /* for sockets. */ | 111 | #define F_GETOWN 9 /* for sockets. */ |
112 | #endif | 112 | #endif |
113 | #ifndef F_SETSIG | 113 | #ifndef F_SETSIG |
114 | #define F_SETSIG 10 /* for sockets. */ | 114 | #define F_SETSIG 10 /* for sockets. */ |
115 | #define F_GETSIG 11 /* for sockets. */ | 115 | #define F_GETSIG 11 /* for sockets. */ |
116 | #endif | 116 | #endif |
117 | 117 | ||
118 | #ifndef CONFIG_64BIT | 118 | #ifndef CONFIG_64BIT |
119 | #ifndef F_GETLK64 | 119 | #ifndef F_GETLK64 |
120 | #define F_GETLK64 12 /* using 'struct flock64' */ | 120 | #define F_GETLK64 12 /* using 'struct flock64' */ |
121 | #define F_SETLK64 13 | 121 | #define F_SETLK64 13 |
122 | #define F_SETLKW64 14 | 122 | #define F_SETLKW64 14 |
123 | #endif | 123 | #endif |
124 | #endif | 124 | #endif |
125 | 125 | ||
126 | #ifndef F_SETOWN_EX | 126 | #ifndef F_SETOWN_EX |
127 | #define F_SETOWN_EX 15 | 127 | #define F_SETOWN_EX 15 |
128 | #define F_GETOWN_EX 16 | 128 | #define F_GETOWN_EX 16 |
129 | #endif | 129 | #endif |
130 | 130 | ||
131 | #ifndef F_GETOWNER_UIDS | 131 | #ifndef F_GETOWNER_UIDS |
132 | #define F_GETOWNER_UIDS 17 | 132 | #define F_GETOWNER_UIDS 17 |
133 | #endif | 133 | #endif |
134 | 134 | ||
135 | /* | 135 | /* |
136 | * fd "private" POSIX locks. | 136 | * Open File Description Locks |
137 | * | 137 | * |
138 | * Usually POSIX locks held by a process are released on *any* close and are | 138 | * Usually record locks held by a process are released on *any* close and are |
139 | * not inherited across a fork(). | 139 | * not inherited across a fork(). |
140 | * | 140 | * |
141 | * These cmd values will set locks that conflict with normal POSIX locks, but | 141 | * These cmd values will set locks that conflict with process-associated |
142 | * are "owned" by the opened file, not the process. This means that they are | 142 | * record locks, but are "owned" by the open file description, not the |
143 | * inherited across fork() like BSD (flock) locks, and they are only released | 143 | * process. This means that they are inherited across fork() like BSD (flock) |
144 | * automatically when the last reference to the the open file against which | 144 | * locks, and they are only released automatically when the last reference to |
145 | * they were acquired is put. | 145 | * the the open file against which they were acquired is put. |
146 | */ | 146 | */ |
147 | #define F_GETLKP 36 | 147 | #define F_OFD_GETLK 36 |
148 | #define F_SETLKP 37 | 148 | #define F_OFD_SETLK 37 |
149 | #define F_SETLKPW 38 | 149 | #define F_OFD_SETLKW 38 |
150 | 150 | ||
151 | #define F_OWNER_TID 0 | 151 | #define F_OWNER_TID 0 |
152 | #define F_OWNER_PID 1 | 152 | #define F_OWNER_PID 1 |
153 | #define F_OWNER_PGRP 2 | 153 | #define F_OWNER_PGRP 2 |
154 | 154 | ||
155 | struct f_owner_ex { | 155 | struct f_owner_ex { |
156 | int type; | 156 | int type; |
157 | __kernel_pid_t pid; | 157 | __kernel_pid_t pid; |
158 | }; | 158 | }; |
159 | 159 | ||
160 | /* for F_[GET|SET]FL */ | 160 | /* for F_[GET|SET]FL */ |
161 | #define FD_CLOEXEC 1 /* actually anything with low bit set goes */ | 161 | #define FD_CLOEXEC 1 /* actually anything with low bit set goes */ |
162 | 162 | ||
163 | /* for posix fcntl() and lockf() */ | 163 | /* for posix fcntl() and lockf() */ |
164 | #ifndef F_RDLCK | 164 | #ifndef F_RDLCK |
165 | #define F_RDLCK 0 | 165 | #define F_RDLCK 0 |
166 | #define F_WRLCK 1 | 166 | #define F_WRLCK 1 |
167 | #define F_UNLCK 2 | 167 | #define F_UNLCK 2 |
168 | #endif | 168 | #endif |
169 | 169 | ||
170 | /* for old implementation of bsd flock () */ | 170 | /* for old implementation of bsd flock () */ |
171 | #ifndef F_EXLCK | 171 | #ifndef F_EXLCK |
172 | #define F_EXLCK 4 /* or 3 */ | 172 | #define F_EXLCK 4 /* or 3 */ |
173 | #define F_SHLCK 8 /* or 4 */ | 173 | #define F_SHLCK 8 /* or 4 */ |
174 | #endif | 174 | #endif |
175 | 175 | ||
176 | /* operations for bsd flock(), also used by the kernel implementation */ | 176 | /* operations for bsd flock(), also used by the kernel implementation */ |
177 | #define LOCK_SH 1 /* shared lock */ | 177 | #define LOCK_SH 1 /* shared lock */ |
178 | #define LOCK_EX 2 /* exclusive lock */ | 178 | #define LOCK_EX 2 /* exclusive lock */ |
179 | #define LOCK_NB 4 /* or'd with one of the above to prevent | 179 | #define LOCK_NB 4 /* or'd with one of the above to prevent |
180 | blocking */ | 180 | blocking */ |
181 | #define LOCK_UN 8 /* remove lock */ | 181 | #define LOCK_UN 8 /* remove lock */ |
182 | 182 | ||
183 | #define LOCK_MAND 32 /* This is a mandatory flock ... */ | 183 | #define LOCK_MAND 32 /* This is a mandatory flock ... */ |
184 | #define LOCK_READ 64 /* which allows concurrent read operations */ | 184 | #define LOCK_READ 64 /* which allows concurrent read operations */ |
185 | #define LOCK_WRITE 128 /* which allows concurrent write operations */ | 185 | #define LOCK_WRITE 128 /* which allows concurrent write operations */ |
186 | #define LOCK_RW 192 /* which allows concurrent read & write ops */ | 186 | #define LOCK_RW 192 /* which allows concurrent read & write ops */ |
187 | 187 | ||
188 | #define F_LINUX_SPECIFIC_BASE 1024 | 188 | #define F_LINUX_SPECIFIC_BASE 1024 |
189 | 189 | ||
190 | #ifndef HAVE_ARCH_STRUCT_FLOCK | 190 | #ifndef HAVE_ARCH_STRUCT_FLOCK |
191 | #ifndef __ARCH_FLOCK_PAD | 191 | #ifndef __ARCH_FLOCK_PAD |
192 | #define __ARCH_FLOCK_PAD | 192 | #define __ARCH_FLOCK_PAD |
193 | #endif | 193 | #endif |
194 | 194 | ||
195 | struct flock { | 195 | struct flock { |
196 | short l_type; | 196 | short l_type; |
197 | short l_whence; | 197 | short l_whence; |
198 | __kernel_off_t l_start; | 198 | __kernel_off_t l_start; |
199 | __kernel_off_t l_len; | 199 | __kernel_off_t l_len; |
200 | __kernel_pid_t l_pid; | 200 | __kernel_pid_t l_pid; |
201 | __ARCH_FLOCK_PAD | 201 | __ARCH_FLOCK_PAD |
202 | }; | 202 | }; |
203 | #endif | 203 | #endif |
204 | 204 | ||
205 | #ifndef HAVE_ARCH_STRUCT_FLOCK64 | 205 | #ifndef HAVE_ARCH_STRUCT_FLOCK64 |
206 | #ifndef __ARCH_FLOCK64_PAD | 206 | #ifndef __ARCH_FLOCK64_PAD |
207 | #define __ARCH_FLOCK64_PAD | 207 | #define __ARCH_FLOCK64_PAD |
208 | #endif | 208 | #endif |
209 | 209 | ||
210 | struct flock64 { | 210 | struct flock64 { |
211 | short l_type; | 211 | short l_type; |
212 | short l_whence; | 212 | short l_whence; |
213 | __kernel_loff_t l_start; | 213 | __kernel_loff_t l_start; |
214 | __kernel_loff_t l_len; | 214 | __kernel_loff_t l_len; |
215 | __kernel_pid_t l_pid; | 215 | __kernel_pid_t l_pid; |
216 | __ARCH_FLOCK64_PAD | 216 | __ARCH_FLOCK64_PAD |
217 | }; | 217 | }; |
218 | #endif | 218 | #endif |
219 | 219 | ||
220 | #endif /* _ASM_GENERIC_FCNTL_H */ | 220 | #endif /* _ASM_GENERIC_FCNTL_H */ |
221 | 221 |
security/selinux/hooks.c
1 | /* | 1 | /* |
2 | * NSA Security-Enhanced Linux (SELinux) security module | 2 | * NSA Security-Enhanced Linux (SELinux) security module |
3 | * | 3 | * |
4 | * This file contains the SELinux hook function implementations. | 4 | * This file contains the SELinux hook function implementations. |
5 | * | 5 | * |
6 | * Authors: Stephen Smalley, <sds@epoch.ncsc.mil> | 6 | * Authors: Stephen Smalley, <sds@epoch.ncsc.mil> |
7 | * Chris Vance, <cvance@nai.com> | 7 | * Chris Vance, <cvance@nai.com> |
8 | * Wayne Salamon, <wsalamon@nai.com> | 8 | * Wayne Salamon, <wsalamon@nai.com> |
9 | * James Morris <jmorris@redhat.com> | 9 | * James Morris <jmorris@redhat.com> |
10 | * | 10 | * |
11 | * Copyright (C) 2001,2002 Networks Associates Technology, Inc. | 11 | * Copyright (C) 2001,2002 Networks Associates Technology, Inc. |
12 | * Copyright (C) 2003-2008 Red Hat, Inc., James Morris <jmorris@redhat.com> | 12 | * Copyright (C) 2003-2008 Red Hat, Inc., James Morris <jmorris@redhat.com> |
13 | * Eric Paris <eparis@redhat.com> | 13 | * Eric Paris <eparis@redhat.com> |
14 | * Copyright (C) 2004-2005 Trusted Computer Solutions, Inc. | 14 | * Copyright (C) 2004-2005 Trusted Computer Solutions, Inc. |
15 | * <dgoeddel@trustedcs.com> | 15 | * <dgoeddel@trustedcs.com> |
16 | * Copyright (C) 2006, 2007, 2009 Hewlett-Packard Development Company, L.P. | 16 | * Copyright (C) 2006, 2007, 2009 Hewlett-Packard Development Company, L.P. |
17 | * Paul Moore <paul@paul-moore.com> | 17 | * Paul Moore <paul@paul-moore.com> |
18 | * Copyright (C) 2007 Hitachi Software Engineering Co., Ltd. | 18 | * Copyright (C) 2007 Hitachi Software Engineering Co., Ltd. |
19 | * Yuichi Nakamura <ynakam@hitachisoft.jp> | 19 | * Yuichi Nakamura <ynakam@hitachisoft.jp> |
20 | * | 20 | * |
21 | * This program is free software; you can redistribute it and/or modify | 21 | * This program is free software; you can redistribute it and/or modify |
22 | * it under the terms of the GNU General Public License version 2, | 22 | * it under the terms of the GNU General Public License version 2, |
23 | * as published by the Free Software Foundation. | 23 | * as published by the Free Software Foundation. |
24 | */ | 24 | */ |
25 | 25 | ||
26 | #include <linux/init.h> | 26 | #include <linux/init.h> |
27 | #include <linux/kd.h> | 27 | #include <linux/kd.h> |
28 | #include <linux/kernel.h> | 28 | #include <linux/kernel.h> |
29 | #include <linux/tracehook.h> | 29 | #include <linux/tracehook.h> |
30 | #include <linux/errno.h> | 30 | #include <linux/errno.h> |
31 | #include <linux/sched.h> | 31 | #include <linux/sched.h> |
32 | #include <linux/security.h> | 32 | #include <linux/security.h> |
33 | #include <linux/xattr.h> | 33 | #include <linux/xattr.h> |
34 | #include <linux/capability.h> | 34 | #include <linux/capability.h> |
35 | #include <linux/unistd.h> | 35 | #include <linux/unistd.h> |
36 | #include <linux/mm.h> | 36 | #include <linux/mm.h> |
37 | #include <linux/mman.h> | 37 | #include <linux/mman.h> |
38 | #include <linux/slab.h> | 38 | #include <linux/slab.h> |
39 | #include <linux/pagemap.h> | 39 | #include <linux/pagemap.h> |
40 | #include <linux/proc_fs.h> | 40 | #include <linux/proc_fs.h> |
41 | #include <linux/swap.h> | 41 | #include <linux/swap.h> |
42 | #include <linux/spinlock.h> | 42 | #include <linux/spinlock.h> |
43 | #include <linux/syscalls.h> | 43 | #include <linux/syscalls.h> |
44 | #include <linux/dcache.h> | 44 | #include <linux/dcache.h> |
45 | #include <linux/file.h> | 45 | #include <linux/file.h> |
46 | #include <linux/fdtable.h> | 46 | #include <linux/fdtable.h> |
47 | #include <linux/namei.h> | 47 | #include <linux/namei.h> |
48 | #include <linux/mount.h> | 48 | #include <linux/mount.h> |
49 | #include <linux/netfilter_ipv4.h> | 49 | #include <linux/netfilter_ipv4.h> |
50 | #include <linux/netfilter_ipv6.h> | 50 | #include <linux/netfilter_ipv6.h> |
51 | #include <linux/tty.h> | 51 | #include <linux/tty.h> |
52 | #include <net/icmp.h> | 52 | #include <net/icmp.h> |
53 | #include <net/ip.h> /* for local_port_range[] */ | 53 | #include <net/ip.h> /* for local_port_range[] */ |
54 | #include <net/sock.h> | 54 | #include <net/sock.h> |
55 | #include <net/tcp.h> /* struct or_callable used in sock_rcv_skb */ | 55 | #include <net/tcp.h> /* struct or_callable used in sock_rcv_skb */ |
56 | #include <net/inet_connection_sock.h> | 56 | #include <net/inet_connection_sock.h> |
57 | #include <net/net_namespace.h> | 57 | #include <net/net_namespace.h> |
58 | #include <net/netlabel.h> | 58 | #include <net/netlabel.h> |
59 | #include <linux/uaccess.h> | 59 | #include <linux/uaccess.h> |
60 | #include <asm/ioctls.h> | 60 | #include <asm/ioctls.h> |
61 | #include <linux/atomic.h> | 61 | #include <linux/atomic.h> |
62 | #include <linux/bitops.h> | 62 | #include <linux/bitops.h> |
63 | #include <linux/interrupt.h> | 63 | #include <linux/interrupt.h> |
64 | #include <linux/netdevice.h> /* for network interface checks */ | 64 | #include <linux/netdevice.h> /* for network interface checks */ |
65 | #include <net/netlink.h> | 65 | #include <net/netlink.h> |
66 | #include <linux/tcp.h> | 66 | #include <linux/tcp.h> |
67 | #include <linux/udp.h> | 67 | #include <linux/udp.h> |
68 | #include <linux/dccp.h> | 68 | #include <linux/dccp.h> |
69 | #include <linux/quota.h> | 69 | #include <linux/quota.h> |
70 | #include <linux/un.h> /* for Unix socket types */ | 70 | #include <linux/un.h> /* for Unix socket types */ |
71 | #include <net/af_unix.h> /* for Unix socket types */ | 71 | #include <net/af_unix.h> /* for Unix socket types */ |
72 | #include <linux/parser.h> | 72 | #include <linux/parser.h> |
73 | #include <linux/nfs_mount.h> | 73 | #include <linux/nfs_mount.h> |
74 | #include <net/ipv6.h> | 74 | #include <net/ipv6.h> |
75 | #include <linux/hugetlb.h> | 75 | #include <linux/hugetlb.h> |
76 | #include <linux/personality.h> | 76 | #include <linux/personality.h> |
77 | #include <linux/audit.h> | 77 | #include <linux/audit.h> |
78 | #include <linux/string.h> | 78 | #include <linux/string.h> |
79 | #include <linux/selinux.h> | 79 | #include <linux/selinux.h> |
80 | #include <linux/mutex.h> | 80 | #include <linux/mutex.h> |
81 | #include <linux/posix-timers.h> | 81 | #include <linux/posix-timers.h> |
82 | #include <linux/syslog.h> | 82 | #include <linux/syslog.h> |
83 | #include <linux/user_namespace.h> | 83 | #include <linux/user_namespace.h> |
84 | #include <linux/export.h> | 84 | #include <linux/export.h> |
85 | #include <linux/msg.h> | 85 | #include <linux/msg.h> |
86 | #include <linux/shm.h> | 86 | #include <linux/shm.h> |
87 | 87 | ||
88 | #include "avc.h" | 88 | #include "avc.h" |
89 | #include "objsec.h" | 89 | #include "objsec.h" |
90 | #include "netif.h" | 90 | #include "netif.h" |
91 | #include "netnode.h" | 91 | #include "netnode.h" |
92 | #include "netport.h" | 92 | #include "netport.h" |
93 | #include "xfrm.h" | 93 | #include "xfrm.h" |
94 | #include "netlabel.h" | 94 | #include "netlabel.h" |
95 | #include "audit.h" | 95 | #include "audit.h" |
96 | #include "avc_ss.h" | 96 | #include "avc_ss.h" |
97 | 97 | ||
98 | extern struct security_operations *security_ops; | 98 | extern struct security_operations *security_ops; |
99 | 99 | ||
100 | /* SECMARK reference count */ | 100 | /* SECMARK reference count */ |
101 | static atomic_t selinux_secmark_refcount = ATOMIC_INIT(0); | 101 | static atomic_t selinux_secmark_refcount = ATOMIC_INIT(0); |
102 | 102 | ||
103 | #ifdef CONFIG_SECURITY_SELINUX_DEVELOP | 103 | #ifdef CONFIG_SECURITY_SELINUX_DEVELOP |
104 | int selinux_enforcing; | 104 | int selinux_enforcing; |
105 | 105 | ||
106 | static int __init enforcing_setup(char *str) | 106 | static int __init enforcing_setup(char *str) |
107 | { | 107 | { |
108 | unsigned long enforcing; | 108 | unsigned long enforcing; |
109 | if (!kstrtoul(str, 0, &enforcing)) | 109 | if (!kstrtoul(str, 0, &enforcing)) |
110 | selinux_enforcing = enforcing ? 1 : 0; | 110 | selinux_enforcing = enforcing ? 1 : 0; |
111 | return 1; | 111 | return 1; |
112 | } | 112 | } |
113 | __setup("enforcing=", enforcing_setup); | 113 | __setup("enforcing=", enforcing_setup); |
114 | #endif | 114 | #endif |
115 | 115 | ||
116 | #ifdef CONFIG_SECURITY_SELINUX_BOOTPARAM | 116 | #ifdef CONFIG_SECURITY_SELINUX_BOOTPARAM |
117 | int selinux_enabled = CONFIG_SECURITY_SELINUX_BOOTPARAM_VALUE; | 117 | int selinux_enabled = CONFIG_SECURITY_SELINUX_BOOTPARAM_VALUE; |
118 | 118 | ||
119 | static int __init selinux_enabled_setup(char *str) | 119 | static int __init selinux_enabled_setup(char *str) |
120 | { | 120 | { |
121 | unsigned long enabled; | 121 | unsigned long enabled; |
122 | if (!kstrtoul(str, 0, &enabled)) | 122 | if (!kstrtoul(str, 0, &enabled)) |
123 | selinux_enabled = enabled ? 1 : 0; | 123 | selinux_enabled = enabled ? 1 : 0; |
124 | return 1; | 124 | return 1; |
125 | } | 125 | } |
126 | __setup("selinux=", selinux_enabled_setup); | 126 | __setup("selinux=", selinux_enabled_setup); |
127 | #else | 127 | #else |
128 | int selinux_enabled = 1; | 128 | int selinux_enabled = 1; |
129 | #endif | 129 | #endif |
130 | 130 | ||
131 | static struct kmem_cache *sel_inode_cache; | 131 | static struct kmem_cache *sel_inode_cache; |
132 | 132 | ||
133 | /** | 133 | /** |
134 | * selinux_secmark_enabled - Check to see if SECMARK is currently enabled | 134 | * selinux_secmark_enabled - Check to see if SECMARK is currently enabled |
135 | * | 135 | * |
136 | * Description: | 136 | * Description: |
137 | * This function checks the SECMARK reference counter to see if any SECMARK | 137 | * This function checks the SECMARK reference counter to see if any SECMARK |
138 | * targets are currently configured, if the reference counter is greater than | 138 | * targets are currently configured, if the reference counter is greater than |
139 | * zero SECMARK is considered to be enabled. Returns true (1) if SECMARK is | 139 | * zero SECMARK is considered to be enabled. Returns true (1) if SECMARK is |
140 | * enabled, false (0) if SECMARK is disabled. If the always_check_network | 140 | * enabled, false (0) if SECMARK is disabled. If the always_check_network |
141 | * policy capability is enabled, SECMARK is always considered enabled. | 141 | * policy capability is enabled, SECMARK is always considered enabled. |
142 | * | 142 | * |
143 | */ | 143 | */ |
144 | static int selinux_secmark_enabled(void) | 144 | static int selinux_secmark_enabled(void) |
145 | { | 145 | { |
146 | return (selinux_policycap_alwaysnetwork || atomic_read(&selinux_secmark_refcount)); | 146 | return (selinux_policycap_alwaysnetwork || atomic_read(&selinux_secmark_refcount)); |
147 | } | 147 | } |
148 | 148 | ||
149 | /** | 149 | /** |
150 | * selinux_peerlbl_enabled - Check to see if peer labeling is currently enabled | 150 | * selinux_peerlbl_enabled - Check to see if peer labeling is currently enabled |
151 | * | 151 | * |
152 | * Description: | 152 | * Description: |
153 | * This function checks if NetLabel or labeled IPSEC is enabled. Returns true | 153 | * This function checks if NetLabel or labeled IPSEC is enabled. Returns true |
154 | * (1) if any are enabled or false (0) if neither are enabled. If the | 154 | * (1) if any are enabled or false (0) if neither are enabled. If the |
155 | * always_check_network policy capability is enabled, peer labeling | 155 | * always_check_network policy capability is enabled, peer labeling |
156 | * is always considered enabled. | 156 | * is always considered enabled. |
157 | * | 157 | * |
158 | */ | 158 | */ |
159 | static int selinux_peerlbl_enabled(void) | 159 | static int selinux_peerlbl_enabled(void) |
160 | { | 160 | { |
161 | return (selinux_policycap_alwaysnetwork || netlbl_enabled() || selinux_xfrm_enabled()); | 161 | return (selinux_policycap_alwaysnetwork || netlbl_enabled() || selinux_xfrm_enabled()); |
162 | } | 162 | } |
163 | 163 | ||
164 | /* | 164 | /* |
165 | * initialise the security for the init task | 165 | * initialise the security for the init task |
166 | */ | 166 | */ |
167 | static void cred_init_security(void) | 167 | static void cred_init_security(void) |
168 | { | 168 | { |
169 | struct cred *cred = (struct cred *) current->real_cred; | 169 | struct cred *cred = (struct cred *) current->real_cred; |
170 | struct task_security_struct *tsec; | 170 | struct task_security_struct *tsec; |
171 | 171 | ||
172 | tsec = kzalloc(sizeof(struct task_security_struct), GFP_KERNEL); | 172 | tsec = kzalloc(sizeof(struct task_security_struct), GFP_KERNEL); |
173 | if (!tsec) | 173 | if (!tsec) |
174 | panic("SELinux: Failed to initialize initial task.\n"); | 174 | panic("SELinux: Failed to initialize initial task.\n"); |
175 | 175 | ||
176 | tsec->osid = tsec->sid = SECINITSID_KERNEL; | 176 | tsec->osid = tsec->sid = SECINITSID_KERNEL; |
177 | cred->security = tsec; | 177 | cred->security = tsec; |
178 | } | 178 | } |
179 | 179 | ||
180 | /* | 180 | /* |
181 | * get the security ID of a set of credentials | 181 | * get the security ID of a set of credentials |
182 | */ | 182 | */ |
183 | static inline u32 cred_sid(const struct cred *cred) | 183 | static inline u32 cred_sid(const struct cred *cred) |
184 | { | 184 | { |
185 | const struct task_security_struct *tsec; | 185 | const struct task_security_struct *tsec; |
186 | 186 | ||
187 | tsec = cred->security; | 187 | tsec = cred->security; |
188 | return tsec->sid; | 188 | return tsec->sid; |
189 | } | 189 | } |
190 | 190 | ||
191 | /* | 191 | /* |
192 | * get the objective security ID of a task | 192 | * get the objective security ID of a task |
193 | */ | 193 | */ |
194 | static inline u32 task_sid(const struct task_struct *task) | 194 | static inline u32 task_sid(const struct task_struct *task) |
195 | { | 195 | { |
196 | u32 sid; | 196 | u32 sid; |
197 | 197 | ||
198 | rcu_read_lock(); | 198 | rcu_read_lock(); |
199 | sid = cred_sid(__task_cred(task)); | 199 | sid = cred_sid(__task_cred(task)); |
200 | rcu_read_unlock(); | 200 | rcu_read_unlock(); |
201 | return sid; | 201 | return sid; |
202 | } | 202 | } |
203 | 203 | ||
204 | /* | 204 | /* |
205 | * get the subjective security ID of the current task | 205 | * get the subjective security ID of the current task |
206 | */ | 206 | */ |
207 | static inline u32 current_sid(void) | 207 | static inline u32 current_sid(void) |
208 | { | 208 | { |
209 | const struct task_security_struct *tsec = current_security(); | 209 | const struct task_security_struct *tsec = current_security(); |
210 | 210 | ||
211 | return tsec->sid; | 211 | return tsec->sid; |
212 | } | 212 | } |
213 | 213 | ||
214 | /* Allocate and free functions for each kind of security blob. */ | 214 | /* Allocate and free functions for each kind of security blob. */ |
215 | 215 | ||
216 | static int inode_alloc_security(struct inode *inode) | 216 | static int inode_alloc_security(struct inode *inode) |
217 | { | 217 | { |
218 | struct inode_security_struct *isec; | 218 | struct inode_security_struct *isec; |
219 | u32 sid = current_sid(); | 219 | u32 sid = current_sid(); |
220 | 220 | ||
221 | isec = kmem_cache_zalloc(sel_inode_cache, GFP_NOFS); | 221 | isec = kmem_cache_zalloc(sel_inode_cache, GFP_NOFS); |
222 | if (!isec) | 222 | if (!isec) |
223 | return -ENOMEM; | 223 | return -ENOMEM; |
224 | 224 | ||
225 | mutex_init(&isec->lock); | 225 | mutex_init(&isec->lock); |
226 | INIT_LIST_HEAD(&isec->list); | 226 | INIT_LIST_HEAD(&isec->list); |
227 | isec->inode = inode; | 227 | isec->inode = inode; |
228 | isec->sid = SECINITSID_UNLABELED; | 228 | isec->sid = SECINITSID_UNLABELED; |
229 | isec->sclass = SECCLASS_FILE; | 229 | isec->sclass = SECCLASS_FILE; |
230 | isec->task_sid = sid; | 230 | isec->task_sid = sid; |
231 | inode->i_security = isec; | 231 | inode->i_security = isec; |
232 | 232 | ||
233 | return 0; | 233 | return 0; |
234 | } | 234 | } |
235 | 235 | ||
236 | static void inode_free_rcu(struct rcu_head *head) | 236 | static void inode_free_rcu(struct rcu_head *head) |
237 | { | 237 | { |
238 | struct inode_security_struct *isec; | 238 | struct inode_security_struct *isec; |
239 | 239 | ||
240 | isec = container_of(head, struct inode_security_struct, rcu); | 240 | isec = container_of(head, struct inode_security_struct, rcu); |
241 | kmem_cache_free(sel_inode_cache, isec); | 241 | kmem_cache_free(sel_inode_cache, isec); |
242 | } | 242 | } |
243 | 243 | ||
244 | static void inode_free_security(struct inode *inode) | 244 | static void inode_free_security(struct inode *inode) |
245 | { | 245 | { |
246 | struct inode_security_struct *isec = inode->i_security; | 246 | struct inode_security_struct *isec = inode->i_security; |
247 | struct superblock_security_struct *sbsec = inode->i_sb->s_security; | 247 | struct superblock_security_struct *sbsec = inode->i_sb->s_security; |
248 | 248 | ||
249 | spin_lock(&sbsec->isec_lock); | 249 | spin_lock(&sbsec->isec_lock); |
250 | if (!list_empty(&isec->list)) | 250 | if (!list_empty(&isec->list)) |
251 | list_del_init(&isec->list); | 251 | list_del_init(&isec->list); |
252 | spin_unlock(&sbsec->isec_lock); | 252 | spin_unlock(&sbsec->isec_lock); |
253 | 253 | ||
254 | /* | 254 | /* |
255 | * The inode may still be referenced in a path walk and | 255 | * The inode may still be referenced in a path walk and |
256 | * a call to selinux_inode_permission() can be made | 256 | * a call to selinux_inode_permission() can be made |
257 | * after inode_free_security() is called. Ideally, the VFS | 257 | * after inode_free_security() is called. Ideally, the VFS |
258 | * wouldn't do this, but fixing that is a much harder | 258 | * wouldn't do this, but fixing that is a much harder |
259 | * job. For now, simply free the i_security via RCU, and | 259 | * job. For now, simply free the i_security via RCU, and |
260 | * leave the current inode->i_security pointer intact. | 260 | * leave the current inode->i_security pointer intact. |
261 | * The inode will be freed after the RCU grace period too. | 261 | * The inode will be freed after the RCU grace period too. |
262 | */ | 262 | */ |
263 | call_rcu(&isec->rcu, inode_free_rcu); | 263 | call_rcu(&isec->rcu, inode_free_rcu); |
264 | } | 264 | } |
265 | 265 | ||
266 | static int file_alloc_security(struct file *file) | 266 | static int file_alloc_security(struct file *file) |
267 | { | 267 | { |
268 | struct file_security_struct *fsec; | 268 | struct file_security_struct *fsec; |
269 | u32 sid = current_sid(); | 269 | u32 sid = current_sid(); |
270 | 270 | ||
271 | fsec = kzalloc(sizeof(struct file_security_struct), GFP_KERNEL); | 271 | fsec = kzalloc(sizeof(struct file_security_struct), GFP_KERNEL); |
272 | if (!fsec) | 272 | if (!fsec) |
273 | return -ENOMEM; | 273 | return -ENOMEM; |
274 | 274 | ||
275 | fsec->sid = sid; | 275 | fsec->sid = sid; |
276 | fsec->fown_sid = sid; | 276 | fsec->fown_sid = sid; |
277 | file->f_security = fsec; | 277 | file->f_security = fsec; |
278 | 278 | ||
279 | return 0; | 279 | return 0; |
280 | } | 280 | } |
281 | 281 | ||
282 | static void file_free_security(struct file *file) | 282 | static void file_free_security(struct file *file) |
283 | { | 283 | { |
284 | struct file_security_struct *fsec = file->f_security; | 284 | struct file_security_struct *fsec = file->f_security; |
285 | file->f_security = NULL; | 285 | file->f_security = NULL; |
286 | kfree(fsec); | 286 | kfree(fsec); |
287 | } | 287 | } |
288 | 288 | ||
289 | static int superblock_alloc_security(struct super_block *sb) | 289 | static int superblock_alloc_security(struct super_block *sb) |
290 | { | 290 | { |
291 | struct superblock_security_struct *sbsec; | 291 | struct superblock_security_struct *sbsec; |
292 | 292 | ||
293 | sbsec = kzalloc(sizeof(struct superblock_security_struct), GFP_KERNEL); | 293 | sbsec = kzalloc(sizeof(struct superblock_security_struct), GFP_KERNEL); |
294 | if (!sbsec) | 294 | if (!sbsec) |
295 | return -ENOMEM; | 295 | return -ENOMEM; |
296 | 296 | ||
297 | mutex_init(&sbsec->lock); | 297 | mutex_init(&sbsec->lock); |
298 | INIT_LIST_HEAD(&sbsec->isec_head); | 298 | INIT_LIST_HEAD(&sbsec->isec_head); |
299 | spin_lock_init(&sbsec->isec_lock); | 299 | spin_lock_init(&sbsec->isec_lock); |
300 | sbsec->sb = sb; | 300 | sbsec->sb = sb; |
301 | sbsec->sid = SECINITSID_UNLABELED; | 301 | sbsec->sid = SECINITSID_UNLABELED; |
302 | sbsec->def_sid = SECINITSID_FILE; | 302 | sbsec->def_sid = SECINITSID_FILE; |
303 | sbsec->mntpoint_sid = SECINITSID_UNLABELED; | 303 | sbsec->mntpoint_sid = SECINITSID_UNLABELED; |
304 | sb->s_security = sbsec; | 304 | sb->s_security = sbsec; |
305 | 305 | ||
306 | return 0; | 306 | return 0; |
307 | } | 307 | } |
308 | 308 | ||
309 | static void superblock_free_security(struct super_block *sb) | 309 | static void superblock_free_security(struct super_block *sb) |
310 | { | 310 | { |
311 | struct superblock_security_struct *sbsec = sb->s_security; | 311 | struct superblock_security_struct *sbsec = sb->s_security; |
312 | sb->s_security = NULL; | 312 | sb->s_security = NULL; |
313 | kfree(sbsec); | 313 | kfree(sbsec); |
314 | } | 314 | } |
315 | 315 | ||
316 | /* The file system's label must be initialized prior to use. */ | 316 | /* The file system's label must be initialized prior to use. */ |
317 | 317 | ||
318 | static const char *labeling_behaviors[7] = { | 318 | static const char *labeling_behaviors[7] = { |
319 | "uses xattr", | 319 | "uses xattr", |
320 | "uses transition SIDs", | 320 | "uses transition SIDs", |
321 | "uses task SIDs", | 321 | "uses task SIDs", |
322 | "uses genfs_contexts", | 322 | "uses genfs_contexts", |
323 | "not configured for labeling", | 323 | "not configured for labeling", |
324 | "uses mountpoint labeling", | 324 | "uses mountpoint labeling", |
325 | "uses native labeling", | 325 | "uses native labeling", |
326 | }; | 326 | }; |
327 | 327 | ||
328 | static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dentry); | 328 | static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dentry); |
329 | 329 | ||
330 | static inline int inode_doinit(struct inode *inode) | 330 | static inline int inode_doinit(struct inode *inode) |
331 | { | 331 | { |
332 | return inode_doinit_with_dentry(inode, NULL); | 332 | return inode_doinit_with_dentry(inode, NULL); |
333 | } | 333 | } |
334 | 334 | ||
335 | enum { | 335 | enum { |
336 | Opt_error = -1, | 336 | Opt_error = -1, |
337 | Opt_context = 1, | 337 | Opt_context = 1, |
338 | Opt_fscontext = 2, | 338 | Opt_fscontext = 2, |
339 | Opt_defcontext = 3, | 339 | Opt_defcontext = 3, |
340 | Opt_rootcontext = 4, | 340 | Opt_rootcontext = 4, |
341 | Opt_labelsupport = 5, | 341 | Opt_labelsupport = 5, |
342 | Opt_nextmntopt = 6, | 342 | Opt_nextmntopt = 6, |
343 | }; | 343 | }; |
344 | 344 | ||
345 | #define NUM_SEL_MNT_OPTS (Opt_nextmntopt - 1) | 345 | #define NUM_SEL_MNT_OPTS (Opt_nextmntopt - 1) |
346 | 346 | ||
347 | static const match_table_t tokens = { | 347 | static const match_table_t tokens = { |
348 | {Opt_context, CONTEXT_STR "%s"}, | 348 | {Opt_context, CONTEXT_STR "%s"}, |
349 | {Opt_fscontext, FSCONTEXT_STR "%s"}, | 349 | {Opt_fscontext, FSCONTEXT_STR "%s"}, |
350 | {Opt_defcontext, DEFCONTEXT_STR "%s"}, | 350 | {Opt_defcontext, DEFCONTEXT_STR "%s"}, |
351 | {Opt_rootcontext, ROOTCONTEXT_STR "%s"}, | 351 | {Opt_rootcontext, ROOTCONTEXT_STR "%s"}, |
352 | {Opt_labelsupport, LABELSUPP_STR}, | 352 | {Opt_labelsupport, LABELSUPP_STR}, |
353 | {Opt_error, NULL}, | 353 | {Opt_error, NULL}, |
354 | }; | 354 | }; |
355 | 355 | ||
356 | #define SEL_MOUNT_FAIL_MSG "SELinux: duplicate or incompatible mount options\n" | 356 | #define SEL_MOUNT_FAIL_MSG "SELinux: duplicate or incompatible mount options\n" |
357 | 357 | ||
358 | static int may_context_mount_sb_relabel(u32 sid, | 358 | static int may_context_mount_sb_relabel(u32 sid, |
359 | struct superblock_security_struct *sbsec, | 359 | struct superblock_security_struct *sbsec, |
360 | const struct cred *cred) | 360 | const struct cred *cred) |
361 | { | 361 | { |
362 | const struct task_security_struct *tsec = cred->security; | 362 | const struct task_security_struct *tsec = cred->security; |
363 | int rc; | 363 | int rc; |
364 | 364 | ||
365 | rc = avc_has_perm(tsec->sid, sbsec->sid, SECCLASS_FILESYSTEM, | 365 | rc = avc_has_perm(tsec->sid, sbsec->sid, SECCLASS_FILESYSTEM, |
366 | FILESYSTEM__RELABELFROM, NULL); | 366 | FILESYSTEM__RELABELFROM, NULL); |
367 | if (rc) | 367 | if (rc) |
368 | return rc; | 368 | return rc; |
369 | 369 | ||
370 | rc = avc_has_perm(tsec->sid, sid, SECCLASS_FILESYSTEM, | 370 | rc = avc_has_perm(tsec->sid, sid, SECCLASS_FILESYSTEM, |
371 | FILESYSTEM__RELABELTO, NULL); | 371 | FILESYSTEM__RELABELTO, NULL); |
372 | return rc; | 372 | return rc; |
373 | } | 373 | } |
374 | 374 | ||
375 | static int may_context_mount_inode_relabel(u32 sid, | 375 | static int may_context_mount_inode_relabel(u32 sid, |
376 | struct superblock_security_struct *sbsec, | 376 | struct superblock_security_struct *sbsec, |
377 | const struct cred *cred) | 377 | const struct cred *cred) |
378 | { | 378 | { |
379 | const struct task_security_struct *tsec = cred->security; | 379 | const struct task_security_struct *tsec = cred->security; |
380 | int rc; | 380 | int rc; |
381 | rc = avc_has_perm(tsec->sid, sbsec->sid, SECCLASS_FILESYSTEM, | 381 | rc = avc_has_perm(tsec->sid, sbsec->sid, SECCLASS_FILESYSTEM, |
382 | FILESYSTEM__RELABELFROM, NULL); | 382 | FILESYSTEM__RELABELFROM, NULL); |
383 | if (rc) | 383 | if (rc) |
384 | return rc; | 384 | return rc; |
385 | 385 | ||
386 | rc = avc_has_perm(sid, sbsec->sid, SECCLASS_FILESYSTEM, | 386 | rc = avc_has_perm(sid, sbsec->sid, SECCLASS_FILESYSTEM, |
387 | FILESYSTEM__ASSOCIATE, NULL); | 387 | FILESYSTEM__ASSOCIATE, NULL); |
388 | return rc; | 388 | return rc; |
389 | } | 389 | } |
390 | 390 | ||
391 | static int selinux_is_sblabel_mnt(struct super_block *sb) | 391 | static int selinux_is_sblabel_mnt(struct super_block *sb) |
392 | { | 392 | { |
393 | struct superblock_security_struct *sbsec = sb->s_security; | 393 | struct superblock_security_struct *sbsec = sb->s_security; |
394 | 394 | ||
395 | if (sbsec->behavior == SECURITY_FS_USE_XATTR || | 395 | if (sbsec->behavior == SECURITY_FS_USE_XATTR || |
396 | sbsec->behavior == SECURITY_FS_USE_TRANS || | 396 | sbsec->behavior == SECURITY_FS_USE_TRANS || |
397 | sbsec->behavior == SECURITY_FS_USE_TASK) | 397 | sbsec->behavior == SECURITY_FS_USE_TASK) |
398 | return 1; | 398 | return 1; |
399 | 399 | ||
400 | /* Special handling for sysfs. Is genfs but also has setxattr handler*/ | 400 | /* Special handling for sysfs. Is genfs but also has setxattr handler*/ |
401 | if (strncmp(sb->s_type->name, "sysfs", sizeof("sysfs")) == 0) | 401 | if (strncmp(sb->s_type->name, "sysfs", sizeof("sysfs")) == 0) |
402 | return 1; | 402 | return 1; |
403 | 403 | ||
404 | /* | 404 | /* |
405 | * Special handling for rootfs. Is genfs but supports | 405 | * Special handling for rootfs. Is genfs but supports |
406 | * setting SELinux context on in-core inodes. | 406 | * setting SELinux context on in-core inodes. |
407 | */ | 407 | */ |
408 | if (strncmp(sb->s_type->name, "rootfs", sizeof("rootfs")) == 0) | 408 | if (strncmp(sb->s_type->name, "rootfs", sizeof("rootfs")) == 0) |
409 | return 1; | 409 | return 1; |
410 | 410 | ||
411 | return 0; | 411 | return 0; |
412 | } | 412 | } |
413 | 413 | ||
414 | static int sb_finish_set_opts(struct super_block *sb) | 414 | static int sb_finish_set_opts(struct super_block *sb) |
415 | { | 415 | { |
416 | struct superblock_security_struct *sbsec = sb->s_security; | 416 | struct superblock_security_struct *sbsec = sb->s_security; |
417 | struct dentry *root = sb->s_root; | 417 | struct dentry *root = sb->s_root; |
418 | struct inode *root_inode = root->d_inode; | 418 | struct inode *root_inode = root->d_inode; |
419 | int rc = 0; | 419 | int rc = 0; |
420 | 420 | ||
421 | if (sbsec->behavior == SECURITY_FS_USE_XATTR) { | 421 | if (sbsec->behavior == SECURITY_FS_USE_XATTR) { |
422 | /* Make sure that the xattr handler exists and that no | 422 | /* Make sure that the xattr handler exists and that no |
423 | error other than -ENODATA is returned by getxattr on | 423 | error other than -ENODATA is returned by getxattr on |
424 | the root directory. -ENODATA is ok, as this may be | 424 | the root directory. -ENODATA is ok, as this may be |
425 | the first boot of the SELinux kernel before we have | 425 | the first boot of the SELinux kernel before we have |
426 | assigned xattr values to the filesystem. */ | 426 | assigned xattr values to the filesystem. */ |
427 | if (!root_inode->i_op->getxattr) { | 427 | if (!root_inode->i_op->getxattr) { |
428 | printk(KERN_WARNING "SELinux: (dev %s, type %s) has no " | 428 | printk(KERN_WARNING "SELinux: (dev %s, type %s) has no " |
429 | "xattr support\n", sb->s_id, sb->s_type->name); | 429 | "xattr support\n", sb->s_id, sb->s_type->name); |
430 | rc = -EOPNOTSUPP; | 430 | rc = -EOPNOTSUPP; |
431 | goto out; | 431 | goto out; |
432 | } | 432 | } |
433 | rc = root_inode->i_op->getxattr(root, XATTR_NAME_SELINUX, NULL, 0); | 433 | rc = root_inode->i_op->getxattr(root, XATTR_NAME_SELINUX, NULL, 0); |
434 | if (rc < 0 && rc != -ENODATA) { | 434 | if (rc < 0 && rc != -ENODATA) { |
435 | if (rc == -EOPNOTSUPP) | 435 | if (rc == -EOPNOTSUPP) |
436 | printk(KERN_WARNING "SELinux: (dev %s, type " | 436 | printk(KERN_WARNING "SELinux: (dev %s, type " |
437 | "%s) has no security xattr handler\n", | 437 | "%s) has no security xattr handler\n", |
438 | sb->s_id, sb->s_type->name); | 438 | sb->s_id, sb->s_type->name); |
439 | else | 439 | else |
440 | printk(KERN_WARNING "SELinux: (dev %s, type " | 440 | printk(KERN_WARNING "SELinux: (dev %s, type " |
441 | "%s) getxattr errno %d\n", sb->s_id, | 441 | "%s) getxattr errno %d\n", sb->s_id, |
442 | sb->s_type->name, -rc); | 442 | sb->s_type->name, -rc); |
443 | goto out; | 443 | goto out; |
444 | } | 444 | } |
445 | } | 445 | } |
446 | 446 | ||
447 | if (sbsec->behavior > ARRAY_SIZE(labeling_behaviors)) | 447 | if (sbsec->behavior > ARRAY_SIZE(labeling_behaviors)) |
448 | printk(KERN_ERR "SELinux: initialized (dev %s, type %s), unknown behavior\n", | 448 | printk(KERN_ERR "SELinux: initialized (dev %s, type %s), unknown behavior\n", |
449 | sb->s_id, sb->s_type->name); | 449 | sb->s_id, sb->s_type->name); |
450 | else | 450 | else |
451 | printk(KERN_DEBUG "SELinux: initialized (dev %s, type %s), %s\n", | 451 | printk(KERN_DEBUG "SELinux: initialized (dev %s, type %s), %s\n", |
452 | sb->s_id, sb->s_type->name, | 452 | sb->s_id, sb->s_type->name, |
453 | labeling_behaviors[sbsec->behavior-1]); | 453 | labeling_behaviors[sbsec->behavior-1]); |
454 | 454 | ||
455 | sbsec->flags |= SE_SBINITIALIZED; | 455 | sbsec->flags |= SE_SBINITIALIZED; |
456 | if (selinux_is_sblabel_mnt(sb)) | 456 | if (selinux_is_sblabel_mnt(sb)) |
457 | sbsec->flags |= SBLABEL_MNT; | 457 | sbsec->flags |= SBLABEL_MNT; |
458 | 458 | ||
459 | /* Initialize the root inode. */ | 459 | /* Initialize the root inode. */ |
460 | rc = inode_doinit_with_dentry(root_inode, root); | 460 | rc = inode_doinit_with_dentry(root_inode, root); |
461 | 461 | ||
462 | /* Initialize any other inodes associated with the superblock, e.g. | 462 | /* Initialize any other inodes associated with the superblock, e.g. |
463 | inodes created prior to initial policy load or inodes created | 463 | inodes created prior to initial policy load or inodes created |
464 | during get_sb by a pseudo filesystem that directly | 464 | during get_sb by a pseudo filesystem that directly |
465 | populates itself. */ | 465 | populates itself. */ |
466 | spin_lock(&sbsec->isec_lock); | 466 | spin_lock(&sbsec->isec_lock); |
467 | next_inode: | 467 | next_inode: |
468 | if (!list_empty(&sbsec->isec_head)) { | 468 | if (!list_empty(&sbsec->isec_head)) { |
469 | struct inode_security_struct *isec = | 469 | struct inode_security_struct *isec = |
470 | list_entry(sbsec->isec_head.next, | 470 | list_entry(sbsec->isec_head.next, |
471 | struct inode_security_struct, list); | 471 | struct inode_security_struct, list); |
472 | struct inode *inode = isec->inode; | 472 | struct inode *inode = isec->inode; |
473 | spin_unlock(&sbsec->isec_lock); | 473 | spin_unlock(&sbsec->isec_lock); |
474 | inode = igrab(inode); | 474 | inode = igrab(inode); |
475 | if (inode) { | 475 | if (inode) { |
476 | if (!IS_PRIVATE(inode)) | 476 | if (!IS_PRIVATE(inode)) |
477 | inode_doinit(inode); | 477 | inode_doinit(inode); |
478 | iput(inode); | 478 | iput(inode); |
479 | } | 479 | } |
480 | spin_lock(&sbsec->isec_lock); | 480 | spin_lock(&sbsec->isec_lock); |
481 | list_del_init(&isec->list); | 481 | list_del_init(&isec->list); |
482 | goto next_inode; | 482 | goto next_inode; |
483 | } | 483 | } |
484 | spin_unlock(&sbsec->isec_lock); | 484 | spin_unlock(&sbsec->isec_lock); |
485 | out: | 485 | out: |
486 | return rc; | 486 | return rc; |
487 | } | 487 | } |
488 | 488 | ||
489 | /* | 489 | /* |
490 | * This function should allow an FS to ask what it's mount security | 490 | * This function should allow an FS to ask what it's mount security |
491 | * options were so it can use those later for submounts, displaying | 491 | * options were so it can use those later for submounts, displaying |
492 | * mount options, or whatever. | 492 | * mount options, or whatever. |
493 | */ | 493 | */ |
494 | static int selinux_get_mnt_opts(const struct super_block *sb, | 494 | static int selinux_get_mnt_opts(const struct super_block *sb, |
495 | struct security_mnt_opts *opts) | 495 | struct security_mnt_opts *opts) |
496 | { | 496 | { |
497 | int rc = 0, i; | 497 | int rc = 0, i; |
498 | struct superblock_security_struct *sbsec = sb->s_security; | 498 | struct superblock_security_struct *sbsec = sb->s_security; |
499 | char *context = NULL; | 499 | char *context = NULL; |
500 | u32 len; | 500 | u32 len; |
501 | char tmp; | 501 | char tmp; |
502 | 502 | ||
503 | security_init_mnt_opts(opts); | 503 | security_init_mnt_opts(opts); |
504 | 504 | ||
505 | if (!(sbsec->flags & SE_SBINITIALIZED)) | 505 | if (!(sbsec->flags & SE_SBINITIALIZED)) |
506 | return -EINVAL; | 506 | return -EINVAL; |
507 | 507 | ||
508 | if (!ss_initialized) | 508 | if (!ss_initialized) |
509 | return -EINVAL; | 509 | return -EINVAL; |
510 | 510 | ||
511 | /* make sure we always check enough bits to cover the mask */ | 511 | /* make sure we always check enough bits to cover the mask */ |
512 | BUILD_BUG_ON(SE_MNTMASK >= (1 << NUM_SEL_MNT_OPTS)); | 512 | BUILD_BUG_ON(SE_MNTMASK >= (1 << NUM_SEL_MNT_OPTS)); |
513 | 513 | ||
514 | tmp = sbsec->flags & SE_MNTMASK; | 514 | tmp = sbsec->flags & SE_MNTMASK; |
515 | /* count the number of mount options for this sb */ | 515 | /* count the number of mount options for this sb */ |
516 | for (i = 0; i < NUM_SEL_MNT_OPTS; i++) { | 516 | for (i = 0; i < NUM_SEL_MNT_OPTS; i++) { |
517 | if (tmp & 0x01) | 517 | if (tmp & 0x01) |
518 | opts->num_mnt_opts++; | 518 | opts->num_mnt_opts++; |
519 | tmp >>= 1; | 519 | tmp >>= 1; |
520 | } | 520 | } |
521 | /* Check if the Label support flag is set */ | 521 | /* Check if the Label support flag is set */ |
522 | if (sbsec->flags & SBLABEL_MNT) | 522 | if (sbsec->flags & SBLABEL_MNT) |
523 | opts->num_mnt_opts++; | 523 | opts->num_mnt_opts++; |
524 | 524 | ||
525 | opts->mnt_opts = kcalloc(opts->num_mnt_opts, sizeof(char *), GFP_ATOMIC); | 525 | opts->mnt_opts = kcalloc(opts->num_mnt_opts, sizeof(char *), GFP_ATOMIC); |
526 | if (!opts->mnt_opts) { | 526 | if (!opts->mnt_opts) { |
527 | rc = -ENOMEM; | 527 | rc = -ENOMEM; |
528 | goto out_free; | 528 | goto out_free; |
529 | } | 529 | } |
530 | 530 | ||
531 | opts->mnt_opts_flags = kcalloc(opts->num_mnt_opts, sizeof(int), GFP_ATOMIC); | 531 | opts->mnt_opts_flags = kcalloc(opts->num_mnt_opts, sizeof(int), GFP_ATOMIC); |
532 | if (!opts->mnt_opts_flags) { | 532 | if (!opts->mnt_opts_flags) { |
533 | rc = -ENOMEM; | 533 | rc = -ENOMEM; |
534 | goto out_free; | 534 | goto out_free; |
535 | } | 535 | } |
536 | 536 | ||
537 | i = 0; | 537 | i = 0; |
538 | if (sbsec->flags & FSCONTEXT_MNT) { | 538 | if (sbsec->flags & FSCONTEXT_MNT) { |
539 | rc = security_sid_to_context(sbsec->sid, &context, &len); | 539 | rc = security_sid_to_context(sbsec->sid, &context, &len); |
540 | if (rc) | 540 | if (rc) |
541 | goto out_free; | 541 | goto out_free; |
542 | opts->mnt_opts[i] = context; | 542 | opts->mnt_opts[i] = context; |
543 | opts->mnt_opts_flags[i++] = FSCONTEXT_MNT; | 543 | opts->mnt_opts_flags[i++] = FSCONTEXT_MNT; |
544 | } | 544 | } |
545 | if (sbsec->flags & CONTEXT_MNT) { | 545 | if (sbsec->flags & CONTEXT_MNT) { |
546 | rc = security_sid_to_context(sbsec->mntpoint_sid, &context, &len); | 546 | rc = security_sid_to_context(sbsec->mntpoint_sid, &context, &len); |
547 | if (rc) | 547 | if (rc) |
548 | goto out_free; | 548 | goto out_free; |
549 | opts->mnt_opts[i] = context; | 549 | opts->mnt_opts[i] = context; |
550 | opts->mnt_opts_flags[i++] = CONTEXT_MNT; | 550 | opts->mnt_opts_flags[i++] = CONTEXT_MNT; |
551 | } | 551 | } |
552 | if (sbsec->flags & DEFCONTEXT_MNT) { | 552 | if (sbsec->flags & DEFCONTEXT_MNT) { |
553 | rc = security_sid_to_context(sbsec->def_sid, &context, &len); | 553 | rc = security_sid_to_context(sbsec->def_sid, &context, &len); |
554 | if (rc) | 554 | if (rc) |
555 | goto out_free; | 555 | goto out_free; |
556 | opts->mnt_opts[i] = context; | 556 | opts->mnt_opts[i] = context; |
557 | opts->mnt_opts_flags[i++] = DEFCONTEXT_MNT; | 557 | opts->mnt_opts_flags[i++] = DEFCONTEXT_MNT; |
558 | } | 558 | } |
559 | if (sbsec->flags & ROOTCONTEXT_MNT) { | 559 | if (sbsec->flags & ROOTCONTEXT_MNT) { |
560 | struct inode *root = sbsec->sb->s_root->d_inode; | 560 | struct inode *root = sbsec->sb->s_root->d_inode; |
561 | struct inode_security_struct *isec = root->i_security; | 561 | struct inode_security_struct *isec = root->i_security; |
562 | 562 | ||
563 | rc = security_sid_to_context(isec->sid, &context, &len); | 563 | rc = security_sid_to_context(isec->sid, &context, &len); |
564 | if (rc) | 564 | if (rc) |
565 | goto out_free; | 565 | goto out_free; |
566 | opts->mnt_opts[i] = context; | 566 | opts->mnt_opts[i] = context; |
567 | opts->mnt_opts_flags[i++] = ROOTCONTEXT_MNT; | 567 | opts->mnt_opts_flags[i++] = ROOTCONTEXT_MNT; |
568 | } | 568 | } |
569 | if (sbsec->flags & SBLABEL_MNT) { | 569 | if (sbsec->flags & SBLABEL_MNT) { |
570 | opts->mnt_opts[i] = NULL; | 570 | opts->mnt_opts[i] = NULL; |
571 | opts->mnt_opts_flags[i++] = SBLABEL_MNT; | 571 | opts->mnt_opts_flags[i++] = SBLABEL_MNT; |
572 | } | 572 | } |
573 | 573 | ||
574 | BUG_ON(i != opts->num_mnt_opts); | 574 | BUG_ON(i != opts->num_mnt_opts); |
575 | 575 | ||
576 | return 0; | 576 | return 0; |
577 | 577 | ||
578 | out_free: | 578 | out_free: |
579 | security_free_mnt_opts(opts); | 579 | security_free_mnt_opts(opts); |
580 | return rc; | 580 | return rc; |
581 | } | 581 | } |
582 | 582 | ||
583 | static int bad_option(struct superblock_security_struct *sbsec, char flag, | 583 | static int bad_option(struct superblock_security_struct *sbsec, char flag, |
584 | u32 old_sid, u32 new_sid) | 584 | u32 old_sid, u32 new_sid) |
585 | { | 585 | { |
586 | char mnt_flags = sbsec->flags & SE_MNTMASK; | 586 | char mnt_flags = sbsec->flags & SE_MNTMASK; |
587 | 587 | ||
588 | /* check if the old mount command had the same options */ | 588 | /* check if the old mount command had the same options */ |
589 | if (sbsec->flags & SE_SBINITIALIZED) | 589 | if (sbsec->flags & SE_SBINITIALIZED) |
590 | if (!(sbsec->flags & flag) || | 590 | if (!(sbsec->flags & flag) || |
591 | (old_sid != new_sid)) | 591 | (old_sid != new_sid)) |
592 | return 1; | 592 | return 1; |
593 | 593 | ||
594 | /* check if we were passed the same options twice, | 594 | /* check if we were passed the same options twice, |
595 | * aka someone passed context=a,context=b | 595 | * aka someone passed context=a,context=b |
596 | */ | 596 | */ |
597 | if (!(sbsec->flags & SE_SBINITIALIZED)) | 597 | if (!(sbsec->flags & SE_SBINITIALIZED)) |
598 | if (mnt_flags & flag) | 598 | if (mnt_flags & flag) |
599 | return 1; | 599 | return 1; |
600 | return 0; | 600 | return 0; |
601 | } | 601 | } |
602 | 602 | ||
603 | /* | 603 | /* |
604 | * Allow filesystems with binary mount data to explicitly set mount point | 604 | * Allow filesystems with binary mount data to explicitly set mount point |
605 | * labeling information. | 605 | * labeling information. |
606 | */ | 606 | */ |
607 | static int selinux_set_mnt_opts(struct super_block *sb, | 607 | static int selinux_set_mnt_opts(struct super_block *sb, |
608 | struct security_mnt_opts *opts, | 608 | struct security_mnt_opts *opts, |
609 | unsigned long kern_flags, | 609 | unsigned long kern_flags, |
610 | unsigned long *set_kern_flags) | 610 | unsigned long *set_kern_flags) |
611 | { | 611 | { |
612 | const struct cred *cred = current_cred(); | 612 | const struct cred *cred = current_cred(); |
613 | int rc = 0, i; | 613 | int rc = 0, i; |
614 | struct superblock_security_struct *sbsec = sb->s_security; | 614 | struct superblock_security_struct *sbsec = sb->s_security; |
615 | const char *name = sb->s_type->name; | 615 | const char *name = sb->s_type->name; |
616 | struct inode *inode = sbsec->sb->s_root->d_inode; | 616 | struct inode *inode = sbsec->sb->s_root->d_inode; |
617 | struct inode_security_struct *root_isec = inode->i_security; | 617 | struct inode_security_struct *root_isec = inode->i_security; |
618 | u32 fscontext_sid = 0, context_sid = 0, rootcontext_sid = 0; | 618 | u32 fscontext_sid = 0, context_sid = 0, rootcontext_sid = 0; |
619 | u32 defcontext_sid = 0; | 619 | u32 defcontext_sid = 0; |
620 | char **mount_options = opts->mnt_opts; | 620 | char **mount_options = opts->mnt_opts; |
621 | int *flags = opts->mnt_opts_flags; | 621 | int *flags = opts->mnt_opts_flags; |
622 | int num_opts = opts->num_mnt_opts; | 622 | int num_opts = opts->num_mnt_opts; |
623 | 623 | ||
624 | mutex_lock(&sbsec->lock); | 624 | mutex_lock(&sbsec->lock); |
625 | 625 | ||
626 | if (!ss_initialized) { | 626 | if (!ss_initialized) { |
627 | if (!num_opts) { | 627 | if (!num_opts) { |
628 | /* Defer initialization until selinux_complete_init, | 628 | /* Defer initialization until selinux_complete_init, |
629 | after the initial policy is loaded and the security | 629 | after the initial policy is loaded and the security |
630 | server is ready to handle calls. */ | 630 | server is ready to handle calls. */ |
631 | goto out; | 631 | goto out; |
632 | } | 632 | } |
633 | rc = -EINVAL; | 633 | rc = -EINVAL; |
634 | printk(KERN_WARNING "SELinux: Unable to set superblock options " | 634 | printk(KERN_WARNING "SELinux: Unable to set superblock options " |
635 | "before the security server is initialized\n"); | 635 | "before the security server is initialized\n"); |
636 | goto out; | 636 | goto out; |
637 | } | 637 | } |
638 | if (kern_flags && !set_kern_flags) { | 638 | if (kern_flags && !set_kern_flags) { |
639 | /* Specifying internal flags without providing a place to | 639 | /* Specifying internal flags without providing a place to |
640 | * place the results is not allowed */ | 640 | * place the results is not allowed */ |
641 | rc = -EINVAL; | 641 | rc = -EINVAL; |
642 | goto out; | 642 | goto out; |
643 | } | 643 | } |
644 | 644 | ||
645 | /* | 645 | /* |
646 | * Binary mount data FS will come through this function twice. Once | 646 | * Binary mount data FS will come through this function twice. Once |
647 | * from an explicit call and once from the generic calls from the vfs. | 647 | * from an explicit call and once from the generic calls from the vfs. |
648 | * Since the generic VFS calls will not contain any security mount data | 648 | * Since the generic VFS calls will not contain any security mount data |
649 | * we need to skip the double mount verification. | 649 | * we need to skip the double mount verification. |
650 | * | 650 | * |
651 | * This does open a hole in which we will not notice if the first | 651 | * This does open a hole in which we will not notice if the first |
652 | * mount using this sb set explict options and a second mount using | 652 | * mount using this sb set explict options and a second mount using |
653 | * this sb does not set any security options. (The first options | 653 | * this sb does not set any security options. (The first options |
654 | * will be used for both mounts) | 654 | * will be used for both mounts) |
655 | */ | 655 | */ |
656 | if ((sbsec->flags & SE_SBINITIALIZED) && (sb->s_type->fs_flags & FS_BINARY_MOUNTDATA) | 656 | if ((sbsec->flags & SE_SBINITIALIZED) && (sb->s_type->fs_flags & FS_BINARY_MOUNTDATA) |
657 | && (num_opts == 0)) | 657 | && (num_opts == 0)) |
658 | goto out; | 658 | goto out; |
659 | 659 | ||
660 | /* | 660 | /* |
661 | * parse the mount options, check if they are valid sids. | 661 | * parse the mount options, check if they are valid sids. |
662 | * also check if someone is trying to mount the same sb more | 662 | * also check if someone is trying to mount the same sb more |
663 | * than once with different security options. | 663 | * than once with different security options. |
664 | */ | 664 | */ |
665 | for (i = 0; i < num_opts; i++) { | 665 | for (i = 0; i < num_opts; i++) { |
666 | u32 sid; | 666 | u32 sid; |
667 | 667 | ||
668 | if (flags[i] == SBLABEL_MNT) | 668 | if (flags[i] == SBLABEL_MNT) |
669 | continue; | 669 | continue; |
670 | rc = security_context_to_sid(mount_options[i], | 670 | rc = security_context_to_sid(mount_options[i], |
671 | strlen(mount_options[i]), &sid, GFP_KERNEL); | 671 | strlen(mount_options[i]), &sid, GFP_KERNEL); |
672 | if (rc) { | 672 | if (rc) { |
673 | printk(KERN_WARNING "SELinux: security_context_to_sid" | 673 | printk(KERN_WARNING "SELinux: security_context_to_sid" |
674 | "(%s) failed for (dev %s, type %s) errno=%d\n", | 674 | "(%s) failed for (dev %s, type %s) errno=%d\n", |
675 | mount_options[i], sb->s_id, name, rc); | 675 | mount_options[i], sb->s_id, name, rc); |
676 | goto out; | 676 | goto out; |
677 | } | 677 | } |
678 | switch (flags[i]) { | 678 | switch (flags[i]) { |
679 | case FSCONTEXT_MNT: | 679 | case FSCONTEXT_MNT: |
680 | fscontext_sid = sid; | 680 | fscontext_sid = sid; |
681 | 681 | ||
682 | if (bad_option(sbsec, FSCONTEXT_MNT, sbsec->sid, | 682 | if (bad_option(sbsec, FSCONTEXT_MNT, sbsec->sid, |
683 | fscontext_sid)) | 683 | fscontext_sid)) |
684 | goto out_double_mount; | 684 | goto out_double_mount; |
685 | 685 | ||
686 | sbsec->flags |= FSCONTEXT_MNT; | 686 | sbsec->flags |= FSCONTEXT_MNT; |
687 | break; | 687 | break; |
688 | case CONTEXT_MNT: | 688 | case CONTEXT_MNT: |
689 | context_sid = sid; | 689 | context_sid = sid; |
690 | 690 | ||
691 | if (bad_option(sbsec, CONTEXT_MNT, sbsec->mntpoint_sid, | 691 | if (bad_option(sbsec, CONTEXT_MNT, sbsec->mntpoint_sid, |
692 | context_sid)) | 692 | context_sid)) |
693 | goto out_double_mount; | 693 | goto out_double_mount; |
694 | 694 | ||
695 | sbsec->flags |= CONTEXT_MNT; | 695 | sbsec->flags |= CONTEXT_MNT; |
696 | break; | 696 | break; |
697 | case ROOTCONTEXT_MNT: | 697 | case ROOTCONTEXT_MNT: |
698 | rootcontext_sid = sid; | 698 | rootcontext_sid = sid; |
699 | 699 | ||
700 | if (bad_option(sbsec, ROOTCONTEXT_MNT, root_isec->sid, | 700 | if (bad_option(sbsec, ROOTCONTEXT_MNT, root_isec->sid, |
701 | rootcontext_sid)) | 701 | rootcontext_sid)) |
702 | goto out_double_mount; | 702 | goto out_double_mount; |
703 | 703 | ||
704 | sbsec->flags |= ROOTCONTEXT_MNT; | 704 | sbsec->flags |= ROOTCONTEXT_MNT; |
705 | 705 | ||
706 | break; | 706 | break; |
707 | case DEFCONTEXT_MNT: | 707 | case DEFCONTEXT_MNT: |
708 | defcontext_sid = sid; | 708 | defcontext_sid = sid; |
709 | 709 | ||
710 | if (bad_option(sbsec, DEFCONTEXT_MNT, sbsec->def_sid, | 710 | if (bad_option(sbsec, DEFCONTEXT_MNT, sbsec->def_sid, |
711 | defcontext_sid)) | 711 | defcontext_sid)) |
712 | goto out_double_mount; | 712 | goto out_double_mount; |
713 | 713 | ||
714 | sbsec->flags |= DEFCONTEXT_MNT; | 714 | sbsec->flags |= DEFCONTEXT_MNT; |
715 | 715 | ||
716 | break; | 716 | break; |
717 | default: | 717 | default: |
718 | rc = -EINVAL; | 718 | rc = -EINVAL; |
719 | goto out; | 719 | goto out; |
720 | } | 720 | } |
721 | } | 721 | } |
722 | 722 | ||
723 | if (sbsec->flags & SE_SBINITIALIZED) { | 723 | if (sbsec->flags & SE_SBINITIALIZED) { |
724 | /* previously mounted with options, but not on this attempt? */ | 724 | /* previously mounted with options, but not on this attempt? */ |
725 | if ((sbsec->flags & SE_MNTMASK) && !num_opts) | 725 | if ((sbsec->flags & SE_MNTMASK) && !num_opts) |
726 | goto out_double_mount; | 726 | goto out_double_mount; |
727 | rc = 0; | 727 | rc = 0; |
728 | goto out; | 728 | goto out; |
729 | } | 729 | } |
730 | 730 | ||
731 | if (strcmp(sb->s_type->name, "proc") == 0) | 731 | if (strcmp(sb->s_type->name, "proc") == 0) |
732 | sbsec->flags |= SE_SBPROC; | 732 | sbsec->flags |= SE_SBPROC; |
733 | 733 | ||
734 | if (!sbsec->behavior) { | 734 | if (!sbsec->behavior) { |
735 | /* | 735 | /* |
736 | * Determine the labeling behavior to use for this | 736 | * Determine the labeling behavior to use for this |
737 | * filesystem type. | 737 | * filesystem type. |
738 | */ | 738 | */ |
739 | rc = security_fs_use(sb); | 739 | rc = security_fs_use(sb); |
740 | if (rc) { | 740 | if (rc) { |
741 | printk(KERN_WARNING | 741 | printk(KERN_WARNING |
742 | "%s: security_fs_use(%s) returned %d\n", | 742 | "%s: security_fs_use(%s) returned %d\n", |
743 | __func__, sb->s_type->name, rc); | 743 | __func__, sb->s_type->name, rc); |
744 | goto out; | 744 | goto out; |
745 | } | 745 | } |
746 | } | 746 | } |
747 | /* sets the context of the superblock for the fs being mounted. */ | 747 | /* sets the context of the superblock for the fs being mounted. */ |
748 | if (fscontext_sid) { | 748 | if (fscontext_sid) { |
749 | rc = may_context_mount_sb_relabel(fscontext_sid, sbsec, cred); | 749 | rc = may_context_mount_sb_relabel(fscontext_sid, sbsec, cred); |
750 | if (rc) | 750 | if (rc) |
751 | goto out; | 751 | goto out; |
752 | 752 | ||
753 | sbsec->sid = fscontext_sid; | 753 | sbsec->sid = fscontext_sid; |
754 | } | 754 | } |
755 | 755 | ||
756 | /* | 756 | /* |
757 | * Switch to using mount point labeling behavior. | 757 | * Switch to using mount point labeling behavior. |
758 | * sets the label used on all file below the mountpoint, and will set | 758 | * sets the label used on all file below the mountpoint, and will set |
759 | * the superblock context if not already set. | 759 | * the superblock context if not already set. |
760 | */ | 760 | */ |
761 | if (kern_flags & SECURITY_LSM_NATIVE_LABELS && !context_sid) { | 761 | if (kern_flags & SECURITY_LSM_NATIVE_LABELS && !context_sid) { |
762 | sbsec->behavior = SECURITY_FS_USE_NATIVE; | 762 | sbsec->behavior = SECURITY_FS_USE_NATIVE; |
763 | *set_kern_flags |= SECURITY_LSM_NATIVE_LABELS; | 763 | *set_kern_flags |= SECURITY_LSM_NATIVE_LABELS; |
764 | } | 764 | } |
765 | 765 | ||
766 | if (context_sid) { | 766 | if (context_sid) { |
767 | if (!fscontext_sid) { | 767 | if (!fscontext_sid) { |
768 | rc = may_context_mount_sb_relabel(context_sid, sbsec, | 768 | rc = may_context_mount_sb_relabel(context_sid, sbsec, |
769 | cred); | 769 | cred); |
770 | if (rc) | 770 | if (rc) |
771 | goto out; | 771 | goto out; |
772 | sbsec->sid = context_sid; | 772 | sbsec->sid = context_sid; |
773 | } else { | 773 | } else { |
774 | rc = may_context_mount_inode_relabel(context_sid, sbsec, | 774 | rc = may_context_mount_inode_relabel(context_sid, sbsec, |
775 | cred); | 775 | cred); |
776 | if (rc) | 776 | if (rc) |
777 | goto out; | 777 | goto out; |
778 | } | 778 | } |
779 | if (!rootcontext_sid) | 779 | if (!rootcontext_sid) |
780 | rootcontext_sid = context_sid; | 780 | rootcontext_sid = context_sid; |
781 | 781 | ||
782 | sbsec->mntpoint_sid = context_sid; | 782 | sbsec->mntpoint_sid = context_sid; |
783 | sbsec->behavior = SECURITY_FS_USE_MNTPOINT; | 783 | sbsec->behavior = SECURITY_FS_USE_MNTPOINT; |
784 | } | 784 | } |
785 | 785 | ||
786 | if (rootcontext_sid) { | 786 | if (rootcontext_sid) { |
787 | rc = may_context_mount_inode_relabel(rootcontext_sid, sbsec, | 787 | rc = may_context_mount_inode_relabel(rootcontext_sid, sbsec, |
788 | cred); | 788 | cred); |
789 | if (rc) | 789 | if (rc) |
790 | goto out; | 790 | goto out; |
791 | 791 | ||
792 | root_isec->sid = rootcontext_sid; | 792 | root_isec->sid = rootcontext_sid; |
793 | root_isec->initialized = 1; | 793 | root_isec->initialized = 1; |
794 | } | 794 | } |
795 | 795 | ||
796 | if (defcontext_sid) { | 796 | if (defcontext_sid) { |
797 | if (sbsec->behavior != SECURITY_FS_USE_XATTR && | 797 | if (sbsec->behavior != SECURITY_FS_USE_XATTR && |
798 | sbsec->behavior != SECURITY_FS_USE_NATIVE) { | 798 | sbsec->behavior != SECURITY_FS_USE_NATIVE) { |
799 | rc = -EINVAL; | 799 | rc = -EINVAL; |
800 | printk(KERN_WARNING "SELinux: defcontext option is " | 800 | printk(KERN_WARNING "SELinux: defcontext option is " |
801 | "invalid for this filesystem type\n"); | 801 | "invalid for this filesystem type\n"); |
802 | goto out; | 802 | goto out; |
803 | } | 803 | } |
804 | 804 | ||
805 | if (defcontext_sid != sbsec->def_sid) { | 805 | if (defcontext_sid != sbsec->def_sid) { |
806 | rc = may_context_mount_inode_relabel(defcontext_sid, | 806 | rc = may_context_mount_inode_relabel(defcontext_sid, |
807 | sbsec, cred); | 807 | sbsec, cred); |
808 | if (rc) | 808 | if (rc) |
809 | goto out; | 809 | goto out; |
810 | } | 810 | } |
811 | 811 | ||
812 | sbsec->def_sid = defcontext_sid; | 812 | sbsec->def_sid = defcontext_sid; |
813 | } | 813 | } |
814 | 814 | ||
815 | rc = sb_finish_set_opts(sb); | 815 | rc = sb_finish_set_opts(sb); |
816 | out: | 816 | out: |
817 | mutex_unlock(&sbsec->lock); | 817 | mutex_unlock(&sbsec->lock); |
818 | return rc; | 818 | return rc; |
819 | out_double_mount: | 819 | out_double_mount: |
820 | rc = -EINVAL; | 820 | rc = -EINVAL; |
821 | printk(KERN_WARNING "SELinux: mount invalid. Same superblock, different " | 821 | printk(KERN_WARNING "SELinux: mount invalid. Same superblock, different " |
822 | "security settings for (dev %s, type %s)\n", sb->s_id, name); | 822 | "security settings for (dev %s, type %s)\n", sb->s_id, name); |
823 | goto out; | 823 | goto out; |
824 | } | 824 | } |
825 | 825 | ||
826 | static int selinux_cmp_sb_context(const struct super_block *oldsb, | 826 | static int selinux_cmp_sb_context(const struct super_block *oldsb, |
827 | const struct super_block *newsb) | 827 | const struct super_block *newsb) |
828 | { | 828 | { |
829 | struct superblock_security_struct *old = oldsb->s_security; | 829 | struct superblock_security_struct *old = oldsb->s_security; |
830 | struct superblock_security_struct *new = newsb->s_security; | 830 | struct superblock_security_struct *new = newsb->s_security; |
831 | char oldflags = old->flags & SE_MNTMASK; | 831 | char oldflags = old->flags & SE_MNTMASK; |
832 | char newflags = new->flags & SE_MNTMASK; | 832 | char newflags = new->flags & SE_MNTMASK; |
833 | 833 | ||
834 | if (oldflags != newflags) | 834 | if (oldflags != newflags) |
835 | goto mismatch; | 835 | goto mismatch; |
836 | if ((oldflags & FSCONTEXT_MNT) && old->sid != new->sid) | 836 | if ((oldflags & FSCONTEXT_MNT) && old->sid != new->sid) |
837 | goto mismatch; | 837 | goto mismatch; |
838 | if ((oldflags & CONTEXT_MNT) && old->mntpoint_sid != new->mntpoint_sid) | 838 | if ((oldflags & CONTEXT_MNT) && old->mntpoint_sid != new->mntpoint_sid) |
839 | goto mismatch; | 839 | goto mismatch; |
840 | if ((oldflags & DEFCONTEXT_MNT) && old->def_sid != new->def_sid) | 840 | if ((oldflags & DEFCONTEXT_MNT) && old->def_sid != new->def_sid) |
841 | goto mismatch; | 841 | goto mismatch; |
842 | if (oldflags & ROOTCONTEXT_MNT) { | 842 | if (oldflags & ROOTCONTEXT_MNT) { |
843 | struct inode_security_struct *oldroot = oldsb->s_root->d_inode->i_security; | 843 | struct inode_security_struct *oldroot = oldsb->s_root->d_inode->i_security; |
844 | struct inode_security_struct *newroot = newsb->s_root->d_inode->i_security; | 844 | struct inode_security_struct *newroot = newsb->s_root->d_inode->i_security; |
845 | if (oldroot->sid != newroot->sid) | 845 | if (oldroot->sid != newroot->sid) |
846 | goto mismatch; | 846 | goto mismatch; |
847 | } | 847 | } |
848 | return 0; | 848 | return 0; |
849 | mismatch: | 849 | mismatch: |
850 | printk(KERN_WARNING "SELinux: mount invalid. Same superblock, " | 850 | printk(KERN_WARNING "SELinux: mount invalid. Same superblock, " |
851 | "different security settings for (dev %s, " | 851 | "different security settings for (dev %s, " |
852 | "type %s)\n", newsb->s_id, newsb->s_type->name); | 852 | "type %s)\n", newsb->s_id, newsb->s_type->name); |
853 | return -EBUSY; | 853 | return -EBUSY; |
854 | } | 854 | } |
855 | 855 | ||
856 | static int selinux_sb_clone_mnt_opts(const struct super_block *oldsb, | 856 | static int selinux_sb_clone_mnt_opts(const struct super_block *oldsb, |
857 | struct super_block *newsb) | 857 | struct super_block *newsb) |
858 | { | 858 | { |
859 | const struct superblock_security_struct *oldsbsec = oldsb->s_security; | 859 | const struct superblock_security_struct *oldsbsec = oldsb->s_security; |
860 | struct superblock_security_struct *newsbsec = newsb->s_security; | 860 | struct superblock_security_struct *newsbsec = newsb->s_security; |
861 | 861 | ||
862 | int set_fscontext = (oldsbsec->flags & FSCONTEXT_MNT); | 862 | int set_fscontext = (oldsbsec->flags & FSCONTEXT_MNT); |
863 | int set_context = (oldsbsec->flags & CONTEXT_MNT); | 863 | int set_context = (oldsbsec->flags & CONTEXT_MNT); |
864 | int set_rootcontext = (oldsbsec->flags & ROOTCONTEXT_MNT); | 864 | int set_rootcontext = (oldsbsec->flags & ROOTCONTEXT_MNT); |
865 | 865 | ||
866 | /* | 866 | /* |
867 | * if the parent was able to be mounted it clearly had no special lsm | 867 | * if the parent was able to be mounted it clearly had no special lsm |
868 | * mount options. thus we can safely deal with this superblock later | 868 | * mount options. thus we can safely deal with this superblock later |
869 | */ | 869 | */ |
870 | if (!ss_initialized) | 870 | if (!ss_initialized) |
871 | return 0; | 871 | return 0; |
872 | 872 | ||
873 | /* how can we clone if the old one wasn't set up?? */ | 873 | /* how can we clone if the old one wasn't set up?? */ |
874 | BUG_ON(!(oldsbsec->flags & SE_SBINITIALIZED)); | 874 | BUG_ON(!(oldsbsec->flags & SE_SBINITIALIZED)); |
875 | 875 | ||
876 | /* if fs is reusing a sb, make sure that the contexts match */ | 876 | /* if fs is reusing a sb, make sure that the contexts match */ |
877 | if (newsbsec->flags & SE_SBINITIALIZED) | 877 | if (newsbsec->flags & SE_SBINITIALIZED) |
878 | return selinux_cmp_sb_context(oldsb, newsb); | 878 | return selinux_cmp_sb_context(oldsb, newsb); |
879 | 879 | ||
880 | mutex_lock(&newsbsec->lock); | 880 | mutex_lock(&newsbsec->lock); |
881 | 881 | ||
882 | newsbsec->flags = oldsbsec->flags; | 882 | newsbsec->flags = oldsbsec->flags; |
883 | 883 | ||
884 | newsbsec->sid = oldsbsec->sid; | 884 | newsbsec->sid = oldsbsec->sid; |
885 | newsbsec->def_sid = oldsbsec->def_sid; | 885 | newsbsec->def_sid = oldsbsec->def_sid; |
886 | newsbsec->behavior = oldsbsec->behavior; | 886 | newsbsec->behavior = oldsbsec->behavior; |
887 | 887 | ||
888 | if (set_context) { | 888 | if (set_context) { |
889 | u32 sid = oldsbsec->mntpoint_sid; | 889 | u32 sid = oldsbsec->mntpoint_sid; |
890 | 890 | ||
891 | if (!set_fscontext) | 891 | if (!set_fscontext) |
892 | newsbsec->sid = sid; | 892 | newsbsec->sid = sid; |
893 | if (!set_rootcontext) { | 893 | if (!set_rootcontext) { |
894 | struct inode *newinode = newsb->s_root->d_inode; | 894 | struct inode *newinode = newsb->s_root->d_inode; |
895 | struct inode_security_struct *newisec = newinode->i_security; | 895 | struct inode_security_struct *newisec = newinode->i_security; |
896 | newisec->sid = sid; | 896 | newisec->sid = sid; |
897 | } | 897 | } |
898 | newsbsec->mntpoint_sid = sid; | 898 | newsbsec->mntpoint_sid = sid; |
899 | } | 899 | } |
900 | if (set_rootcontext) { | 900 | if (set_rootcontext) { |
901 | const struct inode *oldinode = oldsb->s_root->d_inode; | 901 | const struct inode *oldinode = oldsb->s_root->d_inode; |
902 | const struct inode_security_struct *oldisec = oldinode->i_security; | 902 | const struct inode_security_struct *oldisec = oldinode->i_security; |
903 | struct inode *newinode = newsb->s_root->d_inode; | 903 | struct inode *newinode = newsb->s_root->d_inode; |
904 | struct inode_security_struct *newisec = newinode->i_security; | 904 | struct inode_security_struct *newisec = newinode->i_security; |
905 | 905 | ||
906 | newisec->sid = oldisec->sid; | 906 | newisec->sid = oldisec->sid; |
907 | } | 907 | } |
908 | 908 | ||
909 | sb_finish_set_opts(newsb); | 909 | sb_finish_set_opts(newsb); |
910 | mutex_unlock(&newsbsec->lock); | 910 | mutex_unlock(&newsbsec->lock); |
911 | return 0; | 911 | return 0; |
912 | } | 912 | } |
913 | 913 | ||
914 | static int selinux_parse_opts_str(char *options, | 914 | static int selinux_parse_opts_str(char *options, |
915 | struct security_mnt_opts *opts) | 915 | struct security_mnt_opts *opts) |
916 | { | 916 | { |
917 | char *p; | 917 | char *p; |
918 | char *context = NULL, *defcontext = NULL; | 918 | char *context = NULL, *defcontext = NULL; |
919 | char *fscontext = NULL, *rootcontext = NULL; | 919 | char *fscontext = NULL, *rootcontext = NULL; |
920 | int rc, num_mnt_opts = 0; | 920 | int rc, num_mnt_opts = 0; |
921 | 921 | ||
922 | opts->num_mnt_opts = 0; | 922 | opts->num_mnt_opts = 0; |
923 | 923 | ||
924 | /* Standard string-based options. */ | 924 | /* Standard string-based options. */ |
925 | while ((p = strsep(&options, "|")) != NULL) { | 925 | while ((p = strsep(&options, "|")) != NULL) { |
926 | int token; | 926 | int token; |
927 | substring_t args[MAX_OPT_ARGS]; | 927 | substring_t args[MAX_OPT_ARGS]; |
928 | 928 | ||
929 | if (!*p) | 929 | if (!*p) |
930 | continue; | 930 | continue; |
931 | 931 | ||
932 | token = match_token(p, tokens, args); | 932 | token = match_token(p, tokens, args); |
933 | 933 | ||
934 | switch (token) { | 934 | switch (token) { |
935 | case Opt_context: | 935 | case Opt_context: |
936 | if (context || defcontext) { | 936 | if (context || defcontext) { |
937 | rc = -EINVAL; | 937 | rc = -EINVAL; |
938 | printk(KERN_WARNING SEL_MOUNT_FAIL_MSG); | 938 | printk(KERN_WARNING SEL_MOUNT_FAIL_MSG); |
939 | goto out_err; | 939 | goto out_err; |
940 | } | 940 | } |
941 | context = match_strdup(&args[0]); | 941 | context = match_strdup(&args[0]); |
942 | if (!context) { | 942 | if (!context) { |
943 | rc = -ENOMEM; | 943 | rc = -ENOMEM; |
944 | goto out_err; | 944 | goto out_err; |
945 | } | 945 | } |
946 | break; | 946 | break; |
947 | 947 | ||
948 | case Opt_fscontext: | 948 | case Opt_fscontext: |
949 | if (fscontext) { | 949 | if (fscontext) { |
950 | rc = -EINVAL; | 950 | rc = -EINVAL; |
951 | printk(KERN_WARNING SEL_MOUNT_FAIL_MSG); | 951 | printk(KERN_WARNING SEL_MOUNT_FAIL_MSG); |
952 | goto out_err; | 952 | goto out_err; |
953 | } | 953 | } |
954 | fscontext = match_strdup(&args[0]); | 954 | fscontext = match_strdup(&args[0]); |
955 | if (!fscontext) { | 955 | if (!fscontext) { |
956 | rc = -ENOMEM; | 956 | rc = -ENOMEM; |
957 | goto out_err; | 957 | goto out_err; |
958 | } | 958 | } |
959 | break; | 959 | break; |
960 | 960 | ||
961 | case Opt_rootcontext: | 961 | case Opt_rootcontext: |
962 | if (rootcontext) { | 962 | if (rootcontext) { |
963 | rc = -EINVAL; | 963 | rc = -EINVAL; |
964 | printk(KERN_WARNING SEL_MOUNT_FAIL_MSG); | 964 | printk(KERN_WARNING SEL_MOUNT_FAIL_MSG); |
965 | goto out_err; | 965 | goto out_err; |
966 | } | 966 | } |
967 | rootcontext = match_strdup(&args[0]); | 967 | rootcontext = match_strdup(&args[0]); |
968 | if (!rootcontext) { | 968 | if (!rootcontext) { |
969 | rc = -ENOMEM; | 969 | rc = -ENOMEM; |
970 | goto out_err; | 970 | goto out_err; |
971 | } | 971 | } |
972 | break; | 972 | break; |
973 | 973 | ||
974 | case Opt_defcontext: | 974 | case Opt_defcontext: |
975 | if (context || defcontext) { | 975 | if (context || defcontext) { |
976 | rc = -EINVAL; | 976 | rc = -EINVAL; |
977 | printk(KERN_WARNING SEL_MOUNT_FAIL_MSG); | 977 | printk(KERN_WARNING SEL_MOUNT_FAIL_MSG); |
978 | goto out_err; | 978 | goto out_err; |
979 | } | 979 | } |
980 | defcontext = match_strdup(&args[0]); | 980 | defcontext = match_strdup(&args[0]); |
981 | if (!defcontext) { | 981 | if (!defcontext) { |
982 | rc = -ENOMEM; | 982 | rc = -ENOMEM; |
983 | goto out_err; | 983 | goto out_err; |
984 | } | 984 | } |
985 | break; | 985 | break; |
986 | case Opt_labelsupport: | 986 | case Opt_labelsupport: |
987 | break; | 987 | break; |
988 | default: | 988 | default: |
989 | rc = -EINVAL; | 989 | rc = -EINVAL; |
990 | printk(KERN_WARNING "SELinux: unknown mount option\n"); | 990 | printk(KERN_WARNING "SELinux: unknown mount option\n"); |
991 | goto out_err; | 991 | goto out_err; |
992 | 992 | ||
993 | } | 993 | } |
994 | } | 994 | } |
995 | 995 | ||
996 | rc = -ENOMEM; | 996 | rc = -ENOMEM; |
997 | opts->mnt_opts = kcalloc(NUM_SEL_MNT_OPTS, sizeof(char *), GFP_ATOMIC); | 997 | opts->mnt_opts = kcalloc(NUM_SEL_MNT_OPTS, sizeof(char *), GFP_ATOMIC); |
998 | if (!opts->mnt_opts) | 998 | if (!opts->mnt_opts) |
999 | goto out_err; | 999 | goto out_err; |
1000 | 1000 | ||
1001 | opts->mnt_opts_flags = kcalloc(NUM_SEL_MNT_OPTS, sizeof(int), GFP_ATOMIC); | 1001 | opts->mnt_opts_flags = kcalloc(NUM_SEL_MNT_OPTS, sizeof(int), GFP_ATOMIC); |
1002 | if (!opts->mnt_opts_flags) { | 1002 | if (!opts->mnt_opts_flags) { |
1003 | kfree(opts->mnt_opts); | 1003 | kfree(opts->mnt_opts); |
1004 | goto out_err; | 1004 | goto out_err; |
1005 | } | 1005 | } |
1006 | 1006 | ||
1007 | if (fscontext) { | 1007 | if (fscontext) { |
1008 | opts->mnt_opts[num_mnt_opts] = fscontext; | 1008 | opts->mnt_opts[num_mnt_opts] = fscontext; |
1009 | opts->mnt_opts_flags[num_mnt_opts++] = FSCONTEXT_MNT; | 1009 | opts->mnt_opts_flags[num_mnt_opts++] = FSCONTEXT_MNT; |
1010 | } | 1010 | } |
1011 | if (context) { | 1011 | if (context) { |
1012 | opts->mnt_opts[num_mnt_opts] = context; | 1012 | opts->mnt_opts[num_mnt_opts] = context; |
1013 | opts->mnt_opts_flags[num_mnt_opts++] = CONTEXT_MNT; | 1013 | opts->mnt_opts_flags[num_mnt_opts++] = CONTEXT_MNT; |
1014 | } | 1014 | } |
1015 | if (rootcontext) { | 1015 | if (rootcontext) { |
1016 | opts->mnt_opts[num_mnt_opts] = rootcontext; | 1016 | opts->mnt_opts[num_mnt_opts] = rootcontext; |
1017 | opts->mnt_opts_flags[num_mnt_opts++] = ROOTCONTEXT_MNT; | 1017 | opts->mnt_opts_flags[num_mnt_opts++] = ROOTCONTEXT_MNT; |
1018 | } | 1018 | } |
1019 | if (defcontext) { | 1019 | if (defcontext) { |
1020 | opts->mnt_opts[num_mnt_opts] = defcontext; | 1020 | opts->mnt_opts[num_mnt_opts] = defcontext; |
1021 | opts->mnt_opts_flags[num_mnt_opts++] = DEFCONTEXT_MNT; | 1021 | opts->mnt_opts_flags[num_mnt_opts++] = DEFCONTEXT_MNT; |
1022 | } | 1022 | } |
1023 | 1023 | ||
1024 | opts->num_mnt_opts = num_mnt_opts; | 1024 | opts->num_mnt_opts = num_mnt_opts; |
1025 | return 0; | 1025 | return 0; |
1026 | 1026 | ||
1027 | out_err: | 1027 | out_err: |
1028 | kfree(context); | 1028 | kfree(context); |
1029 | kfree(defcontext); | 1029 | kfree(defcontext); |
1030 | kfree(fscontext); | 1030 | kfree(fscontext); |
1031 | kfree(rootcontext); | 1031 | kfree(rootcontext); |
1032 | return rc; | 1032 | return rc; |
1033 | } | 1033 | } |
1034 | /* | 1034 | /* |
1035 | * string mount options parsing and call set the sbsec | 1035 | * string mount options parsing and call set the sbsec |
1036 | */ | 1036 | */ |
1037 | static int superblock_doinit(struct super_block *sb, void *data) | 1037 | static int superblock_doinit(struct super_block *sb, void *data) |
1038 | { | 1038 | { |
1039 | int rc = 0; | 1039 | int rc = 0; |
1040 | char *options = data; | 1040 | char *options = data; |
1041 | struct security_mnt_opts opts; | 1041 | struct security_mnt_opts opts; |
1042 | 1042 | ||
1043 | security_init_mnt_opts(&opts); | 1043 | security_init_mnt_opts(&opts); |
1044 | 1044 | ||
1045 | if (!data) | 1045 | if (!data) |
1046 | goto out; | 1046 | goto out; |
1047 | 1047 | ||
1048 | BUG_ON(sb->s_type->fs_flags & FS_BINARY_MOUNTDATA); | 1048 | BUG_ON(sb->s_type->fs_flags & FS_BINARY_MOUNTDATA); |
1049 | 1049 | ||
1050 | rc = selinux_parse_opts_str(options, &opts); | 1050 | rc = selinux_parse_opts_str(options, &opts); |
1051 | if (rc) | 1051 | if (rc) |
1052 | goto out_err; | 1052 | goto out_err; |
1053 | 1053 | ||
1054 | out: | 1054 | out: |
1055 | rc = selinux_set_mnt_opts(sb, &opts, 0, NULL); | 1055 | rc = selinux_set_mnt_opts(sb, &opts, 0, NULL); |
1056 | 1056 | ||
1057 | out_err: | 1057 | out_err: |
1058 | security_free_mnt_opts(&opts); | 1058 | security_free_mnt_opts(&opts); |
1059 | return rc; | 1059 | return rc; |
1060 | } | 1060 | } |
1061 | 1061 | ||
1062 | static void selinux_write_opts(struct seq_file *m, | 1062 | static void selinux_write_opts(struct seq_file *m, |
1063 | struct security_mnt_opts *opts) | 1063 | struct security_mnt_opts *opts) |
1064 | { | 1064 | { |
1065 | int i; | 1065 | int i; |
1066 | char *prefix; | 1066 | char *prefix; |
1067 | 1067 | ||
1068 | for (i = 0; i < opts->num_mnt_opts; i++) { | 1068 | for (i = 0; i < opts->num_mnt_opts; i++) { |
1069 | char *has_comma; | 1069 | char *has_comma; |
1070 | 1070 | ||
1071 | if (opts->mnt_opts[i]) | 1071 | if (opts->mnt_opts[i]) |
1072 | has_comma = strchr(opts->mnt_opts[i], ','); | 1072 | has_comma = strchr(opts->mnt_opts[i], ','); |
1073 | else | 1073 | else |
1074 | has_comma = NULL; | 1074 | has_comma = NULL; |
1075 | 1075 | ||
1076 | switch (opts->mnt_opts_flags[i]) { | 1076 | switch (opts->mnt_opts_flags[i]) { |
1077 | case CONTEXT_MNT: | 1077 | case CONTEXT_MNT: |
1078 | prefix = CONTEXT_STR; | 1078 | prefix = CONTEXT_STR; |
1079 | break; | 1079 | break; |
1080 | case FSCONTEXT_MNT: | 1080 | case FSCONTEXT_MNT: |
1081 | prefix = FSCONTEXT_STR; | 1081 | prefix = FSCONTEXT_STR; |
1082 | break; | 1082 | break; |
1083 | case ROOTCONTEXT_MNT: | 1083 | case ROOTCONTEXT_MNT: |
1084 | prefix = ROOTCONTEXT_STR; | 1084 | prefix = ROOTCONTEXT_STR; |
1085 | break; | 1085 | break; |
1086 | case DEFCONTEXT_MNT: | 1086 | case DEFCONTEXT_MNT: |
1087 | prefix = DEFCONTEXT_STR; | 1087 | prefix = DEFCONTEXT_STR; |
1088 | break; | 1088 | break; |
1089 | case SBLABEL_MNT: | 1089 | case SBLABEL_MNT: |
1090 | seq_putc(m, ','); | 1090 | seq_putc(m, ','); |
1091 | seq_puts(m, LABELSUPP_STR); | 1091 | seq_puts(m, LABELSUPP_STR); |
1092 | continue; | 1092 | continue; |
1093 | default: | 1093 | default: |
1094 | BUG(); | 1094 | BUG(); |
1095 | return; | 1095 | return; |
1096 | }; | 1096 | }; |
1097 | /* we need a comma before each option */ | 1097 | /* we need a comma before each option */ |
1098 | seq_putc(m, ','); | 1098 | seq_putc(m, ','); |
1099 | seq_puts(m, prefix); | 1099 | seq_puts(m, prefix); |
1100 | if (has_comma) | 1100 | if (has_comma) |
1101 | seq_putc(m, '\"'); | 1101 | seq_putc(m, '\"'); |
1102 | seq_puts(m, opts->mnt_opts[i]); | 1102 | seq_puts(m, opts->mnt_opts[i]); |
1103 | if (has_comma) | 1103 | if (has_comma) |
1104 | seq_putc(m, '\"'); | 1104 | seq_putc(m, '\"'); |
1105 | } | 1105 | } |
1106 | } | 1106 | } |
1107 | 1107 | ||
1108 | static int selinux_sb_show_options(struct seq_file *m, struct super_block *sb) | 1108 | static int selinux_sb_show_options(struct seq_file *m, struct super_block *sb) |
1109 | { | 1109 | { |
1110 | struct security_mnt_opts opts; | 1110 | struct security_mnt_opts opts; |
1111 | int rc; | 1111 | int rc; |
1112 | 1112 | ||
1113 | rc = selinux_get_mnt_opts(sb, &opts); | 1113 | rc = selinux_get_mnt_opts(sb, &opts); |
1114 | if (rc) { | 1114 | if (rc) { |
1115 | /* before policy load we may get EINVAL, don't show anything */ | 1115 | /* before policy load we may get EINVAL, don't show anything */ |
1116 | if (rc == -EINVAL) | 1116 | if (rc == -EINVAL) |
1117 | rc = 0; | 1117 | rc = 0; |
1118 | return rc; | 1118 | return rc; |
1119 | } | 1119 | } |
1120 | 1120 | ||
1121 | selinux_write_opts(m, &opts); | 1121 | selinux_write_opts(m, &opts); |
1122 | 1122 | ||
1123 | security_free_mnt_opts(&opts); | 1123 | security_free_mnt_opts(&opts); |
1124 | 1124 | ||
1125 | return rc; | 1125 | return rc; |
1126 | } | 1126 | } |
1127 | 1127 | ||
1128 | static inline u16 inode_mode_to_security_class(umode_t mode) | 1128 | static inline u16 inode_mode_to_security_class(umode_t mode) |
1129 | { | 1129 | { |
1130 | switch (mode & S_IFMT) { | 1130 | switch (mode & S_IFMT) { |
1131 | case S_IFSOCK: | 1131 | case S_IFSOCK: |
1132 | return SECCLASS_SOCK_FILE; | 1132 | return SECCLASS_SOCK_FILE; |
1133 | case S_IFLNK: | 1133 | case S_IFLNK: |
1134 | return SECCLASS_LNK_FILE; | 1134 | return SECCLASS_LNK_FILE; |
1135 | case S_IFREG: | 1135 | case S_IFREG: |
1136 | return SECCLASS_FILE; | 1136 | return SECCLASS_FILE; |
1137 | case S_IFBLK: | 1137 | case S_IFBLK: |
1138 | return SECCLASS_BLK_FILE; | 1138 | return SECCLASS_BLK_FILE; |
1139 | case S_IFDIR: | 1139 | case S_IFDIR: |
1140 | return SECCLASS_DIR; | 1140 | return SECCLASS_DIR; |
1141 | case S_IFCHR: | 1141 | case S_IFCHR: |
1142 | return SECCLASS_CHR_FILE; | 1142 | return SECCLASS_CHR_FILE; |
1143 | case S_IFIFO: | 1143 | case S_IFIFO: |
1144 | return SECCLASS_FIFO_FILE; | 1144 | return SECCLASS_FIFO_FILE; |
1145 | 1145 | ||
1146 | } | 1146 | } |
1147 | 1147 | ||
1148 | return SECCLASS_FILE; | 1148 | return SECCLASS_FILE; |
1149 | } | 1149 | } |
1150 | 1150 | ||
1151 | static inline int default_protocol_stream(int protocol) | 1151 | static inline int default_protocol_stream(int protocol) |
1152 | { | 1152 | { |
1153 | return (protocol == IPPROTO_IP || protocol == IPPROTO_TCP); | 1153 | return (protocol == IPPROTO_IP || protocol == IPPROTO_TCP); |
1154 | } | 1154 | } |
1155 | 1155 | ||
1156 | static inline int default_protocol_dgram(int protocol) | 1156 | static inline int default_protocol_dgram(int protocol) |
1157 | { | 1157 | { |
1158 | return (protocol == IPPROTO_IP || protocol == IPPROTO_UDP); | 1158 | return (protocol == IPPROTO_IP || protocol == IPPROTO_UDP); |
1159 | } | 1159 | } |
1160 | 1160 | ||
1161 | static inline u16 socket_type_to_security_class(int family, int type, int protocol) | 1161 | static inline u16 socket_type_to_security_class(int family, int type, int protocol) |
1162 | { | 1162 | { |
1163 | switch (family) { | 1163 | switch (family) { |
1164 | case PF_UNIX: | 1164 | case PF_UNIX: |
1165 | switch (type) { | 1165 | switch (type) { |
1166 | case SOCK_STREAM: | 1166 | case SOCK_STREAM: |
1167 | case SOCK_SEQPACKET: | 1167 | case SOCK_SEQPACKET: |
1168 | return SECCLASS_UNIX_STREAM_SOCKET; | 1168 | return SECCLASS_UNIX_STREAM_SOCKET; |
1169 | case SOCK_DGRAM: | 1169 | case SOCK_DGRAM: |
1170 | return SECCLASS_UNIX_DGRAM_SOCKET; | 1170 | return SECCLASS_UNIX_DGRAM_SOCKET; |
1171 | } | 1171 | } |
1172 | break; | 1172 | break; |
1173 | case PF_INET: | 1173 | case PF_INET: |
1174 | case PF_INET6: | 1174 | case PF_INET6: |
1175 | switch (type) { | 1175 | switch (type) { |
1176 | case SOCK_STREAM: | 1176 | case SOCK_STREAM: |
1177 | if (default_protocol_stream(protocol)) | 1177 | if (default_protocol_stream(protocol)) |
1178 | return SECCLASS_TCP_SOCKET; | 1178 | return SECCLASS_TCP_SOCKET; |
1179 | else | 1179 | else |
1180 | return SECCLASS_RAWIP_SOCKET; | 1180 | return SECCLASS_RAWIP_SOCKET; |
1181 | case SOCK_DGRAM: | 1181 | case SOCK_DGRAM: |
1182 | if (default_protocol_dgram(protocol)) | 1182 | if (default_protocol_dgram(protocol)) |
1183 | return SECCLASS_UDP_SOCKET; | 1183 | return SECCLASS_UDP_SOCKET; |
1184 | else | 1184 | else |
1185 | return SECCLASS_RAWIP_SOCKET; | 1185 | return SECCLASS_RAWIP_SOCKET; |
1186 | case SOCK_DCCP: | 1186 | case SOCK_DCCP: |
1187 | return SECCLASS_DCCP_SOCKET; | 1187 | return SECCLASS_DCCP_SOCKET; |
1188 | default: | 1188 | default: |
1189 | return SECCLASS_RAWIP_SOCKET; | 1189 | return SECCLASS_RAWIP_SOCKET; |
1190 | } | 1190 | } |
1191 | break; | 1191 | break; |
1192 | case PF_NETLINK: | 1192 | case PF_NETLINK: |
1193 | switch (protocol) { | 1193 | switch (protocol) { |
1194 | case NETLINK_ROUTE: | 1194 | case NETLINK_ROUTE: |
1195 | return SECCLASS_NETLINK_ROUTE_SOCKET; | 1195 | return SECCLASS_NETLINK_ROUTE_SOCKET; |
1196 | case NETLINK_FIREWALL: | 1196 | case NETLINK_FIREWALL: |
1197 | return SECCLASS_NETLINK_FIREWALL_SOCKET; | 1197 | return SECCLASS_NETLINK_FIREWALL_SOCKET; |
1198 | case NETLINK_SOCK_DIAG: | 1198 | case NETLINK_SOCK_DIAG: |
1199 | return SECCLASS_NETLINK_TCPDIAG_SOCKET; | 1199 | return SECCLASS_NETLINK_TCPDIAG_SOCKET; |
1200 | case NETLINK_NFLOG: | 1200 | case NETLINK_NFLOG: |
1201 | return SECCLASS_NETLINK_NFLOG_SOCKET; | 1201 | return SECCLASS_NETLINK_NFLOG_SOCKET; |
1202 | case NETLINK_XFRM: | 1202 | case NETLINK_XFRM: |
1203 | return SECCLASS_NETLINK_XFRM_SOCKET; | 1203 | return SECCLASS_NETLINK_XFRM_SOCKET; |
1204 | case NETLINK_SELINUX: | 1204 | case NETLINK_SELINUX: |
1205 | return SECCLASS_NETLINK_SELINUX_SOCKET; | 1205 | return SECCLASS_NETLINK_SELINUX_SOCKET; |
1206 | case NETLINK_AUDIT: | 1206 | case NETLINK_AUDIT: |
1207 | return SECCLASS_NETLINK_AUDIT_SOCKET; | 1207 | return SECCLASS_NETLINK_AUDIT_SOCKET; |
1208 | case NETLINK_IP6_FW: | 1208 | case NETLINK_IP6_FW: |
1209 | return SECCLASS_NETLINK_IP6FW_SOCKET; | 1209 | return SECCLASS_NETLINK_IP6FW_SOCKET; |
1210 | case NETLINK_DNRTMSG: | 1210 | case NETLINK_DNRTMSG: |
1211 | return SECCLASS_NETLINK_DNRT_SOCKET; | 1211 | return SECCLASS_NETLINK_DNRT_SOCKET; |
1212 | case NETLINK_KOBJECT_UEVENT: | 1212 | case NETLINK_KOBJECT_UEVENT: |
1213 | return SECCLASS_NETLINK_KOBJECT_UEVENT_SOCKET; | 1213 | return SECCLASS_NETLINK_KOBJECT_UEVENT_SOCKET; |
1214 | default: | 1214 | default: |
1215 | return SECCLASS_NETLINK_SOCKET; | 1215 | return SECCLASS_NETLINK_SOCKET; |
1216 | } | 1216 | } |
1217 | case PF_PACKET: | 1217 | case PF_PACKET: |
1218 | return SECCLASS_PACKET_SOCKET; | 1218 | return SECCLASS_PACKET_SOCKET; |
1219 | case PF_KEY: | 1219 | case PF_KEY: |
1220 | return SECCLASS_KEY_SOCKET; | 1220 | return SECCLASS_KEY_SOCKET; |
1221 | case PF_APPLETALK: | 1221 | case PF_APPLETALK: |
1222 | return SECCLASS_APPLETALK_SOCKET; | 1222 | return SECCLASS_APPLETALK_SOCKET; |
1223 | } | 1223 | } |
1224 | 1224 | ||
1225 | return SECCLASS_SOCKET; | 1225 | return SECCLASS_SOCKET; |
1226 | } | 1226 | } |
1227 | 1227 | ||
1228 | #ifdef CONFIG_PROC_FS | 1228 | #ifdef CONFIG_PROC_FS |
1229 | static int selinux_proc_get_sid(struct dentry *dentry, | 1229 | static int selinux_proc_get_sid(struct dentry *dentry, |
1230 | u16 tclass, | 1230 | u16 tclass, |
1231 | u32 *sid) | 1231 | u32 *sid) |
1232 | { | 1232 | { |
1233 | int rc; | 1233 | int rc; |
1234 | char *buffer, *path; | 1234 | char *buffer, *path; |
1235 | 1235 | ||
1236 | buffer = (char *)__get_free_page(GFP_KERNEL); | 1236 | buffer = (char *)__get_free_page(GFP_KERNEL); |
1237 | if (!buffer) | 1237 | if (!buffer) |
1238 | return -ENOMEM; | 1238 | return -ENOMEM; |
1239 | 1239 | ||
1240 | path = dentry_path_raw(dentry, buffer, PAGE_SIZE); | 1240 | path = dentry_path_raw(dentry, buffer, PAGE_SIZE); |
1241 | if (IS_ERR(path)) | 1241 | if (IS_ERR(path)) |
1242 | rc = PTR_ERR(path); | 1242 | rc = PTR_ERR(path); |
1243 | else { | 1243 | else { |
1244 | /* each process gets a /proc/PID/ entry. Strip off the | 1244 | /* each process gets a /proc/PID/ entry. Strip off the |
1245 | * PID part to get a valid selinux labeling. | 1245 | * PID part to get a valid selinux labeling. |
1246 | * e.g. /proc/1/net/rpc/nfs -> /net/rpc/nfs */ | 1246 | * e.g. /proc/1/net/rpc/nfs -> /net/rpc/nfs */ |
1247 | while (path[1] >= '0' && path[1] <= '9') { | 1247 | while (path[1] >= '0' && path[1] <= '9') { |
1248 | path[1] = '/'; | 1248 | path[1] = '/'; |
1249 | path++; | 1249 | path++; |
1250 | } | 1250 | } |
1251 | rc = security_genfs_sid("proc", path, tclass, sid); | 1251 | rc = security_genfs_sid("proc", path, tclass, sid); |
1252 | } | 1252 | } |
1253 | free_page((unsigned long)buffer); | 1253 | free_page((unsigned long)buffer); |
1254 | return rc; | 1254 | return rc; |
1255 | } | 1255 | } |
1256 | #else | 1256 | #else |
1257 | static int selinux_proc_get_sid(struct dentry *dentry, | 1257 | static int selinux_proc_get_sid(struct dentry *dentry, |
1258 | u16 tclass, | 1258 | u16 tclass, |
1259 | u32 *sid) | 1259 | u32 *sid) |
1260 | { | 1260 | { |
1261 | return -EINVAL; | 1261 | return -EINVAL; |
1262 | } | 1262 | } |
1263 | #endif | 1263 | #endif |
1264 | 1264 | ||
1265 | /* The inode's security attributes must be initialized before first use. */ | 1265 | /* The inode's security attributes must be initialized before first use. */ |
1266 | static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dentry) | 1266 | static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dentry) |
1267 | { | 1267 | { |
1268 | struct superblock_security_struct *sbsec = NULL; | 1268 | struct superblock_security_struct *sbsec = NULL; |
1269 | struct inode_security_struct *isec = inode->i_security; | 1269 | struct inode_security_struct *isec = inode->i_security; |
1270 | u32 sid; | 1270 | u32 sid; |
1271 | struct dentry *dentry; | 1271 | struct dentry *dentry; |
1272 | #define INITCONTEXTLEN 255 | 1272 | #define INITCONTEXTLEN 255 |
1273 | char *context = NULL; | 1273 | char *context = NULL; |
1274 | unsigned len = 0; | 1274 | unsigned len = 0; |
1275 | int rc = 0; | 1275 | int rc = 0; |
1276 | 1276 | ||
1277 | if (isec->initialized) | 1277 | if (isec->initialized) |
1278 | goto out; | 1278 | goto out; |
1279 | 1279 | ||
1280 | mutex_lock(&isec->lock); | 1280 | mutex_lock(&isec->lock); |
1281 | if (isec->initialized) | 1281 | if (isec->initialized) |
1282 | goto out_unlock; | 1282 | goto out_unlock; |
1283 | 1283 | ||
1284 | sbsec = inode->i_sb->s_security; | 1284 | sbsec = inode->i_sb->s_security; |
1285 | if (!(sbsec->flags & SE_SBINITIALIZED)) { | 1285 | if (!(sbsec->flags & SE_SBINITIALIZED)) { |
1286 | /* Defer initialization until selinux_complete_init, | 1286 | /* Defer initialization until selinux_complete_init, |
1287 | after the initial policy is loaded and the security | 1287 | after the initial policy is loaded and the security |
1288 | server is ready to handle calls. */ | 1288 | server is ready to handle calls. */ |
1289 | spin_lock(&sbsec->isec_lock); | 1289 | spin_lock(&sbsec->isec_lock); |
1290 | if (list_empty(&isec->list)) | 1290 | if (list_empty(&isec->list)) |
1291 | list_add(&isec->list, &sbsec->isec_head); | 1291 | list_add(&isec->list, &sbsec->isec_head); |
1292 | spin_unlock(&sbsec->isec_lock); | 1292 | spin_unlock(&sbsec->isec_lock); |
1293 | goto out_unlock; | 1293 | goto out_unlock; |
1294 | } | 1294 | } |
1295 | 1295 | ||
1296 | switch (sbsec->behavior) { | 1296 | switch (sbsec->behavior) { |
1297 | case SECURITY_FS_USE_NATIVE: | 1297 | case SECURITY_FS_USE_NATIVE: |
1298 | break; | 1298 | break; |
1299 | case SECURITY_FS_USE_XATTR: | 1299 | case SECURITY_FS_USE_XATTR: |
1300 | if (!inode->i_op->getxattr) { | 1300 | if (!inode->i_op->getxattr) { |
1301 | isec->sid = sbsec->def_sid; | 1301 | isec->sid = sbsec->def_sid; |
1302 | break; | 1302 | break; |
1303 | } | 1303 | } |
1304 | 1304 | ||
1305 | /* Need a dentry, since the xattr API requires one. | 1305 | /* Need a dentry, since the xattr API requires one. |
1306 | Life would be simpler if we could just pass the inode. */ | 1306 | Life would be simpler if we could just pass the inode. */ |
1307 | if (opt_dentry) { | 1307 | if (opt_dentry) { |
1308 | /* Called from d_instantiate or d_splice_alias. */ | 1308 | /* Called from d_instantiate or d_splice_alias. */ |
1309 | dentry = dget(opt_dentry); | 1309 | dentry = dget(opt_dentry); |
1310 | } else { | 1310 | } else { |
1311 | /* Called from selinux_complete_init, try to find a dentry. */ | 1311 | /* Called from selinux_complete_init, try to find a dentry. */ |
1312 | dentry = d_find_alias(inode); | 1312 | dentry = d_find_alias(inode); |
1313 | } | 1313 | } |
1314 | if (!dentry) { | 1314 | if (!dentry) { |
1315 | /* | 1315 | /* |
1316 | * this is can be hit on boot when a file is accessed | 1316 | * this is can be hit on boot when a file is accessed |
1317 | * before the policy is loaded. When we load policy we | 1317 | * before the policy is loaded. When we load policy we |
1318 | * may find inodes that have no dentry on the | 1318 | * may find inodes that have no dentry on the |
1319 | * sbsec->isec_head list. No reason to complain as these | 1319 | * sbsec->isec_head list. No reason to complain as these |
1320 | * will get fixed up the next time we go through | 1320 | * will get fixed up the next time we go through |
1321 | * inode_doinit with a dentry, before these inodes could | 1321 | * inode_doinit with a dentry, before these inodes could |
1322 | * be used again by userspace. | 1322 | * be used again by userspace. |
1323 | */ | 1323 | */ |
1324 | goto out_unlock; | 1324 | goto out_unlock; |
1325 | } | 1325 | } |
1326 | 1326 | ||
1327 | len = INITCONTEXTLEN; | 1327 | len = INITCONTEXTLEN; |
1328 | context = kmalloc(len+1, GFP_NOFS); | 1328 | context = kmalloc(len+1, GFP_NOFS); |
1329 | if (!context) { | 1329 | if (!context) { |
1330 | rc = -ENOMEM; | 1330 | rc = -ENOMEM; |
1331 | dput(dentry); | 1331 | dput(dentry); |
1332 | goto out_unlock; | 1332 | goto out_unlock; |
1333 | } | 1333 | } |
1334 | context[len] = '\0'; | 1334 | context[len] = '\0'; |
1335 | rc = inode->i_op->getxattr(dentry, XATTR_NAME_SELINUX, | 1335 | rc = inode->i_op->getxattr(dentry, XATTR_NAME_SELINUX, |
1336 | context, len); | 1336 | context, len); |
1337 | if (rc == -ERANGE) { | 1337 | if (rc == -ERANGE) { |
1338 | kfree(context); | 1338 | kfree(context); |
1339 | 1339 | ||
1340 | /* Need a larger buffer. Query for the right size. */ | 1340 | /* Need a larger buffer. Query for the right size. */ |
1341 | rc = inode->i_op->getxattr(dentry, XATTR_NAME_SELINUX, | 1341 | rc = inode->i_op->getxattr(dentry, XATTR_NAME_SELINUX, |
1342 | NULL, 0); | 1342 | NULL, 0); |
1343 | if (rc < 0) { | 1343 | if (rc < 0) { |
1344 | dput(dentry); | 1344 | dput(dentry); |
1345 | goto out_unlock; | 1345 | goto out_unlock; |
1346 | } | 1346 | } |
1347 | len = rc; | 1347 | len = rc; |
1348 | context = kmalloc(len+1, GFP_NOFS); | 1348 | context = kmalloc(len+1, GFP_NOFS); |
1349 | if (!context) { | 1349 | if (!context) { |
1350 | rc = -ENOMEM; | 1350 | rc = -ENOMEM; |
1351 | dput(dentry); | 1351 | dput(dentry); |
1352 | goto out_unlock; | 1352 | goto out_unlock; |
1353 | } | 1353 | } |
1354 | context[len] = '\0'; | 1354 | context[len] = '\0'; |
1355 | rc = inode->i_op->getxattr(dentry, | 1355 | rc = inode->i_op->getxattr(dentry, |
1356 | XATTR_NAME_SELINUX, | 1356 | XATTR_NAME_SELINUX, |
1357 | context, len); | 1357 | context, len); |
1358 | } | 1358 | } |
1359 | dput(dentry); | 1359 | dput(dentry); |
1360 | if (rc < 0) { | 1360 | if (rc < 0) { |
1361 | if (rc != -ENODATA) { | 1361 | if (rc != -ENODATA) { |
1362 | printk(KERN_WARNING "SELinux: %s: getxattr returned " | 1362 | printk(KERN_WARNING "SELinux: %s: getxattr returned " |
1363 | "%d for dev=%s ino=%ld\n", __func__, | 1363 | "%d for dev=%s ino=%ld\n", __func__, |
1364 | -rc, inode->i_sb->s_id, inode->i_ino); | 1364 | -rc, inode->i_sb->s_id, inode->i_ino); |
1365 | kfree(context); | 1365 | kfree(context); |
1366 | goto out_unlock; | 1366 | goto out_unlock; |
1367 | } | 1367 | } |
1368 | /* Map ENODATA to the default file SID */ | 1368 | /* Map ENODATA to the default file SID */ |
1369 | sid = sbsec->def_sid; | 1369 | sid = sbsec->def_sid; |
1370 | rc = 0; | 1370 | rc = 0; |
1371 | } else { | 1371 | } else { |
1372 | rc = security_context_to_sid_default(context, rc, &sid, | 1372 | rc = security_context_to_sid_default(context, rc, &sid, |
1373 | sbsec->def_sid, | 1373 | sbsec->def_sid, |
1374 | GFP_NOFS); | 1374 | GFP_NOFS); |
1375 | if (rc) { | 1375 | if (rc) { |
1376 | char *dev = inode->i_sb->s_id; | 1376 | char *dev = inode->i_sb->s_id; |
1377 | unsigned long ino = inode->i_ino; | 1377 | unsigned long ino = inode->i_ino; |
1378 | 1378 | ||
1379 | if (rc == -EINVAL) { | 1379 | if (rc == -EINVAL) { |
1380 | if (printk_ratelimit()) | 1380 | if (printk_ratelimit()) |
1381 | printk(KERN_NOTICE "SELinux: inode=%lu on dev=%s was found to have an invalid " | 1381 | printk(KERN_NOTICE "SELinux: inode=%lu on dev=%s was found to have an invalid " |
1382 | "context=%s. This indicates you may need to relabel the inode or the " | 1382 | "context=%s. This indicates you may need to relabel the inode or the " |
1383 | "filesystem in question.\n", ino, dev, context); | 1383 | "filesystem in question.\n", ino, dev, context); |
1384 | } else { | 1384 | } else { |
1385 | printk(KERN_WARNING "SELinux: %s: context_to_sid(%s) " | 1385 | printk(KERN_WARNING "SELinux: %s: context_to_sid(%s) " |
1386 | "returned %d for dev=%s ino=%ld\n", | 1386 | "returned %d for dev=%s ino=%ld\n", |
1387 | __func__, context, -rc, dev, ino); | 1387 | __func__, context, -rc, dev, ino); |
1388 | } | 1388 | } |
1389 | kfree(context); | 1389 | kfree(context); |
1390 | /* Leave with the unlabeled SID */ | 1390 | /* Leave with the unlabeled SID */ |
1391 | rc = 0; | 1391 | rc = 0; |
1392 | break; | 1392 | break; |
1393 | } | 1393 | } |
1394 | } | 1394 | } |
1395 | kfree(context); | 1395 | kfree(context); |
1396 | isec->sid = sid; | 1396 | isec->sid = sid; |
1397 | break; | 1397 | break; |
1398 | case SECURITY_FS_USE_TASK: | 1398 | case SECURITY_FS_USE_TASK: |
1399 | isec->sid = isec->task_sid; | 1399 | isec->sid = isec->task_sid; |
1400 | break; | 1400 | break; |
1401 | case SECURITY_FS_USE_TRANS: | 1401 | case SECURITY_FS_USE_TRANS: |
1402 | /* Default to the fs SID. */ | 1402 | /* Default to the fs SID. */ |
1403 | isec->sid = sbsec->sid; | 1403 | isec->sid = sbsec->sid; |
1404 | 1404 | ||
1405 | /* Try to obtain a transition SID. */ | 1405 | /* Try to obtain a transition SID. */ |
1406 | isec->sclass = inode_mode_to_security_class(inode->i_mode); | 1406 | isec->sclass = inode_mode_to_security_class(inode->i_mode); |
1407 | rc = security_transition_sid(isec->task_sid, sbsec->sid, | 1407 | rc = security_transition_sid(isec->task_sid, sbsec->sid, |
1408 | isec->sclass, NULL, &sid); | 1408 | isec->sclass, NULL, &sid); |
1409 | if (rc) | 1409 | if (rc) |
1410 | goto out_unlock; | 1410 | goto out_unlock; |
1411 | isec->sid = sid; | 1411 | isec->sid = sid; |
1412 | break; | 1412 | break; |
1413 | case SECURITY_FS_USE_MNTPOINT: | 1413 | case SECURITY_FS_USE_MNTPOINT: |
1414 | isec->sid = sbsec->mntpoint_sid; | 1414 | isec->sid = sbsec->mntpoint_sid; |
1415 | break; | 1415 | break; |
1416 | default: | 1416 | default: |
1417 | /* Default to the fs superblock SID. */ | 1417 | /* Default to the fs superblock SID. */ |
1418 | isec->sid = sbsec->sid; | 1418 | isec->sid = sbsec->sid; |
1419 | 1419 | ||
1420 | if ((sbsec->flags & SE_SBPROC) && !S_ISLNK(inode->i_mode)) { | 1420 | if ((sbsec->flags & SE_SBPROC) && !S_ISLNK(inode->i_mode)) { |
1421 | /* We must have a dentry to determine the label on | 1421 | /* We must have a dentry to determine the label on |
1422 | * procfs inodes */ | 1422 | * procfs inodes */ |
1423 | if (opt_dentry) | 1423 | if (opt_dentry) |
1424 | /* Called from d_instantiate or | 1424 | /* Called from d_instantiate or |
1425 | * d_splice_alias. */ | 1425 | * d_splice_alias. */ |
1426 | dentry = dget(opt_dentry); | 1426 | dentry = dget(opt_dentry); |
1427 | else | 1427 | else |
1428 | /* Called from selinux_complete_init, try to | 1428 | /* Called from selinux_complete_init, try to |
1429 | * find a dentry. */ | 1429 | * find a dentry. */ |
1430 | dentry = d_find_alias(inode); | 1430 | dentry = d_find_alias(inode); |
1431 | /* | 1431 | /* |
1432 | * This can be hit on boot when a file is accessed | 1432 | * This can be hit on boot when a file is accessed |
1433 | * before the policy is loaded. When we load policy we | 1433 | * before the policy is loaded. When we load policy we |
1434 | * may find inodes that have no dentry on the | 1434 | * may find inodes that have no dentry on the |
1435 | * sbsec->isec_head list. No reason to complain as | 1435 | * sbsec->isec_head list. No reason to complain as |
1436 | * these will get fixed up the next time we go through | 1436 | * these will get fixed up the next time we go through |
1437 | * inode_doinit() with a dentry, before these inodes | 1437 | * inode_doinit() with a dentry, before these inodes |
1438 | * could be used again by userspace. | 1438 | * could be used again by userspace. |
1439 | */ | 1439 | */ |
1440 | if (!dentry) | 1440 | if (!dentry) |
1441 | goto out_unlock; | 1441 | goto out_unlock; |
1442 | isec->sclass = inode_mode_to_security_class(inode->i_mode); | 1442 | isec->sclass = inode_mode_to_security_class(inode->i_mode); |
1443 | rc = selinux_proc_get_sid(dentry, isec->sclass, &sid); | 1443 | rc = selinux_proc_get_sid(dentry, isec->sclass, &sid); |
1444 | dput(dentry); | 1444 | dput(dentry); |
1445 | if (rc) | 1445 | if (rc) |
1446 | goto out_unlock; | 1446 | goto out_unlock; |
1447 | isec->sid = sid; | 1447 | isec->sid = sid; |
1448 | } | 1448 | } |
1449 | break; | 1449 | break; |
1450 | } | 1450 | } |
1451 | 1451 | ||
1452 | isec->initialized = 1; | 1452 | isec->initialized = 1; |
1453 | 1453 | ||
1454 | out_unlock: | 1454 | out_unlock: |
1455 | mutex_unlock(&isec->lock); | 1455 | mutex_unlock(&isec->lock); |
1456 | out: | 1456 | out: |
1457 | if (isec->sclass == SECCLASS_FILE) | 1457 | if (isec->sclass == SECCLASS_FILE) |
1458 | isec->sclass = inode_mode_to_security_class(inode->i_mode); | 1458 | isec->sclass = inode_mode_to_security_class(inode->i_mode); |
1459 | return rc; | 1459 | return rc; |
1460 | } | 1460 | } |
1461 | 1461 | ||
1462 | /* Convert a Linux signal to an access vector. */ | 1462 | /* Convert a Linux signal to an access vector. */ |
1463 | static inline u32 signal_to_av(int sig) | 1463 | static inline u32 signal_to_av(int sig) |
1464 | { | 1464 | { |
1465 | u32 perm = 0; | 1465 | u32 perm = 0; |
1466 | 1466 | ||
1467 | switch (sig) { | 1467 | switch (sig) { |
1468 | case SIGCHLD: | 1468 | case SIGCHLD: |
1469 | /* Commonly granted from child to parent. */ | 1469 | /* Commonly granted from child to parent. */ |
1470 | perm = PROCESS__SIGCHLD; | 1470 | perm = PROCESS__SIGCHLD; |
1471 | break; | 1471 | break; |
1472 | case SIGKILL: | 1472 | case SIGKILL: |
1473 | /* Cannot be caught or ignored */ | 1473 | /* Cannot be caught or ignored */ |
1474 | perm = PROCESS__SIGKILL; | 1474 | perm = PROCESS__SIGKILL; |
1475 | break; | 1475 | break; |
1476 | case SIGSTOP: | 1476 | case SIGSTOP: |
1477 | /* Cannot be caught or ignored */ | 1477 | /* Cannot be caught or ignored */ |
1478 | perm = PROCESS__SIGSTOP; | 1478 | perm = PROCESS__SIGSTOP; |
1479 | break; | 1479 | break; |
1480 | default: | 1480 | default: |
1481 | /* All other signals. */ | 1481 | /* All other signals. */ |
1482 | perm = PROCESS__SIGNAL; | 1482 | perm = PROCESS__SIGNAL; |
1483 | break; | 1483 | break; |
1484 | } | 1484 | } |
1485 | 1485 | ||
1486 | return perm; | 1486 | return perm; |
1487 | } | 1487 | } |
1488 | 1488 | ||
1489 | /* | 1489 | /* |
1490 | * Check permission between a pair of credentials | 1490 | * Check permission between a pair of credentials |
1491 | * fork check, ptrace check, etc. | 1491 | * fork check, ptrace check, etc. |
1492 | */ | 1492 | */ |
1493 | static int cred_has_perm(const struct cred *actor, | 1493 | static int cred_has_perm(const struct cred *actor, |
1494 | const struct cred *target, | 1494 | const struct cred *target, |
1495 | u32 perms) | 1495 | u32 perms) |
1496 | { | 1496 | { |
1497 | u32 asid = cred_sid(actor), tsid = cred_sid(target); | 1497 | u32 asid = cred_sid(actor), tsid = cred_sid(target); |
1498 | 1498 | ||
1499 | return avc_has_perm(asid, tsid, SECCLASS_PROCESS, perms, NULL); | 1499 | return avc_has_perm(asid, tsid, SECCLASS_PROCESS, perms, NULL); |
1500 | } | 1500 | } |
1501 | 1501 | ||
1502 | /* | 1502 | /* |
1503 | * Check permission between a pair of tasks, e.g. signal checks, | 1503 | * Check permission between a pair of tasks, e.g. signal checks, |
1504 | * fork check, ptrace check, etc. | 1504 | * fork check, ptrace check, etc. |
1505 | * tsk1 is the actor and tsk2 is the target | 1505 | * tsk1 is the actor and tsk2 is the target |
1506 | * - this uses the default subjective creds of tsk1 | 1506 | * - this uses the default subjective creds of tsk1 |
1507 | */ | 1507 | */ |
1508 | static int task_has_perm(const struct task_struct *tsk1, | 1508 | static int task_has_perm(const struct task_struct *tsk1, |
1509 | const struct task_struct *tsk2, | 1509 | const struct task_struct *tsk2, |
1510 | u32 perms) | 1510 | u32 perms) |
1511 | { | 1511 | { |
1512 | const struct task_security_struct *__tsec1, *__tsec2; | 1512 | const struct task_security_struct *__tsec1, *__tsec2; |
1513 | u32 sid1, sid2; | 1513 | u32 sid1, sid2; |
1514 | 1514 | ||
1515 | rcu_read_lock(); | 1515 | rcu_read_lock(); |
1516 | __tsec1 = __task_cred(tsk1)->security; sid1 = __tsec1->sid; | 1516 | __tsec1 = __task_cred(tsk1)->security; sid1 = __tsec1->sid; |
1517 | __tsec2 = __task_cred(tsk2)->security; sid2 = __tsec2->sid; | 1517 | __tsec2 = __task_cred(tsk2)->security; sid2 = __tsec2->sid; |
1518 | rcu_read_unlock(); | 1518 | rcu_read_unlock(); |
1519 | return avc_has_perm(sid1, sid2, SECCLASS_PROCESS, perms, NULL); | 1519 | return avc_has_perm(sid1, sid2, SECCLASS_PROCESS, perms, NULL); |
1520 | } | 1520 | } |
1521 | 1521 | ||
1522 | /* | 1522 | /* |
1523 | * Check permission between current and another task, e.g. signal checks, | 1523 | * Check permission between current and another task, e.g. signal checks, |
1524 | * fork check, ptrace check, etc. | 1524 | * fork check, ptrace check, etc. |
1525 | * current is the actor and tsk2 is the target | 1525 | * current is the actor and tsk2 is the target |
1526 | * - this uses current's subjective creds | 1526 | * - this uses current's subjective creds |
1527 | */ | 1527 | */ |
1528 | static int current_has_perm(const struct task_struct *tsk, | 1528 | static int current_has_perm(const struct task_struct *tsk, |
1529 | u32 perms) | 1529 | u32 perms) |
1530 | { | 1530 | { |
1531 | u32 sid, tsid; | 1531 | u32 sid, tsid; |
1532 | 1532 | ||
1533 | sid = current_sid(); | 1533 | sid = current_sid(); |
1534 | tsid = task_sid(tsk); | 1534 | tsid = task_sid(tsk); |
1535 | return avc_has_perm(sid, tsid, SECCLASS_PROCESS, perms, NULL); | 1535 | return avc_has_perm(sid, tsid, SECCLASS_PROCESS, perms, NULL); |
1536 | } | 1536 | } |
1537 | 1537 | ||
1538 | #if CAP_LAST_CAP > 63 | 1538 | #if CAP_LAST_CAP > 63 |
1539 | #error Fix SELinux to handle capabilities > 63. | 1539 | #error Fix SELinux to handle capabilities > 63. |
1540 | #endif | 1540 | #endif |
1541 | 1541 | ||
1542 | /* Check whether a task is allowed to use a capability. */ | 1542 | /* Check whether a task is allowed to use a capability. */ |
1543 | static int cred_has_capability(const struct cred *cred, | 1543 | static int cred_has_capability(const struct cred *cred, |
1544 | int cap, int audit) | 1544 | int cap, int audit) |
1545 | { | 1545 | { |
1546 | struct common_audit_data ad; | 1546 | struct common_audit_data ad; |
1547 | struct av_decision avd; | 1547 | struct av_decision avd; |
1548 | u16 sclass; | 1548 | u16 sclass; |
1549 | u32 sid = cred_sid(cred); | 1549 | u32 sid = cred_sid(cred); |
1550 | u32 av = CAP_TO_MASK(cap); | 1550 | u32 av = CAP_TO_MASK(cap); |
1551 | int rc; | 1551 | int rc; |
1552 | 1552 | ||
1553 | ad.type = LSM_AUDIT_DATA_CAP; | 1553 | ad.type = LSM_AUDIT_DATA_CAP; |
1554 | ad.u.cap = cap; | 1554 | ad.u.cap = cap; |
1555 | 1555 | ||
1556 | switch (CAP_TO_INDEX(cap)) { | 1556 | switch (CAP_TO_INDEX(cap)) { |
1557 | case 0: | 1557 | case 0: |
1558 | sclass = SECCLASS_CAPABILITY; | 1558 | sclass = SECCLASS_CAPABILITY; |
1559 | break; | 1559 | break; |
1560 | case 1: | 1560 | case 1: |
1561 | sclass = SECCLASS_CAPABILITY2; | 1561 | sclass = SECCLASS_CAPABILITY2; |
1562 | break; | 1562 | break; |
1563 | default: | 1563 | default: |
1564 | printk(KERN_ERR | 1564 | printk(KERN_ERR |
1565 | "SELinux: out of range capability %d\n", cap); | 1565 | "SELinux: out of range capability %d\n", cap); |
1566 | BUG(); | 1566 | BUG(); |
1567 | return -EINVAL; | 1567 | return -EINVAL; |
1568 | } | 1568 | } |
1569 | 1569 | ||
1570 | rc = avc_has_perm_noaudit(sid, sid, sclass, av, 0, &avd); | 1570 | rc = avc_has_perm_noaudit(sid, sid, sclass, av, 0, &avd); |
1571 | if (audit == SECURITY_CAP_AUDIT) { | 1571 | if (audit == SECURITY_CAP_AUDIT) { |
1572 | int rc2 = avc_audit(sid, sid, sclass, av, &avd, rc, &ad); | 1572 | int rc2 = avc_audit(sid, sid, sclass, av, &avd, rc, &ad); |
1573 | if (rc2) | 1573 | if (rc2) |
1574 | return rc2; | 1574 | return rc2; |
1575 | } | 1575 | } |
1576 | return rc; | 1576 | return rc; |
1577 | } | 1577 | } |
1578 | 1578 | ||
1579 | /* Check whether a task is allowed to use a system operation. */ | 1579 | /* Check whether a task is allowed to use a system operation. */ |
1580 | static int task_has_system(struct task_struct *tsk, | 1580 | static int task_has_system(struct task_struct *tsk, |
1581 | u32 perms) | 1581 | u32 perms) |
1582 | { | 1582 | { |
1583 | u32 sid = task_sid(tsk); | 1583 | u32 sid = task_sid(tsk); |
1584 | 1584 | ||
1585 | return avc_has_perm(sid, SECINITSID_KERNEL, | 1585 | return avc_has_perm(sid, SECINITSID_KERNEL, |
1586 | SECCLASS_SYSTEM, perms, NULL); | 1586 | SECCLASS_SYSTEM, perms, NULL); |
1587 | } | 1587 | } |
1588 | 1588 | ||
1589 | /* Check whether a task has a particular permission to an inode. | 1589 | /* Check whether a task has a particular permission to an inode. |
1590 | The 'adp' parameter is optional and allows other audit | 1590 | The 'adp' parameter is optional and allows other audit |
1591 | data to be passed (e.g. the dentry). */ | 1591 | data to be passed (e.g. the dentry). */ |
1592 | static int inode_has_perm(const struct cred *cred, | 1592 | static int inode_has_perm(const struct cred *cred, |
1593 | struct inode *inode, | 1593 | struct inode *inode, |
1594 | u32 perms, | 1594 | u32 perms, |
1595 | struct common_audit_data *adp) | 1595 | struct common_audit_data *adp) |
1596 | { | 1596 | { |
1597 | struct inode_security_struct *isec; | 1597 | struct inode_security_struct *isec; |
1598 | u32 sid; | 1598 | u32 sid; |
1599 | 1599 | ||
1600 | validate_creds(cred); | 1600 | validate_creds(cred); |
1601 | 1601 | ||
1602 | if (unlikely(IS_PRIVATE(inode))) | 1602 | if (unlikely(IS_PRIVATE(inode))) |
1603 | return 0; | 1603 | return 0; |
1604 | 1604 | ||
1605 | sid = cred_sid(cred); | 1605 | sid = cred_sid(cred); |
1606 | isec = inode->i_security; | 1606 | isec = inode->i_security; |
1607 | 1607 | ||
1608 | return avc_has_perm(sid, isec->sid, isec->sclass, perms, adp); | 1608 | return avc_has_perm(sid, isec->sid, isec->sclass, perms, adp); |
1609 | } | 1609 | } |
1610 | 1610 | ||
1611 | /* Same as inode_has_perm, but pass explicit audit data containing | 1611 | /* Same as inode_has_perm, but pass explicit audit data containing |
1612 | the dentry to help the auditing code to more easily generate the | 1612 | the dentry to help the auditing code to more easily generate the |
1613 | pathname if needed. */ | 1613 | pathname if needed. */ |
1614 | static inline int dentry_has_perm(const struct cred *cred, | 1614 | static inline int dentry_has_perm(const struct cred *cred, |
1615 | struct dentry *dentry, | 1615 | struct dentry *dentry, |
1616 | u32 av) | 1616 | u32 av) |
1617 | { | 1617 | { |
1618 | struct inode *inode = dentry->d_inode; | 1618 | struct inode *inode = dentry->d_inode; |
1619 | struct common_audit_data ad; | 1619 | struct common_audit_data ad; |
1620 | 1620 | ||
1621 | ad.type = LSM_AUDIT_DATA_DENTRY; | 1621 | ad.type = LSM_AUDIT_DATA_DENTRY; |
1622 | ad.u.dentry = dentry; | 1622 | ad.u.dentry = dentry; |
1623 | return inode_has_perm(cred, inode, av, &ad); | 1623 | return inode_has_perm(cred, inode, av, &ad); |
1624 | } | 1624 | } |
1625 | 1625 | ||
1626 | /* Same as inode_has_perm, but pass explicit audit data containing | 1626 | /* Same as inode_has_perm, but pass explicit audit data containing |
1627 | the path to help the auditing code to more easily generate the | 1627 | the path to help the auditing code to more easily generate the |
1628 | pathname if needed. */ | 1628 | pathname if needed. */ |
1629 | static inline int path_has_perm(const struct cred *cred, | 1629 | static inline int path_has_perm(const struct cred *cred, |
1630 | struct path *path, | 1630 | struct path *path, |
1631 | u32 av) | 1631 | u32 av) |
1632 | { | 1632 | { |
1633 | struct inode *inode = path->dentry->d_inode; | 1633 | struct inode *inode = path->dentry->d_inode; |
1634 | struct common_audit_data ad; | 1634 | struct common_audit_data ad; |
1635 | 1635 | ||
1636 | ad.type = LSM_AUDIT_DATA_PATH; | 1636 | ad.type = LSM_AUDIT_DATA_PATH; |
1637 | ad.u.path = *path; | 1637 | ad.u.path = *path; |
1638 | return inode_has_perm(cred, inode, av, &ad); | 1638 | return inode_has_perm(cred, inode, av, &ad); |
1639 | } | 1639 | } |
1640 | 1640 | ||
1641 | /* Same as path_has_perm, but uses the inode from the file struct. */ | 1641 | /* Same as path_has_perm, but uses the inode from the file struct. */ |
1642 | static inline int file_path_has_perm(const struct cred *cred, | 1642 | static inline int file_path_has_perm(const struct cred *cred, |
1643 | struct file *file, | 1643 | struct file *file, |
1644 | u32 av) | 1644 | u32 av) |
1645 | { | 1645 | { |
1646 | struct common_audit_data ad; | 1646 | struct common_audit_data ad; |
1647 | 1647 | ||
1648 | ad.type = LSM_AUDIT_DATA_PATH; | 1648 | ad.type = LSM_AUDIT_DATA_PATH; |
1649 | ad.u.path = file->f_path; | 1649 | ad.u.path = file->f_path; |
1650 | return inode_has_perm(cred, file_inode(file), av, &ad); | 1650 | return inode_has_perm(cred, file_inode(file), av, &ad); |
1651 | } | 1651 | } |
1652 | 1652 | ||
1653 | /* Check whether a task can use an open file descriptor to | 1653 | /* Check whether a task can use an open file descriptor to |
1654 | access an inode in a given way. Check access to the | 1654 | access an inode in a given way. Check access to the |
1655 | descriptor itself, and then use dentry_has_perm to | 1655 | descriptor itself, and then use dentry_has_perm to |
1656 | check a particular permission to the file. | 1656 | check a particular permission to the file. |
1657 | Access to the descriptor is implicitly granted if it | 1657 | Access to the descriptor is implicitly granted if it |
1658 | has the same SID as the process. If av is zero, then | 1658 | has the same SID as the process. If av is zero, then |
1659 | access to the file is not checked, e.g. for cases | 1659 | access to the file is not checked, e.g. for cases |
1660 | where only the descriptor is affected like seek. */ | 1660 | where only the descriptor is affected like seek. */ |
1661 | static int file_has_perm(const struct cred *cred, | 1661 | static int file_has_perm(const struct cred *cred, |
1662 | struct file *file, | 1662 | struct file *file, |
1663 | u32 av) | 1663 | u32 av) |
1664 | { | 1664 | { |
1665 | struct file_security_struct *fsec = file->f_security; | 1665 | struct file_security_struct *fsec = file->f_security; |
1666 | struct inode *inode = file_inode(file); | 1666 | struct inode *inode = file_inode(file); |
1667 | struct common_audit_data ad; | 1667 | struct common_audit_data ad; |
1668 | u32 sid = cred_sid(cred); | 1668 | u32 sid = cred_sid(cred); |
1669 | int rc; | 1669 | int rc; |
1670 | 1670 | ||
1671 | ad.type = LSM_AUDIT_DATA_PATH; | 1671 | ad.type = LSM_AUDIT_DATA_PATH; |
1672 | ad.u.path = file->f_path; | 1672 | ad.u.path = file->f_path; |
1673 | 1673 | ||
1674 | if (sid != fsec->sid) { | 1674 | if (sid != fsec->sid) { |
1675 | rc = avc_has_perm(sid, fsec->sid, | 1675 | rc = avc_has_perm(sid, fsec->sid, |
1676 | SECCLASS_FD, | 1676 | SECCLASS_FD, |
1677 | FD__USE, | 1677 | FD__USE, |
1678 | &ad); | 1678 | &ad); |
1679 | if (rc) | 1679 | if (rc) |
1680 | goto out; | 1680 | goto out; |
1681 | } | 1681 | } |
1682 | 1682 | ||
1683 | /* av is zero if only checking access to the descriptor. */ | 1683 | /* av is zero if only checking access to the descriptor. */ |
1684 | rc = 0; | 1684 | rc = 0; |
1685 | if (av) | 1685 | if (av) |
1686 | rc = inode_has_perm(cred, inode, av, &ad); | 1686 | rc = inode_has_perm(cred, inode, av, &ad); |
1687 | 1687 | ||
1688 | out: | 1688 | out: |
1689 | return rc; | 1689 | return rc; |
1690 | } | 1690 | } |
1691 | 1691 | ||
1692 | /* Check whether a task can create a file. */ | 1692 | /* Check whether a task can create a file. */ |
1693 | static int may_create(struct inode *dir, | 1693 | static int may_create(struct inode *dir, |
1694 | struct dentry *dentry, | 1694 | struct dentry *dentry, |
1695 | u16 tclass) | 1695 | u16 tclass) |
1696 | { | 1696 | { |
1697 | const struct task_security_struct *tsec = current_security(); | 1697 | const struct task_security_struct *tsec = current_security(); |
1698 | struct inode_security_struct *dsec; | 1698 | struct inode_security_struct *dsec; |
1699 | struct superblock_security_struct *sbsec; | 1699 | struct superblock_security_struct *sbsec; |
1700 | u32 sid, newsid; | 1700 | u32 sid, newsid; |
1701 | struct common_audit_data ad; | 1701 | struct common_audit_data ad; |
1702 | int rc; | 1702 | int rc; |
1703 | 1703 | ||
1704 | dsec = dir->i_security; | 1704 | dsec = dir->i_security; |
1705 | sbsec = dir->i_sb->s_security; | 1705 | sbsec = dir->i_sb->s_security; |
1706 | 1706 | ||
1707 | sid = tsec->sid; | 1707 | sid = tsec->sid; |
1708 | newsid = tsec->create_sid; | 1708 | newsid = tsec->create_sid; |
1709 | 1709 | ||
1710 | ad.type = LSM_AUDIT_DATA_DENTRY; | 1710 | ad.type = LSM_AUDIT_DATA_DENTRY; |
1711 | ad.u.dentry = dentry; | 1711 | ad.u.dentry = dentry; |
1712 | 1712 | ||
1713 | rc = avc_has_perm(sid, dsec->sid, SECCLASS_DIR, | 1713 | rc = avc_has_perm(sid, dsec->sid, SECCLASS_DIR, |
1714 | DIR__ADD_NAME | DIR__SEARCH, | 1714 | DIR__ADD_NAME | DIR__SEARCH, |
1715 | &ad); | 1715 | &ad); |
1716 | if (rc) | 1716 | if (rc) |
1717 | return rc; | 1717 | return rc; |
1718 | 1718 | ||
1719 | if (!newsid || !(sbsec->flags & SBLABEL_MNT)) { | 1719 | if (!newsid || !(sbsec->flags & SBLABEL_MNT)) { |
1720 | rc = security_transition_sid(sid, dsec->sid, tclass, | 1720 | rc = security_transition_sid(sid, dsec->sid, tclass, |
1721 | &dentry->d_name, &newsid); | 1721 | &dentry->d_name, &newsid); |
1722 | if (rc) | 1722 | if (rc) |
1723 | return rc; | 1723 | return rc; |
1724 | } | 1724 | } |
1725 | 1725 | ||
1726 | rc = avc_has_perm(sid, newsid, tclass, FILE__CREATE, &ad); | 1726 | rc = avc_has_perm(sid, newsid, tclass, FILE__CREATE, &ad); |
1727 | if (rc) | 1727 | if (rc) |
1728 | return rc; | 1728 | return rc; |
1729 | 1729 | ||
1730 | return avc_has_perm(newsid, sbsec->sid, | 1730 | return avc_has_perm(newsid, sbsec->sid, |
1731 | SECCLASS_FILESYSTEM, | 1731 | SECCLASS_FILESYSTEM, |
1732 | FILESYSTEM__ASSOCIATE, &ad); | 1732 | FILESYSTEM__ASSOCIATE, &ad); |
1733 | } | 1733 | } |
1734 | 1734 | ||
1735 | /* Check whether a task can create a key. */ | 1735 | /* Check whether a task can create a key. */ |
1736 | static int may_create_key(u32 ksid, | 1736 | static int may_create_key(u32 ksid, |
1737 | struct task_struct *ctx) | 1737 | struct task_struct *ctx) |
1738 | { | 1738 | { |
1739 | u32 sid = task_sid(ctx); | 1739 | u32 sid = task_sid(ctx); |
1740 | 1740 | ||
1741 | return avc_has_perm(sid, ksid, SECCLASS_KEY, KEY__CREATE, NULL); | 1741 | return avc_has_perm(sid, ksid, SECCLASS_KEY, KEY__CREATE, NULL); |
1742 | } | 1742 | } |
1743 | 1743 | ||
1744 | #define MAY_LINK 0 | 1744 | #define MAY_LINK 0 |
1745 | #define MAY_UNLINK 1 | 1745 | #define MAY_UNLINK 1 |
1746 | #define MAY_RMDIR 2 | 1746 | #define MAY_RMDIR 2 |
1747 | 1747 | ||
1748 | /* Check whether a task can link, unlink, or rmdir a file/directory. */ | 1748 | /* Check whether a task can link, unlink, or rmdir a file/directory. */ |
1749 | static int may_link(struct inode *dir, | 1749 | static int may_link(struct inode *dir, |
1750 | struct dentry *dentry, | 1750 | struct dentry *dentry, |
1751 | int kind) | 1751 | int kind) |
1752 | 1752 | ||
1753 | { | 1753 | { |
1754 | struct inode_security_struct *dsec, *isec; | 1754 | struct inode_security_struct *dsec, *isec; |
1755 | struct common_audit_data ad; | 1755 | struct common_audit_data ad; |
1756 | u32 sid = current_sid(); | 1756 | u32 sid = current_sid(); |
1757 | u32 av; | 1757 | u32 av; |
1758 | int rc; | 1758 | int rc; |
1759 | 1759 | ||
1760 | dsec = dir->i_security; | 1760 | dsec = dir->i_security; |
1761 | isec = dentry->d_inode->i_security; | 1761 | isec = dentry->d_inode->i_security; |
1762 | 1762 | ||
1763 | ad.type = LSM_AUDIT_DATA_DENTRY; | 1763 | ad.type = LSM_AUDIT_DATA_DENTRY; |
1764 | ad.u.dentry = dentry; | 1764 | ad.u.dentry = dentry; |
1765 | 1765 | ||
1766 | av = DIR__SEARCH; | 1766 | av = DIR__SEARCH; |
1767 | av |= (kind ? DIR__REMOVE_NAME : DIR__ADD_NAME); | 1767 | av |= (kind ? DIR__REMOVE_NAME : DIR__ADD_NAME); |
1768 | rc = avc_has_perm(sid, dsec->sid, SECCLASS_DIR, av, &ad); | 1768 | rc = avc_has_perm(sid, dsec->sid, SECCLASS_DIR, av, &ad); |
1769 | if (rc) | 1769 | if (rc) |
1770 | return rc; | 1770 | return rc; |
1771 | 1771 | ||
1772 | switch (kind) { | 1772 | switch (kind) { |
1773 | case MAY_LINK: | 1773 | case MAY_LINK: |
1774 | av = FILE__LINK; | 1774 | av = FILE__LINK; |
1775 | break; | 1775 | break; |
1776 | case MAY_UNLINK: | 1776 | case MAY_UNLINK: |
1777 | av = FILE__UNLINK; | 1777 | av = FILE__UNLINK; |
1778 | break; | 1778 | break; |
1779 | case MAY_RMDIR: | 1779 | case MAY_RMDIR: |
1780 | av = DIR__RMDIR; | 1780 | av = DIR__RMDIR; |
1781 | break; | 1781 | break; |
1782 | default: | 1782 | default: |
1783 | printk(KERN_WARNING "SELinux: %s: unrecognized kind %d\n", | 1783 | printk(KERN_WARNING "SELinux: %s: unrecognized kind %d\n", |
1784 | __func__, kind); | 1784 | __func__, kind); |
1785 | return 0; | 1785 | return 0; |
1786 | } | 1786 | } |
1787 | 1787 | ||
1788 | rc = avc_has_perm(sid, isec->sid, isec->sclass, av, &ad); | 1788 | rc = avc_has_perm(sid, isec->sid, isec->sclass, av, &ad); |
1789 | return rc; | 1789 | return rc; |
1790 | } | 1790 | } |
1791 | 1791 | ||
1792 | static inline int may_rename(struct inode *old_dir, | 1792 | static inline int may_rename(struct inode *old_dir, |
1793 | struct dentry *old_dentry, | 1793 | struct dentry *old_dentry, |
1794 | struct inode *new_dir, | 1794 | struct inode *new_dir, |
1795 | struct dentry *new_dentry) | 1795 | struct dentry *new_dentry) |
1796 | { | 1796 | { |
1797 | struct inode_security_struct *old_dsec, *new_dsec, *old_isec, *new_isec; | 1797 | struct inode_security_struct *old_dsec, *new_dsec, *old_isec, *new_isec; |
1798 | struct common_audit_data ad; | 1798 | struct common_audit_data ad; |
1799 | u32 sid = current_sid(); | 1799 | u32 sid = current_sid(); |
1800 | u32 av; | 1800 | u32 av; |
1801 | int old_is_dir, new_is_dir; | 1801 | int old_is_dir, new_is_dir; |
1802 | int rc; | 1802 | int rc; |
1803 | 1803 | ||
1804 | old_dsec = old_dir->i_security; | 1804 | old_dsec = old_dir->i_security; |
1805 | old_isec = old_dentry->d_inode->i_security; | 1805 | old_isec = old_dentry->d_inode->i_security; |
1806 | old_is_dir = S_ISDIR(old_dentry->d_inode->i_mode); | 1806 | old_is_dir = S_ISDIR(old_dentry->d_inode->i_mode); |
1807 | new_dsec = new_dir->i_security; | 1807 | new_dsec = new_dir->i_security; |
1808 | 1808 | ||
1809 | ad.type = LSM_AUDIT_DATA_DENTRY; | 1809 | ad.type = LSM_AUDIT_DATA_DENTRY; |
1810 | 1810 | ||
1811 | ad.u.dentry = old_dentry; | 1811 | ad.u.dentry = old_dentry; |
1812 | rc = avc_has_perm(sid, old_dsec->sid, SECCLASS_DIR, | 1812 | rc = avc_has_perm(sid, old_dsec->sid, SECCLASS_DIR, |
1813 | DIR__REMOVE_NAME | DIR__SEARCH, &ad); | 1813 | DIR__REMOVE_NAME | DIR__SEARCH, &ad); |
1814 | if (rc) | 1814 | if (rc) |
1815 | return rc; | 1815 | return rc; |
1816 | rc = avc_has_perm(sid, old_isec->sid, | 1816 | rc = avc_has_perm(sid, old_isec->sid, |
1817 | old_isec->sclass, FILE__RENAME, &ad); | 1817 | old_isec->sclass, FILE__RENAME, &ad); |
1818 | if (rc) | 1818 | if (rc) |
1819 | return rc; | 1819 | return rc; |
1820 | if (old_is_dir && new_dir != old_dir) { | 1820 | if (old_is_dir && new_dir != old_dir) { |
1821 | rc = avc_has_perm(sid, old_isec->sid, | 1821 | rc = avc_has_perm(sid, old_isec->sid, |
1822 | old_isec->sclass, DIR__REPARENT, &ad); | 1822 | old_isec->sclass, DIR__REPARENT, &ad); |
1823 | if (rc) | 1823 | if (rc) |
1824 | return rc; | 1824 | return rc; |
1825 | } | 1825 | } |
1826 | 1826 | ||
1827 | ad.u.dentry = new_dentry; | 1827 | ad.u.dentry = new_dentry; |
1828 | av = DIR__ADD_NAME | DIR__SEARCH; | 1828 | av = DIR__ADD_NAME | DIR__SEARCH; |
1829 | if (new_dentry->d_inode) | 1829 | if (new_dentry->d_inode) |
1830 | av |= DIR__REMOVE_NAME; | 1830 | av |= DIR__REMOVE_NAME; |
1831 | rc = avc_has_perm(sid, new_dsec->sid, SECCLASS_DIR, av, &ad); | 1831 | rc = avc_has_perm(sid, new_dsec->sid, SECCLASS_DIR, av, &ad); |
1832 | if (rc) | 1832 | if (rc) |
1833 | return rc; | 1833 | return rc; |
1834 | if (new_dentry->d_inode) { | 1834 | if (new_dentry->d_inode) { |
1835 | new_isec = new_dentry->d_inode->i_security; | 1835 | new_isec = new_dentry->d_inode->i_security; |
1836 | new_is_dir = S_ISDIR(new_dentry->d_inode->i_mode); | 1836 | new_is_dir = S_ISDIR(new_dentry->d_inode->i_mode); |
1837 | rc = avc_has_perm(sid, new_isec->sid, | 1837 | rc = avc_has_perm(sid, new_isec->sid, |
1838 | new_isec->sclass, | 1838 | new_isec->sclass, |
1839 | (new_is_dir ? DIR__RMDIR : FILE__UNLINK), &ad); | 1839 | (new_is_dir ? DIR__RMDIR : FILE__UNLINK), &ad); |
1840 | if (rc) | 1840 | if (rc) |
1841 | return rc; | 1841 | return rc; |
1842 | } | 1842 | } |
1843 | 1843 | ||
1844 | return 0; | 1844 | return 0; |
1845 | } | 1845 | } |
1846 | 1846 | ||
1847 | /* Check whether a task can perform a filesystem operation. */ | 1847 | /* Check whether a task can perform a filesystem operation. */ |
1848 | static int superblock_has_perm(const struct cred *cred, | 1848 | static int superblock_has_perm(const struct cred *cred, |
1849 | struct super_block *sb, | 1849 | struct super_block *sb, |
1850 | u32 perms, | 1850 | u32 perms, |
1851 | struct common_audit_data *ad) | 1851 | struct common_audit_data *ad) |
1852 | { | 1852 | { |
1853 | struct superblock_security_struct *sbsec; | 1853 | struct superblock_security_struct *sbsec; |
1854 | u32 sid = cred_sid(cred); | 1854 | u32 sid = cred_sid(cred); |
1855 | 1855 | ||
1856 | sbsec = sb->s_security; | 1856 | sbsec = sb->s_security; |
1857 | return avc_has_perm(sid, sbsec->sid, SECCLASS_FILESYSTEM, perms, ad); | 1857 | return avc_has_perm(sid, sbsec->sid, SECCLASS_FILESYSTEM, perms, ad); |
1858 | } | 1858 | } |
1859 | 1859 | ||
1860 | /* Convert a Linux mode and permission mask to an access vector. */ | 1860 | /* Convert a Linux mode and permission mask to an access vector. */ |
1861 | static inline u32 file_mask_to_av(int mode, int mask) | 1861 | static inline u32 file_mask_to_av(int mode, int mask) |
1862 | { | 1862 | { |
1863 | u32 av = 0; | 1863 | u32 av = 0; |
1864 | 1864 | ||
1865 | if (!S_ISDIR(mode)) { | 1865 | if (!S_ISDIR(mode)) { |
1866 | if (mask & MAY_EXEC) | 1866 | if (mask & MAY_EXEC) |
1867 | av |= FILE__EXECUTE; | 1867 | av |= FILE__EXECUTE; |
1868 | if (mask & MAY_READ) | 1868 | if (mask & MAY_READ) |
1869 | av |= FILE__READ; | 1869 | av |= FILE__READ; |
1870 | 1870 | ||
1871 | if (mask & MAY_APPEND) | 1871 | if (mask & MAY_APPEND) |
1872 | av |= FILE__APPEND; | 1872 | av |= FILE__APPEND; |
1873 | else if (mask & MAY_WRITE) | 1873 | else if (mask & MAY_WRITE) |
1874 | av |= FILE__WRITE; | 1874 | av |= FILE__WRITE; |
1875 | 1875 | ||
1876 | } else { | 1876 | } else { |
1877 | if (mask & MAY_EXEC) | 1877 | if (mask & MAY_EXEC) |
1878 | av |= DIR__SEARCH; | 1878 | av |= DIR__SEARCH; |
1879 | if (mask & MAY_WRITE) | 1879 | if (mask & MAY_WRITE) |
1880 | av |= DIR__WRITE; | 1880 | av |= DIR__WRITE; |
1881 | if (mask & MAY_READ) | 1881 | if (mask & MAY_READ) |
1882 | av |= DIR__READ; | 1882 | av |= DIR__READ; |
1883 | } | 1883 | } |
1884 | 1884 | ||
1885 | return av; | 1885 | return av; |
1886 | } | 1886 | } |
1887 | 1887 | ||
1888 | /* Convert a Linux file to an access vector. */ | 1888 | /* Convert a Linux file to an access vector. */ |
1889 | static inline u32 file_to_av(struct file *file) | 1889 | static inline u32 file_to_av(struct file *file) |
1890 | { | 1890 | { |
1891 | u32 av = 0; | 1891 | u32 av = 0; |
1892 | 1892 | ||
1893 | if (file->f_mode & FMODE_READ) | 1893 | if (file->f_mode & FMODE_READ) |
1894 | av |= FILE__READ; | 1894 | av |= FILE__READ; |
1895 | if (file->f_mode & FMODE_WRITE) { | 1895 | if (file->f_mode & FMODE_WRITE) { |
1896 | if (file->f_flags & O_APPEND) | 1896 | if (file->f_flags & O_APPEND) |
1897 | av |= FILE__APPEND; | 1897 | av |= FILE__APPEND; |
1898 | else | 1898 | else |
1899 | av |= FILE__WRITE; | 1899 | av |= FILE__WRITE; |
1900 | } | 1900 | } |
1901 | if (!av) { | 1901 | if (!av) { |
1902 | /* | 1902 | /* |
1903 | * Special file opened with flags 3 for ioctl-only use. | 1903 | * Special file opened with flags 3 for ioctl-only use. |
1904 | */ | 1904 | */ |
1905 | av = FILE__IOCTL; | 1905 | av = FILE__IOCTL; |
1906 | } | 1906 | } |
1907 | 1907 | ||
1908 | return av; | 1908 | return av; |
1909 | } | 1909 | } |
1910 | 1910 | ||
1911 | /* | 1911 | /* |
1912 | * Convert a file to an access vector and include the correct open | 1912 | * Convert a file to an access vector and include the correct open |
1913 | * open permission. | 1913 | * open permission. |
1914 | */ | 1914 | */ |
1915 | static inline u32 open_file_to_av(struct file *file) | 1915 | static inline u32 open_file_to_av(struct file *file) |
1916 | { | 1916 | { |
1917 | u32 av = file_to_av(file); | 1917 | u32 av = file_to_av(file); |
1918 | 1918 | ||
1919 | if (selinux_policycap_openperm) | 1919 | if (selinux_policycap_openperm) |
1920 | av |= FILE__OPEN; | 1920 | av |= FILE__OPEN; |
1921 | 1921 | ||
1922 | return av; | 1922 | return av; |
1923 | } | 1923 | } |
1924 | 1924 | ||
1925 | /* Hook functions begin here. */ | 1925 | /* Hook functions begin here. */ |
1926 | 1926 | ||
1927 | static int selinux_ptrace_access_check(struct task_struct *child, | 1927 | static int selinux_ptrace_access_check(struct task_struct *child, |
1928 | unsigned int mode) | 1928 | unsigned int mode) |
1929 | { | 1929 | { |
1930 | int rc; | 1930 | int rc; |
1931 | 1931 | ||
1932 | rc = cap_ptrace_access_check(child, mode); | 1932 | rc = cap_ptrace_access_check(child, mode); |
1933 | if (rc) | 1933 | if (rc) |
1934 | return rc; | 1934 | return rc; |
1935 | 1935 | ||
1936 | if (mode & PTRACE_MODE_READ) { | 1936 | if (mode & PTRACE_MODE_READ) { |
1937 | u32 sid = current_sid(); | 1937 | u32 sid = current_sid(); |
1938 | u32 csid = task_sid(child); | 1938 | u32 csid = task_sid(child); |
1939 | return avc_has_perm(sid, csid, SECCLASS_FILE, FILE__READ, NULL); | 1939 | return avc_has_perm(sid, csid, SECCLASS_FILE, FILE__READ, NULL); |
1940 | } | 1940 | } |
1941 | 1941 | ||
1942 | return current_has_perm(child, PROCESS__PTRACE); | 1942 | return current_has_perm(child, PROCESS__PTRACE); |
1943 | } | 1943 | } |
1944 | 1944 | ||
1945 | static int selinux_ptrace_traceme(struct task_struct *parent) | 1945 | static int selinux_ptrace_traceme(struct task_struct *parent) |
1946 | { | 1946 | { |
1947 | int rc; | 1947 | int rc; |
1948 | 1948 | ||
1949 | rc = cap_ptrace_traceme(parent); | 1949 | rc = cap_ptrace_traceme(parent); |
1950 | if (rc) | 1950 | if (rc) |
1951 | return rc; | 1951 | return rc; |
1952 | 1952 | ||
1953 | return task_has_perm(parent, current, PROCESS__PTRACE); | 1953 | return task_has_perm(parent, current, PROCESS__PTRACE); |
1954 | } | 1954 | } |
1955 | 1955 | ||
1956 | static int selinux_capget(struct task_struct *target, kernel_cap_t *effective, | 1956 | static int selinux_capget(struct task_struct *target, kernel_cap_t *effective, |
1957 | kernel_cap_t *inheritable, kernel_cap_t *permitted) | 1957 | kernel_cap_t *inheritable, kernel_cap_t *permitted) |
1958 | { | 1958 | { |
1959 | int error; | 1959 | int error; |
1960 | 1960 | ||
1961 | error = current_has_perm(target, PROCESS__GETCAP); | 1961 | error = current_has_perm(target, PROCESS__GETCAP); |
1962 | if (error) | 1962 | if (error) |
1963 | return error; | 1963 | return error; |
1964 | 1964 | ||
1965 | return cap_capget(target, effective, inheritable, permitted); | 1965 | return cap_capget(target, effective, inheritable, permitted); |
1966 | } | 1966 | } |
1967 | 1967 | ||
1968 | static int selinux_capset(struct cred *new, const struct cred *old, | 1968 | static int selinux_capset(struct cred *new, const struct cred *old, |
1969 | const kernel_cap_t *effective, | 1969 | const kernel_cap_t *effective, |
1970 | const kernel_cap_t *inheritable, | 1970 | const kernel_cap_t *inheritable, |
1971 | const kernel_cap_t *permitted) | 1971 | const kernel_cap_t *permitted) |
1972 | { | 1972 | { |
1973 | int error; | 1973 | int error; |
1974 | 1974 | ||
1975 | error = cap_capset(new, old, | 1975 | error = cap_capset(new, old, |
1976 | effective, inheritable, permitted); | 1976 | effective, inheritable, permitted); |
1977 | if (error) | 1977 | if (error) |
1978 | return error; | 1978 | return error; |
1979 | 1979 | ||
1980 | return cred_has_perm(old, new, PROCESS__SETCAP); | 1980 | return cred_has_perm(old, new, PROCESS__SETCAP); |
1981 | } | 1981 | } |
1982 | 1982 | ||
1983 | /* | 1983 | /* |
1984 | * (This comment used to live with the selinux_task_setuid hook, | 1984 | * (This comment used to live with the selinux_task_setuid hook, |
1985 | * which was removed). | 1985 | * which was removed). |
1986 | * | 1986 | * |
1987 | * Since setuid only affects the current process, and since the SELinux | 1987 | * Since setuid only affects the current process, and since the SELinux |
1988 | * controls are not based on the Linux identity attributes, SELinux does not | 1988 | * controls are not based on the Linux identity attributes, SELinux does not |
1989 | * need to control this operation. However, SELinux does control the use of | 1989 | * need to control this operation. However, SELinux does control the use of |
1990 | * the CAP_SETUID and CAP_SETGID capabilities using the capable hook. | 1990 | * the CAP_SETUID and CAP_SETGID capabilities using the capable hook. |
1991 | */ | 1991 | */ |
1992 | 1992 | ||
1993 | static int selinux_capable(const struct cred *cred, struct user_namespace *ns, | 1993 | static int selinux_capable(const struct cred *cred, struct user_namespace *ns, |
1994 | int cap, int audit) | 1994 | int cap, int audit) |
1995 | { | 1995 | { |
1996 | int rc; | 1996 | int rc; |
1997 | 1997 | ||
1998 | rc = cap_capable(cred, ns, cap, audit); | 1998 | rc = cap_capable(cred, ns, cap, audit); |
1999 | if (rc) | 1999 | if (rc) |
2000 | return rc; | 2000 | return rc; |
2001 | 2001 | ||
2002 | return cred_has_capability(cred, cap, audit); | 2002 | return cred_has_capability(cred, cap, audit); |
2003 | } | 2003 | } |
2004 | 2004 | ||
2005 | static int selinux_quotactl(int cmds, int type, int id, struct super_block *sb) | 2005 | static int selinux_quotactl(int cmds, int type, int id, struct super_block *sb) |
2006 | { | 2006 | { |
2007 | const struct cred *cred = current_cred(); | 2007 | const struct cred *cred = current_cred(); |
2008 | int rc = 0; | 2008 | int rc = 0; |
2009 | 2009 | ||
2010 | if (!sb) | 2010 | if (!sb) |
2011 | return 0; | 2011 | return 0; |
2012 | 2012 | ||
2013 | switch (cmds) { | 2013 | switch (cmds) { |
2014 | case Q_SYNC: | 2014 | case Q_SYNC: |
2015 | case Q_QUOTAON: | 2015 | case Q_QUOTAON: |
2016 | case Q_QUOTAOFF: | 2016 | case Q_QUOTAOFF: |
2017 | case Q_SETINFO: | 2017 | case Q_SETINFO: |
2018 | case Q_SETQUOTA: | 2018 | case Q_SETQUOTA: |
2019 | rc = superblock_has_perm(cred, sb, FILESYSTEM__QUOTAMOD, NULL); | 2019 | rc = superblock_has_perm(cred, sb, FILESYSTEM__QUOTAMOD, NULL); |
2020 | break; | 2020 | break; |
2021 | case Q_GETFMT: | 2021 | case Q_GETFMT: |
2022 | case Q_GETINFO: | 2022 | case Q_GETINFO: |
2023 | case Q_GETQUOTA: | 2023 | case Q_GETQUOTA: |
2024 | rc = superblock_has_perm(cred, sb, FILESYSTEM__QUOTAGET, NULL); | 2024 | rc = superblock_has_perm(cred, sb, FILESYSTEM__QUOTAGET, NULL); |
2025 | break; | 2025 | break; |
2026 | default: | 2026 | default: |
2027 | rc = 0; /* let the kernel handle invalid cmds */ | 2027 | rc = 0; /* let the kernel handle invalid cmds */ |
2028 | break; | 2028 | break; |
2029 | } | 2029 | } |
2030 | return rc; | 2030 | return rc; |
2031 | } | 2031 | } |
2032 | 2032 | ||
2033 | static int selinux_quota_on(struct dentry *dentry) | 2033 | static int selinux_quota_on(struct dentry *dentry) |
2034 | { | 2034 | { |
2035 | const struct cred *cred = current_cred(); | 2035 | const struct cred *cred = current_cred(); |
2036 | 2036 | ||
2037 | return dentry_has_perm(cred, dentry, FILE__QUOTAON); | 2037 | return dentry_has_perm(cred, dentry, FILE__QUOTAON); |
2038 | } | 2038 | } |
2039 | 2039 | ||
2040 | static int selinux_syslog(int type) | 2040 | static int selinux_syslog(int type) |
2041 | { | 2041 | { |
2042 | int rc; | 2042 | int rc; |
2043 | 2043 | ||
2044 | switch (type) { | 2044 | switch (type) { |
2045 | case SYSLOG_ACTION_READ_ALL: /* Read last kernel messages */ | 2045 | case SYSLOG_ACTION_READ_ALL: /* Read last kernel messages */ |
2046 | case SYSLOG_ACTION_SIZE_BUFFER: /* Return size of the log buffer */ | 2046 | case SYSLOG_ACTION_SIZE_BUFFER: /* Return size of the log buffer */ |
2047 | rc = task_has_system(current, SYSTEM__SYSLOG_READ); | 2047 | rc = task_has_system(current, SYSTEM__SYSLOG_READ); |
2048 | break; | 2048 | break; |
2049 | case SYSLOG_ACTION_CONSOLE_OFF: /* Disable logging to console */ | 2049 | case SYSLOG_ACTION_CONSOLE_OFF: /* Disable logging to console */ |
2050 | case SYSLOG_ACTION_CONSOLE_ON: /* Enable logging to console */ | 2050 | case SYSLOG_ACTION_CONSOLE_ON: /* Enable logging to console */ |
2051 | /* Set level of messages printed to console */ | 2051 | /* Set level of messages printed to console */ |
2052 | case SYSLOG_ACTION_CONSOLE_LEVEL: | 2052 | case SYSLOG_ACTION_CONSOLE_LEVEL: |
2053 | rc = task_has_system(current, SYSTEM__SYSLOG_CONSOLE); | 2053 | rc = task_has_system(current, SYSTEM__SYSLOG_CONSOLE); |
2054 | break; | 2054 | break; |
2055 | case SYSLOG_ACTION_CLOSE: /* Close log */ | 2055 | case SYSLOG_ACTION_CLOSE: /* Close log */ |
2056 | case SYSLOG_ACTION_OPEN: /* Open log */ | 2056 | case SYSLOG_ACTION_OPEN: /* Open log */ |
2057 | case SYSLOG_ACTION_READ: /* Read from log */ | 2057 | case SYSLOG_ACTION_READ: /* Read from log */ |
2058 | case SYSLOG_ACTION_READ_CLEAR: /* Read/clear last kernel messages */ | 2058 | case SYSLOG_ACTION_READ_CLEAR: /* Read/clear last kernel messages */ |
2059 | case SYSLOG_ACTION_CLEAR: /* Clear ring buffer */ | 2059 | case SYSLOG_ACTION_CLEAR: /* Clear ring buffer */ |
2060 | default: | 2060 | default: |
2061 | rc = task_has_system(current, SYSTEM__SYSLOG_MOD); | 2061 | rc = task_has_system(current, SYSTEM__SYSLOG_MOD); |
2062 | break; | 2062 | break; |
2063 | } | 2063 | } |
2064 | return rc; | 2064 | return rc; |
2065 | } | 2065 | } |
2066 | 2066 | ||
2067 | /* | 2067 | /* |
2068 | * Check that a process has enough memory to allocate a new virtual | 2068 | * Check that a process has enough memory to allocate a new virtual |
2069 | * mapping. 0 means there is enough memory for the allocation to | 2069 | * mapping. 0 means there is enough memory for the allocation to |
2070 | * succeed and -ENOMEM implies there is not. | 2070 | * succeed and -ENOMEM implies there is not. |
2071 | * | 2071 | * |
2072 | * Do not audit the selinux permission check, as this is applied to all | 2072 | * Do not audit the selinux permission check, as this is applied to all |
2073 | * processes that allocate mappings. | 2073 | * processes that allocate mappings. |
2074 | */ | 2074 | */ |
2075 | static int selinux_vm_enough_memory(struct mm_struct *mm, long pages) | 2075 | static int selinux_vm_enough_memory(struct mm_struct *mm, long pages) |
2076 | { | 2076 | { |
2077 | int rc, cap_sys_admin = 0; | 2077 | int rc, cap_sys_admin = 0; |
2078 | 2078 | ||
2079 | rc = selinux_capable(current_cred(), &init_user_ns, CAP_SYS_ADMIN, | 2079 | rc = selinux_capable(current_cred(), &init_user_ns, CAP_SYS_ADMIN, |
2080 | SECURITY_CAP_NOAUDIT); | 2080 | SECURITY_CAP_NOAUDIT); |
2081 | if (rc == 0) | 2081 | if (rc == 0) |
2082 | cap_sys_admin = 1; | 2082 | cap_sys_admin = 1; |
2083 | 2083 | ||
2084 | return __vm_enough_memory(mm, pages, cap_sys_admin); | 2084 | return __vm_enough_memory(mm, pages, cap_sys_admin); |
2085 | } | 2085 | } |
2086 | 2086 | ||
2087 | /* binprm security operations */ | 2087 | /* binprm security operations */ |
2088 | 2088 | ||
2089 | static int selinux_bprm_set_creds(struct linux_binprm *bprm) | 2089 | static int selinux_bprm_set_creds(struct linux_binprm *bprm) |
2090 | { | 2090 | { |
2091 | const struct task_security_struct *old_tsec; | 2091 | const struct task_security_struct *old_tsec; |
2092 | struct task_security_struct *new_tsec; | 2092 | struct task_security_struct *new_tsec; |
2093 | struct inode_security_struct *isec; | 2093 | struct inode_security_struct *isec; |
2094 | struct common_audit_data ad; | 2094 | struct common_audit_data ad; |
2095 | struct inode *inode = file_inode(bprm->file); | 2095 | struct inode *inode = file_inode(bprm->file); |
2096 | int rc; | 2096 | int rc; |
2097 | 2097 | ||
2098 | rc = cap_bprm_set_creds(bprm); | 2098 | rc = cap_bprm_set_creds(bprm); |
2099 | if (rc) | 2099 | if (rc) |
2100 | return rc; | 2100 | return rc; |
2101 | 2101 | ||
2102 | /* SELinux context only depends on initial program or script and not | 2102 | /* SELinux context only depends on initial program or script and not |
2103 | * the script interpreter */ | 2103 | * the script interpreter */ |
2104 | if (bprm->cred_prepared) | 2104 | if (bprm->cred_prepared) |
2105 | return 0; | 2105 | return 0; |
2106 | 2106 | ||
2107 | old_tsec = current_security(); | 2107 | old_tsec = current_security(); |
2108 | new_tsec = bprm->cred->security; | 2108 | new_tsec = bprm->cred->security; |
2109 | isec = inode->i_security; | 2109 | isec = inode->i_security; |
2110 | 2110 | ||
2111 | /* Default to the current task SID. */ | 2111 | /* Default to the current task SID. */ |
2112 | new_tsec->sid = old_tsec->sid; | 2112 | new_tsec->sid = old_tsec->sid; |
2113 | new_tsec->osid = old_tsec->sid; | 2113 | new_tsec->osid = old_tsec->sid; |
2114 | 2114 | ||
2115 | /* Reset fs, key, and sock SIDs on execve. */ | 2115 | /* Reset fs, key, and sock SIDs on execve. */ |
2116 | new_tsec->create_sid = 0; | 2116 | new_tsec->create_sid = 0; |
2117 | new_tsec->keycreate_sid = 0; | 2117 | new_tsec->keycreate_sid = 0; |
2118 | new_tsec->sockcreate_sid = 0; | 2118 | new_tsec->sockcreate_sid = 0; |
2119 | 2119 | ||
2120 | if (old_tsec->exec_sid) { | 2120 | if (old_tsec->exec_sid) { |
2121 | new_tsec->sid = old_tsec->exec_sid; | 2121 | new_tsec->sid = old_tsec->exec_sid; |
2122 | /* Reset exec SID on execve. */ | 2122 | /* Reset exec SID on execve. */ |
2123 | new_tsec->exec_sid = 0; | 2123 | new_tsec->exec_sid = 0; |
2124 | 2124 | ||
2125 | /* | 2125 | /* |
2126 | * Minimize confusion: if no_new_privs and a transition is | 2126 | * Minimize confusion: if no_new_privs and a transition is |
2127 | * explicitly requested, then fail the exec. | 2127 | * explicitly requested, then fail the exec. |
2128 | */ | 2128 | */ |
2129 | if (bprm->unsafe & LSM_UNSAFE_NO_NEW_PRIVS) | 2129 | if (bprm->unsafe & LSM_UNSAFE_NO_NEW_PRIVS) |
2130 | return -EPERM; | 2130 | return -EPERM; |
2131 | } else { | 2131 | } else { |
2132 | /* Check for a default transition on this program. */ | 2132 | /* Check for a default transition on this program. */ |
2133 | rc = security_transition_sid(old_tsec->sid, isec->sid, | 2133 | rc = security_transition_sid(old_tsec->sid, isec->sid, |
2134 | SECCLASS_PROCESS, NULL, | 2134 | SECCLASS_PROCESS, NULL, |
2135 | &new_tsec->sid); | 2135 | &new_tsec->sid); |
2136 | if (rc) | 2136 | if (rc) |
2137 | return rc; | 2137 | return rc; |
2138 | } | 2138 | } |
2139 | 2139 | ||
2140 | ad.type = LSM_AUDIT_DATA_PATH; | 2140 | ad.type = LSM_AUDIT_DATA_PATH; |
2141 | ad.u.path = bprm->file->f_path; | 2141 | ad.u.path = bprm->file->f_path; |
2142 | 2142 | ||
2143 | if ((bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID) || | 2143 | if ((bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID) || |
2144 | (bprm->unsafe & LSM_UNSAFE_NO_NEW_PRIVS)) | 2144 | (bprm->unsafe & LSM_UNSAFE_NO_NEW_PRIVS)) |
2145 | new_tsec->sid = old_tsec->sid; | 2145 | new_tsec->sid = old_tsec->sid; |
2146 | 2146 | ||
2147 | if (new_tsec->sid == old_tsec->sid) { | 2147 | if (new_tsec->sid == old_tsec->sid) { |
2148 | rc = avc_has_perm(old_tsec->sid, isec->sid, | 2148 | rc = avc_has_perm(old_tsec->sid, isec->sid, |
2149 | SECCLASS_FILE, FILE__EXECUTE_NO_TRANS, &ad); | 2149 | SECCLASS_FILE, FILE__EXECUTE_NO_TRANS, &ad); |
2150 | if (rc) | 2150 | if (rc) |
2151 | return rc; | 2151 | return rc; |
2152 | } else { | 2152 | } else { |
2153 | /* Check permissions for the transition. */ | 2153 | /* Check permissions for the transition. */ |
2154 | rc = avc_has_perm(old_tsec->sid, new_tsec->sid, | 2154 | rc = avc_has_perm(old_tsec->sid, new_tsec->sid, |
2155 | SECCLASS_PROCESS, PROCESS__TRANSITION, &ad); | 2155 | SECCLASS_PROCESS, PROCESS__TRANSITION, &ad); |
2156 | if (rc) | 2156 | if (rc) |
2157 | return rc; | 2157 | return rc; |
2158 | 2158 | ||
2159 | rc = avc_has_perm(new_tsec->sid, isec->sid, | 2159 | rc = avc_has_perm(new_tsec->sid, isec->sid, |
2160 | SECCLASS_FILE, FILE__ENTRYPOINT, &ad); | 2160 | SECCLASS_FILE, FILE__ENTRYPOINT, &ad); |
2161 | if (rc) | 2161 | if (rc) |
2162 | return rc; | 2162 | return rc; |
2163 | 2163 | ||
2164 | /* Check for shared state */ | 2164 | /* Check for shared state */ |
2165 | if (bprm->unsafe & LSM_UNSAFE_SHARE) { | 2165 | if (bprm->unsafe & LSM_UNSAFE_SHARE) { |
2166 | rc = avc_has_perm(old_tsec->sid, new_tsec->sid, | 2166 | rc = avc_has_perm(old_tsec->sid, new_tsec->sid, |
2167 | SECCLASS_PROCESS, PROCESS__SHARE, | 2167 | SECCLASS_PROCESS, PROCESS__SHARE, |
2168 | NULL); | 2168 | NULL); |
2169 | if (rc) | 2169 | if (rc) |
2170 | return -EPERM; | 2170 | return -EPERM; |
2171 | } | 2171 | } |
2172 | 2172 | ||
2173 | /* Make sure that anyone attempting to ptrace over a task that | 2173 | /* Make sure that anyone attempting to ptrace over a task that |
2174 | * changes its SID has the appropriate permit */ | 2174 | * changes its SID has the appropriate permit */ |
2175 | if (bprm->unsafe & | 2175 | if (bprm->unsafe & |
2176 | (LSM_UNSAFE_PTRACE | LSM_UNSAFE_PTRACE_CAP)) { | 2176 | (LSM_UNSAFE_PTRACE | LSM_UNSAFE_PTRACE_CAP)) { |
2177 | struct task_struct *tracer; | 2177 | struct task_struct *tracer; |
2178 | struct task_security_struct *sec; | 2178 | struct task_security_struct *sec; |
2179 | u32 ptsid = 0; | 2179 | u32 ptsid = 0; |
2180 | 2180 | ||
2181 | rcu_read_lock(); | 2181 | rcu_read_lock(); |
2182 | tracer = ptrace_parent(current); | 2182 | tracer = ptrace_parent(current); |
2183 | if (likely(tracer != NULL)) { | 2183 | if (likely(tracer != NULL)) { |
2184 | sec = __task_cred(tracer)->security; | 2184 | sec = __task_cred(tracer)->security; |
2185 | ptsid = sec->sid; | 2185 | ptsid = sec->sid; |
2186 | } | 2186 | } |
2187 | rcu_read_unlock(); | 2187 | rcu_read_unlock(); |
2188 | 2188 | ||
2189 | if (ptsid != 0) { | 2189 | if (ptsid != 0) { |
2190 | rc = avc_has_perm(ptsid, new_tsec->sid, | 2190 | rc = avc_has_perm(ptsid, new_tsec->sid, |
2191 | SECCLASS_PROCESS, | 2191 | SECCLASS_PROCESS, |
2192 | PROCESS__PTRACE, NULL); | 2192 | PROCESS__PTRACE, NULL); |
2193 | if (rc) | 2193 | if (rc) |
2194 | return -EPERM; | 2194 | return -EPERM; |
2195 | } | 2195 | } |
2196 | } | 2196 | } |
2197 | 2197 | ||
2198 | /* Clear any possibly unsafe personality bits on exec: */ | 2198 | /* Clear any possibly unsafe personality bits on exec: */ |
2199 | bprm->per_clear |= PER_CLEAR_ON_SETID; | 2199 | bprm->per_clear |= PER_CLEAR_ON_SETID; |
2200 | } | 2200 | } |
2201 | 2201 | ||
2202 | return 0; | 2202 | return 0; |
2203 | } | 2203 | } |
2204 | 2204 | ||
2205 | static int selinux_bprm_secureexec(struct linux_binprm *bprm) | 2205 | static int selinux_bprm_secureexec(struct linux_binprm *bprm) |
2206 | { | 2206 | { |
2207 | const struct task_security_struct *tsec = current_security(); | 2207 | const struct task_security_struct *tsec = current_security(); |
2208 | u32 sid, osid; | 2208 | u32 sid, osid; |
2209 | int atsecure = 0; | 2209 | int atsecure = 0; |
2210 | 2210 | ||
2211 | sid = tsec->sid; | 2211 | sid = tsec->sid; |
2212 | osid = tsec->osid; | 2212 | osid = tsec->osid; |
2213 | 2213 | ||
2214 | if (osid != sid) { | 2214 | if (osid != sid) { |
2215 | /* Enable secure mode for SIDs transitions unless | 2215 | /* Enable secure mode for SIDs transitions unless |
2216 | the noatsecure permission is granted between | 2216 | the noatsecure permission is granted between |
2217 | the two SIDs, i.e. ahp returns 0. */ | 2217 | the two SIDs, i.e. ahp returns 0. */ |
2218 | atsecure = avc_has_perm(osid, sid, | 2218 | atsecure = avc_has_perm(osid, sid, |
2219 | SECCLASS_PROCESS, | 2219 | SECCLASS_PROCESS, |
2220 | PROCESS__NOATSECURE, NULL); | 2220 | PROCESS__NOATSECURE, NULL); |
2221 | } | 2221 | } |
2222 | 2222 | ||
2223 | return (atsecure || cap_bprm_secureexec(bprm)); | 2223 | return (atsecure || cap_bprm_secureexec(bprm)); |
2224 | } | 2224 | } |
2225 | 2225 | ||
2226 | static int match_file(const void *p, struct file *file, unsigned fd) | 2226 | static int match_file(const void *p, struct file *file, unsigned fd) |
2227 | { | 2227 | { |
2228 | return file_has_perm(p, file, file_to_av(file)) ? fd + 1 : 0; | 2228 | return file_has_perm(p, file, file_to_av(file)) ? fd + 1 : 0; |
2229 | } | 2229 | } |
2230 | 2230 | ||
2231 | /* Derived from fs/exec.c:flush_old_files. */ | 2231 | /* Derived from fs/exec.c:flush_old_files. */ |
2232 | static inline void flush_unauthorized_files(const struct cred *cred, | 2232 | static inline void flush_unauthorized_files(const struct cred *cred, |
2233 | struct files_struct *files) | 2233 | struct files_struct *files) |
2234 | { | 2234 | { |
2235 | struct file *file, *devnull = NULL; | 2235 | struct file *file, *devnull = NULL; |
2236 | struct tty_struct *tty; | 2236 | struct tty_struct *tty; |
2237 | int drop_tty = 0; | 2237 | int drop_tty = 0; |
2238 | unsigned n; | 2238 | unsigned n; |
2239 | 2239 | ||
2240 | tty = get_current_tty(); | 2240 | tty = get_current_tty(); |
2241 | if (tty) { | 2241 | if (tty) { |
2242 | spin_lock(&tty_files_lock); | 2242 | spin_lock(&tty_files_lock); |
2243 | if (!list_empty(&tty->tty_files)) { | 2243 | if (!list_empty(&tty->tty_files)) { |
2244 | struct tty_file_private *file_priv; | 2244 | struct tty_file_private *file_priv; |
2245 | 2245 | ||
2246 | /* Revalidate access to controlling tty. | 2246 | /* Revalidate access to controlling tty. |
2247 | Use file_path_has_perm on the tty path directly | 2247 | Use file_path_has_perm on the tty path directly |
2248 | rather than using file_has_perm, as this particular | 2248 | rather than using file_has_perm, as this particular |
2249 | open file may belong to another process and we are | 2249 | open file may belong to another process and we are |
2250 | only interested in the inode-based check here. */ | 2250 | only interested in the inode-based check here. */ |
2251 | file_priv = list_first_entry(&tty->tty_files, | 2251 | file_priv = list_first_entry(&tty->tty_files, |
2252 | struct tty_file_private, list); | 2252 | struct tty_file_private, list); |
2253 | file = file_priv->file; | 2253 | file = file_priv->file; |
2254 | if (file_path_has_perm(cred, file, FILE__READ | FILE__WRITE)) | 2254 | if (file_path_has_perm(cred, file, FILE__READ | FILE__WRITE)) |
2255 | drop_tty = 1; | 2255 | drop_tty = 1; |
2256 | } | 2256 | } |
2257 | spin_unlock(&tty_files_lock); | 2257 | spin_unlock(&tty_files_lock); |
2258 | tty_kref_put(tty); | 2258 | tty_kref_put(tty); |
2259 | } | 2259 | } |
2260 | /* Reset controlling tty. */ | 2260 | /* Reset controlling tty. */ |
2261 | if (drop_tty) | 2261 | if (drop_tty) |
2262 | no_tty(); | 2262 | no_tty(); |
2263 | 2263 | ||
2264 | /* Revalidate access to inherited open files. */ | 2264 | /* Revalidate access to inherited open files. */ |
2265 | n = iterate_fd(files, 0, match_file, cred); | 2265 | n = iterate_fd(files, 0, match_file, cred); |
2266 | if (!n) /* none found? */ | 2266 | if (!n) /* none found? */ |
2267 | return; | 2267 | return; |
2268 | 2268 | ||
2269 | devnull = dentry_open(&selinux_null, O_RDWR, cred); | 2269 | devnull = dentry_open(&selinux_null, O_RDWR, cred); |
2270 | if (IS_ERR(devnull)) | 2270 | if (IS_ERR(devnull)) |
2271 | devnull = NULL; | 2271 | devnull = NULL; |
2272 | /* replace all the matching ones with this */ | 2272 | /* replace all the matching ones with this */ |
2273 | do { | 2273 | do { |
2274 | replace_fd(n - 1, devnull, 0); | 2274 | replace_fd(n - 1, devnull, 0); |
2275 | } while ((n = iterate_fd(files, n, match_file, cred)) != 0); | 2275 | } while ((n = iterate_fd(files, n, match_file, cred)) != 0); |
2276 | if (devnull) | 2276 | if (devnull) |
2277 | fput(devnull); | 2277 | fput(devnull); |
2278 | } | 2278 | } |
2279 | 2279 | ||
2280 | /* | 2280 | /* |
2281 | * Prepare a process for imminent new credential changes due to exec | 2281 | * Prepare a process for imminent new credential changes due to exec |
2282 | */ | 2282 | */ |
2283 | static void selinux_bprm_committing_creds(struct linux_binprm *bprm) | 2283 | static void selinux_bprm_committing_creds(struct linux_binprm *bprm) |
2284 | { | 2284 | { |
2285 | struct task_security_struct *new_tsec; | 2285 | struct task_security_struct *new_tsec; |
2286 | struct rlimit *rlim, *initrlim; | 2286 | struct rlimit *rlim, *initrlim; |
2287 | int rc, i; | 2287 | int rc, i; |
2288 | 2288 | ||
2289 | new_tsec = bprm->cred->security; | 2289 | new_tsec = bprm->cred->security; |
2290 | if (new_tsec->sid == new_tsec->osid) | 2290 | if (new_tsec->sid == new_tsec->osid) |
2291 | return; | 2291 | return; |
2292 | 2292 | ||
2293 | /* Close files for which the new task SID is not authorized. */ | 2293 | /* Close files for which the new task SID is not authorized. */ |
2294 | flush_unauthorized_files(bprm->cred, current->files); | 2294 | flush_unauthorized_files(bprm->cred, current->files); |
2295 | 2295 | ||
2296 | /* Always clear parent death signal on SID transitions. */ | 2296 | /* Always clear parent death signal on SID transitions. */ |
2297 | current->pdeath_signal = 0; | 2297 | current->pdeath_signal = 0; |
2298 | 2298 | ||
2299 | /* Check whether the new SID can inherit resource limits from the old | 2299 | /* Check whether the new SID can inherit resource limits from the old |
2300 | * SID. If not, reset all soft limits to the lower of the current | 2300 | * SID. If not, reset all soft limits to the lower of the current |
2301 | * task's hard limit and the init task's soft limit. | 2301 | * task's hard limit and the init task's soft limit. |
2302 | * | 2302 | * |
2303 | * Note that the setting of hard limits (even to lower them) can be | 2303 | * Note that the setting of hard limits (even to lower them) can be |
2304 | * controlled by the setrlimit check. The inclusion of the init task's | 2304 | * controlled by the setrlimit check. The inclusion of the init task's |
2305 | * soft limit into the computation is to avoid resetting soft limits | 2305 | * soft limit into the computation is to avoid resetting soft limits |
2306 | * higher than the default soft limit for cases where the default is | 2306 | * higher than the default soft limit for cases where the default is |
2307 | * lower than the hard limit, e.g. RLIMIT_CORE or RLIMIT_STACK. | 2307 | * lower than the hard limit, e.g. RLIMIT_CORE or RLIMIT_STACK. |
2308 | */ | 2308 | */ |
2309 | rc = avc_has_perm(new_tsec->osid, new_tsec->sid, SECCLASS_PROCESS, | 2309 | rc = avc_has_perm(new_tsec->osid, new_tsec->sid, SECCLASS_PROCESS, |
2310 | PROCESS__RLIMITINH, NULL); | 2310 | PROCESS__RLIMITINH, NULL); |
2311 | if (rc) { | 2311 | if (rc) { |
2312 | /* protect against do_prlimit() */ | 2312 | /* protect against do_prlimit() */ |
2313 | task_lock(current); | 2313 | task_lock(current); |
2314 | for (i = 0; i < RLIM_NLIMITS; i++) { | 2314 | for (i = 0; i < RLIM_NLIMITS; i++) { |
2315 | rlim = current->signal->rlim + i; | 2315 | rlim = current->signal->rlim + i; |
2316 | initrlim = init_task.signal->rlim + i; | 2316 | initrlim = init_task.signal->rlim + i; |
2317 | rlim->rlim_cur = min(rlim->rlim_max, initrlim->rlim_cur); | 2317 | rlim->rlim_cur = min(rlim->rlim_max, initrlim->rlim_cur); |
2318 | } | 2318 | } |
2319 | task_unlock(current); | 2319 | task_unlock(current); |
2320 | update_rlimit_cpu(current, rlimit(RLIMIT_CPU)); | 2320 | update_rlimit_cpu(current, rlimit(RLIMIT_CPU)); |
2321 | } | 2321 | } |
2322 | } | 2322 | } |
2323 | 2323 | ||
2324 | /* | 2324 | /* |
2325 | * Clean up the process immediately after the installation of new credentials | 2325 | * Clean up the process immediately after the installation of new credentials |
2326 | * due to exec | 2326 | * due to exec |
2327 | */ | 2327 | */ |
2328 | static void selinux_bprm_committed_creds(struct linux_binprm *bprm) | 2328 | static void selinux_bprm_committed_creds(struct linux_binprm *bprm) |
2329 | { | 2329 | { |
2330 | const struct task_security_struct *tsec = current_security(); | 2330 | const struct task_security_struct *tsec = current_security(); |
2331 | struct itimerval itimer; | 2331 | struct itimerval itimer; |
2332 | u32 osid, sid; | 2332 | u32 osid, sid; |
2333 | int rc, i; | 2333 | int rc, i; |
2334 | 2334 | ||
2335 | osid = tsec->osid; | 2335 | osid = tsec->osid; |
2336 | sid = tsec->sid; | 2336 | sid = tsec->sid; |
2337 | 2337 | ||
2338 | if (sid == osid) | 2338 | if (sid == osid) |
2339 | return; | 2339 | return; |
2340 | 2340 | ||
2341 | /* Check whether the new SID can inherit signal state from the old SID. | 2341 | /* Check whether the new SID can inherit signal state from the old SID. |
2342 | * If not, clear itimers to avoid subsequent signal generation and | 2342 | * If not, clear itimers to avoid subsequent signal generation and |
2343 | * flush and unblock signals. | 2343 | * flush and unblock signals. |
2344 | * | 2344 | * |
2345 | * This must occur _after_ the task SID has been updated so that any | 2345 | * This must occur _after_ the task SID has been updated so that any |
2346 | * kill done after the flush will be checked against the new SID. | 2346 | * kill done after the flush will be checked against the new SID. |
2347 | */ | 2347 | */ |
2348 | rc = avc_has_perm(osid, sid, SECCLASS_PROCESS, PROCESS__SIGINH, NULL); | 2348 | rc = avc_has_perm(osid, sid, SECCLASS_PROCESS, PROCESS__SIGINH, NULL); |
2349 | if (rc) { | 2349 | if (rc) { |
2350 | memset(&itimer, 0, sizeof itimer); | 2350 | memset(&itimer, 0, sizeof itimer); |
2351 | for (i = 0; i < 3; i++) | 2351 | for (i = 0; i < 3; i++) |
2352 | do_setitimer(i, &itimer, NULL); | 2352 | do_setitimer(i, &itimer, NULL); |
2353 | spin_lock_irq(¤t->sighand->siglock); | 2353 | spin_lock_irq(¤t->sighand->siglock); |
2354 | if (!(current->signal->flags & SIGNAL_GROUP_EXIT)) { | 2354 | if (!(current->signal->flags & SIGNAL_GROUP_EXIT)) { |
2355 | __flush_signals(current); | 2355 | __flush_signals(current); |
2356 | flush_signal_handlers(current, 1); | 2356 | flush_signal_handlers(current, 1); |
2357 | sigemptyset(¤t->blocked); | 2357 | sigemptyset(¤t->blocked); |
2358 | } | 2358 | } |
2359 | spin_unlock_irq(¤t->sighand->siglock); | 2359 | spin_unlock_irq(¤t->sighand->siglock); |
2360 | } | 2360 | } |
2361 | 2361 | ||
2362 | /* Wake up the parent if it is waiting so that it can recheck | 2362 | /* Wake up the parent if it is waiting so that it can recheck |
2363 | * wait permission to the new task SID. */ | 2363 | * wait permission to the new task SID. */ |
2364 | read_lock(&tasklist_lock); | 2364 | read_lock(&tasklist_lock); |
2365 | __wake_up_parent(current, current->real_parent); | 2365 | __wake_up_parent(current, current->real_parent); |
2366 | read_unlock(&tasklist_lock); | 2366 | read_unlock(&tasklist_lock); |
2367 | } | 2367 | } |
2368 | 2368 | ||
2369 | /* superblock security operations */ | 2369 | /* superblock security operations */ |
2370 | 2370 | ||
2371 | static int selinux_sb_alloc_security(struct super_block *sb) | 2371 | static int selinux_sb_alloc_security(struct super_block *sb) |
2372 | { | 2372 | { |
2373 | return superblock_alloc_security(sb); | 2373 | return superblock_alloc_security(sb); |
2374 | } | 2374 | } |
2375 | 2375 | ||
2376 | static void selinux_sb_free_security(struct super_block *sb) | 2376 | static void selinux_sb_free_security(struct super_block *sb) |
2377 | { | 2377 | { |
2378 | superblock_free_security(sb); | 2378 | superblock_free_security(sb); |
2379 | } | 2379 | } |
2380 | 2380 | ||
2381 | static inline int match_prefix(char *prefix, int plen, char *option, int olen) | 2381 | static inline int match_prefix(char *prefix, int plen, char *option, int olen) |
2382 | { | 2382 | { |
2383 | if (plen > olen) | 2383 | if (plen > olen) |
2384 | return 0; | 2384 | return 0; |
2385 | 2385 | ||
2386 | return !memcmp(prefix, option, plen); | 2386 | return !memcmp(prefix, option, plen); |
2387 | } | 2387 | } |
2388 | 2388 | ||
2389 | static inline int selinux_option(char *option, int len) | 2389 | static inline int selinux_option(char *option, int len) |
2390 | { | 2390 | { |
2391 | return (match_prefix(CONTEXT_STR, sizeof(CONTEXT_STR)-1, option, len) || | 2391 | return (match_prefix(CONTEXT_STR, sizeof(CONTEXT_STR)-1, option, len) || |
2392 | match_prefix(FSCONTEXT_STR, sizeof(FSCONTEXT_STR)-1, option, len) || | 2392 | match_prefix(FSCONTEXT_STR, sizeof(FSCONTEXT_STR)-1, option, len) || |
2393 | match_prefix(DEFCONTEXT_STR, sizeof(DEFCONTEXT_STR)-1, option, len) || | 2393 | match_prefix(DEFCONTEXT_STR, sizeof(DEFCONTEXT_STR)-1, option, len) || |
2394 | match_prefix(ROOTCONTEXT_STR, sizeof(ROOTCONTEXT_STR)-1, option, len) || | 2394 | match_prefix(ROOTCONTEXT_STR, sizeof(ROOTCONTEXT_STR)-1, option, len) || |
2395 | match_prefix(LABELSUPP_STR, sizeof(LABELSUPP_STR)-1, option, len)); | 2395 | match_prefix(LABELSUPP_STR, sizeof(LABELSUPP_STR)-1, option, len)); |
2396 | } | 2396 | } |
2397 | 2397 | ||
2398 | static inline void take_option(char **to, char *from, int *first, int len) | 2398 | static inline void take_option(char **to, char *from, int *first, int len) |
2399 | { | 2399 | { |
2400 | if (!*first) { | 2400 | if (!*first) { |
2401 | **to = ','; | 2401 | **to = ','; |
2402 | *to += 1; | 2402 | *to += 1; |
2403 | } else | 2403 | } else |
2404 | *first = 0; | 2404 | *first = 0; |
2405 | memcpy(*to, from, len); | 2405 | memcpy(*to, from, len); |
2406 | *to += len; | 2406 | *to += len; |
2407 | } | 2407 | } |
2408 | 2408 | ||
2409 | static inline void take_selinux_option(char **to, char *from, int *first, | 2409 | static inline void take_selinux_option(char **to, char *from, int *first, |
2410 | int len) | 2410 | int len) |
2411 | { | 2411 | { |
2412 | int current_size = 0; | 2412 | int current_size = 0; |
2413 | 2413 | ||
2414 | if (!*first) { | 2414 | if (!*first) { |
2415 | **to = '|'; | 2415 | **to = '|'; |
2416 | *to += 1; | 2416 | *to += 1; |
2417 | } else | 2417 | } else |
2418 | *first = 0; | 2418 | *first = 0; |
2419 | 2419 | ||
2420 | while (current_size < len) { | 2420 | while (current_size < len) { |
2421 | if (*from != '"') { | 2421 | if (*from != '"') { |
2422 | **to = *from; | 2422 | **to = *from; |
2423 | *to += 1; | 2423 | *to += 1; |
2424 | } | 2424 | } |
2425 | from += 1; | 2425 | from += 1; |
2426 | current_size += 1; | 2426 | current_size += 1; |
2427 | } | 2427 | } |
2428 | } | 2428 | } |
2429 | 2429 | ||
2430 | static int selinux_sb_copy_data(char *orig, char *copy) | 2430 | static int selinux_sb_copy_data(char *orig, char *copy) |
2431 | { | 2431 | { |
2432 | int fnosec, fsec, rc = 0; | 2432 | int fnosec, fsec, rc = 0; |
2433 | char *in_save, *in_curr, *in_end; | 2433 | char *in_save, *in_curr, *in_end; |
2434 | char *sec_curr, *nosec_save, *nosec; | 2434 | char *sec_curr, *nosec_save, *nosec; |
2435 | int open_quote = 0; | 2435 | int open_quote = 0; |
2436 | 2436 | ||
2437 | in_curr = orig; | 2437 | in_curr = orig; |
2438 | sec_curr = copy; | 2438 | sec_curr = copy; |
2439 | 2439 | ||
2440 | nosec = (char *)get_zeroed_page(GFP_KERNEL); | 2440 | nosec = (char *)get_zeroed_page(GFP_KERNEL); |
2441 | if (!nosec) { | 2441 | if (!nosec) { |
2442 | rc = -ENOMEM; | 2442 | rc = -ENOMEM; |
2443 | goto out; | 2443 | goto out; |
2444 | } | 2444 | } |
2445 | 2445 | ||
2446 | nosec_save = nosec; | 2446 | nosec_save = nosec; |
2447 | fnosec = fsec = 1; | 2447 | fnosec = fsec = 1; |
2448 | in_save = in_end = orig; | 2448 | in_save = in_end = orig; |
2449 | 2449 | ||
2450 | do { | 2450 | do { |
2451 | if (*in_end == '"') | 2451 | if (*in_end == '"') |
2452 | open_quote = !open_quote; | 2452 | open_quote = !open_quote; |
2453 | if ((*in_end == ',' && open_quote == 0) || | 2453 | if ((*in_end == ',' && open_quote == 0) || |
2454 | *in_end == '\0') { | 2454 | *in_end == '\0') { |
2455 | int len = in_end - in_curr; | 2455 | int len = in_end - in_curr; |
2456 | 2456 | ||
2457 | if (selinux_option(in_curr, len)) | 2457 | if (selinux_option(in_curr, len)) |
2458 | take_selinux_option(&sec_curr, in_curr, &fsec, len); | 2458 | take_selinux_option(&sec_curr, in_curr, &fsec, len); |
2459 | else | 2459 | else |
2460 | take_option(&nosec, in_curr, &fnosec, len); | 2460 | take_option(&nosec, in_curr, &fnosec, len); |
2461 | 2461 | ||
2462 | in_curr = in_end + 1; | 2462 | in_curr = in_end + 1; |
2463 | } | 2463 | } |
2464 | } while (*in_end++); | 2464 | } while (*in_end++); |
2465 | 2465 | ||
2466 | strcpy(in_save, nosec_save); | 2466 | strcpy(in_save, nosec_save); |
2467 | free_page((unsigned long)nosec_save); | 2467 | free_page((unsigned long)nosec_save); |
2468 | out: | 2468 | out: |
2469 | return rc; | 2469 | return rc; |
2470 | } | 2470 | } |
2471 | 2471 | ||
2472 | static int selinux_sb_remount(struct super_block *sb, void *data) | 2472 | static int selinux_sb_remount(struct super_block *sb, void *data) |
2473 | { | 2473 | { |
2474 | int rc, i, *flags; | 2474 | int rc, i, *flags; |
2475 | struct security_mnt_opts opts; | 2475 | struct security_mnt_opts opts; |
2476 | char *secdata, **mount_options; | 2476 | char *secdata, **mount_options; |
2477 | struct superblock_security_struct *sbsec = sb->s_security; | 2477 | struct superblock_security_struct *sbsec = sb->s_security; |
2478 | 2478 | ||
2479 | if (!(sbsec->flags & SE_SBINITIALIZED)) | 2479 | if (!(sbsec->flags & SE_SBINITIALIZED)) |
2480 | return 0; | 2480 | return 0; |
2481 | 2481 | ||
2482 | if (!data) | 2482 | if (!data) |
2483 | return 0; | 2483 | return 0; |
2484 | 2484 | ||
2485 | if (sb->s_type->fs_flags & FS_BINARY_MOUNTDATA) | 2485 | if (sb->s_type->fs_flags & FS_BINARY_MOUNTDATA) |
2486 | return 0; | 2486 | return 0; |
2487 | 2487 | ||
2488 | security_init_mnt_opts(&opts); | 2488 | security_init_mnt_opts(&opts); |
2489 | secdata = alloc_secdata(); | 2489 | secdata = alloc_secdata(); |
2490 | if (!secdata) | 2490 | if (!secdata) |
2491 | return -ENOMEM; | 2491 | return -ENOMEM; |
2492 | rc = selinux_sb_copy_data(data, secdata); | 2492 | rc = selinux_sb_copy_data(data, secdata); |
2493 | if (rc) | 2493 | if (rc) |
2494 | goto out_free_secdata; | 2494 | goto out_free_secdata; |
2495 | 2495 | ||
2496 | rc = selinux_parse_opts_str(secdata, &opts); | 2496 | rc = selinux_parse_opts_str(secdata, &opts); |
2497 | if (rc) | 2497 | if (rc) |
2498 | goto out_free_secdata; | 2498 | goto out_free_secdata; |
2499 | 2499 | ||
2500 | mount_options = opts.mnt_opts; | 2500 | mount_options = opts.mnt_opts; |
2501 | flags = opts.mnt_opts_flags; | 2501 | flags = opts.mnt_opts_flags; |
2502 | 2502 | ||
2503 | for (i = 0; i < opts.num_mnt_opts; i++) { | 2503 | for (i = 0; i < opts.num_mnt_opts; i++) { |
2504 | u32 sid; | 2504 | u32 sid; |
2505 | size_t len; | 2505 | size_t len; |
2506 | 2506 | ||
2507 | if (flags[i] == SBLABEL_MNT) | 2507 | if (flags[i] == SBLABEL_MNT) |
2508 | continue; | 2508 | continue; |
2509 | len = strlen(mount_options[i]); | 2509 | len = strlen(mount_options[i]); |
2510 | rc = security_context_to_sid(mount_options[i], len, &sid, | 2510 | rc = security_context_to_sid(mount_options[i], len, &sid, |
2511 | GFP_KERNEL); | 2511 | GFP_KERNEL); |
2512 | if (rc) { | 2512 | if (rc) { |
2513 | printk(KERN_WARNING "SELinux: security_context_to_sid" | 2513 | printk(KERN_WARNING "SELinux: security_context_to_sid" |
2514 | "(%s) failed for (dev %s, type %s) errno=%d\n", | 2514 | "(%s) failed for (dev %s, type %s) errno=%d\n", |
2515 | mount_options[i], sb->s_id, sb->s_type->name, rc); | 2515 | mount_options[i], sb->s_id, sb->s_type->name, rc); |
2516 | goto out_free_opts; | 2516 | goto out_free_opts; |
2517 | } | 2517 | } |
2518 | rc = -EINVAL; | 2518 | rc = -EINVAL; |
2519 | switch (flags[i]) { | 2519 | switch (flags[i]) { |
2520 | case FSCONTEXT_MNT: | 2520 | case FSCONTEXT_MNT: |
2521 | if (bad_option(sbsec, FSCONTEXT_MNT, sbsec->sid, sid)) | 2521 | if (bad_option(sbsec, FSCONTEXT_MNT, sbsec->sid, sid)) |
2522 | goto out_bad_option; | 2522 | goto out_bad_option; |
2523 | break; | 2523 | break; |
2524 | case CONTEXT_MNT: | 2524 | case CONTEXT_MNT: |
2525 | if (bad_option(sbsec, CONTEXT_MNT, sbsec->mntpoint_sid, sid)) | 2525 | if (bad_option(sbsec, CONTEXT_MNT, sbsec->mntpoint_sid, sid)) |
2526 | goto out_bad_option; | 2526 | goto out_bad_option; |
2527 | break; | 2527 | break; |
2528 | case ROOTCONTEXT_MNT: { | 2528 | case ROOTCONTEXT_MNT: { |
2529 | struct inode_security_struct *root_isec; | 2529 | struct inode_security_struct *root_isec; |
2530 | root_isec = sb->s_root->d_inode->i_security; | 2530 | root_isec = sb->s_root->d_inode->i_security; |
2531 | 2531 | ||
2532 | if (bad_option(sbsec, ROOTCONTEXT_MNT, root_isec->sid, sid)) | 2532 | if (bad_option(sbsec, ROOTCONTEXT_MNT, root_isec->sid, sid)) |
2533 | goto out_bad_option; | 2533 | goto out_bad_option; |
2534 | break; | 2534 | break; |
2535 | } | 2535 | } |
2536 | case DEFCONTEXT_MNT: | 2536 | case DEFCONTEXT_MNT: |
2537 | if (bad_option(sbsec, DEFCONTEXT_MNT, sbsec->def_sid, sid)) | 2537 | if (bad_option(sbsec, DEFCONTEXT_MNT, sbsec->def_sid, sid)) |
2538 | goto out_bad_option; | 2538 | goto out_bad_option; |
2539 | break; | 2539 | break; |
2540 | default: | 2540 | default: |
2541 | goto out_free_opts; | 2541 | goto out_free_opts; |
2542 | } | 2542 | } |
2543 | } | 2543 | } |
2544 | 2544 | ||
2545 | rc = 0; | 2545 | rc = 0; |
2546 | out_free_opts: | 2546 | out_free_opts: |
2547 | security_free_mnt_opts(&opts); | 2547 | security_free_mnt_opts(&opts); |
2548 | out_free_secdata: | 2548 | out_free_secdata: |
2549 | free_secdata(secdata); | 2549 | free_secdata(secdata); |
2550 | return rc; | 2550 | return rc; |
2551 | out_bad_option: | 2551 | out_bad_option: |
2552 | printk(KERN_WARNING "SELinux: unable to change security options " | 2552 | printk(KERN_WARNING "SELinux: unable to change security options " |
2553 | "during remount (dev %s, type=%s)\n", sb->s_id, | 2553 | "during remount (dev %s, type=%s)\n", sb->s_id, |
2554 | sb->s_type->name); | 2554 | sb->s_type->name); |
2555 | goto out_free_opts; | 2555 | goto out_free_opts; |
2556 | } | 2556 | } |
2557 | 2557 | ||
2558 | static int selinux_sb_kern_mount(struct super_block *sb, int flags, void *data) | 2558 | static int selinux_sb_kern_mount(struct super_block *sb, int flags, void *data) |
2559 | { | 2559 | { |
2560 | const struct cred *cred = current_cred(); | 2560 | const struct cred *cred = current_cred(); |
2561 | struct common_audit_data ad; | 2561 | struct common_audit_data ad; |
2562 | int rc; | 2562 | int rc; |
2563 | 2563 | ||
2564 | rc = superblock_doinit(sb, data); | 2564 | rc = superblock_doinit(sb, data); |
2565 | if (rc) | 2565 | if (rc) |
2566 | return rc; | 2566 | return rc; |
2567 | 2567 | ||
2568 | /* Allow all mounts performed by the kernel */ | 2568 | /* Allow all mounts performed by the kernel */ |
2569 | if (flags & MS_KERNMOUNT) | 2569 | if (flags & MS_KERNMOUNT) |
2570 | return 0; | 2570 | return 0; |
2571 | 2571 | ||
2572 | ad.type = LSM_AUDIT_DATA_DENTRY; | 2572 | ad.type = LSM_AUDIT_DATA_DENTRY; |
2573 | ad.u.dentry = sb->s_root; | 2573 | ad.u.dentry = sb->s_root; |
2574 | return superblock_has_perm(cred, sb, FILESYSTEM__MOUNT, &ad); | 2574 | return superblock_has_perm(cred, sb, FILESYSTEM__MOUNT, &ad); |
2575 | } | 2575 | } |
2576 | 2576 | ||
2577 | static int selinux_sb_statfs(struct dentry *dentry) | 2577 | static int selinux_sb_statfs(struct dentry *dentry) |
2578 | { | 2578 | { |
2579 | const struct cred *cred = current_cred(); | 2579 | const struct cred *cred = current_cred(); |
2580 | struct common_audit_data ad; | 2580 | struct common_audit_data ad; |
2581 | 2581 | ||
2582 | ad.type = LSM_AUDIT_DATA_DENTRY; | 2582 | ad.type = LSM_AUDIT_DATA_DENTRY; |
2583 | ad.u.dentry = dentry->d_sb->s_root; | 2583 | ad.u.dentry = dentry->d_sb->s_root; |
2584 | return superblock_has_perm(cred, dentry->d_sb, FILESYSTEM__GETATTR, &ad); | 2584 | return superblock_has_perm(cred, dentry->d_sb, FILESYSTEM__GETATTR, &ad); |
2585 | } | 2585 | } |
2586 | 2586 | ||
2587 | static int selinux_mount(const char *dev_name, | 2587 | static int selinux_mount(const char *dev_name, |
2588 | struct path *path, | 2588 | struct path *path, |
2589 | const char *type, | 2589 | const char *type, |
2590 | unsigned long flags, | 2590 | unsigned long flags, |
2591 | void *data) | 2591 | void *data) |
2592 | { | 2592 | { |
2593 | const struct cred *cred = current_cred(); | 2593 | const struct cred *cred = current_cred(); |
2594 | 2594 | ||
2595 | if (flags & MS_REMOUNT) | 2595 | if (flags & MS_REMOUNT) |
2596 | return superblock_has_perm(cred, path->dentry->d_sb, | 2596 | return superblock_has_perm(cred, path->dentry->d_sb, |
2597 | FILESYSTEM__REMOUNT, NULL); | 2597 | FILESYSTEM__REMOUNT, NULL); |
2598 | else | 2598 | else |
2599 | return path_has_perm(cred, path, FILE__MOUNTON); | 2599 | return path_has_perm(cred, path, FILE__MOUNTON); |
2600 | } | 2600 | } |
2601 | 2601 | ||
2602 | static int selinux_umount(struct vfsmount *mnt, int flags) | 2602 | static int selinux_umount(struct vfsmount *mnt, int flags) |
2603 | { | 2603 | { |
2604 | const struct cred *cred = current_cred(); | 2604 | const struct cred *cred = current_cred(); |
2605 | 2605 | ||
2606 | return superblock_has_perm(cred, mnt->mnt_sb, | 2606 | return superblock_has_perm(cred, mnt->mnt_sb, |
2607 | FILESYSTEM__UNMOUNT, NULL); | 2607 | FILESYSTEM__UNMOUNT, NULL); |
2608 | } | 2608 | } |
2609 | 2609 | ||
2610 | /* inode security operations */ | 2610 | /* inode security operations */ |
2611 | 2611 | ||
2612 | static int selinux_inode_alloc_security(struct inode *inode) | 2612 | static int selinux_inode_alloc_security(struct inode *inode) |
2613 | { | 2613 | { |
2614 | return inode_alloc_security(inode); | 2614 | return inode_alloc_security(inode); |
2615 | } | 2615 | } |
2616 | 2616 | ||
2617 | static void selinux_inode_free_security(struct inode *inode) | 2617 | static void selinux_inode_free_security(struct inode *inode) |
2618 | { | 2618 | { |
2619 | inode_free_security(inode); | 2619 | inode_free_security(inode); |
2620 | } | 2620 | } |
2621 | 2621 | ||
2622 | static int selinux_dentry_init_security(struct dentry *dentry, int mode, | 2622 | static int selinux_dentry_init_security(struct dentry *dentry, int mode, |
2623 | struct qstr *name, void **ctx, | 2623 | struct qstr *name, void **ctx, |
2624 | u32 *ctxlen) | 2624 | u32 *ctxlen) |
2625 | { | 2625 | { |
2626 | const struct cred *cred = current_cred(); | 2626 | const struct cred *cred = current_cred(); |
2627 | struct task_security_struct *tsec; | 2627 | struct task_security_struct *tsec; |
2628 | struct inode_security_struct *dsec; | 2628 | struct inode_security_struct *dsec; |
2629 | struct superblock_security_struct *sbsec; | 2629 | struct superblock_security_struct *sbsec; |
2630 | struct inode *dir = dentry->d_parent->d_inode; | 2630 | struct inode *dir = dentry->d_parent->d_inode; |
2631 | u32 newsid; | 2631 | u32 newsid; |
2632 | int rc; | 2632 | int rc; |
2633 | 2633 | ||
2634 | tsec = cred->security; | 2634 | tsec = cred->security; |
2635 | dsec = dir->i_security; | 2635 | dsec = dir->i_security; |
2636 | sbsec = dir->i_sb->s_security; | 2636 | sbsec = dir->i_sb->s_security; |
2637 | 2637 | ||
2638 | if (tsec->create_sid && sbsec->behavior != SECURITY_FS_USE_MNTPOINT) { | 2638 | if (tsec->create_sid && sbsec->behavior != SECURITY_FS_USE_MNTPOINT) { |
2639 | newsid = tsec->create_sid; | 2639 | newsid = tsec->create_sid; |
2640 | } else { | 2640 | } else { |
2641 | rc = security_transition_sid(tsec->sid, dsec->sid, | 2641 | rc = security_transition_sid(tsec->sid, dsec->sid, |
2642 | inode_mode_to_security_class(mode), | 2642 | inode_mode_to_security_class(mode), |
2643 | name, | 2643 | name, |
2644 | &newsid); | 2644 | &newsid); |
2645 | if (rc) { | 2645 | if (rc) { |
2646 | printk(KERN_WARNING | 2646 | printk(KERN_WARNING |
2647 | "%s: security_transition_sid failed, rc=%d\n", | 2647 | "%s: security_transition_sid failed, rc=%d\n", |
2648 | __func__, -rc); | 2648 | __func__, -rc); |
2649 | return rc; | 2649 | return rc; |
2650 | } | 2650 | } |
2651 | } | 2651 | } |
2652 | 2652 | ||
2653 | return security_sid_to_context(newsid, (char **)ctx, ctxlen); | 2653 | return security_sid_to_context(newsid, (char **)ctx, ctxlen); |
2654 | } | 2654 | } |
2655 | 2655 | ||
2656 | static int selinux_inode_init_security(struct inode *inode, struct inode *dir, | 2656 | static int selinux_inode_init_security(struct inode *inode, struct inode *dir, |
2657 | const struct qstr *qstr, | 2657 | const struct qstr *qstr, |
2658 | const char **name, | 2658 | const char **name, |
2659 | void **value, size_t *len) | 2659 | void **value, size_t *len) |
2660 | { | 2660 | { |
2661 | const struct task_security_struct *tsec = current_security(); | 2661 | const struct task_security_struct *tsec = current_security(); |
2662 | struct inode_security_struct *dsec; | 2662 | struct inode_security_struct *dsec; |
2663 | struct superblock_security_struct *sbsec; | 2663 | struct superblock_security_struct *sbsec; |
2664 | u32 sid, newsid, clen; | 2664 | u32 sid, newsid, clen; |
2665 | int rc; | 2665 | int rc; |
2666 | char *context; | 2666 | char *context; |
2667 | 2667 | ||
2668 | dsec = dir->i_security; | 2668 | dsec = dir->i_security; |
2669 | sbsec = dir->i_sb->s_security; | 2669 | sbsec = dir->i_sb->s_security; |
2670 | 2670 | ||
2671 | sid = tsec->sid; | 2671 | sid = tsec->sid; |
2672 | newsid = tsec->create_sid; | 2672 | newsid = tsec->create_sid; |
2673 | 2673 | ||
2674 | if ((sbsec->flags & SE_SBINITIALIZED) && | 2674 | if ((sbsec->flags & SE_SBINITIALIZED) && |
2675 | (sbsec->behavior == SECURITY_FS_USE_MNTPOINT)) | 2675 | (sbsec->behavior == SECURITY_FS_USE_MNTPOINT)) |
2676 | newsid = sbsec->mntpoint_sid; | 2676 | newsid = sbsec->mntpoint_sid; |
2677 | else if (!newsid || !(sbsec->flags & SBLABEL_MNT)) { | 2677 | else if (!newsid || !(sbsec->flags & SBLABEL_MNT)) { |
2678 | rc = security_transition_sid(sid, dsec->sid, | 2678 | rc = security_transition_sid(sid, dsec->sid, |
2679 | inode_mode_to_security_class(inode->i_mode), | 2679 | inode_mode_to_security_class(inode->i_mode), |
2680 | qstr, &newsid); | 2680 | qstr, &newsid); |
2681 | if (rc) { | 2681 | if (rc) { |
2682 | printk(KERN_WARNING "%s: " | 2682 | printk(KERN_WARNING "%s: " |
2683 | "security_transition_sid failed, rc=%d (dev=%s " | 2683 | "security_transition_sid failed, rc=%d (dev=%s " |
2684 | "ino=%ld)\n", | 2684 | "ino=%ld)\n", |
2685 | __func__, | 2685 | __func__, |
2686 | -rc, inode->i_sb->s_id, inode->i_ino); | 2686 | -rc, inode->i_sb->s_id, inode->i_ino); |
2687 | return rc; | 2687 | return rc; |
2688 | } | 2688 | } |
2689 | } | 2689 | } |
2690 | 2690 | ||
2691 | /* Possibly defer initialization to selinux_complete_init. */ | 2691 | /* Possibly defer initialization to selinux_complete_init. */ |
2692 | if (sbsec->flags & SE_SBINITIALIZED) { | 2692 | if (sbsec->flags & SE_SBINITIALIZED) { |
2693 | struct inode_security_struct *isec = inode->i_security; | 2693 | struct inode_security_struct *isec = inode->i_security; |
2694 | isec->sclass = inode_mode_to_security_class(inode->i_mode); | 2694 | isec->sclass = inode_mode_to_security_class(inode->i_mode); |
2695 | isec->sid = newsid; | 2695 | isec->sid = newsid; |
2696 | isec->initialized = 1; | 2696 | isec->initialized = 1; |
2697 | } | 2697 | } |
2698 | 2698 | ||
2699 | if (!ss_initialized || !(sbsec->flags & SBLABEL_MNT)) | 2699 | if (!ss_initialized || !(sbsec->flags & SBLABEL_MNT)) |
2700 | return -EOPNOTSUPP; | 2700 | return -EOPNOTSUPP; |
2701 | 2701 | ||
2702 | if (name) | 2702 | if (name) |
2703 | *name = XATTR_SELINUX_SUFFIX; | 2703 | *name = XATTR_SELINUX_SUFFIX; |
2704 | 2704 | ||
2705 | if (value && len) { | 2705 | if (value && len) { |
2706 | rc = security_sid_to_context_force(newsid, &context, &clen); | 2706 | rc = security_sid_to_context_force(newsid, &context, &clen); |
2707 | if (rc) | 2707 | if (rc) |
2708 | return rc; | 2708 | return rc; |
2709 | *value = context; | 2709 | *value = context; |
2710 | *len = clen; | 2710 | *len = clen; |
2711 | } | 2711 | } |
2712 | 2712 | ||
2713 | return 0; | 2713 | return 0; |
2714 | } | 2714 | } |
2715 | 2715 | ||
2716 | static int selinux_inode_create(struct inode *dir, struct dentry *dentry, umode_t mode) | 2716 | static int selinux_inode_create(struct inode *dir, struct dentry *dentry, umode_t mode) |
2717 | { | 2717 | { |
2718 | return may_create(dir, dentry, SECCLASS_FILE); | 2718 | return may_create(dir, dentry, SECCLASS_FILE); |
2719 | } | 2719 | } |
2720 | 2720 | ||
2721 | static int selinux_inode_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry) | 2721 | static int selinux_inode_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry) |
2722 | { | 2722 | { |
2723 | return may_link(dir, old_dentry, MAY_LINK); | 2723 | return may_link(dir, old_dentry, MAY_LINK); |
2724 | } | 2724 | } |
2725 | 2725 | ||
2726 | static int selinux_inode_unlink(struct inode *dir, struct dentry *dentry) | 2726 | static int selinux_inode_unlink(struct inode *dir, struct dentry *dentry) |
2727 | { | 2727 | { |
2728 | return may_link(dir, dentry, MAY_UNLINK); | 2728 | return may_link(dir, dentry, MAY_UNLINK); |
2729 | } | 2729 | } |
2730 | 2730 | ||
2731 | static int selinux_inode_symlink(struct inode *dir, struct dentry *dentry, const char *name) | 2731 | static int selinux_inode_symlink(struct inode *dir, struct dentry *dentry, const char *name) |
2732 | { | 2732 | { |
2733 | return may_create(dir, dentry, SECCLASS_LNK_FILE); | 2733 | return may_create(dir, dentry, SECCLASS_LNK_FILE); |
2734 | } | 2734 | } |
2735 | 2735 | ||
2736 | static int selinux_inode_mkdir(struct inode *dir, struct dentry *dentry, umode_t mask) | 2736 | static int selinux_inode_mkdir(struct inode *dir, struct dentry *dentry, umode_t mask) |
2737 | { | 2737 | { |
2738 | return may_create(dir, dentry, SECCLASS_DIR); | 2738 | return may_create(dir, dentry, SECCLASS_DIR); |
2739 | } | 2739 | } |
2740 | 2740 | ||
2741 | static int selinux_inode_rmdir(struct inode *dir, struct dentry *dentry) | 2741 | static int selinux_inode_rmdir(struct inode *dir, struct dentry *dentry) |
2742 | { | 2742 | { |
2743 | return may_link(dir, dentry, MAY_RMDIR); | 2743 | return may_link(dir, dentry, MAY_RMDIR); |
2744 | } | 2744 | } |
2745 | 2745 | ||
2746 | static int selinux_inode_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev) | 2746 | static int selinux_inode_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev) |
2747 | { | 2747 | { |
2748 | return may_create(dir, dentry, inode_mode_to_security_class(mode)); | 2748 | return may_create(dir, dentry, inode_mode_to_security_class(mode)); |
2749 | } | 2749 | } |
2750 | 2750 | ||
2751 | static int selinux_inode_rename(struct inode *old_inode, struct dentry *old_dentry, | 2751 | static int selinux_inode_rename(struct inode *old_inode, struct dentry *old_dentry, |
2752 | struct inode *new_inode, struct dentry *new_dentry) | 2752 | struct inode *new_inode, struct dentry *new_dentry) |
2753 | { | 2753 | { |
2754 | return may_rename(old_inode, old_dentry, new_inode, new_dentry); | 2754 | return may_rename(old_inode, old_dentry, new_inode, new_dentry); |
2755 | } | 2755 | } |
2756 | 2756 | ||
2757 | static int selinux_inode_readlink(struct dentry *dentry) | 2757 | static int selinux_inode_readlink(struct dentry *dentry) |
2758 | { | 2758 | { |
2759 | const struct cred *cred = current_cred(); | 2759 | const struct cred *cred = current_cred(); |
2760 | 2760 | ||
2761 | return dentry_has_perm(cred, dentry, FILE__READ); | 2761 | return dentry_has_perm(cred, dentry, FILE__READ); |
2762 | } | 2762 | } |
2763 | 2763 | ||
2764 | static int selinux_inode_follow_link(struct dentry *dentry, struct nameidata *nameidata) | 2764 | static int selinux_inode_follow_link(struct dentry *dentry, struct nameidata *nameidata) |
2765 | { | 2765 | { |
2766 | const struct cred *cred = current_cred(); | 2766 | const struct cred *cred = current_cred(); |
2767 | 2767 | ||
2768 | return dentry_has_perm(cred, dentry, FILE__READ); | 2768 | return dentry_has_perm(cred, dentry, FILE__READ); |
2769 | } | 2769 | } |
2770 | 2770 | ||
2771 | static noinline int audit_inode_permission(struct inode *inode, | 2771 | static noinline int audit_inode_permission(struct inode *inode, |
2772 | u32 perms, u32 audited, u32 denied, | 2772 | u32 perms, u32 audited, u32 denied, |
2773 | unsigned flags) | 2773 | unsigned flags) |
2774 | { | 2774 | { |
2775 | struct common_audit_data ad; | 2775 | struct common_audit_data ad; |
2776 | struct inode_security_struct *isec = inode->i_security; | 2776 | struct inode_security_struct *isec = inode->i_security; |
2777 | int rc; | 2777 | int rc; |
2778 | 2778 | ||
2779 | ad.type = LSM_AUDIT_DATA_INODE; | 2779 | ad.type = LSM_AUDIT_DATA_INODE; |
2780 | ad.u.inode = inode; | 2780 | ad.u.inode = inode; |
2781 | 2781 | ||
2782 | rc = slow_avc_audit(current_sid(), isec->sid, isec->sclass, perms, | 2782 | rc = slow_avc_audit(current_sid(), isec->sid, isec->sclass, perms, |
2783 | audited, denied, &ad, flags); | 2783 | audited, denied, &ad, flags); |
2784 | if (rc) | 2784 | if (rc) |
2785 | return rc; | 2785 | return rc; |
2786 | return 0; | 2786 | return 0; |
2787 | } | 2787 | } |
2788 | 2788 | ||
2789 | static int selinux_inode_permission(struct inode *inode, int mask) | 2789 | static int selinux_inode_permission(struct inode *inode, int mask) |
2790 | { | 2790 | { |
2791 | const struct cred *cred = current_cred(); | 2791 | const struct cred *cred = current_cred(); |
2792 | u32 perms; | 2792 | u32 perms; |
2793 | bool from_access; | 2793 | bool from_access; |
2794 | unsigned flags = mask & MAY_NOT_BLOCK; | 2794 | unsigned flags = mask & MAY_NOT_BLOCK; |
2795 | struct inode_security_struct *isec; | 2795 | struct inode_security_struct *isec; |
2796 | u32 sid; | 2796 | u32 sid; |
2797 | struct av_decision avd; | 2797 | struct av_decision avd; |
2798 | int rc, rc2; | 2798 | int rc, rc2; |
2799 | u32 audited, denied; | 2799 | u32 audited, denied; |
2800 | 2800 | ||
2801 | from_access = mask & MAY_ACCESS; | 2801 | from_access = mask & MAY_ACCESS; |
2802 | mask &= (MAY_READ|MAY_WRITE|MAY_EXEC|MAY_APPEND); | 2802 | mask &= (MAY_READ|MAY_WRITE|MAY_EXEC|MAY_APPEND); |
2803 | 2803 | ||
2804 | /* No permission to check. Existence test. */ | 2804 | /* No permission to check. Existence test. */ |
2805 | if (!mask) | 2805 | if (!mask) |
2806 | return 0; | 2806 | return 0; |
2807 | 2807 | ||
2808 | validate_creds(cred); | 2808 | validate_creds(cred); |
2809 | 2809 | ||
2810 | if (unlikely(IS_PRIVATE(inode))) | 2810 | if (unlikely(IS_PRIVATE(inode))) |
2811 | return 0; | 2811 | return 0; |
2812 | 2812 | ||
2813 | perms = file_mask_to_av(inode->i_mode, mask); | 2813 | perms = file_mask_to_av(inode->i_mode, mask); |
2814 | 2814 | ||
2815 | sid = cred_sid(cred); | 2815 | sid = cred_sid(cred); |
2816 | isec = inode->i_security; | 2816 | isec = inode->i_security; |
2817 | 2817 | ||
2818 | rc = avc_has_perm_noaudit(sid, isec->sid, isec->sclass, perms, 0, &avd); | 2818 | rc = avc_has_perm_noaudit(sid, isec->sid, isec->sclass, perms, 0, &avd); |
2819 | audited = avc_audit_required(perms, &avd, rc, | 2819 | audited = avc_audit_required(perms, &avd, rc, |
2820 | from_access ? FILE__AUDIT_ACCESS : 0, | 2820 | from_access ? FILE__AUDIT_ACCESS : 0, |
2821 | &denied); | 2821 | &denied); |
2822 | if (likely(!audited)) | 2822 | if (likely(!audited)) |
2823 | return rc; | 2823 | return rc; |
2824 | 2824 | ||
2825 | rc2 = audit_inode_permission(inode, perms, audited, denied, flags); | 2825 | rc2 = audit_inode_permission(inode, perms, audited, denied, flags); |
2826 | if (rc2) | 2826 | if (rc2) |
2827 | return rc2; | 2827 | return rc2; |
2828 | return rc; | 2828 | return rc; |
2829 | } | 2829 | } |
2830 | 2830 | ||
2831 | static int selinux_inode_setattr(struct dentry *dentry, struct iattr *iattr) | 2831 | static int selinux_inode_setattr(struct dentry *dentry, struct iattr *iattr) |
2832 | { | 2832 | { |
2833 | const struct cred *cred = current_cred(); | 2833 | const struct cred *cred = current_cred(); |
2834 | unsigned int ia_valid = iattr->ia_valid; | 2834 | unsigned int ia_valid = iattr->ia_valid; |
2835 | __u32 av = FILE__WRITE; | 2835 | __u32 av = FILE__WRITE; |
2836 | 2836 | ||
2837 | /* ATTR_FORCE is just used for ATTR_KILL_S[UG]ID. */ | 2837 | /* ATTR_FORCE is just used for ATTR_KILL_S[UG]ID. */ |
2838 | if (ia_valid & ATTR_FORCE) { | 2838 | if (ia_valid & ATTR_FORCE) { |
2839 | ia_valid &= ~(ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_MODE | | 2839 | ia_valid &= ~(ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_MODE | |
2840 | ATTR_FORCE); | 2840 | ATTR_FORCE); |
2841 | if (!ia_valid) | 2841 | if (!ia_valid) |
2842 | return 0; | 2842 | return 0; |
2843 | } | 2843 | } |
2844 | 2844 | ||
2845 | if (ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID | | 2845 | if (ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID | |
2846 | ATTR_ATIME_SET | ATTR_MTIME_SET | ATTR_TIMES_SET)) | 2846 | ATTR_ATIME_SET | ATTR_MTIME_SET | ATTR_TIMES_SET)) |
2847 | return dentry_has_perm(cred, dentry, FILE__SETATTR); | 2847 | return dentry_has_perm(cred, dentry, FILE__SETATTR); |
2848 | 2848 | ||
2849 | if (selinux_policycap_openperm && (ia_valid & ATTR_SIZE)) | 2849 | if (selinux_policycap_openperm && (ia_valid & ATTR_SIZE)) |
2850 | av |= FILE__OPEN; | 2850 | av |= FILE__OPEN; |
2851 | 2851 | ||
2852 | return dentry_has_perm(cred, dentry, av); | 2852 | return dentry_has_perm(cred, dentry, av); |
2853 | } | 2853 | } |
2854 | 2854 | ||
2855 | static int selinux_inode_getattr(struct vfsmount *mnt, struct dentry *dentry) | 2855 | static int selinux_inode_getattr(struct vfsmount *mnt, struct dentry *dentry) |
2856 | { | 2856 | { |
2857 | const struct cred *cred = current_cred(); | 2857 | const struct cred *cred = current_cred(); |
2858 | struct path path; | 2858 | struct path path; |
2859 | 2859 | ||
2860 | path.dentry = dentry; | 2860 | path.dentry = dentry; |
2861 | path.mnt = mnt; | 2861 | path.mnt = mnt; |
2862 | 2862 | ||
2863 | return path_has_perm(cred, &path, FILE__GETATTR); | 2863 | return path_has_perm(cred, &path, FILE__GETATTR); |
2864 | } | 2864 | } |
2865 | 2865 | ||
2866 | static int selinux_inode_setotherxattr(struct dentry *dentry, const char *name) | 2866 | static int selinux_inode_setotherxattr(struct dentry *dentry, const char *name) |
2867 | { | 2867 | { |
2868 | const struct cred *cred = current_cred(); | 2868 | const struct cred *cred = current_cred(); |
2869 | 2869 | ||
2870 | if (!strncmp(name, XATTR_SECURITY_PREFIX, | 2870 | if (!strncmp(name, XATTR_SECURITY_PREFIX, |
2871 | sizeof XATTR_SECURITY_PREFIX - 1)) { | 2871 | sizeof XATTR_SECURITY_PREFIX - 1)) { |
2872 | if (!strcmp(name, XATTR_NAME_CAPS)) { | 2872 | if (!strcmp(name, XATTR_NAME_CAPS)) { |
2873 | if (!capable(CAP_SETFCAP)) | 2873 | if (!capable(CAP_SETFCAP)) |
2874 | return -EPERM; | 2874 | return -EPERM; |
2875 | } else if (!capable(CAP_SYS_ADMIN)) { | 2875 | } else if (!capable(CAP_SYS_ADMIN)) { |
2876 | /* A different attribute in the security namespace. | 2876 | /* A different attribute in the security namespace. |
2877 | Restrict to administrator. */ | 2877 | Restrict to administrator. */ |
2878 | return -EPERM; | 2878 | return -EPERM; |
2879 | } | 2879 | } |
2880 | } | 2880 | } |
2881 | 2881 | ||
2882 | /* Not an attribute we recognize, so just check the | 2882 | /* Not an attribute we recognize, so just check the |
2883 | ordinary setattr permission. */ | 2883 | ordinary setattr permission. */ |
2884 | return dentry_has_perm(cred, dentry, FILE__SETATTR); | 2884 | return dentry_has_perm(cred, dentry, FILE__SETATTR); |
2885 | } | 2885 | } |
2886 | 2886 | ||
2887 | static int selinux_inode_setxattr(struct dentry *dentry, const char *name, | 2887 | static int selinux_inode_setxattr(struct dentry *dentry, const char *name, |
2888 | const void *value, size_t size, int flags) | 2888 | const void *value, size_t size, int flags) |
2889 | { | 2889 | { |
2890 | struct inode *inode = dentry->d_inode; | 2890 | struct inode *inode = dentry->d_inode; |
2891 | struct inode_security_struct *isec = inode->i_security; | 2891 | struct inode_security_struct *isec = inode->i_security; |
2892 | struct superblock_security_struct *sbsec; | 2892 | struct superblock_security_struct *sbsec; |
2893 | struct common_audit_data ad; | 2893 | struct common_audit_data ad; |
2894 | u32 newsid, sid = current_sid(); | 2894 | u32 newsid, sid = current_sid(); |
2895 | int rc = 0; | 2895 | int rc = 0; |
2896 | 2896 | ||
2897 | if (strcmp(name, XATTR_NAME_SELINUX)) | 2897 | if (strcmp(name, XATTR_NAME_SELINUX)) |
2898 | return selinux_inode_setotherxattr(dentry, name); | 2898 | return selinux_inode_setotherxattr(dentry, name); |
2899 | 2899 | ||
2900 | sbsec = inode->i_sb->s_security; | 2900 | sbsec = inode->i_sb->s_security; |
2901 | if (!(sbsec->flags & SBLABEL_MNT)) | 2901 | if (!(sbsec->flags & SBLABEL_MNT)) |
2902 | return -EOPNOTSUPP; | 2902 | return -EOPNOTSUPP; |
2903 | 2903 | ||
2904 | if (!inode_owner_or_capable(inode)) | 2904 | if (!inode_owner_or_capable(inode)) |
2905 | return -EPERM; | 2905 | return -EPERM; |
2906 | 2906 | ||
2907 | ad.type = LSM_AUDIT_DATA_DENTRY; | 2907 | ad.type = LSM_AUDIT_DATA_DENTRY; |
2908 | ad.u.dentry = dentry; | 2908 | ad.u.dentry = dentry; |
2909 | 2909 | ||
2910 | rc = avc_has_perm(sid, isec->sid, isec->sclass, | 2910 | rc = avc_has_perm(sid, isec->sid, isec->sclass, |
2911 | FILE__RELABELFROM, &ad); | 2911 | FILE__RELABELFROM, &ad); |
2912 | if (rc) | 2912 | if (rc) |
2913 | return rc; | 2913 | return rc; |
2914 | 2914 | ||
2915 | rc = security_context_to_sid(value, size, &newsid, GFP_KERNEL); | 2915 | rc = security_context_to_sid(value, size, &newsid, GFP_KERNEL); |
2916 | if (rc == -EINVAL) { | 2916 | if (rc == -EINVAL) { |
2917 | if (!capable(CAP_MAC_ADMIN)) { | 2917 | if (!capable(CAP_MAC_ADMIN)) { |
2918 | struct audit_buffer *ab; | 2918 | struct audit_buffer *ab; |
2919 | size_t audit_size; | 2919 | size_t audit_size; |
2920 | const char *str; | 2920 | const char *str; |
2921 | 2921 | ||
2922 | /* We strip a nul only if it is at the end, otherwise the | 2922 | /* We strip a nul only if it is at the end, otherwise the |
2923 | * context contains a nul and we should audit that */ | 2923 | * context contains a nul and we should audit that */ |
2924 | if (value) { | 2924 | if (value) { |
2925 | str = value; | 2925 | str = value; |
2926 | if (str[size - 1] == '\0') | 2926 | if (str[size - 1] == '\0') |
2927 | audit_size = size - 1; | 2927 | audit_size = size - 1; |
2928 | else | 2928 | else |
2929 | audit_size = size; | 2929 | audit_size = size; |
2930 | } else { | 2930 | } else { |
2931 | str = ""; | 2931 | str = ""; |
2932 | audit_size = 0; | 2932 | audit_size = 0; |
2933 | } | 2933 | } |
2934 | ab = audit_log_start(current->audit_context, GFP_ATOMIC, AUDIT_SELINUX_ERR); | 2934 | ab = audit_log_start(current->audit_context, GFP_ATOMIC, AUDIT_SELINUX_ERR); |
2935 | audit_log_format(ab, "op=setxattr invalid_context="); | 2935 | audit_log_format(ab, "op=setxattr invalid_context="); |
2936 | audit_log_n_untrustedstring(ab, value, audit_size); | 2936 | audit_log_n_untrustedstring(ab, value, audit_size); |
2937 | audit_log_end(ab); | 2937 | audit_log_end(ab); |
2938 | 2938 | ||
2939 | return rc; | 2939 | return rc; |
2940 | } | 2940 | } |
2941 | rc = security_context_to_sid_force(value, size, &newsid); | 2941 | rc = security_context_to_sid_force(value, size, &newsid); |
2942 | } | 2942 | } |
2943 | if (rc) | 2943 | if (rc) |
2944 | return rc; | 2944 | return rc; |
2945 | 2945 | ||
2946 | rc = avc_has_perm(sid, newsid, isec->sclass, | 2946 | rc = avc_has_perm(sid, newsid, isec->sclass, |
2947 | FILE__RELABELTO, &ad); | 2947 | FILE__RELABELTO, &ad); |
2948 | if (rc) | 2948 | if (rc) |
2949 | return rc; | 2949 | return rc; |
2950 | 2950 | ||
2951 | rc = security_validate_transition(isec->sid, newsid, sid, | 2951 | rc = security_validate_transition(isec->sid, newsid, sid, |
2952 | isec->sclass); | 2952 | isec->sclass); |
2953 | if (rc) | 2953 | if (rc) |
2954 | return rc; | 2954 | return rc; |
2955 | 2955 | ||
2956 | return avc_has_perm(newsid, | 2956 | return avc_has_perm(newsid, |
2957 | sbsec->sid, | 2957 | sbsec->sid, |
2958 | SECCLASS_FILESYSTEM, | 2958 | SECCLASS_FILESYSTEM, |
2959 | FILESYSTEM__ASSOCIATE, | 2959 | FILESYSTEM__ASSOCIATE, |
2960 | &ad); | 2960 | &ad); |
2961 | } | 2961 | } |
2962 | 2962 | ||
2963 | static void selinux_inode_post_setxattr(struct dentry *dentry, const char *name, | 2963 | static void selinux_inode_post_setxattr(struct dentry *dentry, const char *name, |
2964 | const void *value, size_t size, | 2964 | const void *value, size_t size, |
2965 | int flags) | 2965 | int flags) |
2966 | { | 2966 | { |
2967 | struct inode *inode = dentry->d_inode; | 2967 | struct inode *inode = dentry->d_inode; |
2968 | struct inode_security_struct *isec = inode->i_security; | 2968 | struct inode_security_struct *isec = inode->i_security; |
2969 | u32 newsid; | 2969 | u32 newsid; |
2970 | int rc; | 2970 | int rc; |
2971 | 2971 | ||
2972 | if (strcmp(name, XATTR_NAME_SELINUX)) { | 2972 | if (strcmp(name, XATTR_NAME_SELINUX)) { |
2973 | /* Not an attribute we recognize, so nothing to do. */ | 2973 | /* Not an attribute we recognize, so nothing to do. */ |
2974 | return; | 2974 | return; |
2975 | } | 2975 | } |
2976 | 2976 | ||
2977 | rc = security_context_to_sid_force(value, size, &newsid); | 2977 | rc = security_context_to_sid_force(value, size, &newsid); |
2978 | if (rc) { | 2978 | if (rc) { |
2979 | printk(KERN_ERR "SELinux: unable to map context to SID" | 2979 | printk(KERN_ERR "SELinux: unable to map context to SID" |
2980 | "for (%s, %lu), rc=%d\n", | 2980 | "for (%s, %lu), rc=%d\n", |
2981 | inode->i_sb->s_id, inode->i_ino, -rc); | 2981 | inode->i_sb->s_id, inode->i_ino, -rc); |
2982 | return; | 2982 | return; |
2983 | } | 2983 | } |
2984 | 2984 | ||
2985 | isec->sclass = inode_mode_to_security_class(inode->i_mode); | 2985 | isec->sclass = inode_mode_to_security_class(inode->i_mode); |
2986 | isec->sid = newsid; | 2986 | isec->sid = newsid; |
2987 | isec->initialized = 1; | 2987 | isec->initialized = 1; |
2988 | 2988 | ||
2989 | return; | 2989 | return; |
2990 | } | 2990 | } |
2991 | 2991 | ||
2992 | static int selinux_inode_getxattr(struct dentry *dentry, const char *name) | 2992 | static int selinux_inode_getxattr(struct dentry *dentry, const char *name) |
2993 | { | 2993 | { |
2994 | const struct cred *cred = current_cred(); | 2994 | const struct cred *cred = current_cred(); |
2995 | 2995 | ||
2996 | return dentry_has_perm(cred, dentry, FILE__GETATTR); | 2996 | return dentry_has_perm(cred, dentry, FILE__GETATTR); |
2997 | } | 2997 | } |
2998 | 2998 | ||
2999 | static int selinux_inode_listxattr(struct dentry *dentry) | 2999 | static int selinux_inode_listxattr(struct dentry *dentry) |
3000 | { | 3000 | { |
3001 | const struct cred *cred = current_cred(); | 3001 | const struct cred *cred = current_cred(); |
3002 | 3002 | ||
3003 | return dentry_has_perm(cred, dentry, FILE__GETATTR); | 3003 | return dentry_has_perm(cred, dentry, FILE__GETATTR); |
3004 | } | 3004 | } |
3005 | 3005 | ||
3006 | static int selinux_inode_removexattr(struct dentry *dentry, const char *name) | 3006 | static int selinux_inode_removexattr(struct dentry *dentry, const char *name) |
3007 | { | 3007 | { |
3008 | if (strcmp(name, XATTR_NAME_SELINUX)) | 3008 | if (strcmp(name, XATTR_NAME_SELINUX)) |
3009 | return selinux_inode_setotherxattr(dentry, name); | 3009 | return selinux_inode_setotherxattr(dentry, name); |
3010 | 3010 | ||
3011 | /* No one is allowed to remove a SELinux security label. | 3011 | /* No one is allowed to remove a SELinux security label. |
3012 | You can change the label, but all data must be labeled. */ | 3012 | You can change the label, but all data must be labeled. */ |
3013 | return -EACCES; | 3013 | return -EACCES; |
3014 | } | 3014 | } |
3015 | 3015 | ||
3016 | /* | 3016 | /* |
3017 | * Copy the inode security context value to the user. | 3017 | * Copy the inode security context value to the user. |
3018 | * | 3018 | * |
3019 | * Permission check is handled by selinux_inode_getxattr hook. | 3019 | * Permission check is handled by selinux_inode_getxattr hook. |
3020 | */ | 3020 | */ |
3021 | static int selinux_inode_getsecurity(const struct inode *inode, const char *name, void **buffer, bool alloc) | 3021 | static int selinux_inode_getsecurity(const struct inode *inode, const char *name, void **buffer, bool alloc) |
3022 | { | 3022 | { |
3023 | u32 size; | 3023 | u32 size; |
3024 | int error; | 3024 | int error; |
3025 | char *context = NULL; | 3025 | char *context = NULL; |
3026 | struct inode_security_struct *isec = inode->i_security; | 3026 | struct inode_security_struct *isec = inode->i_security; |
3027 | 3027 | ||
3028 | if (strcmp(name, XATTR_SELINUX_SUFFIX)) | 3028 | if (strcmp(name, XATTR_SELINUX_SUFFIX)) |
3029 | return -EOPNOTSUPP; | 3029 | return -EOPNOTSUPP; |
3030 | 3030 | ||
3031 | /* | 3031 | /* |
3032 | * If the caller has CAP_MAC_ADMIN, then get the raw context | 3032 | * If the caller has CAP_MAC_ADMIN, then get the raw context |
3033 | * value even if it is not defined by current policy; otherwise, | 3033 | * value even if it is not defined by current policy; otherwise, |
3034 | * use the in-core value under current policy. | 3034 | * use the in-core value under current policy. |
3035 | * Use the non-auditing forms of the permission checks since | 3035 | * Use the non-auditing forms of the permission checks since |
3036 | * getxattr may be called by unprivileged processes commonly | 3036 | * getxattr may be called by unprivileged processes commonly |
3037 | * and lack of permission just means that we fall back to the | 3037 | * and lack of permission just means that we fall back to the |
3038 | * in-core context value, not a denial. | 3038 | * in-core context value, not a denial. |
3039 | */ | 3039 | */ |
3040 | error = selinux_capable(current_cred(), &init_user_ns, CAP_MAC_ADMIN, | 3040 | error = selinux_capable(current_cred(), &init_user_ns, CAP_MAC_ADMIN, |
3041 | SECURITY_CAP_NOAUDIT); | 3041 | SECURITY_CAP_NOAUDIT); |
3042 | if (!error) | 3042 | if (!error) |
3043 | error = security_sid_to_context_force(isec->sid, &context, | 3043 | error = security_sid_to_context_force(isec->sid, &context, |
3044 | &size); | 3044 | &size); |
3045 | else | 3045 | else |
3046 | error = security_sid_to_context(isec->sid, &context, &size); | 3046 | error = security_sid_to_context(isec->sid, &context, &size); |
3047 | if (error) | 3047 | if (error) |
3048 | return error; | 3048 | return error; |
3049 | error = size; | 3049 | error = size; |
3050 | if (alloc) { | 3050 | if (alloc) { |
3051 | *buffer = context; | 3051 | *buffer = context; |
3052 | goto out_nofree; | 3052 | goto out_nofree; |
3053 | } | 3053 | } |
3054 | kfree(context); | 3054 | kfree(context); |
3055 | out_nofree: | 3055 | out_nofree: |
3056 | return error; | 3056 | return error; |
3057 | } | 3057 | } |
3058 | 3058 | ||
3059 | static int selinux_inode_setsecurity(struct inode *inode, const char *name, | 3059 | static int selinux_inode_setsecurity(struct inode *inode, const char *name, |
3060 | const void *value, size_t size, int flags) | 3060 | const void *value, size_t size, int flags) |
3061 | { | 3061 | { |
3062 | struct inode_security_struct *isec = inode->i_security; | 3062 | struct inode_security_struct *isec = inode->i_security; |
3063 | u32 newsid; | 3063 | u32 newsid; |
3064 | int rc; | 3064 | int rc; |
3065 | 3065 | ||
3066 | if (strcmp(name, XATTR_SELINUX_SUFFIX)) | 3066 | if (strcmp(name, XATTR_SELINUX_SUFFIX)) |
3067 | return -EOPNOTSUPP; | 3067 | return -EOPNOTSUPP; |
3068 | 3068 | ||
3069 | if (!value || !size) | 3069 | if (!value || !size) |
3070 | return -EACCES; | 3070 | return -EACCES; |
3071 | 3071 | ||
3072 | rc = security_context_to_sid((void *)value, size, &newsid, GFP_KERNEL); | 3072 | rc = security_context_to_sid((void *)value, size, &newsid, GFP_KERNEL); |
3073 | if (rc) | 3073 | if (rc) |
3074 | return rc; | 3074 | return rc; |
3075 | 3075 | ||
3076 | isec->sclass = inode_mode_to_security_class(inode->i_mode); | 3076 | isec->sclass = inode_mode_to_security_class(inode->i_mode); |
3077 | isec->sid = newsid; | 3077 | isec->sid = newsid; |
3078 | isec->initialized = 1; | 3078 | isec->initialized = 1; |
3079 | return 0; | 3079 | return 0; |
3080 | } | 3080 | } |
3081 | 3081 | ||
3082 | static int selinux_inode_listsecurity(struct inode *inode, char *buffer, size_t buffer_size) | 3082 | static int selinux_inode_listsecurity(struct inode *inode, char *buffer, size_t buffer_size) |
3083 | { | 3083 | { |
3084 | const int len = sizeof(XATTR_NAME_SELINUX); | 3084 | const int len = sizeof(XATTR_NAME_SELINUX); |
3085 | if (buffer && len <= buffer_size) | 3085 | if (buffer && len <= buffer_size) |
3086 | memcpy(buffer, XATTR_NAME_SELINUX, len); | 3086 | memcpy(buffer, XATTR_NAME_SELINUX, len); |
3087 | return len; | 3087 | return len; |
3088 | } | 3088 | } |
3089 | 3089 | ||
3090 | static void selinux_inode_getsecid(const struct inode *inode, u32 *secid) | 3090 | static void selinux_inode_getsecid(const struct inode *inode, u32 *secid) |
3091 | { | 3091 | { |
3092 | struct inode_security_struct *isec = inode->i_security; | 3092 | struct inode_security_struct *isec = inode->i_security; |
3093 | *secid = isec->sid; | 3093 | *secid = isec->sid; |
3094 | } | 3094 | } |
3095 | 3095 | ||
3096 | /* file security operations */ | 3096 | /* file security operations */ |
3097 | 3097 | ||
3098 | static int selinux_revalidate_file_permission(struct file *file, int mask) | 3098 | static int selinux_revalidate_file_permission(struct file *file, int mask) |
3099 | { | 3099 | { |
3100 | const struct cred *cred = current_cred(); | 3100 | const struct cred *cred = current_cred(); |
3101 | struct inode *inode = file_inode(file); | 3101 | struct inode *inode = file_inode(file); |
3102 | 3102 | ||
3103 | /* file_mask_to_av won't add FILE__WRITE if MAY_APPEND is set */ | 3103 | /* file_mask_to_av won't add FILE__WRITE if MAY_APPEND is set */ |
3104 | if ((file->f_flags & O_APPEND) && (mask & MAY_WRITE)) | 3104 | if ((file->f_flags & O_APPEND) && (mask & MAY_WRITE)) |
3105 | mask |= MAY_APPEND; | 3105 | mask |= MAY_APPEND; |
3106 | 3106 | ||
3107 | return file_has_perm(cred, file, | 3107 | return file_has_perm(cred, file, |
3108 | file_mask_to_av(inode->i_mode, mask)); | 3108 | file_mask_to_av(inode->i_mode, mask)); |
3109 | } | 3109 | } |
3110 | 3110 | ||
3111 | static int selinux_file_permission(struct file *file, int mask) | 3111 | static int selinux_file_permission(struct file *file, int mask) |
3112 | { | 3112 | { |
3113 | struct inode *inode = file_inode(file); | 3113 | struct inode *inode = file_inode(file); |
3114 | struct file_security_struct *fsec = file->f_security; | 3114 | struct file_security_struct *fsec = file->f_security; |
3115 | struct inode_security_struct *isec = inode->i_security; | 3115 | struct inode_security_struct *isec = inode->i_security; |
3116 | u32 sid = current_sid(); | 3116 | u32 sid = current_sid(); |
3117 | 3117 | ||
3118 | if (!mask) | 3118 | if (!mask) |
3119 | /* No permission to check. Existence test. */ | 3119 | /* No permission to check. Existence test. */ |
3120 | return 0; | 3120 | return 0; |
3121 | 3121 | ||
3122 | if (sid == fsec->sid && fsec->isid == isec->sid && | 3122 | if (sid == fsec->sid && fsec->isid == isec->sid && |
3123 | fsec->pseqno == avc_policy_seqno()) | 3123 | fsec->pseqno == avc_policy_seqno()) |
3124 | /* No change since file_open check. */ | 3124 | /* No change since file_open check. */ |
3125 | return 0; | 3125 | return 0; |
3126 | 3126 | ||
3127 | return selinux_revalidate_file_permission(file, mask); | 3127 | return selinux_revalidate_file_permission(file, mask); |
3128 | } | 3128 | } |
3129 | 3129 | ||
3130 | static int selinux_file_alloc_security(struct file *file) | 3130 | static int selinux_file_alloc_security(struct file *file) |
3131 | { | 3131 | { |
3132 | return file_alloc_security(file); | 3132 | return file_alloc_security(file); |
3133 | } | 3133 | } |
3134 | 3134 | ||
3135 | static void selinux_file_free_security(struct file *file) | 3135 | static void selinux_file_free_security(struct file *file) |
3136 | { | 3136 | { |
3137 | file_free_security(file); | 3137 | file_free_security(file); |
3138 | } | 3138 | } |
3139 | 3139 | ||
3140 | static int selinux_file_ioctl(struct file *file, unsigned int cmd, | 3140 | static int selinux_file_ioctl(struct file *file, unsigned int cmd, |
3141 | unsigned long arg) | 3141 | unsigned long arg) |
3142 | { | 3142 | { |
3143 | const struct cred *cred = current_cred(); | 3143 | const struct cred *cred = current_cred(); |
3144 | int error = 0; | 3144 | int error = 0; |
3145 | 3145 | ||
3146 | switch (cmd) { | 3146 | switch (cmd) { |
3147 | case FIONREAD: | 3147 | case FIONREAD: |
3148 | /* fall through */ | 3148 | /* fall through */ |
3149 | case FIBMAP: | 3149 | case FIBMAP: |
3150 | /* fall through */ | 3150 | /* fall through */ |
3151 | case FIGETBSZ: | 3151 | case FIGETBSZ: |
3152 | /* fall through */ | 3152 | /* fall through */ |
3153 | case FS_IOC_GETFLAGS: | 3153 | case FS_IOC_GETFLAGS: |
3154 | /* fall through */ | 3154 | /* fall through */ |
3155 | case FS_IOC_GETVERSION: | 3155 | case FS_IOC_GETVERSION: |
3156 | error = file_has_perm(cred, file, FILE__GETATTR); | 3156 | error = file_has_perm(cred, file, FILE__GETATTR); |
3157 | break; | 3157 | break; |
3158 | 3158 | ||
3159 | case FS_IOC_SETFLAGS: | 3159 | case FS_IOC_SETFLAGS: |
3160 | /* fall through */ | 3160 | /* fall through */ |
3161 | case FS_IOC_SETVERSION: | 3161 | case FS_IOC_SETVERSION: |
3162 | error = file_has_perm(cred, file, FILE__SETATTR); | 3162 | error = file_has_perm(cred, file, FILE__SETATTR); |
3163 | break; | 3163 | break; |
3164 | 3164 | ||
3165 | /* sys_ioctl() checks */ | 3165 | /* sys_ioctl() checks */ |
3166 | case FIONBIO: | 3166 | case FIONBIO: |
3167 | /* fall through */ | 3167 | /* fall through */ |
3168 | case FIOASYNC: | 3168 | case FIOASYNC: |
3169 | error = file_has_perm(cred, file, 0); | 3169 | error = file_has_perm(cred, file, 0); |
3170 | break; | 3170 | break; |
3171 | 3171 | ||
3172 | case KDSKBENT: | 3172 | case KDSKBENT: |
3173 | case KDSKBSENT: | 3173 | case KDSKBSENT: |
3174 | error = cred_has_capability(cred, CAP_SYS_TTY_CONFIG, | 3174 | error = cred_has_capability(cred, CAP_SYS_TTY_CONFIG, |
3175 | SECURITY_CAP_AUDIT); | 3175 | SECURITY_CAP_AUDIT); |
3176 | break; | 3176 | break; |
3177 | 3177 | ||
3178 | /* default case assumes that the command will go | 3178 | /* default case assumes that the command will go |
3179 | * to the file's ioctl() function. | 3179 | * to the file's ioctl() function. |
3180 | */ | 3180 | */ |
3181 | default: | 3181 | default: |
3182 | error = file_has_perm(cred, file, FILE__IOCTL); | 3182 | error = file_has_perm(cred, file, FILE__IOCTL); |
3183 | } | 3183 | } |
3184 | return error; | 3184 | return error; |
3185 | } | 3185 | } |
3186 | 3186 | ||
3187 | static int default_noexec; | 3187 | static int default_noexec; |
3188 | 3188 | ||
3189 | static int file_map_prot_check(struct file *file, unsigned long prot, int shared) | 3189 | static int file_map_prot_check(struct file *file, unsigned long prot, int shared) |
3190 | { | 3190 | { |
3191 | const struct cred *cred = current_cred(); | 3191 | const struct cred *cred = current_cred(); |
3192 | int rc = 0; | 3192 | int rc = 0; |
3193 | 3193 | ||
3194 | if (default_noexec && | 3194 | if (default_noexec && |
3195 | (prot & PROT_EXEC) && (!file || (!shared && (prot & PROT_WRITE)))) { | 3195 | (prot & PROT_EXEC) && (!file || (!shared && (prot & PROT_WRITE)))) { |
3196 | /* | 3196 | /* |
3197 | * We are making executable an anonymous mapping or a | 3197 | * We are making executable an anonymous mapping or a |
3198 | * private file mapping that will also be writable. | 3198 | * private file mapping that will also be writable. |
3199 | * This has an additional check. | 3199 | * This has an additional check. |
3200 | */ | 3200 | */ |
3201 | rc = cred_has_perm(cred, cred, PROCESS__EXECMEM); | 3201 | rc = cred_has_perm(cred, cred, PROCESS__EXECMEM); |
3202 | if (rc) | 3202 | if (rc) |
3203 | goto error; | 3203 | goto error; |
3204 | } | 3204 | } |
3205 | 3205 | ||
3206 | if (file) { | 3206 | if (file) { |
3207 | /* read access is always possible with a mapping */ | 3207 | /* read access is always possible with a mapping */ |
3208 | u32 av = FILE__READ; | 3208 | u32 av = FILE__READ; |
3209 | 3209 | ||
3210 | /* write access only matters if the mapping is shared */ | 3210 | /* write access only matters if the mapping is shared */ |
3211 | if (shared && (prot & PROT_WRITE)) | 3211 | if (shared && (prot & PROT_WRITE)) |
3212 | av |= FILE__WRITE; | 3212 | av |= FILE__WRITE; |
3213 | 3213 | ||
3214 | if (prot & PROT_EXEC) | 3214 | if (prot & PROT_EXEC) |
3215 | av |= FILE__EXECUTE; | 3215 | av |= FILE__EXECUTE; |
3216 | 3216 | ||
3217 | return file_has_perm(cred, file, av); | 3217 | return file_has_perm(cred, file, av); |
3218 | } | 3218 | } |
3219 | 3219 | ||
3220 | error: | 3220 | error: |
3221 | return rc; | 3221 | return rc; |
3222 | } | 3222 | } |
3223 | 3223 | ||
3224 | static int selinux_mmap_addr(unsigned long addr) | 3224 | static int selinux_mmap_addr(unsigned long addr) |
3225 | { | 3225 | { |
3226 | int rc; | 3226 | int rc; |
3227 | 3227 | ||
3228 | /* do DAC check on address space usage */ | 3228 | /* do DAC check on address space usage */ |
3229 | rc = cap_mmap_addr(addr); | 3229 | rc = cap_mmap_addr(addr); |
3230 | if (rc) | 3230 | if (rc) |
3231 | return rc; | 3231 | return rc; |
3232 | 3232 | ||
3233 | if (addr < CONFIG_LSM_MMAP_MIN_ADDR) { | 3233 | if (addr < CONFIG_LSM_MMAP_MIN_ADDR) { |
3234 | u32 sid = current_sid(); | 3234 | u32 sid = current_sid(); |
3235 | rc = avc_has_perm(sid, sid, SECCLASS_MEMPROTECT, | 3235 | rc = avc_has_perm(sid, sid, SECCLASS_MEMPROTECT, |
3236 | MEMPROTECT__MMAP_ZERO, NULL); | 3236 | MEMPROTECT__MMAP_ZERO, NULL); |
3237 | } | 3237 | } |
3238 | 3238 | ||
3239 | return rc; | 3239 | return rc; |
3240 | } | 3240 | } |
3241 | 3241 | ||
3242 | static int selinux_mmap_file(struct file *file, unsigned long reqprot, | 3242 | static int selinux_mmap_file(struct file *file, unsigned long reqprot, |
3243 | unsigned long prot, unsigned long flags) | 3243 | unsigned long prot, unsigned long flags) |
3244 | { | 3244 | { |
3245 | if (selinux_checkreqprot) | 3245 | if (selinux_checkreqprot) |
3246 | prot = reqprot; | 3246 | prot = reqprot; |
3247 | 3247 | ||
3248 | return file_map_prot_check(file, prot, | 3248 | return file_map_prot_check(file, prot, |
3249 | (flags & MAP_TYPE) == MAP_SHARED); | 3249 | (flags & MAP_TYPE) == MAP_SHARED); |
3250 | } | 3250 | } |
3251 | 3251 | ||
3252 | static int selinux_file_mprotect(struct vm_area_struct *vma, | 3252 | static int selinux_file_mprotect(struct vm_area_struct *vma, |
3253 | unsigned long reqprot, | 3253 | unsigned long reqprot, |
3254 | unsigned long prot) | 3254 | unsigned long prot) |
3255 | { | 3255 | { |
3256 | const struct cred *cred = current_cred(); | 3256 | const struct cred *cred = current_cred(); |
3257 | 3257 | ||
3258 | if (selinux_checkreqprot) | 3258 | if (selinux_checkreqprot) |
3259 | prot = reqprot; | 3259 | prot = reqprot; |
3260 | 3260 | ||
3261 | if (default_noexec && | 3261 | if (default_noexec && |
3262 | (prot & PROT_EXEC) && !(vma->vm_flags & VM_EXEC)) { | 3262 | (prot & PROT_EXEC) && !(vma->vm_flags & VM_EXEC)) { |
3263 | int rc = 0; | 3263 | int rc = 0; |
3264 | if (vma->vm_start >= vma->vm_mm->start_brk && | 3264 | if (vma->vm_start >= vma->vm_mm->start_brk && |
3265 | vma->vm_end <= vma->vm_mm->brk) { | 3265 | vma->vm_end <= vma->vm_mm->brk) { |
3266 | rc = cred_has_perm(cred, cred, PROCESS__EXECHEAP); | 3266 | rc = cred_has_perm(cred, cred, PROCESS__EXECHEAP); |
3267 | } else if (!vma->vm_file && | 3267 | } else if (!vma->vm_file && |
3268 | vma->vm_start <= vma->vm_mm->start_stack && | 3268 | vma->vm_start <= vma->vm_mm->start_stack && |
3269 | vma->vm_end >= vma->vm_mm->start_stack) { | 3269 | vma->vm_end >= vma->vm_mm->start_stack) { |
3270 | rc = current_has_perm(current, PROCESS__EXECSTACK); | 3270 | rc = current_has_perm(current, PROCESS__EXECSTACK); |
3271 | } else if (vma->vm_file && vma->anon_vma) { | 3271 | } else if (vma->vm_file && vma->anon_vma) { |
3272 | /* | 3272 | /* |
3273 | * We are making executable a file mapping that has | 3273 | * We are making executable a file mapping that has |
3274 | * had some COW done. Since pages might have been | 3274 | * had some COW done. Since pages might have been |
3275 | * written, check ability to execute the possibly | 3275 | * written, check ability to execute the possibly |
3276 | * modified content. This typically should only | 3276 | * modified content. This typically should only |
3277 | * occur for text relocations. | 3277 | * occur for text relocations. |
3278 | */ | 3278 | */ |
3279 | rc = file_has_perm(cred, vma->vm_file, FILE__EXECMOD); | 3279 | rc = file_has_perm(cred, vma->vm_file, FILE__EXECMOD); |
3280 | } | 3280 | } |
3281 | if (rc) | 3281 | if (rc) |
3282 | return rc; | 3282 | return rc; |
3283 | } | 3283 | } |
3284 | 3284 | ||
3285 | return file_map_prot_check(vma->vm_file, prot, vma->vm_flags&VM_SHARED); | 3285 | return file_map_prot_check(vma->vm_file, prot, vma->vm_flags&VM_SHARED); |
3286 | } | 3286 | } |
3287 | 3287 | ||
3288 | static int selinux_file_lock(struct file *file, unsigned int cmd) | 3288 | static int selinux_file_lock(struct file *file, unsigned int cmd) |
3289 | { | 3289 | { |
3290 | const struct cred *cred = current_cred(); | 3290 | const struct cred *cred = current_cred(); |
3291 | 3291 | ||
3292 | return file_has_perm(cred, file, FILE__LOCK); | 3292 | return file_has_perm(cred, file, FILE__LOCK); |
3293 | } | 3293 | } |
3294 | 3294 | ||
3295 | static int selinux_file_fcntl(struct file *file, unsigned int cmd, | 3295 | static int selinux_file_fcntl(struct file *file, unsigned int cmd, |
3296 | unsigned long arg) | 3296 | unsigned long arg) |
3297 | { | 3297 | { |
3298 | const struct cred *cred = current_cred(); | 3298 | const struct cred *cred = current_cred(); |
3299 | int err = 0; | 3299 | int err = 0; |
3300 | 3300 | ||
3301 | switch (cmd) { | 3301 | switch (cmd) { |
3302 | case F_SETFL: | 3302 | case F_SETFL: |
3303 | if ((file->f_flags & O_APPEND) && !(arg & O_APPEND)) { | 3303 | if ((file->f_flags & O_APPEND) && !(arg & O_APPEND)) { |
3304 | err = file_has_perm(cred, file, FILE__WRITE); | 3304 | err = file_has_perm(cred, file, FILE__WRITE); |
3305 | break; | 3305 | break; |
3306 | } | 3306 | } |
3307 | /* fall through */ | 3307 | /* fall through */ |
3308 | case F_SETOWN: | 3308 | case F_SETOWN: |
3309 | case F_SETSIG: | 3309 | case F_SETSIG: |
3310 | case F_GETFL: | 3310 | case F_GETFL: |
3311 | case F_GETOWN: | 3311 | case F_GETOWN: |
3312 | case F_GETSIG: | 3312 | case F_GETSIG: |
3313 | case F_GETOWNER_UIDS: | 3313 | case F_GETOWNER_UIDS: |
3314 | /* Just check FD__USE permission */ | 3314 | /* Just check FD__USE permission */ |
3315 | err = file_has_perm(cred, file, 0); | 3315 | err = file_has_perm(cred, file, 0); |
3316 | break; | 3316 | break; |
3317 | case F_GETLK: | 3317 | case F_GETLK: |
3318 | case F_SETLK: | 3318 | case F_SETLK: |
3319 | case F_SETLKW: | 3319 | case F_SETLKW: |
3320 | case F_GETLKP: | 3320 | case F_OFD_GETLK: |
3321 | case F_SETLKP: | 3321 | case F_OFD_SETLK: |
3322 | case F_SETLKPW: | 3322 | case F_OFD_SETLKW: |
3323 | #if BITS_PER_LONG == 32 | 3323 | #if BITS_PER_LONG == 32 |
3324 | case F_GETLK64: | 3324 | case F_GETLK64: |
3325 | case F_SETLK64: | 3325 | case F_SETLK64: |
3326 | case F_SETLKW64: | 3326 | case F_SETLKW64: |
3327 | #endif | 3327 | #endif |
3328 | err = file_has_perm(cred, file, FILE__LOCK); | 3328 | err = file_has_perm(cred, file, FILE__LOCK); |
3329 | break; | 3329 | break; |
3330 | } | 3330 | } |
3331 | 3331 | ||
3332 | return err; | 3332 | return err; |
3333 | } | 3333 | } |
3334 | 3334 | ||
3335 | static int selinux_file_set_fowner(struct file *file) | 3335 | static int selinux_file_set_fowner(struct file *file) |
3336 | { | 3336 | { |
3337 | struct file_security_struct *fsec; | 3337 | struct file_security_struct *fsec; |
3338 | 3338 | ||
3339 | fsec = file->f_security; | 3339 | fsec = file->f_security; |
3340 | fsec->fown_sid = current_sid(); | 3340 | fsec->fown_sid = current_sid(); |
3341 | 3341 | ||
3342 | return 0; | 3342 | return 0; |
3343 | } | 3343 | } |
3344 | 3344 | ||
3345 | static int selinux_file_send_sigiotask(struct task_struct *tsk, | 3345 | static int selinux_file_send_sigiotask(struct task_struct *tsk, |
3346 | struct fown_struct *fown, int signum) | 3346 | struct fown_struct *fown, int signum) |
3347 | { | 3347 | { |
3348 | struct file *file; | 3348 | struct file *file; |
3349 | u32 sid = task_sid(tsk); | 3349 | u32 sid = task_sid(tsk); |
3350 | u32 perm; | 3350 | u32 perm; |
3351 | struct file_security_struct *fsec; | 3351 | struct file_security_struct *fsec; |
3352 | 3352 | ||
3353 | /* struct fown_struct is never outside the context of a struct file */ | 3353 | /* struct fown_struct is never outside the context of a struct file */ |
3354 | file = container_of(fown, struct file, f_owner); | 3354 | file = container_of(fown, struct file, f_owner); |
3355 | 3355 | ||
3356 | fsec = file->f_security; | 3356 | fsec = file->f_security; |
3357 | 3357 | ||
3358 | if (!signum) | 3358 | if (!signum) |
3359 | perm = signal_to_av(SIGIO); /* as per send_sigio_to_task */ | 3359 | perm = signal_to_av(SIGIO); /* as per send_sigio_to_task */ |
3360 | else | 3360 | else |
3361 | perm = signal_to_av(signum); | 3361 | perm = signal_to_av(signum); |
3362 | 3362 | ||
3363 | return avc_has_perm(fsec->fown_sid, sid, | 3363 | return avc_has_perm(fsec->fown_sid, sid, |
3364 | SECCLASS_PROCESS, perm, NULL); | 3364 | SECCLASS_PROCESS, perm, NULL); |
3365 | } | 3365 | } |
3366 | 3366 | ||
3367 | static int selinux_file_receive(struct file *file) | 3367 | static int selinux_file_receive(struct file *file) |
3368 | { | 3368 | { |
3369 | const struct cred *cred = current_cred(); | 3369 | const struct cred *cred = current_cred(); |
3370 | 3370 | ||
3371 | return file_has_perm(cred, file, file_to_av(file)); | 3371 | return file_has_perm(cred, file, file_to_av(file)); |
3372 | } | 3372 | } |
3373 | 3373 | ||
3374 | static int selinux_file_open(struct file *file, const struct cred *cred) | 3374 | static int selinux_file_open(struct file *file, const struct cred *cred) |
3375 | { | 3375 | { |
3376 | struct file_security_struct *fsec; | 3376 | struct file_security_struct *fsec; |
3377 | struct inode_security_struct *isec; | 3377 | struct inode_security_struct *isec; |
3378 | 3378 | ||
3379 | fsec = file->f_security; | 3379 | fsec = file->f_security; |
3380 | isec = file_inode(file)->i_security; | 3380 | isec = file_inode(file)->i_security; |
3381 | /* | 3381 | /* |
3382 | * Save inode label and policy sequence number | 3382 | * Save inode label and policy sequence number |
3383 | * at open-time so that selinux_file_permission | 3383 | * at open-time so that selinux_file_permission |
3384 | * can determine whether revalidation is necessary. | 3384 | * can determine whether revalidation is necessary. |
3385 | * Task label is already saved in the file security | 3385 | * Task label is already saved in the file security |
3386 | * struct as its SID. | 3386 | * struct as its SID. |
3387 | */ | 3387 | */ |
3388 | fsec->isid = isec->sid; | 3388 | fsec->isid = isec->sid; |
3389 | fsec->pseqno = avc_policy_seqno(); | 3389 | fsec->pseqno = avc_policy_seqno(); |
3390 | /* | 3390 | /* |
3391 | * Since the inode label or policy seqno may have changed | 3391 | * Since the inode label or policy seqno may have changed |
3392 | * between the selinux_inode_permission check and the saving | 3392 | * between the selinux_inode_permission check and the saving |
3393 | * of state above, recheck that access is still permitted. | 3393 | * of state above, recheck that access is still permitted. |
3394 | * Otherwise, access might never be revalidated against the | 3394 | * Otherwise, access might never be revalidated against the |
3395 | * new inode label or new policy. | 3395 | * new inode label or new policy. |
3396 | * This check is not redundant - do not remove. | 3396 | * This check is not redundant - do not remove. |
3397 | */ | 3397 | */ |
3398 | return file_path_has_perm(cred, file, open_file_to_av(file)); | 3398 | return file_path_has_perm(cred, file, open_file_to_av(file)); |
3399 | } | 3399 | } |
3400 | 3400 | ||
3401 | /* task security operations */ | 3401 | /* task security operations */ |
3402 | 3402 | ||
3403 | static int selinux_task_create(unsigned long clone_flags) | 3403 | static int selinux_task_create(unsigned long clone_flags) |
3404 | { | 3404 | { |
3405 | return current_has_perm(current, PROCESS__FORK); | 3405 | return current_has_perm(current, PROCESS__FORK); |
3406 | } | 3406 | } |
3407 | 3407 | ||
3408 | /* | 3408 | /* |
3409 | * allocate the SELinux part of blank credentials | 3409 | * allocate the SELinux part of blank credentials |
3410 | */ | 3410 | */ |
3411 | static int selinux_cred_alloc_blank(struct cred *cred, gfp_t gfp) | 3411 | static int selinux_cred_alloc_blank(struct cred *cred, gfp_t gfp) |
3412 | { | 3412 | { |
3413 | struct task_security_struct *tsec; | 3413 | struct task_security_struct *tsec; |
3414 | 3414 | ||
3415 | tsec = kzalloc(sizeof(struct task_security_struct), gfp); | 3415 | tsec = kzalloc(sizeof(struct task_security_struct), gfp); |
3416 | if (!tsec) | 3416 | if (!tsec) |
3417 | return -ENOMEM; | 3417 | return -ENOMEM; |
3418 | 3418 | ||
3419 | cred->security = tsec; | 3419 | cred->security = tsec; |
3420 | return 0; | 3420 | return 0; |
3421 | } | 3421 | } |
3422 | 3422 | ||
3423 | /* | 3423 | /* |
3424 | * detach and free the LSM part of a set of credentials | 3424 | * detach and free the LSM part of a set of credentials |
3425 | */ | 3425 | */ |
3426 | static void selinux_cred_free(struct cred *cred) | 3426 | static void selinux_cred_free(struct cred *cred) |
3427 | { | 3427 | { |
3428 | struct task_security_struct *tsec = cred->security; | 3428 | struct task_security_struct *tsec = cred->security; |
3429 | 3429 | ||
3430 | /* | 3430 | /* |
3431 | * cred->security == NULL if security_cred_alloc_blank() or | 3431 | * cred->security == NULL if security_cred_alloc_blank() or |
3432 | * security_prepare_creds() returned an error. | 3432 | * security_prepare_creds() returned an error. |
3433 | */ | 3433 | */ |
3434 | BUG_ON(cred->security && (unsigned long) cred->security < PAGE_SIZE); | 3434 | BUG_ON(cred->security && (unsigned long) cred->security < PAGE_SIZE); |
3435 | cred->security = (void *) 0x7UL; | 3435 | cred->security = (void *) 0x7UL; |
3436 | kfree(tsec); | 3436 | kfree(tsec); |
3437 | } | 3437 | } |
3438 | 3438 | ||
3439 | /* | 3439 | /* |
3440 | * prepare a new set of credentials for modification | 3440 | * prepare a new set of credentials for modification |
3441 | */ | 3441 | */ |
3442 | static int selinux_cred_prepare(struct cred *new, const struct cred *old, | 3442 | static int selinux_cred_prepare(struct cred *new, const struct cred *old, |
3443 | gfp_t gfp) | 3443 | gfp_t gfp) |
3444 | { | 3444 | { |
3445 | const struct task_security_struct *old_tsec; | 3445 | const struct task_security_struct *old_tsec; |
3446 | struct task_security_struct *tsec; | 3446 | struct task_security_struct *tsec; |
3447 | 3447 | ||
3448 | old_tsec = old->security; | 3448 | old_tsec = old->security; |
3449 | 3449 | ||
3450 | tsec = kmemdup(old_tsec, sizeof(struct task_security_struct), gfp); | 3450 | tsec = kmemdup(old_tsec, sizeof(struct task_security_struct), gfp); |
3451 | if (!tsec) | 3451 | if (!tsec) |
3452 | return -ENOMEM; | 3452 | return -ENOMEM; |
3453 | 3453 | ||
3454 | new->security = tsec; | 3454 | new->security = tsec; |
3455 | return 0; | 3455 | return 0; |
3456 | } | 3456 | } |
3457 | 3457 | ||
3458 | /* | 3458 | /* |
3459 | * transfer the SELinux data to a blank set of creds | 3459 | * transfer the SELinux data to a blank set of creds |
3460 | */ | 3460 | */ |
3461 | static void selinux_cred_transfer(struct cred *new, const struct cred *old) | 3461 | static void selinux_cred_transfer(struct cred *new, const struct cred *old) |
3462 | { | 3462 | { |
3463 | const struct task_security_struct *old_tsec = old->security; | 3463 | const struct task_security_struct *old_tsec = old->security; |
3464 | struct task_security_struct *tsec = new->security; | 3464 | struct task_security_struct *tsec = new->security; |
3465 | 3465 | ||
3466 | *tsec = *old_tsec; | 3466 | *tsec = *old_tsec; |
3467 | } | 3467 | } |
3468 | 3468 | ||
3469 | /* | 3469 | /* |
3470 | * set the security data for a kernel service | 3470 | * set the security data for a kernel service |
3471 | * - all the creation contexts are set to unlabelled | 3471 | * - all the creation contexts are set to unlabelled |
3472 | */ | 3472 | */ |
3473 | static int selinux_kernel_act_as(struct cred *new, u32 secid) | 3473 | static int selinux_kernel_act_as(struct cred *new, u32 secid) |
3474 | { | 3474 | { |
3475 | struct task_security_struct *tsec = new->security; | 3475 | struct task_security_struct *tsec = new->security; |
3476 | u32 sid = current_sid(); | 3476 | u32 sid = current_sid(); |
3477 | int ret; | 3477 | int ret; |
3478 | 3478 | ||
3479 | ret = avc_has_perm(sid, secid, | 3479 | ret = avc_has_perm(sid, secid, |
3480 | SECCLASS_KERNEL_SERVICE, | 3480 | SECCLASS_KERNEL_SERVICE, |
3481 | KERNEL_SERVICE__USE_AS_OVERRIDE, | 3481 | KERNEL_SERVICE__USE_AS_OVERRIDE, |
3482 | NULL); | 3482 | NULL); |
3483 | if (ret == 0) { | 3483 | if (ret == 0) { |
3484 | tsec->sid = secid; | 3484 | tsec->sid = secid; |
3485 | tsec->create_sid = 0; | 3485 | tsec->create_sid = 0; |
3486 | tsec->keycreate_sid = 0; | 3486 | tsec->keycreate_sid = 0; |
3487 | tsec->sockcreate_sid = 0; | 3487 | tsec->sockcreate_sid = 0; |
3488 | } | 3488 | } |
3489 | return ret; | 3489 | return ret; |
3490 | } | 3490 | } |
3491 | 3491 | ||
3492 | /* | 3492 | /* |
3493 | * set the file creation context in a security record to the same as the | 3493 | * set the file creation context in a security record to the same as the |
3494 | * objective context of the specified inode | 3494 | * objective context of the specified inode |
3495 | */ | 3495 | */ |
3496 | static int selinux_kernel_create_files_as(struct cred *new, struct inode *inode) | 3496 | static int selinux_kernel_create_files_as(struct cred *new, struct inode *inode) |
3497 | { | 3497 | { |
3498 | struct inode_security_struct *isec = inode->i_security; | 3498 | struct inode_security_struct *isec = inode->i_security; |
3499 | struct task_security_struct *tsec = new->security; | 3499 | struct task_security_struct *tsec = new->security; |
3500 | u32 sid = current_sid(); | 3500 | u32 sid = current_sid(); |
3501 | int ret; | 3501 | int ret; |
3502 | 3502 | ||
3503 | ret = avc_has_perm(sid, isec->sid, | 3503 | ret = avc_has_perm(sid, isec->sid, |
3504 | SECCLASS_KERNEL_SERVICE, | 3504 | SECCLASS_KERNEL_SERVICE, |
3505 | KERNEL_SERVICE__CREATE_FILES_AS, | 3505 | KERNEL_SERVICE__CREATE_FILES_AS, |
3506 | NULL); | 3506 | NULL); |
3507 | 3507 | ||
3508 | if (ret == 0) | 3508 | if (ret == 0) |
3509 | tsec->create_sid = isec->sid; | 3509 | tsec->create_sid = isec->sid; |
3510 | return ret; | 3510 | return ret; |
3511 | } | 3511 | } |
3512 | 3512 | ||
3513 | static int selinux_kernel_module_request(char *kmod_name) | 3513 | static int selinux_kernel_module_request(char *kmod_name) |
3514 | { | 3514 | { |
3515 | u32 sid; | 3515 | u32 sid; |
3516 | struct common_audit_data ad; | 3516 | struct common_audit_data ad; |
3517 | 3517 | ||
3518 | sid = task_sid(current); | 3518 | sid = task_sid(current); |
3519 | 3519 | ||
3520 | ad.type = LSM_AUDIT_DATA_KMOD; | 3520 | ad.type = LSM_AUDIT_DATA_KMOD; |
3521 | ad.u.kmod_name = kmod_name; | 3521 | ad.u.kmod_name = kmod_name; |
3522 | 3522 | ||
3523 | return avc_has_perm(sid, SECINITSID_KERNEL, SECCLASS_SYSTEM, | 3523 | return avc_has_perm(sid, SECINITSID_KERNEL, SECCLASS_SYSTEM, |
3524 | SYSTEM__MODULE_REQUEST, &ad); | 3524 | SYSTEM__MODULE_REQUEST, &ad); |
3525 | } | 3525 | } |
3526 | 3526 | ||
3527 | static int selinux_task_setpgid(struct task_struct *p, pid_t pgid) | 3527 | static int selinux_task_setpgid(struct task_struct *p, pid_t pgid) |
3528 | { | 3528 | { |
3529 | return current_has_perm(p, PROCESS__SETPGID); | 3529 | return current_has_perm(p, PROCESS__SETPGID); |
3530 | } | 3530 | } |
3531 | 3531 | ||
3532 | static int selinux_task_getpgid(struct task_struct *p) | 3532 | static int selinux_task_getpgid(struct task_struct *p) |
3533 | { | 3533 | { |
3534 | return current_has_perm(p, PROCESS__GETPGID); | 3534 | return current_has_perm(p, PROCESS__GETPGID); |
3535 | } | 3535 | } |
3536 | 3536 | ||
3537 | static int selinux_task_getsid(struct task_struct *p) | 3537 | static int selinux_task_getsid(struct task_struct *p) |
3538 | { | 3538 | { |
3539 | return current_has_perm(p, PROCESS__GETSESSION); | 3539 | return current_has_perm(p, PROCESS__GETSESSION); |
3540 | } | 3540 | } |
3541 | 3541 | ||
3542 | static void selinux_task_getsecid(struct task_struct *p, u32 *secid) | 3542 | static void selinux_task_getsecid(struct task_struct *p, u32 *secid) |
3543 | { | 3543 | { |
3544 | *secid = task_sid(p); | 3544 | *secid = task_sid(p); |
3545 | } | 3545 | } |
3546 | 3546 | ||
3547 | static int selinux_task_setnice(struct task_struct *p, int nice) | 3547 | static int selinux_task_setnice(struct task_struct *p, int nice) |
3548 | { | 3548 | { |
3549 | int rc; | 3549 | int rc; |
3550 | 3550 | ||
3551 | rc = cap_task_setnice(p, nice); | 3551 | rc = cap_task_setnice(p, nice); |
3552 | if (rc) | 3552 | if (rc) |
3553 | return rc; | 3553 | return rc; |
3554 | 3554 | ||
3555 | return current_has_perm(p, PROCESS__SETSCHED); | 3555 | return current_has_perm(p, PROCESS__SETSCHED); |
3556 | } | 3556 | } |
3557 | 3557 | ||
3558 | static int selinux_task_setioprio(struct task_struct *p, int ioprio) | 3558 | static int selinux_task_setioprio(struct task_struct *p, int ioprio) |
3559 | { | 3559 | { |
3560 | int rc; | 3560 | int rc; |
3561 | 3561 | ||
3562 | rc = cap_task_setioprio(p, ioprio); | 3562 | rc = cap_task_setioprio(p, ioprio); |
3563 | if (rc) | 3563 | if (rc) |
3564 | return rc; | 3564 | return rc; |
3565 | 3565 | ||
3566 | return current_has_perm(p, PROCESS__SETSCHED); | 3566 | return current_has_perm(p, PROCESS__SETSCHED); |
3567 | } | 3567 | } |
3568 | 3568 | ||
3569 | static int selinux_task_getioprio(struct task_struct *p) | 3569 | static int selinux_task_getioprio(struct task_struct *p) |
3570 | { | 3570 | { |
3571 | return current_has_perm(p, PROCESS__GETSCHED); | 3571 | return current_has_perm(p, PROCESS__GETSCHED); |
3572 | } | 3572 | } |
3573 | 3573 | ||
3574 | static int selinux_task_setrlimit(struct task_struct *p, unsigned int resource, | 3574 | static int selinux_task_setrlimit(struct task_struct *p, unsigned int resource, |
3575 | struct rlimit *new_rlim) | 3575 | struct rlimit *new_rlim) |
3576 | { | 3576 | { |
3577 | struct rlimit *old_rlim = p->signal->rlim + resource; | 3577 | struct rlimit *old_rlim = p->signal->rlim + resource; |
3578 | 3578 | ||
3579 | /* Control the ability to change the hard limit (whether | 3579 | /* Control the ability to change the hard limit (whether |
3580 | lowering or raising it), so that the hard limit can | 3580 | lowering or raising it), so that the hard limit can |
3581 | later be used as a safe reset point for the soft limit | 3581 | later be used as a safe reset point for the soft limit |
3582 | upon context transitions. See selinux_bprm_committing_creds. */ | 3582 | upon context transitions. See selinux_bprm_committing_creds. */ |
3583 | if (old_rlim->rlim_max != new_rlim->rlim_max) | 3583 | if (old_rlim->rlim_max != new_rlim->rlim_max) |
3584 | return current_has_perm(p, PROCESS__SETRLIMIT); | 3584 | return current_has_perm(p, PROCESS__SETRLIMIT); |
3585 | 3585 | ||
3586 | return 0; | 3586 | return 0; |
3587 | } | 3587 | } |
3588 | 3588 | ||
3589 | static int selinux_task_setscheduler(struct task_struct *p) | 3589 | static int selinux_task_setscheduler(struct task_struct *p) |
3590 | { | 3590 | { |
3591 | int rc; | 3591 | int rc; |
3592 | 3592 | ||
3593 | rc = cap_task_setscheduler(p); | 3593 | rc = cap_task_setscheduler(p); |
3594 | if (rc) | 3594 | if (rc) |
3595 | return rc; | 3595 | return rc; |
3596 | 3596 | ||
3597 | return current_has_perm(p, PROCESS__SETSCHED); | 3597 | return current_has_perm(p, PROCESS__SETSCHED); |
3598 | } | 3598 | } |
3599 | 3599 | ||
3600 | static int selinux_task_getscheduler(struct task_struct *p) | 3600 | static int selinux_task_getscheduler(struct task_struct *p) |
3601 | { | 3601 | { |
3602 | return current_has_perm(p, PROCESS__GETSCHED); | 3602 | return current_has_perm(p, PROCESS__GETSCHED); |
3603 | } | 3603 | } |
3604 | 3604 | ||
3605 | static int selinux_task_movememory(struct task_struct *p) | 3605 | static int selinux_task_movememory(struct task_struct *p) |
3606 | { | 3606 | { |
3607 | return current_has_perm(p, PROCESS__SETSCHED); | 3607 | return current_has_perm(p, PROCESS__SETSCHED); |
3608 | } | 3608 | } |
3609 | 3609 | ||
3610 | static int selinux_task_kill(struct task_struct *p, struct siginfo *info, | 3610 | static int selinux_task_kill(struct task_struct *p, struct siginfo *info, |
3611 | int sig, u32 secid) | 3611 | int sig, u32 secid) |
3612 | { | 3612 | { |
3613 | u32 perm; | 3613 | u32 perm; |
3614 | int rc; | 3614 | int rc; |
3615 | 3615 | ||
3616 | if (!sig) | 3616 | if (!sig) |
3617 | perm = PROCESS__SIGNULL; /* null signal; existence test */ | 3617 | perm = PROCESS__SIGNULL; /* null signal; existence test */ |
3618 | else | 3618 | else |
3619 | perm = signal_to_av(sig); | 3619 | perm = signal_to_av(sig); |
3620 | if (secid) | 3620 | if (secid) |
3621 | rc = avc_has_perm(secid, task_sid(p), | 3621 | rc = avc_has_perm(secid, task_sid(p), |
3622 | SECCLASS_PROCESS, perm, NULL); | 3622 | SECCLASS_PROCESS, perm, NULL); |
3623 | else | 3623 | else |
3624 | rc = current_has_perm(p, perm); | 3624 | rc = current_has_perm(p, perm); |
3625 | return rc; | 3625 | return rc; |
3626 | } | 3626 | } |
3627 | 3627 | ||
3628 | static int selinux_task_wait(struct task_struct *p) | 3628 | static int selinux_task_wait(struct task_struct *p) |
3629 | { | 3629 | { |
3630 | return task_has_perm(p, current, PROCESS__SIGCHLD); | 3630 | return task_has_perm(p, current, PROCESS__SIGCHLD); |
3631 | } | 3631 | } |
3632 | 3632 | ||
3633 | static void selinux_task_to_inode(struct task_struct *p, | 3633 | static void selinux_task_to_inode(struct task_struct *p, |
3634 | struct inode *inode) | 3634 | struct inode *inode) |
3635 | { | 3635 | { |
3636 | struct inode_security_struct *isec = inode->i_security; | 3636 | struct inode_security_struct *isec = inode->i_security; |
3637 | u32 sid = task_sid(p); | 3637 | u32 sid = task_sid(p); |
3638 | 3638 | ||
3639 | isec->sid = sid; | 3639 | isec->sid = sid; |
3640 | isec->initialized = 1; | 3640 | isec->initialized = 1; |
3641 | } | 3641 | } |
3642 | 3642 | ||
3643 | /* Returns error only if unable to parse addresses */ | 3643 | /* Returns error only if unable to parse addresses */ |
3644 | static int selinux_parse_skb_ipv4(struct sk_buff *skb, | 3644 | static int selinux_parse_skb_ipv4(struct sk_buff *skb, |
3645 | struct common_audit_data *ad, u8 *proto) | 3645 | struct common_audit_data *ad, u8 *proto) |
3646 | { | 3646 | { |
3647 | int offset, ihlen, ret = -EINVAL; | 3647 | int offset, ihlen, ret = -EINVAL; |
3648 | struct iphdr _iph, *ih; | 3648 | struct iphdr _iph, *ih; |
3649 | 3649 | ||
3650 | offset = skb_network_offset(skb); | 3650 | offset = skb_network_offset(skb); |
3651 | ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph); | 3651 | ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph); |
3652 | if (ih == NULL) | 3652 | if (ih == NULL) |
3653 | goto out; | 3653 | goto out; |
3654 | 3654 | ||
3655 | ihlen = ih->ihl * 4; | 3655 | ihlen = ih->ihl * 4; |
3656 | if (ihlen < sizeof(_iph)) | 3656 | if (ihlen < sizeof(_iph)) |
3657 | goto out; | 3657 | goto out; |
3658 | 3658 | ||
3659 | ad->u.net->v4info.saddr = ih->saddr; | 3659 | ad->u.net->v4info.saddr = ih->saddr; |
3660 | ad->u.net->v4info.daddr = ih->daddr; | 3660 | ad->u.net->v4info.daddr = ih->daddr; |
3661 | ret = 0; | 3661 | ret = 0; |
3662 | 3662 | ||
3663 | if (proto) | 3663 | if (proto) |
3664 | *proto = ih->protocol; | 3664 | *proto = ih->protocol; |
3665 | 3665 | ||
3666 | switch (ih->protocol) { | 3666 | switch (ih->protocol) { |
3667 | case IPPROTO_TCP: { | 3667 | case IPPROTO_TCP: { |
3668 | struct tcphdr _tcph, *th; | 3668 | struct tcphdr _tcph, *th; |
3669 | 3669 | ||
3670 | if (ntohs(ih->frag_off) & IP_OFFSET) | 3670 | if (ntohs(ih->frag_off) & IP_OFFSET) |
3671 | break; | 3671 | break; |
3672 | 3672 | ||
3673 | offset += ihlen; | 3673 | offset += ihlen; |
3674 | th = skb_header_pointer(skb, offset, sizeof(_tcph), &_tcph); | 3674 | th = skb_header_pointer(skb, offset, sizeof(_tcph), &_tcph); |
3675 | if (th == NULL) | 3675 | if (th == NULL) |
3676 | break; | 3676 | break; |
3677 | 3677 | ||
3678 | ad->u.net->sport = th->source; | 3678 | ad->u.net->sport = th->source; |
3679 | ad->u.net->dport = th->dest; | 3679 | ad->u.net->dport = th->dest; |
3680 | break; | 3680 | break; |
3681 | } | 3681 | } |
3682 | 3682 | ||
3683 | case IPPROTO_UDP: { | 3683 | case IPPROTO_UDP: { |
3684 | struct udphdr _udph, *uh; | 3684 | struct udphdr _udph, *uh; |
3685 | 3685 | ||
3686 | if (ntohs(ih->frag_off) & IP_OFFSET) | 3686 | if (ntohs(ih->frag_off) & IP_OFFSET) |
3687 | break; | 3687 | break; |
3688 | 3688 | ||
3689 | offset += ihlen; | 3689 | offset += ihlen; |
3690 | uh = skb_header_pointer(skb, offset, sizeof(_udph), &_udph); | 3690 | uh = skb_header_pointer(skb, offset, sizeof(_udph), &_udph); |
3691 | if (uh == NULL) | 3691 | if (uh == NULL) |
3692 | break; | 3692 | break; |
3693 | 3693 | ||
3694 | ad->u.net->sport = uh->source; | 3694 | ad->u.net->sport = uh->source; |
3695 | ad->u.net->dport = uh->dest; | 3695 | ad->u.net->dport = uh->dest; |
3696 | break; | 3696 | break; |
3697 | } | 3697 | } |
3698 | 3698 | ||
3699 | case IPPROTO_DCCP: { | 3699 | case IPPROTO_DCCP: { |
3700 | struct dccp_hdr _dccph, *dh; | 3700 | struct dccp_hdr _dccph, *dh; |
3701 | 3701 | ||
3702 | if (ntohs(ih->frag_off) & IP_OFFSET) | 3702 | if (ntohs(ih->frag_off) & IP_OFFSET) |
3703 | break; | 3703 | break; |
3704 | 3704 | ||
3705 | offset += ihlen; | 3705 | offset += ihlen; |
3706 | dh = skb_header_pointer(skb, offset, sizeof(_dccph), &_dccph); | 3706 | dh = skb_header_pointer(skb, offset, sizeof(_dccph), &_dccph); |
3707 | if (dh == NULL) | 3707 | if (dh == NULL) |
3708 | break; | 3708 | break; |
3709 | 3709 | ||
3710 | ad->u.net->sport = dh->dccph_sport; | 3710 | ad->u.net->sport = dh->dccph_sport; |
3711 | ad->u.net->dport = dh->dccph_dport; | 3711 | ad->u.net->dport = dh->dccph_dport; |
3712 | break; | 3712 | break; |
3713 | } | 3713 | } |
3714 | 3714 | ||
3715 | default: | 3715 | default: |
3716 | break; | 3716 | break; |
3717 | } | 3717 | } |
3718 | out: | 3718 | out: |
3719 | return ret; | 3719 | return ret; |
3720 | } | 3720 | } |
3721 | 3721 | ||
3722 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | 3722 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) |
3723 | 3723 | ||
3724 | /* Returns error only if unable to parse addresses */ | 3724 | /* Returns error only if unable to parse addresses */ |
3725 | static int selinux_parse_skb_ipv6(struct sk_buff *skb, | 3725 | static int selinux_parse_skb_ipv6(struct sk_buff *skb, |
3726 | struct common_audit_data *ad, u8 *proto) | 3726 | struct common_audit_data *ad, u8 *proto) |
3727 | { | 3727 | { |
3728 | u8 nexthdr; | 3728 | u8 nexthdr; |
3729 | int ret = -EINVAL, offset; | 3729 | int ret = -EINVAL, offset; |
3730 | struct ipv6hdr _ipv6h, *ip6; | 3730 | struct ipv6hdr _ipv6h, *ip6; |
3731 | __be16 frag_off; | 3731 | __be16 frag_off; |
3732 | 3732 | ||
3733 | offset = skb_network_offset(skb); | 3733 | offset = skb_network_offset(skb); |
3734 | ip6 = skb_header_pointer(skb, offset, sizeof(_ipv6h), &_ipv6h); | 3734 | ip6 = skb_header_pointer(skb, offset, sizeof(_ipv6h), &_ipv6h); |
3735 | if (ip6 == NULL) | 3735 | if (ip6 == NULL) |
3736 | goto out; | 3736 | goto out; |
3737 | 3737 | ||
3738 | ad->u.net->v6info.saddr = ip6->saddr; | 3738 | ad->u.net->v6info.saddr = ip6->saddr; |
3739 | ad->u.net->v6info.daddr = ip6->daddr; | 3739 | ad->u.net->v6info.daddr = ip6->daddr; |
3740 | ret = 0; | 3740 | ret = 0; |
3741 | 3741 | ||
3742 | nexthdr = ip6->nexthdr; | 3742 | nexthdr = ip6->nexthdr; |
3743 | offset += sizeof(_ipv6h); | 3743 | offset += sizeof(_ipv6h); |
3744 | offset = ipv6_skip_exthdr(skb, offset, &nexthdr, &frag_off); | 3744 | offset = ipv6_skip_exthdr(skb, offset, &nexthdr, &frag_off); |
3745 | if (offset < 0) | 3745 | if (offset < 0) |
3746 | goto out; | 3746 | goto out; |
3747 | 3747 | ||
3748 | if (proto) | 3748 | if (proto) |
3749 | *proto = nexthdr; | 3749 | *proto = nexthdr; |
3750 | 3750 | ||
3751 | switch (nexthdr) { | 3751 | switch (nexthdr) { |
3752 | case IPPROTO_TCP: { | 3752 | case IPPROTO_TCP: { |
3753 | struct tcphdr _tcph, *th; | 3753 | struct tcphdr _tcph, *th; |
3754 | 3754 | ||
3755 | th = skb_header_pointer(skb, offset, sizeof(_tcph), &_tcph); | 3755 | th = skb_header_pointer(skb, offset, sizeof(_tcph), &_tcph); |
3756 | if (th == NULL) | 3756 | if (th == NULL) |
3757 | break; | 3757 | break; |
3758 | 3758 | ||
3759 | ad->u.net->sport = th->source; | 3759 | ad->u.net->sport = th->source; |
3760 | ad->u.net->dport = th->dest; | 3760 | ad->u.net->dport = th->dest; |
3761 | break; | 3761 | break; |
3762 | } | 3762 | } |
3763 | 3763 | ||
3764 | case IPPROTO_UDP: { | 3764 | case IPPROTO_UDP: { |
3765 | struct udphdr _udph, *uh; | 3765 | struct udphdr _udph, *uh; |
3766 | 3766 | ||
3767 | uh = skb_header_pointer(skb, offset, sizeof(_udph), &_udph); | 3767 | uh = skb_header_pointer(skb, offset, sizeof(_udph), &_udph); |
3768 | if (uh == NULL) | 3768 | if (uh == NULL) |
3769 | break; | 3769 | break; |
3770 | 3770 | ||
3771 | ad->u.net->sport = uh->source; | 3771 | ad->u.net->sport = uh->source; |
3772 | ad->u.net->dport = uh->dest; | 3772 | ad->u.net->dport = uh->dest; |
3773 | break; | 3773 | break; |
3774 | } | 3774 | } |
3775 | 3775 | ||
3776 | case IPPROTO_DCCP: { | 3776 | case IPPROTO_DCCP: { |
3777 | struct dccp_hdr _dccph, *dh; | 3777 | struct dccp_hdr _dccph, *dh; |
3778 | 3778 | ||
3779 | dh = skb_header_pointer(skb, offset, sizeof(_dccph), &_dccph); | 3779 | dh = skb_header_pointer(skb, offset, sizeof(_dccph), &_dccph); |
3780 | if (dh == NULL) | 3780 | if (dh == NULL) |
3781 | break; | 3781 | break; |
3782 | 3782 | ||
3783 | ad->u.net->sport = dh->dccph_sport; | 3783 | ad->u.net->sport = dh->dccph_sport; |
3784 | ad->u.net->dport = dh->dccph_dport; | 3784 | ad->u.net->dport = dh->dccph_dport; |
3785 | break; | 3785 | break; |
3786 | } | 3786 | } |
3787 | 3787 | ||
3788 | /* includes fragments */ | 3788 | /* includes fragments */ |
3789 | default: | 3789 | default: |
3790 | break; | 3790 | break; |
3791 | } | 3791 | } |
3792 | out: | 3792 | out: |
3793 | return ret; | 3793 | return ret; |
3794 | } | 3794 | } |
3795 | 3795 | ||
3796 | #endif /* IPV6 */ | 3796 | #endif /* IPV6 */ |
3797 | 3797 | ||
3798 | static int selinux_parse_skb(struct sk_buff *skb, struct common_audit_data *ad, | 3798 | static int selinux_parse_skb(struct sk_buff *skb, struct common_audit_data *ad, |
3799 | char **_addrp, int src, u8 *proto) | 3799 | char **_addrp, int src, u8 *proto) |
3800 | { | 3800 | { |
3801 | char *addrp; | 3801 | char *addrp; |
3802 | int ret; | 3802 | int ret; |
3803 | 3803 | ||
3804 | switch (ad->u.net->family) { | 3804 | switch (ad->u.net->family) { |
3805 | case PF_INET: | 3805 | case PF_INET: |
3806 | ret = selinux_parse_skb_ipv4(skb, ad, proto); | 3806 | ret = selinux_parse_skb_ipv4(skb, ad, proto); |
3807 | if (ret) | 3807 | if (ret) |
3808 | goto parse_error; | 3808 | goto parse_error; |
3809 | addrp = (char *)(src ? &ad->u.net->v4info.saddr : | 3809 | addrp = (char *)(src ? &ad->u.net->v4info.saddr : |
3810 | &ad->u.net->v4info.daddr); | 3810 | &ad->u.net->v4info.daddr); |
3811 | goto okay; | 3811 | goto okay; |
3812 | 3812 | ||
3813 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | 3813 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) |
3814 | case PF_INET6: | 3814 | case PF_INET6: |
3815 | ret = selinux_parse_skb_ipv6(skb, ad, proto); | 3815 | ret = selinux_parse_skb_ipv6(skb, ad, proto); |
3816 | if (ret) | 3816 | if (ret) |
3817 | goto parse_error; | 3817 | goto parse_error; |
3818 | addrp = (char *)(src ? &ad->u.net->v6info.saddr : | 3818 | addrp = (char *)(src ? &ad->u.net->v6info.saddr : |
3819 | &ad->u.net->v6info.daddr); | 3819 | &ad->u.net->v6info.daddr); |
3820 | goto okay; | 3820 | goto okay; |
3821 | #endif /* IPV6 */ | 3821 | #endif /* IPV6 */ |
3822 | default: | 3822 | default: |
3823 | addrp = NULL; | 3823 | addrp = NULL; |
3824 | goto okay; | 3824 | goto okay; |
3825 | } | 3825 | } |
3826 | 3826 | ||
3827 | parse_error: | 3827 | parse_error: |
3828 | printk(KERN_WARNING | 3828 | printk(KERN_WARNING |
3829 | "SELinux: failure in selinux_parse_skb()," | 3829 | "SELinux: failure in selinux_parse_skb()," |
3830 | " unable to parse packet\n"); | 3830 | " unable to parse packet\n"); |
3831 | return ret; | 3831 | return ret; |
3832 | 3832 | ||
3833 | okay: | 3833 | okay: |
3834 | if (_addrp) | 3834 | if (_addrp) |
3835 | *_addrp = addrp; | 3835 | *_addrp = addrp; |
3836 | return 0; | 3836 | return 0; |
3837 | } | 3837 | } |
3838 | 3838 | ||
3839 | /** | 3839 | /** |
3840 | * selinux_skb_peerlbl_sid - Determine the peer label of a packet | 3840 | * selinux_skb_peerlbl_sid - Determine the peer label of a packet |
3841 | * @skb: the packet | 3841 | * @skb: the packet |
3842 | * @family: protocol family | 3842 | * @family: protocol family |
3843 | * @sid: the packet's peer label SID | 3843 | * @sid: the packet's peer label SID |
3844 | * | 3844 | * |
3845 | * Description: | 3845 | * Description: |
3846 | * Check the various different forms of network peer labeling and determine | 3846 | * Check the various different forms of network peer labeling and determine |
3847 | * the peer label/SID for the packet; most of the magic actually occurs in | 3847 | * the peer label/SID for the packet; most of the magic actually occurs in |
3848 | * the security server function security_net_peersid_cmp(). The function | 3848 | * the security server function security_net_peersid_cmp(). The function |
3849 | * returns zero if the value in @sid is valid (although it may be SECSID_NULL) | 3849 | * returns zero if the value in @sid is valid (although it may be SECSID_NULL) |
3850 | * or -EACCES if @sid is invalid due to inconsistencies with the different | 3850 | * or -EACCES if @sid is invalid due to inconsistencies with the different |
3851 | * peer labels. | 3851 | * peer labels. |
3852 | * | 3852 | * |
3853 | */ | 3853 | */ |
3854 | static int selinux_skb_peerlbl_sid(struct sk_buff *skb, u16 family, u32 *sid) | 3854 | static int selinux_skb_peerlbl_sid(struct sk_buff *skb, u16 family, u32 *sid) |
3855 | { | 3855 | { |
3856 | int err; | 3856 | int err; |
3857 | u32 xfrm_sid; | 3857 | u32 xfrm_sid; |
3858 | u32 nlbl_sid; | 3858 | u32 nlbl_sid; |
3859 | u32 nlbl_type; | 3859 | u32 nlbl_type; |
3860 | 3860 | ||
3861 | err = selinux_xfrm_skb_sid(skb, &xfrm_sid); | 3861 | err = selinux_xfrm_skb_sid(skb, &xfrm_sid); |
3862 | if (unlikely(err)) | 3862 | if (unlikely(err)) |
3863 | return -EACCES; | 3863 | return -EACCES; |
3864 | err = selinux_netlbl_skbuff_getsid(skb, family, &nlbl_type, &nlbl_sid); | 3864 | err = selinux_netlbl_skbuff_getsid(skb, family, &nlbl_type, &nlbl_sid); |
3865 | if (unlikely(err)) | 3865 | if (unlikely(err)) |
3866 | return -EACCES; | 3866 | return -EACCES; |
3867 | 3867 | ||
3868 | err = security_net_peersid_resolve(nlbl_sid, nlbl_type, xfrm_sid, sid); | 3868 | err = security_net_peersid_resolve(nlbl_sid, nlbl_type, xfrm_sid, sid); |
3869 | if (unlikely(err)) { | 3869 | if (unlikely(err)) { |
3870 | printk(KERN_WARNING | 3870 | printk(KERN_WARNING |
3871 | "SELinux: failure in selinux_skb_peerlbl_sid()," | 3871 | "SELinux: failure in selinux_skb_peerlbl_sid()," |
3872 | " unable to determine packet's peer label\n"); | 3872 | " unable to determine packet's peer label\n"); |
3873 | return -EACCES; | 3873 | return -EACCES; |
3874 | } | 3874 | } |
3875 | 3875 | ||
3876 | return 0; | 3876 | return 0; |
3877 | } | 3877 | } |
3878 | 3878 | ||
3879 | /** | 3879 | /** |
3880 | * selinux_conn_sid - Determine the child socket label for a connection | 3880 | * selinux_conn_sid - Determine the child socket label for a connection |
3881 | * @sk_sid: the parent socket's SID | 3881 | * @sk_sid: the parent socket's SID |
3882 | * @skb_sid: the packet's SID | 3882 | * @skb_sid: the packet's SID |
3883 | * @conn_sid: the resulting connection SID | 3883 | * @conn_sid: the resulting connection SID |
3884 | * | 3884 | * |
3885 | * If @skb_sid is valid then the user:role:type information from @sk_sid is | 3885 | * If @skb_sid is valid then the user:role:type information from @sk_sid is |
3886 | * combined with the MLS information from @skb_sid in order to create | 3886 | * combined with the MLS information from @skb_sid in order to create |
3887 | * @conn_sid. If @skb_sid is not valid then then @conn_sid is simply a copy | 3887 | * @conn_sid. If @skb_sid is not valid then then @conn_sid is simply a copy |
3888 | * of @sk_sid. Returns zero on success, negative values on failure. | 3888 | * of @sk_sid. Returns zero on success, negative values on failure. |
3889 | * | 3889 | * |
3890 | */ | 3890 | */ |
3891 | static int selinux_conn_sid(u32 sk_sid, u32 skb_sid, u32 *conn_sid) | 3891 | static int selinux_conn_sid(u32 sk_sid, u32 skb_sid, u32 *conn_sid) |
3892 | { | 3892 | { |
3893 | int err = 0; | 3893 | int err = 0; |
3894 | 3894 | ||
3895 | if (skb_sid != SECSID_NULL) | 3895 | if (skb_sid != SECSID_NULL) |
3896 | err = security_sid_mls_copy(sk_sid, skb_sid, conn_sid); | 3896 | err = security_sid_mls_copy(sk_sid, skb_sid, conn_sid); |
3897 | else | 3897 | else |
3898 | *conn_sid = sk_sid; | 3898 | *conn_sid = sk_sid; |
3899 | 3899 | ||
3900 | return err; | 3900 | return err; |
3901 | } | 3901 | } |
3902 | 3902 | ||
3903 | /* socket security operations */ | 3903 | /* socket security operations */ |
3904 | 3904 | ||
3905 | static int socket_sockcreate_sid(const struct task_security_struct *tsec, | 3905 | static int socket_sockcreate_sid(const struct task_security_struct *tsec, |
3906 | u16 secclass, u32 *socksid) | 3906 | u16 secclass, u32 *socksid) |
3907 | { | 3907 | { |
3908 | if (tsec->sockcreate_sid > SECSID_NULL) { | 3908 | if (tsec->sockcreate_sid > SECSID_NULL) { |
3909 | *socksid = tsec->sockcreate_sid; | 3909 | *socksid = tsec->sockcreate_sid; |
3910 | return 0; | 3910 | return 0; |
3911 | } | 3911 | } |
3912 | 3912 | ||
3913 | return security_transition_sid(tsec->sid, tsec->sid, secclass, NULL, | 3913 | return security_transition_sid(tsec->sid, tsec->sid, secclass, NULL, |
3914 | socksid); | 3914 | socksid); |
3915 | } | 3915 | } |
3916 | 3916 | ||
3917 | static int sock_has_perm(struct task_struct *task, struct sock *sk, u32 perms) | 3917 | static int sock_has_perm(struct task_struct *task, struct sock *sk, u32 perms) |
3918 | { | 3918 | { |
3919 | struct sk_security_struct *sksec = sk->sk_security; | 3919 | struct sk_security_struct *sksec = sk->sk_security; |
3920 | struct common_audit_data ad; | 3920 | struct common_audit_data ad; |
3921 | struct lsm_network_audit net = {0,}; | 3921 | struct lsm_network_audit net = {0,}; |
3922 | u32 tsid = task_sid(task); | 3922 | u32 tsid = task_sid(task); |
3923 | 3923 | ||
3924 | if (sksec->sid == SECINITSID_KERNEL) | 3924 | if (sksec->sid == SECINITSID_KERNEL) |
3925 | return 0; | 3925 | return 0; |
3926 | 3926 | ||
3927 | ad.type = LSM_AUDIT_DATA_NET; | 3927 | ad.type = LSM_AUDIT_DATA_NET; |
3928 | ad.u.net = &net; | 3928 | ad.u.net = &net; |
3929 | ad.u.net->sk = sk; | 3929 | ad.u.net->sk = sk; |
3930 | 3930 | ||
3931 | return avc_has_perm(tsid, sksec->sid, sksec->sclass, perms, &ad); | 3931 | return avc_has_perm(tsid, sksec->sid, sksec->sclass, perms, &ad); |
3932 | } | 3932 | } |
3933 | 3933 | ||
3934 | static int selinux_socket_create(int family, int type, | 3934 | static int selinux_socket_create(int family, int type, |
3935 | int protocol, int kern) | 3935 | int protocol, int kern) |
3936 | { | 3936 | { |
3937 | const struct task_security_struct *tsec = current_security(); | 3937 | const struct task_security_struct *tsec = current_security(); |
3938 | u32 newsid; | 3938 | u32 newsid; |
3939 | u16 secclass; | 3939 | u16 secclass; |
3940 | int rc; | 3940 | int rc; |
3941 | 3941 | ||
3942 | if (kern) | 3942 | if (kern) |
3943 | return 0; | 3943 | return 0; |
3944 | 3944 | ||
3945 | secclass = socket_type_to_security_class(family, type, protocol); | 3945 | secclass = socket_type_to_security_class(family, type, protocol); |
3946 | rc = socket_sockcreate_sid(tsec, secclass, &newsid); | 3946 | rc = socket_sockcreate_sid(tsec, secclass, &newsid); |
3947 | if (rc) | 3947 | if (rc) |
3948 | return rc; | 3948 | return rc; |
3949 | 3949 | ||
3950 | return avc_has_perm(tsec->sid, newsid, secclass, SOCKET__CREATE, NULL); | 3950 | return avc_has_perm(tsec->sid, newsid, secclass, SOCKET__CREATE, NULL); |
3951 | } | 3951 | } |
3952 | 3952 | ||
3953 | static int selinux_socket_post_create(struct socket *sock, int family, | 3953 | static int selinux_socket_post_create(struct socket *sock, int family, |
3954 | int type, int protocol, int kern) | 3954 | int type, int protocol, int kern) |
3955 | { | 3955 | { |
3956 | const struct task_security_struct *tsec = current_security(); | 3956 | const struct task_security_struct *tsec = current_security(); |
3957 | struct inode_security_struct *isec = SOCK_INODE(sock)->i_security; | 3957 | struct inode_security_struct *isec = SOCK_INODE(sock)->i_security; |
3958 | struct sk_security_struct *sksec; | 3958 | struct sk_security_struct *sksec; |
3959 | int err = 0; | 3959 | int err = 0; |
3960 | 3960 | ||
3961 | isec->sclass = socket_type_to_security_class(family, type, protocol); | 3961 | isec->sclass = socket_type_to_security_class(family, type, protocol); |
3962 | 3962 | ||
3963 | if (kern) | 3963 | if (kern) |
3964 | isec->sid = SECINITSID_KERNEL; | 3964 | isec->sid = SECINITSID_KERNEL; |
3965 | else { | 3965 | else { |
3966 | err = socket_sockcreate_sid(tsec, isec->sclass, &(isec->sid)); | 3966 | err = socket_sockcreate_sid(tsec, isec->sclass, &(isec->sid)); |
3967 | if (err) | 3967 | if (err) |
3968 | return err; | 3968 | return err; |
3969 | } | 3969 | } |
3970 | 3970 | ||
3971 | isec->initialized = 1; | 3971 | isec->initialized = 1; |
3972 | 3972 | ||
3973 | if (sock->sk) { | 3973 | if (sock->sk) { |
3974 | sksec = sock->sk->sk_security; | 3974 | sksec = sock->sk->sk_security; |
3975 | sksec->sid = isec->sid; | 3975 | sksec->sid = isec->sid; |
3976 | sksec->sclass = isec->sclass; | 3976 | sksec->sclass = isec->sclass; |
3977 | err = selinux_netlbl_socket_post_create(sock->sk, family); | 3977 | err = selinux_netlbl_socket_post_create(sock->sk, family); |
3978 | } | 3978 | } |
3979 | 3979 | ||
3980 | return err; | 3980 | return err; |
3981 | } | 3981 | } |
3982 | 3982 | ||
3983 | /* Range of port numbers used to automatically bind. | 3983 | /* Range of port numbers used to automatically bind. |
3984 | Need to determine whether we should perform a name_bind | 3984 | Need to determine whether we should perform a name_bind |
3985 | permission check between the socket and the port number. */ | 3985 | permission check between the socket and the port number. */ |
3986 | 3986 | ||
3987 | static int selinux_socket_bind(struct socket *sock, struct sockaddr *address, int addrlen) | 3987 | static int selinux_socket_bind(struct socket *sock, struct sockaddr *address, int addrlen) |
3988 | { | 3988 | { |
3989 | struct sock *sk = sock->sk; | 3989 | struct sock *sk = sock->sk; |
3990 | u16 family; | 3990 | u16 family; |
3991 | int err; | 3991 | int err; |
3992 | 3992 | ||
3993 | err = sock_has_perm(current, sk, SOCKET__BIND); | 3993 | err = sock_has_perm(current, sk, SOCKET__BIND); |
3994 | if (err) | 3994 | if (err) |
3995 | goto out; | 3995 | goto out; |
3996 | 3996 | ||
3997 | /* | 3997 | /* |
3998 | * If PF_INET or PF_INET6, check name_bind permission for the port. | 3998 | * If PF_INET or PF_INET6, check name_bind permission for the port. |
3999 | * Multiple address binding for SCTP is not supported yet: we just | 3999 | * Multiple address binding for SCTP is not supported yet: we just |
4000 | * check the first address now. | 4000 | * check the first address now. |
4001 | */ | 4001 | */ |
4002 | family = sk->sk_family; | 4002 | family = sk->sk_family; |
4003 | if (family == PF_INET || family == PF_INET6) { | 4003 | if (family == PF_INET || family == PF_INET6) { |
4004 | char *addrp; | 4004 | char *addrp; |
4005 | struct sk_security_struct *sksec = sk->sk_security; | 4005 | struct sk_security_struct *sksec = sk->sk_security; |
4006 | struct common_audit_data ad; | 4006 | struct common_audit_data ad; |
4007 | struct lsm_network_audit net = {0,}; | 4007 | struct lsm_network_audit net = {0,}; |
4008 | struct sockaddr_in *addr4 = NULL; | 4008 | struct sockaddr_in *addr4 = NULL; |
4009 | struct sockaddr_in6 *addr6 = NULL; | 4009 | struct sockaddr_in6 *addr6 = NULL; |
4010 | unsigned short snum; | 4010 | unsigned short snum; |
4011 | u32 sid, node_perm; | 4011 | u32 sid, node_perm; |
4012 | 4012 | ||
4013 | if (family == PF_INET) { | 4013 | if (family == PF_INET) { |
4014 | addr4 = (struct sockaddr_in *)address; | 4014 | addr4 = (struct sockaddr_in *)address; |
4015 | snum = ntohs(addr4->sin_port); | 4015 | snum = ntohs(addr4->sin_port); |
4016 | addrp = (char *)&addr4->sin_addr.s_addr; | 4016 | addrp = (char *)&addr4->sin_addr.s_addr; |
4017 | } else { | 4017 | } else { |
4018 | addr6 = (struct sockaddr_in6 *)address; | 4018 | addr6 = (struct sockaddr_in6 *)address; |
4019 | snum = ntohs(addr6->sin6_port); | 4019 | snum = ntohs(addr6->sin6_port); |
4020 | addrp = (char *)&addr6->sin6_addr.s6_addr; | 4020 | addrp = (char *)&addr6->sin6_addr.s6_addr; |
4021 | } | 4021 | } |
4022 | 4022 | ||
4023 | if (snum) { | 4023 | if (snum) { |
4024 | int low, high; | 4024 | int low, high; |
4025 | 4025 | ||
4026 | inet_get_local_port_range(sock_net(sk), &low, &high); | 4026 | inet_get_local_port_range(sock_net(sk), &low, &high); |
4027 | 4027 | ||
4028 | if (snum < max(PROT_SOCK, low) || snum > high) { | 4028 | if (snum < max(PROT_SOCK, low) || snum > high) { |
4029 | err = sel_netport_sid(sk->sk_protocol, | 4029 | err = sel_netport_sid(sk->sk_protocol, |
4030 | snum, &sid); | 4030 | snum, &sid); |
4031 | if (err) | 4031 | if (err) |
4032 | goto out; | 4032 | goto out; |
4033 | ad.type = LSM_AUDIT_DATA_NET; | 4033 | ad.type = LSM_AUDIT_DATA_NET; |
4034 | ad.u.net = &net; | 4034 | ad.u.net = &net; |
4035 | ad.u.net->sport = htons(snum); | 4035 | ad.u.net->sport = htons(snum); |
4036 | ad.u.net->family = family; | 4036 | ad.u.net->family = family; |
4037 | err = avc_has_perm(sksec->sid, sid, | 4037 | err = avc_has_perm(sksec->sid, sid, |
4038 | sksec->sclass, | 4038 | sksec->sclass, |
4039 | SOCKET__NAME_BIND, &ad); | 4039 | SOCKET__NAME_BIND, &ad); |
4040 | if (err) | 4040 | if (err) |
4041 | goto out; | 4041 | goto out; |
4042 | } | 4042 | } |
4043 | } | 4043 | } |
4044 | 4044 | ||
4045 | switch (sksec->sclass) { | 4045 | switch (sksec->sclass) { |
4046 | case SECCLASS_TCP_SOCKET: | 4046 | case SECCLASS_TCP_SOCKET: |
4047 | node_perm = TCP_SOCKET__NODE_BIND; | 4047 | node_perm = TCP_SOCKET__NODE_BIND; |
4048 | break; | 4048 | break; |
4049 | 4049 | ||
4050 | case SECCLASS_UDP_SOCKET: | 4050 | case SECCLASS_UDP_SOCKET: |
4051 | node_perm = UDP_SOCKET__NODE_BIND; | 4051 | node_perm = UDP_SOCKET__NODE_BIND; |
4052 | break; | 4052 | break; |
4053 | 4053 | ||
4054 | case SECCLASS_DCCP_SOCKET: | 4054 | case SECCLASS_DCCP_SOCKET: |
4055 | node_perm = DCCP_SOCKET__NODE_BIND; | 4055 | node_perm = DCCP_SOCKET__NODE_BIND; |
4056 | break; | 4056 | break; |
4057 | 4057 | ||
4058 | default: | 4058 | default: |
4059 | node_perm = RAWIP_SOCKET__NODE_BIND; | 4059 | node_perm = RAWIP_SOCKET__NODE_BIND; |
4060 | break; | 4060 | break; |
4061 | } | 4061 | } |
4062 | 4062 | ||
4063 | err = sel_netnode_sid(addrp, family, &sid); | 4063 | err = sel_netnode_sid(addrp, family, &sid); |
4064 | if (err) | 4064 | if (err) |
4065 | goto out; | 4065 | goto out; |
4066 | 4066 | ||
4067 | ad.type = LSM_AUDIT_DATA_NET; | 4067 | ad.type = LSM_AUDIT_DATA_NET; |
4068 | ad.u.net = &net; | 4068 | ad.u.net = &net; |
4069 | ad.u.net->sport = htons(snum); | 4069 | ad.u.net->sport = htons(snum); |
4070 | ad.u.net->family = family; | 4070 | ad.u.net->family = family; |
4071 | 4071 | ||
4072 | if (family == PF_INET) | 4072 | if (family == PF_INET) |
4073 | ad.u.net->v4info.saddr = addr4->sin_addr.s_addr; | 4073 | ad.u.net->v4info.saddr = addr4->sin_addr.s_addr; |
4074 | else | 4074 | else |
4075 | ad.u.net->v6info.saddr = addr6->sin6_addr; | 4075 | ad.u.net->v6info.saddr = addr6->sin6_addr; |
4076 | 4076 | ||
4077 | err = avc_has_perm(sksec->sid, sid, | 4077 | err = avc_has_perm(sksec->sid, sid, |
4078 | sksec->sclass, node_perm, &ad); | 4078 | sksec->sclass, node_perm, &ad); |
4079 | if (err) | 4079 | if (err) |
4080 | goto out; | 4080 | goto out; |
4081 | } | 4081 | } |
4082 | out: | 4082 | out: |
4083 | return err; | 4083 | return err; |
4084 | } | 4084 | } |
4085 | 4085 | ||
4086 | static int selinux_socket_connect(struct socket *sock, struct sockaddr *address, int addrlen) | 4086 | static int selinux_socket_connect(struct socket *sock, struct sockaddr *address, int addrlen) |
4087 | { | 4087 | { |
4088 | struct sock *sk = sock->sk; | 4088 | struct sock *sk = sock->sk; |
4089 | struct sk_security_struct *sksec = sk->sk_security; | 4089 | struct sk_security_struct *sksec = sk->sk_security; |
4090 | int err; | 4090 | int err; |
4091 | 4091 | ||
4092 | err = sock_has_perm(current, sk, SOCKET__CONNECT); | 4092 | err = sock_has_perm(current, sk, SOCKET__CONNECT); |
4093 | if (err) | 4093 | if (err) |
4094 | return err; | 4094 | return err; |
4095 | 4095 | ||
4096 | /* | 4096 | /* |
4097 | * If a TCP or DCCP socket, check name_connect permission for the port. | 4097 | * If a TCP or DCCP socket, check name_connect permission for the port. |
4098 | */ | 4098 | */ |
4099 | if (sksec->sclass == SECCLASS_TCP_SOCKET || | 4099 | if (sksec->sclass == SECCLASS_TCP_SOCKET || |
4100 | sksec->sclass == SECCLASS_DCCP_SOCKET) { | 4100 | sksec->sclass == SECCLASS_DCCP_SOCKET) { |
4101 | struct common_audit_data ad; | 4101 | struct common_audit_data ad; |
4102 | struct lsm_network_audit net = {0,}; | 4102 | struct lsm_network_audit net = {0,}; |
4103 | struct sockaddr_in *addr4 = NULL; | 4103 | struct sockaddr_in *addr4 = NULL; |
4104 | struct sockaddr_in6 *addr6 = NULL; | 4104 | struct sockaddr_in6 *addr6 = NULL; |
4105 | unsigned short snum; | 4105 | unsigned short snum; |
4106 | u32 sid, perm; | 4106 | u32 sid, perm; |
4107 | 4107 | ||
4108 | if (sk->sk_family == PF_INET) { | 4108 | if (sk->sk_family == PF_INET) { |
4109 | addr4 = (struct sockaddr_in *)address; | 4109 | addr4 = (struct sockaddr_in *)address; |
4110 | if (addrlen < sizeof(struct sockaddr_in)) | 4110 | if (addrlen < sizeof(struct sockaddr_in)) |
4111 | return -EINVAL; | 4111 | return -EINVAL; |
4112 | snum = ntohs(addr4->sin_port); | 4112 | snum = ntohs(addr4->sin_port); |
4113 | } else { | 4113 | } else { |
4114 | addr6 = (struct sockaddr_in6 *)address; | 4114 | addr6 = (struct sockaddr_in6 *)address; |
4115 | if (addrlen < SIN6_LEN_RFC2133) | 4115 | if (addrlen < SIN6_LEN_RFC2133) |
4116 | return -EINVAL; | 4116 | return -EINVAL; |
4117 | snum = ntohs(addr6->sin6_port); | 4117 | snum = ntohs(addr6->sin6_port); |
4118 | } | 4118 | } |
4119 | 4119 | ||
4120 | err = sel_netport_sid(sk->sk_protocol, snum, &sid); | 4120 | err = sel_netport_sid(sk->sk_protocol, snum, &sid); |
4121 | if (err) | 4121 | if (err) |
4122 | goto out; | 4122 | goto out; |
4123 | 4123 | ||
4124 | perm = (sksec->sclass == SECCLASS_TCP_SOCKET) ? | 4124 | perm = (sksec->sclass == SECCLASS_TCP_SOCKET) ? |
4125 | TCP_SOCKET__NAME_CONNECT : DCCP_SOCKET__NAME_CONNECT; | 4125 | TCP_SOCKET__NAME_CONNECT : DCCP_SOCKET__NAME_CONNECT; |
4126 | 4126 | ||
4127 | ad.type = LSM_AUDIT_DATA_NET; | 4127 | ad.type = LSM_AUDIT_DATA_NET; |
4128 | ad.u.net = &net; | 4128 | ad.u.net = &net; |
4129 | ad.u.net->dport = htons(snum); | 4129 | ad.u.net->dport = htons(snum); |
4130 | ad.u.net->family = sk->sk_family; | 4130 | ad.u.net->family = sk->sk_family; |
4131 | err = avc_has_perm(sksec->sid, sid, sksec->sclass, perm, &ad); | 4131 | err = avc_has_perm(sksec->sid, sid, sksec->sclass, perm, &ad); |
4132 | if (err) | 4132 | if (err) |
4133 | goto out; | 4133 | goto out; |
4134 | } | 4134 | } |
4135 | 4135 | ||
4136 | err = selinux_netlbl_socket_connect(sk, address); | 4136 | err = selinux_netlbl_socket_connect(sk, address); |
4137 | 4137 | ||
4138 | out: | 4138 | out: |
4139 | return err; | 4139 | return err; |
4140 | } | 4140 | } |
4141 | 4141 | ||
4142 | static int selinux_socket_listen(struct socket *sock, int backlog) | 4142 | static int selinux_socket_listen(struct socket *sock, int backlog) |
4143 | { | 4143 | { |
4144 | return sock_has_perm(current, sock->sk, SOCKET__LISTEN); | 4144 | return sock_has_perm(current, sock->sk, SOCKET__LISTEN); |
4145 | } | 4145 | } |
4146 | 4146 | ||
4147 | static int selinux_socket_accept(struct socket *sock, struct socket *newsock) | 4147 | static int selinux_socket_accept(struct socket *sock, struct socket *newsock) |
4148 | { | 4148 | { |
4149 | int err; | 4149 | int err; |
4150 | struct inode_security_struct *isec; | 4150 | struct inode_security_struct *isec; |
4151 | struct inode_security_struct *newisec; | 4151 | struct inode_security_struct *newisec; |
4152 | 4152 | ||
4153 | err = sock_has_perm(current, sock->sk, SOCKET__ACCEPT); | 4153 | err = sock_has_perm(current, sock->sk, SOCKET__ACCEPT); |
4154 | if (err) | 4154 | if (err) |
4155 | return err; | 4155 | return err; |
4156 | 4156 | ||
4157 | newisec = SOCK_INODE(newsock)->i_security; | 4157 | newisec = SOCK_INODE(newsock)->i_security; |
4158 | 4158 | ||
4159 | isec = SOCK_INODE(sock)->i_security; | 4159 | isec = SOCK_INODE(sock)->i_security; |
4160 | newisec->sclass = isec->sclass; | 4160 | newisec->sclass = isec->sclass; |
4161 | newisec->sid = isec->sid; | 4161 | newisec->sid = isec->sid; |
4162 | newisec->initialized = 1; | 4162 | newisec->initialized = 1; |
4163 | 4163 | ||
4164 | return 0; | 4164 | return 0; |
4165 | } | 4165 | } |
4166 | 4166 | ||
4167 | static int selinux_socket_sendmsg(struct socket *sock, struct msghdr *msg, | 4167 | static int selinux_socket_sendmsg(struct socket *sock, struct msghdr *msg, |
4168 | int size) | 4168 | int size) |
4169 | { | 4169 | { |
4170 | return sock_has_perm(current, sock->sk, SOCKET__WRITE); | 4170 | return sock_has_perm(current, sock->sk, SOCKET__WRITE); |
4171 | } | 4171 | } |
4172 | 4172 | ||
4173 | static int selinux_socket_recvmsg(struct socket *sock, struct msghdr *msg, | 4173 | static int selinux_socket_recvmsg(struct socket *sock, struct msghdr *msg, |
4174 | int size, int flags) | 4174 | int size, int flags) |
4175 | { | 4175 | { |
4176 | return sock_has_perm(current, sock->sk, SOCKET__READ); | 4176 | return sock_has_perm(current, sock->sk, SOCKET__READ); |
4177 | } | 4177 | } |
4178 | 4178 | ||
4179 | static int selinux_socket_getsockname(struct socket *sock) | 4179 | static int selinux_socket_getsockname(struct socket *sock) |
4180 | { | 4180 | { |
4181 | return sock_has_perm(current, sock->sk, SOCKET__GETATTR); | 4181 | return sock_has_perm(current, sock->sk, SOCKET__GETATTR); |
4182 | } | 4182 | } |
4183 | 4183 | ||
4184 | static int selinux_socket_getpeername(struct socket *sock) | 4184 | static int selinux_socket_getpeername(struct socket *sock) |
4185 | { | 4185 | { |
4186 | return sock_has_perm(current, sock->sk, SOCKET__GETATTR); | 4186 | return sock_has_perm(current, sock->sk, SOCKET__GETATTR); |
4187 | } | 4187 | } |
4188 | 4188 | ||
4189 | static int selinux_socket_setsockopt(struct socket *sock, int level, int optname) | 4189 | static int selinux_socket_setsockopt(struct socket *sock, int level, int optname) |
4190 | { | 4190 | { |
4191 | int err; | 4191 | int err; |
4192 | 4192 | ||
4193 | err = sock_has_perm(current, sock->sk, SOCKET__SETOPT); | 4193 | err = sock_has_perm(current, sock->sk, SOCKET__SETOPT); |
4194 | if (err) | 4194 | if (err) |
4195 | return err; | 4195 | return err; |
4196 | 4196 | ||
4197 | return selinux_netlbl_socket_setsockopt(sock, level, optname); | 4197 | return selinux_netlbl_socket_setsockopt(sock, level, optname); |
4198 | } | 4198 | } |
4199 | 4199 | ||
4200 | static int selinux_socket_getsockopt(struct socket *sock, int level, | 4200 | static int selinux_socket_getsockopt(struct socket *sock, int level, |
4201 | int optname) | 4201 | int optname) |
4202 | { | 4202 | { |
4203 | return sock_has_perm(current, sock->sk, SOCKET__GETOPT); | 4203 | return sock_has_perm(current, sock->sk, SOCKET__GETOPT); |
4204 | } | 4204 | } |
4205 | 4205 | ||
4206 | static int selinux_socket_shutdown(struct socket *sock, int how) | 4206 | static int selinux_socket_shutdown(struct socket *sock, int how) |
4207 | { | 4207 | { |
4208 | return sock_has_perm(current, sock->sk, SOCKET__SHUTDOWN); | 4208 | return sock_has_perm(current, sock->sk, SOCKET__SHUTDOWN); |
4209 | } | 4209 | } |
4210 | 4210 | ||
4211 | static int selinux_socket_unix_stream_connect(struct sock *sock, | 4211 | static int selinux_socket_unix_stream_connect(struct sock *sock, |
4212 | struct sock *other, | 4212 | struct sock *other, |
4213 | struct sock *newsk) | 4213 | struct sock *newsk) |
4214 | { | 4214 | { |
4215 | struct sk_security_struct *sksec_sock = sock->sk_security; | 4215 | struct sk_security_struct *sksec_sock = sock->sk_security; |
4216 | struct sk_security_struct *sksec_other = other->sk_security; | 4216 | struct sk_security_struct *sksec_other = other->sk_security; |
4217 | struct sk_security_struct *sksec_new = newsk->sk_security; | 4217 | struct sk_security_struct *sksec_new = newsk->sk_security; |
4218 | struct common_audit_data ad; | 4218 | struct common_audit_data ad; |
4219 | struct lsm_network_audit net = {0,}; | 4219 | struct lsm_network_audit net = {0,}; |
4220 | int err; | 4220 | int err; |
4221 | 4221 | ||
4222 | ad.type = LSM_AUDIT_DATA_NET; | 4222 | ad.type = LSM_AUDIT_DATA_NET; |
4223 | ad.u.net = &net; | 4223 | ad.u.net = &net; |
4224 | ad.u.net->sk = other; | 4224 | ad.u.net->sk = other; |
4225 | 4225 | ||
4226 | err = avc_has_perm(sksec_sock->sid, sksec_other->sid, | 4226 | err = avc_has_perm(sksec_sock->sid, sksec_other->sid, |
4227 | sksec_other->sclass, | 4227 | sksec_other->sclass, |
4228 | UNIX_STREAM_SOCKET__CONNECTTO, &ad); | 4228 | UNIX_STREAM_SOCKET__CONNECTTO, &ad); |
4229 | if (err) | 4229 | if (err) |
4230 | return err; | 4230 | return err; |
4231 | 4231 | ||
4232 | /* server child socket */ | 4232 | /* server child socket */ |
4233 | sksec_new->peer_sid = sksec_sock->sid; | 4233 | sksec_new->peer_sid = sksec_sock->sid; |
4234 | err = security_sid_mls_copy(sksec_other->sid, sksec_sock->sid, | 4234 | err = security_sid_mls_copy(sksec_other->sid, sksec_sock->sid, |
4235 | &sksec_new->sid); | 4235 | &sksec_new->sid); |
4236 | if (err) | 4236 | if (err) |
4237 | return err; | 4237 | return err; |
4238 | 4238 | ||
4239 | /* connecting socket */ | 4239 | /* connecting socket */ |
4240 | sksec_sock->peer_sid = sksec_new->sid; | 4240 | sksec_sock->peer_sid = sksec_new->sid; |
4241 | 4241 | ||
4242 | return 0; | 4242 | return 0; |
4243 | } | 4243 | } |
4244 | 4244 | ||
4245 | static int selinux_socket_unix_may_send(struct socket *sock, | 4245 | static int selinux_socket_unix_may_send(struct socket *sock, |
4246 | struct socket *other) | 4246 | struct socket *other) |
4247 | { | 4247 | { |
4248 | struct sk_security_struct *ssec = sock->sk->sk_security; | 4248 | struct sk_security_struct *ssec = sock->sk->sk_security; |
4249 | struct sk_security_struct *osec = other->sk->sk_security; | 4249 | struct sk_security_struct *osec = other->sk->sk_security; |
4250 | struct common_audit_data ad; | 4250 | struct common_audit_data ad; |
4251 | struct lsm_network_audit net = {0,}; | 4251 | struct lsm_network_audit net = {0,}; |
4252 | 4252 | ||
4253 | ad.type = LSM_AUDIT_DATA_NET; | 4253 | ad.type = LSM_AUDIT_DATA_NET; |
4254 | ad.u.net = &net; | 4254 | ad.u.net = &net; |
4255 | ad.u.net->sk = other->sk; | 4255 | ad.u.net->sk = other->sk; |
4256 | 4256 | ||
4257 | return avc_has_perm(ssec->sid, osec->sid, osec->sclass, SOCKET__SENDTO, | 4257 | return avc_has_perm(ssec->sid, osec->sid, osec->sclass, SOCKET__SENDTO, |
4258 | &ad); | 4258 | &ad); |
4259 | } | 4259 | } |
4260 | 4260 | ||
4261 | static int selinux_inet_sys_rcv_skb(int ifindex, char *addrp, u16 family, | 4261 | static int selinux_inet_sys_rcv_skb(int ifindex, char *addrp, u16 family, |
4262 | u32 peer_sid, | 4262 | u32 peer_sid, |
4263 | struct common_audit_data *ad) | 4263 | struct common_audit_data *ad) |
4264 | { | 4264 | { |
4265 | int err; | 4265 | int err; |
4266 | u32 if_sid; | 4266 | u32 if_sid; |
4267 | u32 node_sid; | 4267 | u32 node_sid; |
4268 | 4268 | ||
4269 | err = sel_netif_sid(ifindex, &if_sid); | 4269 | err = sel_netif_sid(ifindex, &if_sid); |
4270 | if (err) | 4270 | if (err) |
4271 | return err; | 4271 | return err; |
4272 | err = avc_has_perm(peer_sid, if_sid, | 4272 | err = avc_has_perm(peer_sid, if_sid, |
4273 | SECCLASS_NETIF, NETIF__INGRESS, ad); | 4273 | SECCLASS_NETIF, NETIF__INGRESS, ad); |
4274 | if (err) | 4274 | if (err) |
4275 | return err; | 4275 | return err; |
4276 | 4276 | ||
4277 | err = sel_netnode_sid(addrp, family, &node_sid); | 4277 | err = sel_netnode_sid(addrp, family, &node_sid); |
4278 | if (err) | 4278 | if (err) |
4279 | return err; | 4279 | return err; |
4280 | return avc_has_perm(peer_sid, node_sid, | 4280 | return avc_has_perm(peer_sid, node_sid, |
4281 | SECCLASS_NODE, NODE__RECVFROM, ad); | 4281 | SECCLASS_NODE, NODE__RECVFROM, ad); |
4282 | } | 4282 | } |
4283 | 4283 | ||
4284 | static int selinux_sock_rcv_skb_compat(struct sock *sk, struct sk_buff *skb, | 4284 | static int selinux_sock_rcv_skb_compat(struct sock *sk, struct sk_buff *skb, |
4285 | u16 family) | 4285 | u16 family) |
4286 | { | 4286 | { |
4287 | int err = 0; | 4287 | int err = 0; |
4288 | struct sk_security_struct *sksec = sk->sk_security; | 4288 | struct sk_security_struct *sksec = sk->sk_security; |
4289 | u32 sk_sid = sksec->sid; | 4289 | u32 sk_sid = sksec->sid; |
4290 | struct common_audit_data ad; | 4290 | struct common_audit_data ad; |
4291 | struct lsm_network_audit net = {0,}; | 4291 | struct lsm_network_audit net = {0,}; |
4292 | char *addrp; | 4292 | char *addrp; |
4293 | 4293 | ||
4294 | ad.type = LSM_AUDIT_DATA_NET; | 4294 | ad.type = LSM_AUDIT_DATA_NET; |
4295 | ad.u.net = &net; | 4295 | ad.u.net = &net; |
4296 | ad.u.net->netif = skb->skb_iif; | 4296 | ad.u.net->netif = skb->skb_iif; |
4297 | ad.u.net->family = family; | 4297 | ad.u.net->family = family; |
4298 | err = selinux_parse_skb(skb, &ad, &addrp, 1, NULL); | 4298 | err = selinux_parse_skb(skb, &ad, &addrp, 1, NULL); |
4299 | if (err) | 4299 | if (err) |
4300 | return err; | 4300 | return err; |
4301 | 4301 | ||
4302 | if (selinux_secmark_enabled()) { | 4302 | if (selinux_secmark_enabled()) { |
4303 | err = avc_has_perm(sk_sid, skb->secmark, SECCLASS_PACKET, | 4303 | err = avc_has_perm(sk_sid, skb->secmark, SECCLASS_PACKET, |
4304 | PACKET__RECV, &ad); | 4304 | PACKET__RECV, &ad); |
4305 | if (err) | 4305 | if (err) |
4306 | return err; | 4306 | return err; |
4307 | } | 4307 | } |
4308 | 4308 | ||
4309 | err = selinux_netlbl_sock_rcv_skb(sksec, skb, family, &ad); | 4309 | err = selinux_netlbl_sock_rcv_skb(sksec, skb, family, &ad); |
4310 | if (err) | 4310 | if (err) |
4311 | return err; | 4311 | return err; |
4312 | err = selinux_xfrm_sock_rcv_skb(sksec->sid, skb, &ad); | 4312 | err = selinux_xfrm_sock_rcv_skb(sksec->sid, skb, &ad); |
4313 | 4313 | ||
4314 | return err; | 4314 | return err; |
4315 | } | 4315 | } |
4316 | 4316 | ||
4317 | static int selinux_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb) | 4317 | static int selinux_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb) |
4318 | { | 4318 | { |
4319 | int err; | 4319 | int err; |
4320 | struct sk_security_struct *sksec = sk->sk_security; | 4320 | struct sk_security_struct *sksec = sk->sk_security; |
4321 | u16 family = sk->sk_family; | 4321 | u16 family = sk->sk_family; |
4322 | u32 sk_sid = sksec->sid; | 4322 | u32 sk_sid = sksec->sid; |
4323 | struct common_audit_data ad; | 4323 | struct common_audit_data ad; |
4324 | struct lsm_network_audit net = {0,}; | 4324 | struct lsm_network_audit net = {0,}; |
4325 | char *addrp; | 4325 | char *addrp; |
4326 | u8 secmark_active; | 4326 | u8 secmark_active; |
4327 | u8 peerlbl_active; | 4327 | u8 peerlbl_active; |
4328 | 4328 | ||
4329 | if (family != PF_INET && family != PF_INET6) | 4329 | if (family != PF_INET && family != PF_INET6) |
4330 | return 0; | 4330 | return 0; |
4331 | 4331 | ||
4332 | /* Handle mapped IPv4 packets arriving via IPv6 sockets */ | 4332 | /* Handle mapped IPv4 packets arriving via IPv6 sockets */ |
4333 | if (family == PF_INET6 && skb->protocol == htons(ETH_P_IP)) | 4333 | if (family == PF_INET6 && skb->protocol == htons(ETH_P_IP)) |
4334 | family = PF_INET; | 4334 | family = PF_INET; |
4335 | 4335 | ||
4336 | /* If any sort of compatibility mode is enabled then handoff processing | 4336 | /* If any sort of compatibility mode is enabled then handoff processing |
4337 | * to the selinux_sock_rcv_skb_compat() function to deal with the | 4337 | * to the selinux_sock_rcv_skb_compat() function to deal with the |
4338 | * special handling. We do this in an attempt to keep this function | 4338 | * special handling. We do this in an attempt to keep this function |
4339 | * as fast and as clean as possible. */ | 4339 | * as fast and as clean as possible. */ |
4340 | if (!selinux_policycap_netpeer) | 4340 | if (!selinux_policycap_netpeer) |
4341 | return selinux_sock_rcv_skb_compat(sk, skb, family); | 4341 | return selinux_sock_rcv_skb_compat(sk, skb, family); |
4342 | 4342 | ||
4343 | secmark_active = selinux_secmark_enabled(); | 4343 | secmark_active = selinux_secmark_enabled(); |
4344 | peerlbl_active = selinux_peerlbl_enabled(); | 4344 | peerlbl_active = selinux_peerlbl_enabled(); |
4345 | if (!secmark_active && !peerlbl_active) | 4345 | if (!secmark_active && !peerlbl_active) |
4346 | return 0; | 4346 | return 0; |
4347 | 4347 | ||
4348 | ad.type = LSM_AUDIT_DATA_NET; | 4348 | ad.type = LSM_AUDIT_DATA_NET; |
4349 | ad.u.net = &net; | 4349 | ad.u.net = &net; |
4350 | ad.u.net->netif = skb->skb_iif; | 4350 | ad.u.net->netif = skb->skb_iif; |
4351 | ad.u.net->family = family; | 4351 | ad.u.net->family = family; |
4352 | err = selinux_parse_skb(skb, &ad, &addrp, 1, NULL); | 4352 | err = selinux_parse_skb(skb, &ad, &addrp, 1, NULL); |
4353 | if (err) | 4353 | if (err) |
4354 | return err; | 4354 | return err; |
4355 | 4355 | ||
4356 | if (peerlbl_active) { | 4356 | if (peerlbl_active) { |
4357 | u32 peer_sid; | 4357 | u32 peer_sid; |
4358 | 4358 | ||
4359 | err = selinux_skb_peerlbl_sid(skb, family, &peer_sid); | 4359 | err = selinux_skb_peerlbl_sid(skb, family, &peer_sid); |
4360 | if (err) | 4360 | if (err) |
4361 | return err; | 4361 | return err; |
4362 | err = selinux_inet_sys_rcv_skb(skb->skb_iif, addrp, family, | 4362 | err = selinux_inet_sys_rcv_skb(skb->skb_iif, addrp, family, |
4363 | peer_sid, &ad); | 4363 | peer_sid, &ad); |
4364 | if (err) { | 4364 | if (err) { |
4365 | selinux_netlbl_err(skb, err, 0); | 4365 | selinux_netlbl_err(skb, err, 0); |
4366 | return err; | 4366 | return err; |
4367 | } | 4367 | } |
4368 | err = avc_has_perm(sk_sid, peer_sid, SECCLASS_PEER, | 4368 | err = avc_has_perm(sk_sid, peer_sid, SECCLASS_PEER, |
4369 | PEER__RECV, &ad); | 4369 | PEER__RECV, &ad); |
4370 | if (err) { | 4370 | if (err) { |
4371 | selinux_netlbl_err(skb, err, 0); | 4371 | selinux_netlbl_err(skb, err, 0); |
4372 | return err; | 4372 | return err; |
4373 | } | 4373 | } |
4374 | } | 4374 | } |
4375 | 4375 | ||
4376 | if (secmark_active) { | 4376 | if (secmark_active) { |
4377 | err = avc_has_perm(sk_sid, skb->secmark, SECCLASS_PACKET, | 4377 | err = avc_has_perm(sk_sid, skb->secmark, SECCLASS_PACKET, |
4378 | PACKET__RECV, &ad); | 4378 | PACKET__RECV, &ad); |
4379 | if (err) | 4379 | if (err) |
4380 | return err; | 4380 | return err; |
4381 | } | 4381 | } |
4382 | 4382 | ||
4383 | return err; | 4383 | return err; |
4384 | } | 4384 | } |
4385 | 4385 | ||
4386 | static int selinux_socket_getpeersec_stream(struct socket *sock, char __user *optval, | 4386 | static int selinux_socket_getpeersec_stream(struct socket *sock, char __user *optval, |
4387 | int __user *optlen, unsigned len) | 4387 | int __user *optlen, unsigned len) |
4388 | { | 4388 | { |
4389 | int err = 0; | 4389 | int err = 0; |
4390 | char *scontext; | 4390 | char *scontext; |
4391 | u32 scontext_len; | 4391 | u32 scontext_len; |
4392 | struct sk_security_struct *sksec = sock->sk->sk_security; | 4392 | struct sk_security_struct *sksec = sock->sk->sk_security; |
4393 | u32 peer_sid = SECSID_NULL; | 4393 | u32 peer_sid = SECSID_NULL; |
4394 | 4394 | ||
4395 | if (sksec->sclass == SECCLASS_UNIX_STREAM_SOCKET || | 4395 | if (sksec->sclass == SECCLASS_UNIX_STREAM_SOCKET || |
4396 | sksec->sclass == SECCLASS_TCP_SOCKET) | 4396 | sksec->sclass == SECCLASS_TCP_SOCKET) |
4397 | peer_sid = sksec->peer_sid; | 4397 | peer_sid = sksec->peer_sid; |
4398 | if (peer_sid == SECSID_NULL) | 4398 | if (peer_sid == SECSID_NULL) |
4399 | return -ENOPROTOOPT; | 4399 | return -ENOPROTOOPT; |
4400 | 4400 | ||
4401 | err = security_sid_to_context(peer_sid, &scontext, &scontext_len); | 4401 | err = security_sid_to_context(peer_sid, &scontext, &scontext_len); |
4402 | if (err) | 4402 | if (err) |
4403 | return err; | 4403 | return err; |
4404 | 4404 | ||
4405 | if (scontext_len > len) { | 4405 | if (scontext_len > len) { |
4406 | err = -ERANGE; | 4406 | err = -ERANGE; |
4407 | goto out_len; | 4407 | goto out_len; |
4408 | } | 4408 | } |
4409 | 4409 | ||
4410 | if (copy_to_user(optval, scontext, scontext_len)) | 4410 | if (copy_to_user(optval, scontext, scontext_len)) |
4411 | err = -EFAULT; | 4411 | err = -EFAULT; |
4412 | 4412 | ||
4413 | out_len: | 4413 | out_len: |
4414 | if (put_user(scontext_len, optlen)) | 4414 | if (put_user(scontext_len, optlen)) |
4415 | err = -EFAULT; | 4415 | err = -EFAULT; |
4416 | kfree(scontext); | 4416 | kfree(scontext); |
4417 | return err; | 4417 | return err; |
4418 | } | 4418 | } |
4419 | 4419 | ||
4420 | static int selinux_socket_getpeersec_dgram(struct socket *sock, struct sk_buff *skb, u32 *secid) | 4420 | static int selinux_socket_getpeersec_dgram(struct socket *sock, struct sk_buff *skb, u32 *secid) |
4421 | { | 4421 | { |
4422 | u32 peer_secid = SECSID_NULL; | 4422 | u32 peer_secid = SECSID_NULL; |
4423 | u16 family; | 4423 | u16 family; |
4424 | 4424 | ||
4425 | if (skb && skb->protocol == htons(ETH_P_IP)) | 4425 | if (skb && skb->protocol == htons(ETH_P_IP)) |
4426 | family = PF_INET; | 4426 | family = PF_INET; |
4427 | else if (skb && skb->protocol == htons(ETH_P_IPV6)) | 4427 | else if (skb && skb->protocol == htons(ETH_P_IPV6)) |
4428 | family = PF_INET6; | 4428 | family = PF_INET6; |
4429 | else if (sock) | 4429 | else if (sock) |
4430 | family = sock->sk->sk_family; | 4430 | family = sock->sk->sk_family; |
4431 | else | 4431 | else |
4432 | goto out; | 4432 | goto out; |
4433 | 4433 | ||
4434 | if (sock && family == PF_UNIX) | 4434 | if (sock && family == PF_UNIX) |
4435 | selinux_inode_getsecid(SOCK_INODE(sock), &peer_secid); | 4435 | selinux_inode_getsecid(SOCK_INODE(sock), &peer_secid); |
4436 | else if (skb) | 4436 | else if (skb) |
4437 | selinux_skb_peerlbl_sid(skb, family, &peer_secid); | 4437 | selinux_skb_peerlbl_sid(skb, family, &peer_secid); |
4438 | 4438 | ||
4439 | out: | 4439 | out: |
4440 | *secid = peer_secid; | 4440 | *secid = peer_secid; |
4441 | if (peer_secid == SECSID_NULL) | 4441 | if (peer_secid == SECSID_NULL) |
4442 | return -EINVAL; | 4442 | return -EINVAL; |
4443 | return 0; | 4443 | return 0; |
4444 | } | 4444 | } |
4445 | 4445 | ||
4446 | static int selinux_sk_alloc_security(struct sock *sk, int family, gfp_t priority) | 4446 | static int selinux_sk_alloc_security(struct sock *sk, int family, gfp_t priority) |
4447 | { | 4447 | { |
4448 | struct sk_security_struct *sksec; | 4448 | struct sk_security_struct *sksec; |
4449 | 4449 | ||
4450 | sksec = kzalloc(sizeof(*sksec), priority); | 4450 | sksec = kzalloc(sizeof(*sksec), priority); |
4451 | if (!sksec) | 4451 | if (!sksec) |
4452 | return -ENOMEM; | 4452 | return -ENOMEM; |
4453 | 4453 | ||
4454 | sksec->peer_sid = SECINITSID_UNLABELED; | 4454 | sksec->peer_sid = SECINITSID_UNLABELED; |
4455 | sksec->sid = SECINITSID_UNLABELED; | 4455 | sksec->sid = SECINITSID_UNLABELED; |
4456 | selinux_netlbl_sk_security_reset(sksec); | 4456 | selinux_netlbl_sk_security_reset(sksec); |
4457 | sk->sk_security = sksec; | 4457 | sk->sk_security = sksec; |
4458 | 4458 | ||
4459 | return 0; | 4459 | return 0; |
4460 | } | 4460 | } |
4461 | 4461 | ||
4462 | static void selinux_sk_free_security(struct sock *sk) | 4462 | static void selinux_sk_free_security(struct sock *sk) |
4463 | { | 4463 | { |
4464 | struct sk_security_struct *sksec = sk->sk_security; | 4464 | struct sk_security_struct *sksec = sk->sk_security; |
4465 | 4465 | ||
4466 | sk->sk_security = NULL; | 4466 | sk->sk_security = NULL; |
4467 | selinux_netlbl_sk_security_free(sksec); | 4467 | selinux_netlbl_sk_security_free(sksec); |
4468 | kfree(sksec); | 4468 | kfree(sksec); |
4469 | } | 4469 | } |
4470 | 4470 | ||
4471 | static void selinux_sk_clone_security(const struct sock *sk, struct sock *newsk) | 4471 | static void selinux_sk_clone_security(const struct sock *sk, struct sock *newsk) |
4472 | { | 4472 | { |
4473 | struct sk_security_struct *sksec = sk->sk_security; | 4473 | struct sk_security_struct *sksec = sk->sk_security; |
4474 | struct sk_security_struct *newsksec = newsk->sk_security; | 4474 | struct sk_security_struct *newsksec = newsk->sk_security; |
4475 | 4475 | ||
4476 | newsksec->sid = sksec->sid; | 4476 | newsksec->sid = sksec->sid; |
4477 | newsksec->peer_sid = sksec->peer_sid; | 4477 | newsksec->peer_sid = sksec->peer_sid; |
4478 | newsksec->sclass = sksec->sclass; | 4478 | newsksec->sclass = sksec->sclass; |
4479 | 4479 | ||
4480 | selinux_netlbl_sk_security_reset(newsksec); | 4480 | selinux_netlbl_sk_security_reset(newsksec); |
4481 | } | 4481 | } |
4482 | 4482 | ||
4483 | static void selinux_sk_getsecid(struct sock *sk, u32 *secid) | 4483 | static void selinux_sk_getsecid(struct sock *sk, u32 *secid) |
4484 | { | 4484 | { |
4485 | if (!sk) | 4485 | if (!sk) |
4486 | *secid = SECINITSID_ANY_SOCKET; | 4486 | *secid = SECINITSID_ANY_SOCKET; |
4487 | else { | 4487 | else { |
4488 | struct sk_security_struct *sksec = sk->sk_security; | 4488 | struct sk_security_struct *sksec = sk->sk_security; |
4489 | 4489 | ||
4490 | *secid = sksec->sid; | 4490 | *secid = sksec->sid; |
4491 | } | 4491 | } |
4492 | } | 4492 | } |
4493 | 4493 | ||
4494 | static void selinux_sock_graft(struct sock *sk, struct socket *parent) | 4494 | static void selinux_sock_graft(struct sock *sk, struct socket *parent) |
4495 | { | 4495 | { |
4496 | struct inode_security_struct *isec = SOCK_INODE(parent)->i_security; | 4496 | struct inode_security_struct *isec = SOCK_INODE(parent)->i_security; |
4497 | struct sk_security_struct *sksec = sk->sk_security; | 4497 | struct sk_security_struct *sksec = sk->sk_security; |
4498 | 4498 | ||
4499 | if (sk->sk_family == PF_INET || sk->sk_family == PF_INET6 || | 4499 | if (sk->sk_family == PF_INET || sk->sk_family == PF_INET6 || |
4500 | sk->sk_family == PF_UNIX) | 4500 | sk->sk_family == PF_UNIX) |
4501 | isec->sid = sksec->sid; | 4501 | isec->sid = sksec->sid; |
4502 | sksec->sclass = isec->sclass; | 4502 | sksec->sclass = isec->sclass; |
4503 | } | 4503 | } |
4504 | 4504 | ||
4505 | static int selinux_inet_conn_request(struct sock *sk, struct sk_buff *skb, | 4505 | static int selinux_inet_conn_request(struct sock *sk, struct sk_buff *skb, |
4506 | struct request_sock *req) | 4506 | struct request_sock *req) |
4507 | { | 4507 | { |
4508 | struct sk_security_struct *sksec = sk->sk_security; | 4508 | struct sk_security_struct *sksec = sk->sk_security; |
4509 | int err; | 4509 | int err; |
4510 | u16 family = req->rsk_ops->family; | 4510 | u16 family = req->rsk_ops->family; |
4511 | u32 connsid; | 4511 | u32 connsid; |
4512 | u32 peersid; | 4512 | u32 peersid; |
4513 | 4513 | ||
4514 | err = selinux_skb_peerlbl_sid(skb, family, &peersid); | 4514 | err = selinux_skb_peerlbl_sid(skb, family, &peersid); |
4515 | if (err) | 4515 | if (err) |
4516 | return err; | 4516 | return err; |
4517 | err = selinux_conn_sid(sksec->sid, peersid, &connsid); | 4517 | err = selinux_conn_sid(sksec->sid, peersid, &connsid); |
4518 | if (err) | 4518 | if (err) |
4519 | return err; | 4519 | return err; |
4520 | req->secid = connsid; | 4520 | req->secid = connsid; |
4521 | req->peer_secid = peersid; | 4521 | req->peer_secid = peersid; |
4522 | 4522 | ||
4523 | return selinux_netlbl_inet_conn_request(req, family); | 4523 | return selinux_netlbl_inet_conn_request(req, family); |
4524 | } | 4524 | } |
4525 | 4525 | ||
4526 | static void selinux_inet_csk_clone(struct sock *newsk, | 4526 | static void selinux_inet_csk_clone(struct sock *newsk, |
4527 | const struct request_sock *req) | 4527 | const struct request_sock *req) |
4528 | { | 4528 | { |
4529 | struct sk_security_struct *newsksec = newsk->sk_security; | 4529 | struct sk_security_struct *newsksec = newsk->sk_security; |
4530 | 4530 | ||
4531 | newsksec->sid = req->secid; | 4531 | newsksec->sid = req->secid; |
4532 | newsksec->peer_sid = req->peer_secid; | 4532 | newsksec->peer_sid = req->peer_secid; |
4533 | /* NOTE: Ideally, we should also get the isec->sid for the | 4533 | /* NOTE: Ideally, we should also get the isec->sid for the |
4534 | new socket in sync, but we don't have the isec available yet. | 4534 | new socket in sync, but we don't have the isec available yet. |
4535 | So we will wait until sock_graft to do it, by which | 4535 | So we will wait until sock_graft to do it, by which |
4536 | time it will have been created and available. */ | 4536 | time it will have been created and available. */ |
4537 | 4537 | ||
4538 | /* We don't need to take any sort of lock here as we are the only | 4538 | /* We don't need to take any sort of lock here as we are the only |
4539 | * thread with access to newsksec */ | 4539 | * thread with access to newsksec */ |
4540 | selinux_netlbl_inet_csk_clone(newsk, req->rsk_ops->family); | 4540 | selinux_netlbl_inet_csk_clone(newsk, req->rsk_ops->family); |
4541 | } | 4541 | } |
4542 | 4542 | ||
4543 | static void selinux_inet_conn_established(struct sock *sk, struct sk_buff *skb) | 4543 | static void selinux_inet_conn_established(struct sock *sk, struct sk_buff *skb) |
4544 | { | 4544 | { |
4545 | u16 family = sk->sk_family; | 4545 | u16 family = sk->sk_family; |
4546 | struct sk_security_struct *sksec = sk->sk_security; | 4546 | struct sk_security_struct *sksec = sk->sk_security; |
4547 | 4547 | ||
4548 | /* handle mapped IPv4 packets arriving via IPv6 sockets */ | 4548 | /* handle mapped IPv4 packets arriving via IPv6 sockets */ |
4549 | if (family == PF_INET6 && skb->protocol == htons(ETH_P_IP)) | 4549 | if (family == PF_INET6 && skb->protocol == htons(ETH_P_IP)) |
4550 | family = PF_INET; | 4550 | family = PF_INET; |
4551 | 4551 | ||
4552 | selinux_skb_peerlbl_sid(skb, family, &sksec->peer_sid); | 4552 | selinux_skb_peerlbl_sid(skb, family, &sksec->peer_sid); |
4553 | } | 4553 | } |
4554 | 4554 | ||
4555 | static void selinux_skb_owned_by(struct sk_buff *skb, struct sock *sk) | 4555 | static void selinux_skb_owned_by(struct sk_buff *skb, struct sock *sk) |
4556 | { | 4556 | { |
4557 | skb_set_owner_w(skb, sk); | 4557 | skb_set_owner_w(skb, sk); |
4558 | } | 4558 | } |
4559 | 4559 | ||
4560 | static int selinux_secmark_relabel_packet(u32 sid) | 4560 | static int selinux_secmark_relabel_packet(u32 sid) |
4561 | { | 4561 | { |
4562 | const struct task_security_struct *__tsec; | 4562 | const struct task_security_struct *__tsec; |
4563 | u32 tsid; | 4563 | u32 tsid; |
4564 | 4564 | ||
4565 | __tsec = current_security(); | 4565 | __tsec = current_security(); |
4566 | tsid = __tsec->sid; | 4566 | tsid = __tsec->sid; |
4567 | 4567 | ||
4568 | return avc_has_perm(tsid, sid, SECCLASS_PACKET, PACKET__RELABELTO, NULL); | 4568 | return avc_has_perm(tsid, sid, SECCLASS_PACKET, PACKET__RELABELTO, NULL); |
4569 | } | 4569 | } |
4570 | 4570 | ||
4571 | static void selinux_secmark_refcount_inc(void) | 4571 | static void selinux_secmark_refcount_inc(void) |
4572 | { | 4572 | { |
4573 | atomic_inc(&selinux_secmark_refcount); | 4573 | atomic_inc(&selinux_secmark_refcount); |
4574 | } | 4574 | } |
4575 | 4575 | ||
4576 | static void selinux_secmark_refcount_dec(void) | 4576 | static void selinux_secmark_refcount_dec(void) |
4577 | { | 4577 | { |
4578 | atomic_dec(&selinux_secmark_refcount); | 4578 | atomic_dec(&selinux_secmark_refcount); |
4579 | } | 4579 | } |
4580 | 4580 | ||
4581 | static void selinux_req_classify_flow(const struct request_sock *req, | 4581 | static void selinux_req_classify_flow(const struct request_sock *req, |
4582 | struct flowi *fl) | 4582 | struct flowi *fl) |
4583 | { | 4583 | { |
4584 | fl->flowi_secid = req->secid; | 4584 | fl->flowi_secid = req->secid; |
4585 | } | 4585 | } |
4586 | 4586 | ||
4587 | static int selinux_tun_dev_alloc_security(void **security) | 4587 | static int selinux_tun_dev_alloc_security(void **security) |
4588 | { | 4588 | { |
4589 | struct tun_security_struct *tunsec; | 4589 | struct tun_security_struct *tunsec; |
4590 | 4590 | ||
4591 | tunsec = kzalloc(sizeof(*tunsec), GFP_KERNEL); | 4591 | tunsec = kzalloc(sizeof(*tunsec), GFP_KERNEL); |
4592 | if (!tunsec) | 4592 | if (!tunsec) |
4593 | return -ENOMEM; | 4593 | return -ENOMEM; |
4594 | tunsec->sid = current_sid(); | 4594 | tunsec->sid = current_sid(); |
4595 | 4595 | ||
4596 | *security = tunsec; | 4596 | *security = tunsec; |
4597 | return 0; | 4597 | return 0; |
4598 | } | 4598 | } |
4599 | 4599 | ||
4600 | static void selinux_tun_dev_free_security(void *security) | 4600 | static void selinux_tun_dev_free_security(void *security) |
4601 | { | 4601 | { |
4602 | kfree(security); | 4602 | kfree(security); |
4603 | } | 4603 | } |
4604 | 4604 | ||
4605 | static int selinux_tun_dev_create(void) | 4605 | static int selinux_tun_dev_create(void) |
4606 | { | 4606 | { |
4607 | u32 sid = current_sid(); | 4607 | u32 sid = current_sid(); |
4608 | 4608 | ||
4609 | /* we aren't taking into account the "sockcreate" SID since the socket | 4609 | /* we aren't taking into account the "sockcreate" SID since the socket |
4610 | * that is being created here is not a socket in the traditional sense, | 4610 | * that is being created here is not a socket in the traditional sense, |
4611 | * instead it is a private sock, accessible only to the kernel, and | 4611 | * instead it is a private sock, accessible only to the kernel, and |
4612 | * representing a wide range of network traffic spanning multiple | 4612 | * representing a wide range of network traffic spanning multiple |
4613 | * connections unlike traditional sockets - check the TUN driver to | 4613 | * connections unlike traditional sockets - check the TUN driver to |
4614 | * get a better understanding of why this socket is special */ | 4614 | * get a better understanding of why this socket is special */ |
4615 | 4615 | ||
4616 | return avc_has_perm(sid, sid, SECCLASS_TUN_SOCKET, TUN_SOCKET__CREATE, | 4616 | return avc_has_perm(sid, sid, SECCLASS_TUN_SOCKET, TUN_SOCKET__CREATE, |
4617 | NULL); | 4617 | NULL); |
4618 | } | 4618 | } |
4619 | 4619 | ||
4620 | static int selinux_tun_dev_attach_queue(void *security) | 4620 | static int selinux_tun_dev_attach_queue(void *security) |
4621 | { | 4621 | { |
4622 | struct tun_security_struct *tunsec = security; | 4622 | struct tun_security_struct *tunsec = security; |
4623 | 4623 | ||
4624 | return avc_has_perm(current_sid(), tunsec->sid, SECCLASS_TUN_SOCKET, | 4624 | return avc_has_perm(current_sid(), tunsec->sid, SECCLASS_TUN_SOCKET, |
4625 | TUN_SOCKET__ATTACH_QUEUE, NULL); | 4625 | TUN_SOCKET__ATTACH_QUEUE, NULL); |
4626 | } | 4626 | } |
4627 | 4627 | ||
4628 | static int selinux_tun_dev_attach(struct sock *sk, void *security) | 4628 | static int selinux_tun_dev_attach(struct sock *sk, void *security) |
4629 | { | 4629 | { |
4630 | struct tun_security_struct *tunsec = security; | 4630 | struct tun_security_struct *tunsec = security; |
4631 | struct sk_security_struct *sksec = sk->sk_security; | 4631 | struct sk_security_struct *sksec = sk->sk_security; |
4632 | 4632 | ||
4633 | /* we don't currently perform any NetLabel based labeling here and it | 4633 | /* we don't currently perform any NetLabel based labeling here and it |
4634 | * isn't clear that we would want to do so anyway; while we could apply | 4634 | * isn't clear that we would want to do so anyway; while we could apply |
4635 | * labeling without the support of the TUN user the resulting labeled | 4635 | * labeling without the support of the TUN user the resulting labeled |
4636 | * traffic from the other end of the connection would almost certainly | 4636 | * traffic from the other end of the connection would almost certainly |
4637 | * cause confusion to the TUN user that had no idea network labeling | 4637 | * cause confusion to the TUN user that had no idea network labeling |
4638 | * protocols were being used */ | 4638 | * protocols were being used */ |
4639 | 4639 | ||
4640 | sksec->sid = tunsec->sid; | 4640 | sksec->sid = tunsec->sid; |
4641 | sksec->sclass = SECCLASS_TUN_SOCKET; | 4641 | sksec->sclass = SECCLASS_TUN_SOCKET; |
4642 | 4642 | ||
4643 | return 0; | 4643 | return 0; |
4644 | } | 4644 | } |
4645 | 4645 | ||
4646 | static int selinux_tun_dev_open(void *security) | 4646 | static int selinux_tun_dev_open(void *security) |
4647 | { | 4647 | { |
4648 | struct tun_security_struct *tunsec = security; | 4648 | struct tun_security_struct *tunsec = security; |
4649 | u32 sid = current_sid(); | 4649 | u32 sid = current_sid(); |
4650 | int err; | 4650 | int err; |
4651 | 4651 | ||
4652 | err = avc_has_perm(sid, tunsec->sid, SECCLASS_TUN_SOCKET, | 4652 | err = avc_has_perm(sid, tunsec->sid, SECCLASS_TUN_SOCKET, |
4653 | TUN_SOCKET__RELABELFROM, NULL); | 4653 | TUN_SOCKET__RELABELFROM, NULL); |
4654 | if (err) | 4654 | if (err) |
4655 | return err; | 4655 | return err; |
4656 | err = avc_has_perm(sid, sid, SECCLASS_TUN_SOCKET, | 4656 | err = avc_has_perm(sid, sid, SECCLASS_TUN_SOCKET, |
4657 | TUN_SOCKET__RELABELTO, NULL); | 4657 | TUN_SOCKET__RELABELTO, NULL); |
4658 | if (err) | 4658 | if (err) |
4659 | return err; | 4659 | return err; |
4660 | tunsec->sid = sid; | 4660 | tunsec->sid = sid; |
4661 | 4661 | ||
4662 | return 0; | 4662 | return 0; |
4663 | } | 4663 | } |
4664 | 4664 | ||
4665 | static int selinux_nlmsg_perm(struct sock *sk, struct sk_buff *skb) | 4665 | static int selinux_nlmsg_perm(struct sock *sk, struct sk_buff *skb) |
4666 | { | 4666 | { |
4667 | int err = 0; | 4667 | int err = 0; |
4668 | u32 perm; | 4668 | u32 perm; |
4669 | struct nlmsghdr *nlh; | 4669 | struct nlmsghdr *nlh; |
4670 | struct sk_security_struct *sksec = sk->sk_security; | 4670 | struct sk_security_struct *sksec = sk->sk_security; |
4671 | 4671 | ||
4672 | if (skb->len < NLMSG_HDRLEN) { | 4672 | if (skb->len < NLMSG_HDRLEN) { |
4673 | err = -EINVAL; | 4673 | err = -EINVAL; |
4674 | goto out; | 4674 | goto out; |
4675 | } | 4675 | } |
4676 | nlh = nlmsg_hdr(skb); | 4676 | nlh = nlmsg_hdr(skb); |
4677 | 4677 | ||
4678 | err = selinux_nlmsg_lookup(sksec->sclass, nlh->nlmsg_type, &perm); | 4678 | err = selinux_nlmsg_lookup(sksec->sclass, nlh->nlmsg_type, &perm); |
4679 | if (err) { | 4679 | if (err) { |
4680 | if (err == -EINVAL) { | 4680 | if (err == -EINVAL) { |
4681 | audit_log(current->audit_context, GFP_KERNEL, AUDIT_SELINUX_ERR, | 4681 | audit_log(current->audit_context, GFP_KERNEL, AUDIT_SELINUX_ERR, |
4682 | "SELinux: unrecognized netlink message" | 4682 | "SELinux: unrecognized netlink message" |
4683 | " type=%hu for sclass=%hu\n", | 4683 | " type=%hu for sclass=%hu\n", |
4684 | nlh->nlmsg_type, sksec->sclass); | 4684 | nlh->nlmsg_type, sksec->sclass); |
4685 | if (!selinux_enforcing || security_get_allow_unknown()) | 4685 | if (!selinux_enforcing || security_get_allow_unknown()) |
4686 | err = 0; | 4686 | err = 0; |
4687 | } | 4687 | } |
4688 | 4688 | ||
4689 | /* Ignore */ | 4689 | /* Ignore */ |
4690 | if (err == -ENOENT) | 4690 | if (err == -ENOENT) |
4691 | err = 0; | 4691 | err = 0; |
4692 | goto out; | 4692 | goto out; |
4693 | } | 4693 | } |
4694 | 4694 | ||
4695 | err = sock_has_perm(current, sk, perm); | 4695 | err = sock_has_perm(current, sk, perm); |
4696 | out: | 4696 | out: |
4697 | return err; | 4697 | return err; |
4698 | } | 4698 | } |
4699 | 4699 | ||
4700 | #ifdef CONFIG_NETFILTER | 4700 | #ifdef CONFIG_NETFILTER |
4701 | 4701 | ||
4702 | static unsigned int selinux_ip_forward(struct sk_buff *skb, int ifindex, | 4702 | static unsigned int selinux_ip_forward(struct sk_buff *skb, int ifindex, |
4703 | u16 family) | 4703 | u16 family) |
4704 | { | 4704 | { |
4705 | int err; | 4705 | int err; |
4706 | char *addrp; | 4706 | char *addrp; |
4707 | u32 peer_sid; | 4707 | u32 peer_sid; |
4708 | struct common_audit_data ad; | 4708 | struct common_audit_data ad; |
4709 | struct lsm_network_audit net = {0,}; | 4709 | struct lsm_network_audit net = {0,}; |
4710 | u8 secmark_active; | 4710 | u8 secmark_active; |
4711 | u8 netlbl_active; | 4711 | u8 netlbl_active; |
4712 | u8 peerlbl_active; | 4712 | u8 peerlbl_active; |
4713 | 4713 | ||
4714 | if (!selinux_policycap_netpeer) | 4714 | if (!selinux_policycap_netpeer) |
4715 | return NF_ACCEPT; | 4715 | return NF_ACCEPT; |
4716 | 4716 | ||
4717 | secmark_active = selinux_secmark_enabled(); | 4717 | secmark_active = selinux_secmark_enabled(); |
4718 | netlbl_active = netlbl_enabled(); | 4718 | netlbl_active = netlbl_enabled(); |
4719 | peerlbl_active = selinux_peerlbl_enabled(); | 4719 | peerlbl_active = selinux_peerlbl_enabled(); |
4720 | if (!secmark_active && !peerlbl_active) | 4720 | if (!secmark_active && !peerlbl_active) |
4721 | return NF_ACCEPT; | 4721 | return NF_ACCEPT; |
4722 | 4722 | ||
4723 | if (selinux_skb_peerlbl_sid(skb, family, &peer_sid) != 0) | 4723 | if (selinux_skb_peerlbl_sid(skb, family, &peer_sid) != 0) |
4724 | return NF_DROP; | 4724 | return NF_DROP; |
4725 | 4725 | ||
4726 | ad.type = LSM_AUDIT_DATA_NET; | 4726 | ad.type = LSM_AUDIT_DATA_NET; |
4727 | ad.u.net = &net; | 4727 | ad.u.net = &net; |
4728 | ad.u.net->netif = ifindex; | 4728 | ad.u.net->netif = ifindex; |
4729 | ad.u.net->family = family; | 4729 | ad.u.net->family = family; |
4730 | if (selinux_parse_skb(skb, &ad, &addrp, 1, NULL) != 0) | 4730 | if (selinux_parse_skb(skb, &ad, &addrp, 1, NULL) != 0) |
4731 | return NF_DROP; | 4731 | return NF_DROP; |
4732 | 4732 | ||
4733 | if (peerlbl_active) { | 4733 | if (peerlbl_active) { |
4734 | err = selinux_inet_sys_rcv_skb(ifindex, addrp, family, | 4734 | err = selinux_inet_sys_rcv_skb(ifindex, addrp, family, |
4735 | peer_sid, &ad); | 4735 | peer_sid, &ad); |
4736 | if (err) { | 4736 | if (err) { |
4737 | selinux_netlbl_err(skb, err, 1); | 4737 | selinux_netlbl_err(skb, err, 1); |
4738 | return NF_DROP; | 4738 | return NF_DROP; |
4739 | } | 4739 | } |
4740 | } | 4740 | } |
4741 | 4741 | ||
4742 | if (secmark_active) | 4742 | if (secmark_active) |
4743 | if (avc_has_perm(peer_sid, skb->secmark, | 4743 | if (avc_has_perm(peer_sid, skb->secmark, |
4744 | SECCLASS_PACKET, PACKET__FORWARD_IN, &ad)) | 4744 | SECCLASS_PACKET, PACKET__FORWARD_IN, &ad)) |
4745 | return NF_DROP; | 4745 | return NF_DROP; |
4746 | 4746 | ||
4747 | if (netlbl_active) | 4747 | if (netlbl_active) |
4748 | /* we do this in the FORWARD path and not the POST_ROUTING | 4748 | /* we do this in the FORWARD path and not the POST_ROUTING |
4749 | * path because we want to make sure we apply the necessary | 4749 | * path because we want to make sure we apply the necessary |
4750 | * labeling before IPsec is applied so we can leverage AH | 4750 | * labeling before IPsec is applied so we can leverage AH |
4751 | * protection */ | 4751 | * protection */ |
4752 | if (selinux_netlbl_skbuff_setsid(skb, family, peer_sid) != 0) | 4752 | if (selinux_netlbl_skbuff_setsid(skb, family, peer_sid) != 0) |
4753 | return NF_DROP; | 4753 | return NF_DROP; |
4754 | 4754 | ||
4755 | return NF_ACCEPT; | 4755 | return NF_ACCEPT; |
4756 | } | 4756 | } |
4757 | 4757 | ||
4758 | static unsigned int selinux_ipv4_forward(const struct nf_hook_ops *ops, | 4758 | static unsigned int selinux_ipv4_forward(const struct nf_hook_ops *ops, |
4759 | struct sk_buff *skb, | 4759 | struct sk_buff *skb, |
4760 | const struct net_device *in, | 4760 | const struct net_device *in, |
4761 | const struct net_device *out, | 4761 | const struct net_device *out, |
4762 | int (*okfn)(struct sk_buff *)) | 4762 | int (*okfn)(struct sk_buff *)) |
4763 | { | 4763 | { |
4764 | return selinux_ip_forward(skb, in->ifindex, PF_INET); | 4764 | return selinux_ip_forward(skb, in->ifindex, PF_INET); |
4765 | } | 4765 | } |
4766 | 4766 | ||
4767 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | 4767 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) |
4768 | static unsigned int selinux_ipv6_forward(const struct nf_hook_ops *ops, | 4768 | static unsigned int selinux_ipv6_forward(const struct nf_hook_ops *ops, |
4769 | struct sk_buff *skb, | 4769 | struct sk_buff *skb, |
4770 | const struct net_device *in, | 4770 | const struct net_device *in, |
4771 | const struct net_device *out, | 4771 | const struct net_device *out, |
4772 | int (*okfn)(struct sk_buff *)) | 4772 | int (*okfn)(struct sk_buff *)) |
4773 | { | 4773 | { |
4774 | return selinux_ip_forward(skb, in->ifindex, PF_INET6); | 4774 | return selinux_ip_forward(skb, in->ifindex, PF_INET6); |
4775 | } | 4775 | } |
4776 | #endif /* IPV6 */ | 4776 | #endif /* IPV6 */ |
4777 | 4777 | ||
4778 | static unsigned int selinux_ip_output(struct sk_buff *skb, | 4778 | static unsigned int selinux_ip_output(struct sk_buff *skb, |
4779 | u16 family) | 4779 | u16 family) |
4780 | { | 4780 | { |
4781 | struct sock *sk; | 4781 | struct sock *sk; |
4782 | u32 sid; | 4782 | u32 sid; |
4783 | 4783 | ||
4784 | if (!netlbl_enabled()) | 4784 | if (!netlbl_enabled()) |
4785 | return NF_ACCEPT; | 4785 | return NF_ACCEPT; |
4786 | 4786 | ||
4787 | /* we do this in the LOCAL_OUT path and not the POST_ROUTING path | 4787 | /* we do this in the LOCAL_OUT path and not the POST_ROUTING path |
4788 | * because we want to make sure we apply the necessary labeling | 4788 | * because we want to make sure we apply the necessary labeling |
4789 | * before IPsec is applied so we can leverage AH protection */ | 4789 | * before IPsec is applied so we can leverage AH protection */ |
4790 | sk = skb->sk; | 4790 | sk = skb->sk; |
4791 | if (sk) { | 4791 | if (sk) { |
4792 | struct sk_security_struct *sksec; | 4792 | struct sk_security_struct *sksec; |
4793 | 4793 | ||
4794 | if (sk->sk_state == TCP_LISTEN) | 4794 | if (sk->sk_state == TCP_LISTEN) |
4795 | /* if the socket is the listening state then this | 4795 | /* if the socket is the listening state then this |
4796 | * packet is a SYN-ACK packet which means it needs to | 4796 | * packet is a SYN-ACK packet which means it needs to |
4797 | * be labeled based on the connection/request_sock and | 4797 | * be labeled based on the connection/request_sock and |
4798 | * not the parent socket. unfortunately, we can't | 4798 | * not the parent socket. unfortunately, we can't |
4799 | * lookup the request_sock yet as it isn't queued on | 4799 | * lookup the request_sock yet as it isn't queued on |
4800 | * the parent socket until after the SYN-ACK is sent. | 4800 | * the parent socket until after the SYN-ACK is sent. |
4801 | * the "solution" is to simply pass the packet as-is | 4801 | * the "solution" is to simply pass the packet as-is |
4802 | * as any IP option based labeling should be copied | 4802 | * as any IP option based labeling should be copied |
4803 | * from the initial connection request (in the IP | 4803 | * from the initial connection request (in the IP |
4804 | * layer). it is far from ideal, but until we get a | 4804 | * layer). it is far from ideal, but until we get a |
4805 | * security label in the packet itself this is the | 4805 | * security label in the packet itself this is the |
4806 | * best we can do. */ | 4806 | * best we can do. */ |
4807 | return NF_ACCEPT; | 4807 | return NF_ACCEPT; |
4808 | 4808 | ||
4809 | /* standard practice, label using the parent socket */ | 4809 | /* standard practice, label using the parent socket */ |
4810 | sksec = sk->sk_security; | 4810 | sksec = sk->sk_security; |
4811 | sid = sksec->sid; | 4811 | sid = sksec->sid; |
4812 | } else | 4812 | } else |
4813 | sid = SECINITSID_KERNEL; | 4813 | sid = SECINITSID_KERNEL; |
4814 | if (selinux_netlbl_skbuff_setsid(skb, family, sid) != 0) | 4814 | if (selinux_netlbl_skbuff_setsid(skb, family, sid) != 0) |
4815 | return NF_DROP; | 4815 | return NF_DROP; |
4816 | 4816 | ||
4817 | return NF_ACCEPT; | 4817 | return NF_ACCEPT; |
4818 | } | 4818 | } |
4819 | 4819 | ||
4820 | static unsigned int selinux_ipv4_output(const struct nf_hook_ops *ops, | 4820 | static unsigned int selinux_ipv4_output(const struct nf_hook_ops *ops, |
4821 | struct sk_buff *skb, | 4821 | struct sk_buff *skb, |
4822 | const struct net_device *in, | 4822 | const struct net_device *in, |
4823 | const struct net_device *out, | 4823 | const struct net_device *out, |
4824 | int (*okfn)(struct sk_buff *)) | 4824 | int (*okfn)(struct sk_buff *)) |
4825 | { | 4825 | { |
4826 | return selinux_ip_output(skb, PF_INET); | 4826 | return selinux_ip_output(skb, PF_INET); |
4827 | } | 4827 | } |
4828 | 4828 | ||
4829 | static unsigned int selinux_ip_postroute_compat(struct sk_buff *skb, | 4829 | static unsigned int selinux_ip_postroute_compat(struct sk_buff *skb, |
4830 | int ifindex, | 4830 | int ifindex, |
4831 | u16 family) | 4831 | u16 family) |
4832 | { | 4832 | { |
4833 | struct sock *sk = skb->sk; | 4833 | struct sock *sk = skb->sk; |
4834 | struct sk_security_struct *sksec; | 4834 | struct sk_security_struct *sksec; |
4835 | struct common_audit_data ad; | 4835 | struct common_audit_data ad; |
4836 | struct lsm_network_audit net = {0,}; | 4836 | struct lsm_network_audit net = {0,}; |
4837 | char *addrp; | 4837 | char *addrp; |
4838 | u8 proto; | 4838 | u8 proto; |
4839 | 4839 | ||
4840 | if (sk == NULL) | 4840 | if (sk == NULL) |
4841 | return NF_ACCEPT; | 4841 | return NF_ACCEPT; |
4842 | sksec = sk->sk_security; | 4842 | sksec = sk->sk_security; |
4843 | 4843 | ||
4844 | ad.type = LSM_AUDIT_DATA_NET; | 4844 | ad.type = LSM_AUDIT_DATA_NET; |
4845 | ad.u.net = &net; | 4845 | ad.u.net = &net; |
4846 | ad.u.net->netif = ifindex; | 4846 | ad.u.net->netif = ifindex; |
4847 | ad.u.net->family = family; | 4847 | ad.u.net->family = family; |
4848 | if (selinux_parse_skb(skb, &ad, &addrp, 0, &proto)) | 4848 | if (selinux_parse_skb(skb, &ad, &addrp, 0, &proto)) |
4849 | return NF_DROP; | 4849 | return NF_DROP; |
4850 | 4850 | ||
4851 | if (selinux_secmark_enabled()) | 4851 | if (selinux_secmark_enabled()) |
4852 | if (avc_has_perm(sksec->sid, skb->secmark, | 4852 | if (avc_has_perm(sksec->sid, skb->secmark, |
4853 | SECCLASS_PACKET, PACKET__SEND, &ad)) | 4853 | SECCLASS_PACKET, PACKET__SEND, &ad)) |
4854 | return NF_DROP_ERR(-ECONNREFUSED); | 4854 | return NF_DROP_ERR(-ECONNREFUSED); |
4855 | 4855 | ||
4856 | if (selinux_xfrm_postroute_last(sksec->sid, skb, &ad, proto)) | 4856 | if (selinux_xfrm_postroute_last(sksec->sid, skb, &ad, proto)) |
4857 | return NF_DROP_ERR(-ECONNREFUSED); | 4857 | return NF_DROP_ERR(-ECONNREFUSED); |
4858 | 4858 | ||
4859 | return NF_ACCEPT; | 4859 | return NF_ACCEPT; |
4860 | } | 4860 | } |
4861 | 4861 | ||
4862 | static unsigned int selinux_ip_postroute(struct sk_buff *skb, int ifindex, | 4862 | static unsigned int selinux_ip_postroute(struct sk_buff *skb, int ifindex, |
4863 | u16 family) | 4863 | u16 family) |
4864 | { | 4864 | { |
4865 | u32 secmark_perm; | 4865 | u32 secmark_perm; |
4866 | u32 peer_sid; | 4866 | u32 peer_sid; |
4867 | struct sock *sk; | 4867 | struct sock *sk; |
4868 | struct common_audit_data ad; | 4868 | struct common_audit_data ad; |
4869 | struct lsm_network_audit net = {0,}; | 4869 | struct lsm_network_audit net = {0,}; |
4870 | char *addrp; | 4870 | char *addrp; |
4871 | u8 secmark_active; | 4871 | u8 secmark_active; |
4872 | u8 peerlbl_active; | 4872 | u8 peerlbl_active; |
4873 | 4873 | ||
4874 | /* If any sort of compatibility mode is enabled then handoff processing | 4874 | /* If any sort of compatibility mode is enabled then handoff processing |
4875 | * to the selinux_ip_postroute_compat() function to deal with the | 4875 | * to the selinux_ip_postroute_compat() function to deal with the |
4876 | * special handling. We do this in an attempt to keep this function | 4876 | * special handling. We do this in an attempt to keep this function |
4877 | * as fast and as clean as possible. */ | 4877 | * as fast and as clean as possible. */ |
4878 | if (!selinux_policycap_netpeer) | 4878 | if (!selinux_policycap_netpeer) |
4879 | return selinux_ip_postroute_compat(skb, ifindex, family); | 4879 | return selinux_ip_postroute_compat(skb, ifindex, family); |
4880 | 4880 | ||
4881 | secmark_active = selinux_secmark_enabled(); | 4881 | secmark_active = selinux_secmark_enabled(); |
4882 | peerlbl_active = selinux_peerlbl_enabled(); | 4882 | peerlbl_active = selinux_peerlbl_enabled(); |
4883 | if (!secmark_active && !peerlbl_active) | 4883 | if (!secmark_active && !peerlbl_active) |
4884 | return NF_ACCEPT; | 4884 | return NF_ACCEPT; |
4885 | 4885 | ||
4886 | sk = skb->sk; | 4886 | sk = skb->sk; |
4887 | 4887 | ||
4888 | #ifdef CONFIG_XFRM | 4888 | #ifdef CONFIG_XFRM |
4889 | /* If skb->dst->xfrm is non-NULL then the packet is undergoing an IPsec | 4889 | /* If skb->dst->xfrm is non-NULL then the packet is undergoing an IPsec |
4890 | * packet transformation so allow the packet to pass without any checks | 4890 | * packet transformation so allow the packet to pass without any checks |
4891 | * since we'll have another chance to perform access control checks | 4891 | * since we'll have another chance to perform access control checks |
4892 | * when the packet is on it's final way out. | 4892 | * when the packet is on it's final way out. |
4893 | * NOTE: there appear to be some IPv6 multicast cases where skb->dst | 4893 | * NOTE: there appear to be some IPv6 multicast cases where skb->dst |
4894 | * is NULL, in this case go ahead and apply access control. | 4894 | * is NULL, in this case go ahead and apply access control. |
4895 | * NOTE: if this is a local socket (skb->sk != NULL) that is in the | 4895 | * NOTE: if this is a local socket (skb->sk != NULL) that is in the |
4896 | * TCP listening state we cannot wait until the XFRM processing | 4896 | * TCP listening state we cannot wait until the XFRM processing |
4897 | * is done as we will miss out on the SA label if we do; | 4897 | * is done as we will miss out on the SA label if we do; |
4898 | * unfortunately, this means more work, but it is only once per | 4898 | * unfortunately, this means more work, but it is only once per |
4899 | * connection. */ | 4899 | * connection. */ |
4900 | if (skb_dst(skb) != NULL && skb_dst(skb)->xfrm != NULL && | 4900 | if (skb_dst(skb) != NULL && skb_dst(skb)->xfrm != NULL && |
4901 | !(sk != NULL && sk->sk_state == TCP_LISTEN)) | 4901 | !(sk != NULL && sk->sk_state == TCP_LISTEN)) |
4902 | return NF_ACCEPT; | 4902 | return NF_ACCEPT; |
4903 | #endif | 4903 | #endif |
4904 | 4904 | ||
4905 | if (sk == NULL) { | 4905 | if (sk == NULL) { |
4906 | /* Without an associated socket the packet is either coming | 4906 | /* Without an associated socket the packet is either coming |
4907 | * from the kernel or it is being forwarded; check the packet | 4907 | * from the kernel or it is being forwarded; check the packet |
4908 | * to determine which and if the packet is being forwarded | 4908 | * to determine which and if the packet is being forwarded |
4909 | * query the packet directly to determine the security label. */ | 4909 | * query the packet directly to determine the security label. */ |
4910 | if (skb->skb_iif) { | 4910 | if (skb->skb_iif) { |
4911 | secmark_perm = PACKET__FORWARD_OUT; | 4911 | secmark_perm = PACKET__FORWARD_OUT; |
4912 | if (selinux_skb_peerlbl_sid(skb, family, &peer_sid)) | 4912 | if (selinux_skb_peerlbl_sid(skb, family, &peer_sid)) |
4913 | return NF_DROP; | 4913 | return NF_DROP; |
4914 | } else { | 4914 | } else { |
4915 | secmark_perm = PACKET__SEND; | 4915 | secmark_perm = PACKET__SEND; |
4916 | peer_sid = SECINITSID_KERNEL; | 4916 | peer_sid = SECINITSID_KERNEL; |
4917 | } | 4917 | } |
4918 | } else if (sk->sk_state == TCP_LISTEN) { | 4918 | } else if (sk->sk_state == TCP_LISTEN) { |
4919 | /* Locally generated packet but the associated socket is in the | 4919 | /* Locally generated packet but the associated socket is in the |
4920 | * listening state which means this is a SYN-ACK packet. In | 4920 | * listening state which means this is a SYN-ACK packet. In |
4921 | * this particular case the correct security label is assigned | 4921 | * this particular case the correct security label is assigned |
4922 | * to the connection/request_sock but unfortunately we can't | 4922 | * to the connection/request_sock but unfortunately we can't |
4923 | * query the request_sock as it isn't queued on the parent | 4923 | * query the request_sock as it isn't queued on the parent |
4924 | * socket until after the SYN-ACK packet is sent; the only | 4924 | * socket until after the SYN-ACK packet is sent; the only |
4925 | * viable choice is to regenerate the label like we do in | 4925 | * viable choice is to regenerate the label like we do in |
4926 | * selinux_inet_conn_request(). See also selinux_ip_output() | 4926 | * selinux_inet_conn_request(). See also selinux_ip_output() |
4927 | * for similar problems. */ | 4927 | * for similar problems. */ |
4928 | u32 skb_sid; | 4928 | u32 skb_sid; |
4929 | struct sk_security_struct *sksec = sk->sk_security; | 4929 | struct sk_security_struct *sksec = sk->sk_security; |
4930 | if (selinux_skb_peerlbl_sid(skb, family, &skb_sid)) | 4930 | if (selinux_skb_peerlbl_sid(skb, family, &skb_sid)) |
4931 | return NF_DROP; | 4931 | return NF_DROP; |
4932 | /* At this point, if the returned skb peerlbl is SECSID_NULL | 4932 | /* At this point, if the returned skb peerlbl is SECSID_NULL |
4933 | * and the packet has been through at least one XFRM | 4933 | * and the packet has been through at least one XFRM |
4934 | * transformation then we must be dealing with the "final" | 4934 | * transformation then we must be dealing with the "final" |
4935 | * form of labeled IPsec packet; since we've already applied | 4935 | * form of labeled IPsec packet; since we've already applied |
4936 | * all of our access controls on this packet we can safely | 4936 | * all of our access controls on this packet we can safely |
4937 | * pass the packet. */ | 4937 | * pass the packet. */ |
4938 | if (skb_sid == SECSID_NULL) { | 4938 | if (skb_sid == SECSID_NULL) { |
4939 | switch (family) { | 4939 | switch (family) { |
4940 | case PF_INET: | 4940 | case PF_INET: |
4941 | if (IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) | 4941 | if (IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) |
4942 | return NF_ACCEPT; | 4942 | return NF_ACCEPT; |
4943 | break; | 4943 | break; |
4944 | case PF_INET6: | 4944 | case PF_INET6: |
4945 | if (IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) | 4945 | if (IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) |
4946 | return NF_ACCEPT; | 4946 | return NF_ACCEPT; |
4947 | default: | 4947 | default: |
4948 | return NF_DROP_ERR(-ECONNREFUSED); | 4948 | return NF_DROP_ERR(-ECONNREFUSED); |
4949 | } | 4949 | } |
4950 | } | 4950 | } |
4951 | if (selinux_conn_sid(sksec->sid, skb_sid, &peer_sid)) | 4951 | if (selinux_conn_sid(sksec->sid, skb_sid, &peer_sid)) |
4952 | return NF_DROP; | 4952 | return NF_DROP; |
4953 | secmark_perm = PACKET__SEND; | 4953 | secmark_perm = PACKET__SEND; |
4954 | } else { | 4954 | } else { |
4955 | /* Locally generated packet, fetch the security label from the | 4955 | /* Locally generated packet, fetch the security label from the |
4956 | * associated socket. */ | 4956 | * associated socket. */ |
4957 | struct sk_security_struct *sksec = sk->sk_security; | 4957 | struct sk_security_struct *sksec = sk->sk_security; |
4958 | peer_sid = sksec->sid; | 4958 | peer_sid = sksec->sid; |
4959 | secmark_perm = PACKET__SEND; | 4959 | secmark_perm = PACKET__SEND; |
4960 | } | 4960 | } |
4961 | 4961 | ||
4962 | ad.type = LSM_AUDIT_DATA_NET; | 4962 | ad.type = LSM_AUDIT_DATA_NET; |
4963 | ad.u.net = &net; | 4963 | ad.u.net = &net; |
4964 | ad.u.net->netif = ifindex; | 4964 | ad.u.net->netif = ifindex; |
4965 | ad.u.net->family = family; | 4965 | ad.u.net->family = family; |
4966 | if (selinux_parse_skb(skb, &ad, &addrp, 0, NULL)) | 4966 | if (selinux_parse_skb(skb, &ad, &addrp, 0, NULL)) |
4967 | return NF_DROP; | 4967 | return NF_DROP; |
4968 | 4968 | ||
4969 | if (secmark_active) | 4969 | if (secmark_active) |
4970 | if (avc_has_perm(peer_sid, skb->secmark, | 4970 | if (avc_has_perm(peer_sid, skb->secmark, |
4971 | SECCLASS_PACKET, secmark_perm, &ad)) | 4971 | SECCLASS_PACKET, secmark_perm, &ad)) |
4972 | return NF_DROP_ERR(-ECONNREFUSED); | 4972 | return NF_DROP_ERR(-ECONNREFUSED); |
4973 | 4973 | ||
4974 | if (peerlbl_active) { | 4974 | if (peerlbl_active) { |
4975 | u32 if_sid; | 4975 | u32 if_sid; |
4976 | u32 node_sid; | 4976 | u32 node_sid; |
4977 | 4977 | ||
4978 | if (sel_netif_sid(ifindex, &if_sid)) | 4978 | if (sel_netif_sid(ifindex, &if_sid)) |
4979 | return NF_DROP; | 4979 | return NF_DROP; |
4980 | if (avc_has_perm(peer_sid, if_sid, | 4980 | if (avc_has_perm(peer_sid, if_sid, |
4981 | SECCLASS_NETIF, NETIF__EGRESS, &ad)) | 4981 | SECCLASS_NETIF, NETIF__EGRESS, &ad)) |
4982 | return NF_DROP_ERR(-ECONNREFUSED); | 4982 | return NF_DROP_ERR(-ECONNREFUSED); |
4983 | 4983 | ||
4984 | if (sel_netnode_sid(addrp, family, &node_sid)) | 4984 | if (sel_netnode_sid(addrp, family, &node_sid)) |
4985 | return NF_DROP; | 4985 | return NF_DROP; |
4986 | if (avc_has_perm(peer_sid, node_sid, | 4986 | if (avc_has_perm(peer_sid, node_sid, |
4987 | SECCLASS_NODE, NODE__SENDTO, &ad)) | 4987 | SECCLASS_NODE, NODE__SENDTO, &ad)) |
4988 | return NF_DROP_ERR(-ECONNREFUSED); | 4988 | return NF_DROP_ERR(-ECONNREFUSED); |
4989 | } | 4989 | } |
4990 | 4990 | ||
4991 | return NF_ACCEPT; | 4991 | return NF_ACCEPT; |
4992 | } | 4992 | } |
4993 | 4993 | ||
4994 | static unsigned int selinux_ipv4_postroute(const struct nf_hook_ops *ops, | 4994 | static unsigned int selinux_ipv4_postroute(const struct nf_hook_ops *ops, |
4995 | struct sk_buff *skb, | 4995 | struct sk_buff *skb, |
4996 | const struct net_device *in, | 4996 | const struct net_device *in, |
4997 | const struct net_device *out, | 4997 | const struct net_device *out, |
4998 | int (*okfn)(struct sk_buff *)) | 4998 | int (*okfn)(struct sk_buff *)) |
4999 | { | 4999 | { |
5000 | return selinux_ip_postroute(skb, out->ifindex, PF_INET); | 5000 | return selinux_ip_postroute(skb, out->ifindex, PF_INET); |
5001 | } | 5001 | } |
5002 | 5002 | ||
5003 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | 5003 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) |
5004 | static unsigned int selinux_ipv6_postroute(const struct nf_hook_ops *ops, | 5004 | static unsigned int selinux_ipv6_postroute(const struct nf_hook_ops *ops, |
5005 | struct sk_buff *skb, | 5005 | struct sk_buff *skb, |
5006 | const struct net_device *in, | 5006 | const struct net_device *in, |
5007 | const struct net_device *out, | 5007 | const struct net_device *out, |
5008 | int (*okfn)(struct sk_buff *)) | 5008 | int (*okfn)(struct sk_buff *)) |
5009 | { | 5009 | { |
5010 | return selinux_ip_postroute(skb, out->ifindex, PF_INET6); | 5010 | return selinux_ip_postroute(skb, out->ifindex, PF_INET6); |
5011 | } | 5011 | } |
5012 | #endif /* IPV6 */ | 5012 | #endif /* IPV6 */ |
5013 | 5013 | ||
5014 | #endif /* CONFIG_NETFILTER */ | 5014 | #endif /* CONFIG_NETFILTER */ |
5015 | 5015 | ||
5016 | static int selinux_netlink_send(struct sock *sk, struct sk_buff *skb) | 5016 | static int selinux_netlink_send(struct sock *sk, struct sk_buff *skb) |
5017 | { | 5017 | { |
5018 | int err; | 5018 | int err; |
5019 | 5019 | ||
5020 | err = cap_netlink_send(sk, skb); | 5020 | err = cap_netlink_send(sk, skb); |
5021 | if (err) | 5021 | if (err) |
5022 | return err; | 5022 | return err; |
5023 | 5023 | ||
5024 | return selinux_nlmsg_perm(sk, skb); | 5024 | return selinux_nlmsg_perm(sk, skb); |
5025 | } | 5025 | } |
5026 | 5026 | ||
5027 | static int ipc_alloc_security(struct task_struct *task, | 5027 | static int ipc_alloc_security(struct task_struct *task, |
5028 | struct kern_ipc_perm *perm, | 5028 | struct kern_ipc_perm *perm, |
5029 | u16 sclass) | 5029 | u16 sclass) |
5030 | { | 5030 | { |
5031 | struct ipc_security_struct *isec; | 5031 | struct ipc_security_struct *isec; |
5032 | u32 sid; | 5032 | u32 sid; |
5033 | 5033 | ||
5034 | isec = kzalloc(sizeof(struct ipc_security_struct), GFP_KERNEL); | 5034 | isec = kzalloc(sizeof(struct ipc_security_struct), GFP_KERNEL); |
5035 | if (!isec) | 5035 | if (!isec) |
5036 | return -ENOMEM; | 5036 | return -ENOMEM; |
5037 | 5037 | ||
5038 | sid = task_sid(task); | 5038 | sid = task_sid(task); |
5039 | isec->sclass = sclass; | 5039 | isec->sclass = sclass; |
5040 | isec->sid = sid; | 5040 | isec->sid = sid; |
5041 | perm->security = isec; | 5041 | perm->security = isec; |
5042 | 5042 | ||
5043 | return 0; | 5043 | return 0; |
5044 | } | 5044 | } |
5045 | 5045 | ||
5046 | static void ipc_free_security(struct kern_ipc_perm *perm) | 5046 | static void ipc_free_security(struct kern_ipc_perm *perm) |
5047 | { | 5047 | { |
5048 | struct ipc_security_struct *isec = perm->security; | 5048 | struct ipc_security_struct *isec = perm->security; |
5049 | perm->security = NULL; | 5049 | perm->security = NULL; |
5050 | kfree(isec); | 5050 | kfree(isec); |
5051 | } | 5051 | } |
5052 | 5052 | ||
5053 | static int msg_msg_alloc_security(struct msg_msg *msg) | 5053 | static int msg_msg_alloc_security(struct msg_msg *msg) |
5054 | { | 5054 | { |
5055 | struct msg_security_struct *msec; | 5055 | struct msg_security_struct *msec; |
5056 | 5056 | ||
5057 | msec = kzalloc(sizeof(struct msg_security_struct), GFP_KERNEL); | 5057 | msec = kzalloc(sizeof(struct msg_security_struct), GFP_KERNEL); |
5058 | if (!msec) | 5058 | if (!msec) |
5059 | return -ENOMEM; | 5059 | return -ENOMEM; |
5060 | 5060 | ||
5061 | msec->sid = SECINITSID_UNLABELED; | 5061 | msec->sid = SECINITSID_UNLABELED; |
5062 | msg->security = msec; | 5062 | msg->security = msec; |
5063 | 5063 | ||
5064 | return 0; | 5064 | return 0; |
5065 | } | 5065 | } |
5066 | 5066 | ||
5067 | static void msg_msg_free_security(struct msg_msg *msg) | 5067 | static void msg_msg_free_security(struct msg_msg *msg) |
5068 | { | 5068 | { |
5069 | struct msg_security_struct *msec = msg->security; | 5069 | struct msg_security_struct *msec = msg->security; |
5070 | 5070 | ||
5071 | msg->security = NULL; | 5071 | msg->security = NULL; |
5072 | kfree(msec); | 5072 | kfree(msec); |
5073 | } | 5073 | } |
5074 | 5074 | ||
5075 | static int ipc_has_perm(struct kern_ipc_perm *ipc_perms, | 5075 | static int ipc_has_perm(struct kern_ipc_perm *ipc_perms, |
5076 | u32 perms) | 5076 | u32 perms) |
5077 | { | 5077 | { |
5078 | struct ipc_security_struct *isec; | 5078 | struct ipc_security_struct *isec; |
5079 | struct common_audit_data ad; | 5079 | struct common_audit_data ad; |
5080 | u32 sid = current_sid(); | 5080 | u32 sid = current_sid(); |
5081 | 5081 | ||
5082 | isec = ipc_perms->security; | 5082 | isec = ipc_perms->security; |
5083 | 5083 | ||
5084 | ad.type = LSM_AUDIT_DATA_IPC; | 5084 | ad.type = LSM_AUDIT_DATA_IPC; |
5085 | ad.u.ipc_id = ipc_perms->key; | 5085 | ad.u.ipc_id = ipc_perms->key; |
5086 | 5086 | ||
5087 | return avc_has_perm(sid, isec->sid, isec->sclass, perms, &ad); | 5087 | return avc_has_perm(sid, isec->sid, isec->sclass, perms, &ad); |
5088 | } | 5088 | } |
5089 | 5089 | ||
5090 | static int selinux_msg_msg_alloc_security(struct msg_msg *msg) | 5090 | static int selinux_msg_msg_alloc_security(struct msg_msg *msg) |
5091 | { | 5091 | { |
5092 | return msg_msg_alloc_security(msg); | 5092 | return msg_msg_alloc_security(msg); |
5093 | } | 5093 | } |
5094 | 5094 | ||
5095 | static void selinux_msg_msg_free_security(struct msg_msg *msg) | 5095 | static void selinux_msg_msg_free_security(struct msg_msg *msg) |
5096 | { | 5096 | { |
5097 | msg_msg_free_security(msg); | 5097 | msg_msg_free_security(msg); |
5098 | } | 5098 | } |
5099 | 5099 | ||
5100 | /* message queue security operations */ | 5100 | /* message queue security operations */ |
5101 | static int selinux_msg_queue_alloc_security(struct msg_queue *msq) | 5101 | static int selinux_msg_queue_alloc_security(struct msg_queue *msq) |
5102 | { | 5102 | { |
5103 | struct ipc_security_struct *isec; | 5103 | struct ipc_security_struct *isec; |
5104 | struct common_audit_data ad; | 5104 | struct common_audit_data ad; |
5105 | u32 sid = current_sid(); | 5105 | u32 sid = current_sid(); |
5106 | int rc; | 5106 | int rc; |
5107 | 5107 | ||
5108 | rc = ipc_alloc_security(current, &msq->q_perm, SECCLASS_MSGQ); | 5108 | rc = ipc_alloc_security(current, &msq->q_perm, SECCLASS_MSGQ); |
5109 | if (rc) | 5109 | if (rc) |
5110 | return rc; | 5110 | return rc; |
5111 | 5111 | ||
5112 | isec = msq->q_perm.security; | 5112 | isec = msq->q_perm.security; |
5113 | 5113 | ||
5114 | ad.type = LSM_AUDIT_DATA_IPC; | 5114 | ad.type = LSM_AUDIT_DATA_IPC; |
5115 | ad.u.ipc_id = msq->q_perm.key; | 5115 | ad.u.ipc_id = msq->q_perm.key; |
5116 | 5116 | ||
5117 | rc = avc_has_perm(sid, isec->sid, SECCLASS_MSGQ, | 5117 | rc = avc_has_perm(sid, isec->sid, SECCLASS_MSGQ, |
5118 | MSGQ__CREATE, &ad); | 5118 | MSGQ__CREATE, &ad); |
5119 | if (rc) { | 5119 | if (rc) { |
5120 | ipc_free_security(&msq->q_perm); | 5120 | ipc_free_security(&msq->q_perm); |
5121 | return rc; | 5121 | return rc; |
5122 | } | 5122 | } |
5123 | return 0; | 5123 | return 0; |
5124 | } | 5124 | } |
5125 | 5125 | ||
5126 | static void selinux_msg_queue_free_security(struct msg_queue *msq) | 5126 | static void selinux_msg_queue_free_security(struct msg_queue *msq) |
5127 | { | 5127 | { |
5128 | ipc_free_security(&msq->q_perm); | 5128 | ipc_free_security(&msq->q_perm); |
5129 | } | 5129 | } |
5130 | 5130 | ||
5131 | static int selinux_msg_queue_associate(struct msg_queue *msq, int msqflg) | 5131 | static int selinux_msg_queue_associate(struct msg_queue *msq, int msqflg) |
5132 | { | 5132 | { |
5133 | struct ipc_security_struct *isec; | 5133 | struct ipc_security_struct *isec; |
5134 | struct common_audit_data ad; | 5134 | struct common_audit_data ad; |
5135 | u32 sid = current_sid(); | 5135 | u32 sid = current_sid(); |
5136 | 5136 | ||
5137 | isec = msq->q_perm.security; | 5137 | isec = msq->q_perm.security; |
5138 | 5138 | ||
5139 | ad.type = LSM_AUDIT_DATA_IPC; | 5139 | ad.type = LSM_AUDIT_DATA_IPC; |
5140 | ad.u.ipc_id = msq->q_perm.key; | 5140 | ad.u.ipc_id = msq->q_perm.key; |
5141 | 5141 | ||
5142 | return avc_has_perm(sid, isec->sid, SECCLASS_MSGQ, | 5142 | return avc_has_perm(sid, isec->sid, SECCLASS_MSGQ, |
5143 | MSGQ__ASSOCIATE, &ad); | 5143 | MSGQ__ASSOCIATE, &ad); |
5144 | } | 5144 | } |
5145 | 5145 | ||
5146 | static int selinux_msg_queue_msgctl(struct msg_queue *msq, int cmd) | 5146 | static int selinux_msg_queue_msgctl(struct msg_queue *msq, int cmd) |
5147 | { | 5147 | { |
5148 | int err; | 5148 | int err; |
5149 | int perms; | 5149 | int perms; |
5150 | 5150 | ||
5151 | switch (cmd) { | 5151 | switch (cmd) { |
5152 | case IPC_INFO: | 5152 | case IPC_INFO: |
5153 | case MSG_INFO: | 5153 | case MSG_INFO: |
5154 | /* No specific object, just general system-wide information. */ | 5154 | /* No specific object, just general system-wide information. */ |
5155 | return task_has_system(current, SYSTEM__IPC_INFO); | 5155 | return task_has_system(current, SYSTEM__IPC_INFO); |
5156 | case IPC_STAT: | 5156 | case IPC_STAT: |
5157 | case MSG_STAT: | 5157 | case MSG_STAT: |
5158 | perms = MSGQ__GETATTR | MSGQ__ASSOCIATE; | 5158 | perms = MSGQ__GETATTR | MSGQ__ASSOCIATE; |
5159 | break; | 5159 | break; |
5160 | case IPC_SET: | 5160 | case IPC_SET: |
5161 | perms = MSGQ__SETATTR; | 5161 | perms = MSGQ__SETATTR; |
5162 | break; | 5162 | break; |
5163 | case IPC_RMID: | 5163 | case IPC_RMID: |
5164 | perms = MSGQ__DESTROY; | 5164 | perms = MSGQ__DESTROY; |
5165 | break; | 5165 | break; |
5166 | default: | 5166 | default: |
5167 | return 0; | 5167 | return 0; |
5168 | } | 5168 | } |
5169 | 5169 | ||
5170 | err = ipc_has_perm(&msq->q_perm, perms); | 5170 | err = ipc_has_perm(&msq->q_perm, perms); |
5171 | return err; | 5171 | return err; |
5172 | } | 5172 | } |
5173 | 5173 | ||
5174 | static int selinux_msg_queue_msgsnd(struct msg_queue *msq, struct msg_msg *msg, int msqflg) | 5174 | static int selinux_msg_queue_msgsnd(struct msg_queue *msq, struct msg_msg *msg, int msqflg) |
5175 | { | 5175 | { |
5176 | struct ipc_security_struct *isec; | 5176 | struct ipc_security_struct *isec; |
5177 | struct msg_security_struct *msec; | 5177 | struct msg_security_struct *msec; |
5178 | struct common_audit_data ad; | 5178 | struct common_audit_data ad; |
5179 | u32 sid = current_sid(); | 5179 | u32 sid = current_sid(); |
5180 | int rc; | 5180 | int rc; |
5181 | 5181 | ||
5182 | isec = msq->q_perm.security; | 5182 | isec = msq->q_perm.security; |
5183 | msec = msg->security; | 5183 | msec = msg->security; |
5184 | 5184 | ||
5185 | /* | 5185 | /* |
5186 | * First time through, need to assign label to the message | 5186 | * First time through, need to assign label to the message |
5187 | */ | 5187 | */ |
5188 | if (msec->sid == SECINITSID_UNLABELED) { | 5188 | if (msec->sid == SECINITSID_UNLABELED) { |
5189 | /* | 5189 | /* |
5190 | * Compute new sid based on current process and | 5190 | * Compute new sid based on current process and |
5191 | * message queue this message will be stored in | 5191 | * message queue this message will be stored in |
5192 | */ | 5192 | */ |
5193 | rc = security_transition_sid(sid, isec->sid, SECCLASS_MSG, | 5193 | rc = security_transition_sid(sid, isec->sid, SECCLASS_MSG, |
5194 | NULL, &msec->sid); | 5194 | NULL, &msec->sid); |
5195 | if (rc) | 5195 | if (rc) |
5196 | return rc; | 5196 | return rc; |
5197 | } | 5197 | } |
5198 | 5198 | ||
5199 | ad.type = LSM_AUDIT_DATA_IPC; | 5199 | ad.type = LSM_AUDIT_DATA_IPC; |
5200 | ad.u.ipc_id = msq->q_perm.key; | 5200 | ad.u.ipc_id = msq->q_perm.key; |
5201 | 5201 | ||
5202 | /* Can this process write to the queue? */ | 5202 | /* Can this process write to the queue? */ |
5203 | rc = avc_has_perm(sid, isec->sid, SECCLASS_MSGQ, | 5203 | rc = avc_has_perm(sid, isec->sid, SECCLASS_MSGQ, |
5204 | MSGQ__WRITE, &ad); | 5204 | MSGQ__WRITE, &ad); |
5205 | if (!rc) | 5205 | if (!rc) |
5206 | /* Can this process send the message */ | 5206 | /* Can this process send the message */ |
5207 | rc = avc_has_perm(sid, msec->sid, SECCLASS_MSG, | 5207 | rc = avc_has_perm(sid, msec->sid, SECCLASS_MSG, |
5208 | MSG__SEND, &ad); | 5208 | MSG__SEND, &ad); |
5209 | if (!rc) | 5209 | if (!rc) |
5210 | /* Can the message be put in the queue? */ | 5210 | /* Can the message be put in the queue? */ |
5211 | rc = avc_has_perm(msec->sid, isec->sid, SECCLASS_MSGQ, | 5211 | rc = avc_has_perm(msec->sid, isec->sid, SECCLASS_MSGQ, |
5212 | MSGQ__ENQUEUE, &ad); | 5212 | MSGQ__ENQUEUE, &ad); |
5213 | 5213 | ||
5214 | return rc; | 5214 | return rc; |
5215 | } | 5215 | } |
5216 | 5216 | ||
5217 | static int selinux_msg_queue_msgrcv(struct msg_queue *msq, struct msg_msg *msg, | 5217 | static int selinux_msg_queue_msgrcv(struct msg_queue *msq, struct msg_msg *msg, |
5218 | struct task_struct *target, | 5218 | struct task_struct *target, |
5219 | long type, int mode) | 5219 | long type, int mode) |
5220 | { | 5220 | { |
5221 | struct ipc_security_struct *isec; | 5221 | struct ipc_security_struct *isec; |
5222 | struct msg_security_struct *msec; | 5222 | struct msg_security_struct *msec; |
5223 | struct common_audit_data ad; | 5223 | struct common_audit_data ad; |
5224 | u32 sid = task_sid(target); | 5224 | u32 sid = task_sid(target); |
5225 | int rc; | 5225 | int rc; |
5226 | 5226 | ||
5227 | isec = msq->q_perm.security; | 5227 | isec = msq->q_perm.security; |
5228 | msec = msg->security; | 5228 | msec = msg->security; |
5229 | 5229 | ||
5230 | ad.type = LSM_AUDIT_DATA_IPC; | 5230 | ad.type = LSM_AUDIT_DATA_IPC; |
5231 | ad.u.ipc_id = msq->q_perm.key; | 5231 | ad.u.ipc_id = msq->q_perm.key; |
5232 | 5232 | ||
5233 | rc = avc_has_perm(sid, isec->sid, | 5233 | rc = avc_has_perm(sid, isec->sid, |
5234 | SECCLASS_MSGQ, MSGQ__READ, &ad); | 5234 | SECCLASS_MSGQ, MSGQ__READ, &ad); |
5235 | if (!rc) | 5235 | if (!rc) |
5236 | rc = avc_has_perm(sid, msec->sid, | 5236 | rc = avc_has_perm(sid, msec->sid, |
5237 | SECCLASS_MSG, MSG__RECEIVE, &ad); | 5237 | SECCLASS_MSG, MSG__RECEIVE, &ad); |
5238 | return rc; | 5238 | return rc; |
5239 | } | 5239 | } |
5240 | 5240 | ||
5241 | /* Shared Memory security operations */ | 5241 | /* Shared Memory security operations */ |
5242 | static int selinux_shm_alloc_security(struct shmid_kernel *shp) | 5242 | static int selinux_shm_alloc_security(struct shmid_kernel *shp) |
5243 | { | 5243 | { |
5244 | struct ipc_security_struct *isec; | 5244 | struct ipc_security_struct *isec; |
5245 | struct common_audit_data ad; | 5245 | struct common_audit_data ad; |
5246 | u32 sid = current_sid(); | 5246 | u32 sid = current_sid(); |
5247 | int rc; | 5247 | int rc; |
5248 | 5248 | ||
5249 | rc = ipc_alloc_security(current, &shp->shm_perm, SECCLASS_SHM); | 5249 | rc = ipc_alloc_security(current, &shp->shm_perm, SECCLASS_SHM); |
5250 | if (rc) | 5250 | if (rc) |
5251 | return rc; | 5251 | return rc; |
5252 | 5252 | ||
5253 | isec = shp->shm_perm.security; | 5253 | isec = shp->shm_perm.security; |
5254 | 5254 | ||
5255 | ad.type = LSM_AUDIT_DATA_IPC; | 5255 | ad.type = LSM_AUDIT_DATA_IPC; |
5256 | ad.u.ipc_id = shp->shm_perm.key; | 5256 | ad.u.ipc_id = shp->shm_perm.key; |
5257 | 5257 | ||
5258 | rc = avc_has_perm(sid, isec->sid, SECCLASS_SHM, | 5258 | rc = avc_has_perm(sid, isec->sid, SECCLASS_SHM, |
5259 | SHM__CREATE, &ad); | 5259 | SHM__CREATE, &ad); |
5260 | if (rc) { | 5260 | if (rc) { |
5261 | ipc_free_security(&shp->shm_perm); | 5261 | ipc_free_security(&shp->shm_perm); |
5262 | return rc; | 5262 | return rc; |
5263 | } | 5263 | } |
5264 | return 0; | 5264 | return 0; |
5265 | } | 5265 | } |
5266 | 5266 | ||
5267 | static void selinux_shm_free_security(struct shmid_kernel *shp) | 5267 | static void selinux_shm_free_security(struct shmid_kernel *shp) |
5268 | { | 5268 | { |
5269 | ipc_free_security(&shp->shm_perm); | 5269 | ipc_free_security(&shp->shm_perm); |
5270 | } | 5270 | } |
5271 | 5271 | ||
5272 | static int selinux_shm_associate(struct shmid_kernel *shp, int shmflg) | 5272 | static int selinux_shm_associate(struct shmid_kernel *shp, int shmflg) |
5273 | { | 5273 | { |
5274 | struct ipc_security_struct *isec; | 5274 | struct ipc_security_struct *isec; |
5275 | struct common_audit_data ad; | 5275 | struct common_audit_data ad; |
5276 | u32 sid = current_sid(); | 5276 | u32 sid = current_sid(); |
5277 | 5277 | ||
5278 | isec = shp->shm_perm.security; | 5278 | isec = shp->shm_perm.security; |
5279 | 5279 | ||
5280 | ad.type = LSM_AUDIT_DATA_IPC; | 5280 | ad.type = LSM_AUDIT_DATA_IPC; |
5281 | ad.u.ipc_id = shp->shm_perm.key; | 5281 | ad.u.ipc_id = shp->shm_perm.key; |
5282 | 5282 | ||
5283 | return avc_has_perm(sid, isec->sid, SECCLASS_SHM, | 5283 | return avc_has_perm(sid, isec->sid, SECCLASS_SHM, |
5284 | SHM__ASSOCIATE, &ad); | 5284 | SHM__ASSOCIATE, &ad); |
5285 | } | 5285 | } |
5286 | 5286 | ||
5287 | /* Note, at this point, shp is locked down */ | 5287 | /* Note, at this point, shp is locked down */ |
5288 | static int selinux_shm_shmctl(struct shmid_kernel *shp, int cmd) | 5288 | static int selinux_shm_shmctl(struct shmid_kernel *shp, int cmd) |
5289 | { | 5289 | { |
5290 | int perms; | 5290 | int perms; |
5291 | int err; | 5291 | int err; |
5292 | 5292 | ||
5293 | switch (cmd) { | 5293 | switch (cmd) { |
5294 | case IPC_INFO: | 5294 | case IPC_INFO: |
5295 | case SHM_INFO: | 5295 | case SHM_INFO: |
5296 | /* No specific object, just general system-wide information. */ | 5296 | /* No specific object, just general system-wide information. */ |
5297 | return task_has_system(current, SYSTEM__IPC_INFO); | 5297 | return task_has_system(current, SYSTEM__IPC_INFO); |
5298 | case IPC_STAT: | 5298 | case IPC_STAT: |
5299 | case SHM_STAT: | 5299 | case SHM_STAT: |
5300 | perms = SHM__GETATTR | SHM__ASSOCIATE; | 5300 | perms = SHM__GETATTR | SHM__ASSOCIATE; |
5301 | break; | 5301 | break; |
5302 | case IPC_SET: | 5302 | case IPC_SET: |
5303 | perms = SHM__SETATTR; | 5303 | perms = SHM__SETATTR; |
5304 | break; | 5304 | break; |
5305 | case SHM_LOCK: | 5305 | case SHM_LOCK: |
5306 | case SHM_UNLOCK: | 5306 | case SHM_UNLOCK: |
5307 | perms = SHM__LOCK; | 5307 | perms = SHM__LOCK; |
5308 | break; | 5308 | break; |
5309 | case IPC_RMID: | 5309 | case IPC_RMID: |
5310 | perms = SHM__DESTROY; | 5310 | perms = SHM__DESTROY; |
5311 | break; | 5311 | break; |
5312 | default: | 5312 | default: |
5313 | return 0; | 5313 | return 0; |
5314 | } | 5314 | } |
5315 | 5315 | ||
5316 | err = ipc_has_perm(&shp->shm_perm, perms); | 5316 | err = ipc_has_perm(&shp->shm_perm, perms); |
5317 | return err; | 5317 | return err; |
5318 | } | 5318 | } |
5319 | 5319 | ||
5320 | static int selinux_shm_shmat(struct shmid_kernel *shp, | 5320 | static int selinux_shm_shmat(struct shmid_kernel *shp, |
5321 | char __user *shmaddr, int shmflg) | 5321 | char __user *shmaddr, int shmflg) |
5322 | { | 5322 | { |
5323 | u32 perms; | 5323 | u32 perms; |
5324 | 5324 | ||
5325 | if (shmflg & SHM_RDONLY) | 5325 | if (shmflg & SHM_RDONLY) |
5326 | perms = SHM__READ; | 5326 | perms = SHM__READ; |
5327 | else | 5327 | else |
5328 | perms = SHM__READ | SHM__WRITE; | 5328 | perms = SHM__READ | SHM__WRITE; |
5329 | 5329 | ||
5330 | return ipc_has_perm(&shp->shm_perm, perms); | 5330 | return ipc_has_perm(&shp->shm_perm, perms); |
5331 | } | 5331 | } |
5332 | 5332 | ||
5333 | /* Semaphore security operations */ | 5333 | /* Semaphore security operations */ |
5334 | static int selinux_sem_alloc_security(struct sem_array *sma) | 5334 | static int selinux_sem_alloc_security(struct sem_array *sma) |
5335 | { | 5335 | { |
5336 | struct ipc_security_struct *isec; | 5336 | struct ipc_security_struct *isec; |
5337 | struct common_audit_data ad; | 5337 | struct common_audit_data ad; |
5338 | u32 sid = current_sid(); | 5338 | u32 sid = current_sid(); |
5339 | int rc; | 5339 | int rc; |
5340 | 5340 | ||
5341 | rc = ipc_alloc_security(current, &sma->sem_perm, SECCLASS_SEM); | 5341 | rc = ipc_alloc_security(current, &sma->sem_perm, SECCLASS_SEM); |
5342 | if (rc) | 5342 | if (rc) |
5343 | return rc; | 5343 | return rc; |
5344 | 5344 | ||
5345 | isec = sma->sem_perm.security; | 5345 | isec = sma->sem_perm.security; |
5346 | 5346 | ||
5347 | ad.type = LSM_AUDIT_DATA_IPC; | 5347 | ad.type = LSM_AUDIT_DATA_IPC; |
5348 | ad.u.ipc_id = sma->sem_perm.key; | 5348 | ad.u.ipc_id = sma->sem_perm.key; |
5349 | 5349 | ||
5350 | rc = avc_has_perm(sid, isec->sid, SECCLASS_SEM, | 5350 | rc = avc_has_perm(sid, isec->sid, SECCLASS_SEM, |
5351 | SEM__CREATE, &ad); | 5351 | SEM__CREATE, &ad); |
5352 | if (rc) { | 5352 | if (rc) { |
5353 | ipc_free_security(&sma->sem_perm); | 5353 | ipc_free_security(&sma->sem_perm); |
5354 | return rc; | 5354 | return rc; |
5355 | } | 5355 | } |
5356 | return 0; | 5356 | return 0; |
5357 | } | 5357 | } |
5358 | 5358 | ||
5359 | static void selinux_sem_free_security(struct sem_array *sma) | 5359 | static void selinux_sem_free_security(struct sem_array *sma) |
5360 | { | 5360 | { |
5361 | ipc_free_security(&sma->sem_perm); | 5361 | ipc_free_security(&sma->sem_perm); |
5362 | } | 5362 | } |
5363 | 5363 | ||
5364 | static int selinux_sem_associate(struct sem_array *sma, int semflg) | 5364 | static int selinux_sem_associate(struct sem_array *sma, int semflg) |
5365 | { | 5365 | { |
5366 | struct ipc_security_struct *isec; | 5366 | struct ipc_security_struct *isec; |
5367 | struct common_audit_data ad; | 5367 | struct common_audit_data ad; |
5368 | u32 sid = current_sid(); | 5368 | u32 sid = current_sid(); |
5369 | 5369 | ||
5370 | isec = sma->sem_perm.security; | 5370 | isec = sma->sem_perm.security; |
5371 | 5371 | ||
5372 | ad.type = LSM_AUDIT_DATA_IPC; | 5372 | ad.type = LSM_AUDIT_DATA_IPC; |
5373 | ad.u.ipc_id = sma->sem_perm.key; | 5373 | ad.u.ipc_id = sma->sem_perm.key; |
5374 | 5374 | ||
5375 | return avc_has_perm(sid, isec->sid, SECCLASS_SEM, | 5375 | return avc_has_perm(sid, isec->sid, SECCLASS_SEM, |
5376 | SEM__ASSOCIATE, &ad); | 5376 | SEM__ASSOCIATE, &ad); |
5377 | } | 5377 | } |
5378 | 5378 | ||
5379 | /* Note, at this point, sma is locked down */ | 5379 | /* Note, at this point, sma is locked down */ |
5380 | static int selinux_sem_semctl(struct sem_array *sma, int cmd) | 5380 | static int selinux_sem_semctl(struct sem_array *sma, int cmd) |
5381 | { | 5381 | { |
5382 | int err; | 5382 | int err; |
5383 | u32 perms; | 5383 | u32 perms; |
5384 | 5384 | ||
5385 | switch (cmd) { | 5385 | switch (cmd) { |
5386 | case IPC_INFO: | 5386 | case IPC_INFO: |
5387 | case SEM_INFO: | 5387 | case SEM_INFO: |
5388 | /* No specific object, just general system-wide information. */ | 5388 | /* No specific object, just general system-wide information. */ |
5389 | return task_has_system(current, SYSTEM__IPC_INFO); | 5389 | return task_has_system(current, SYSTEM__IPC_INFO); |
5390 | case GETPID: | 5390 | case GETPID: |
5391 | case GETNCNT: | 5391 | case GETNCNT: |
5392 | case GETZCNT: | 5392 | case GETZCNT: |
5393 | perms = SEM__GETATTR; | 5393 | perms = SEM__GETATTR; |
5394 | break; | 5394 | break; |
5395 | case GETVAL: | 5395 | case GETVAL: |
5396 | case GETALL: | 5396 | case GETALL: |
5397 | perms = SEM__READ; | 5397 | perms = SEM__READ; |
5398 | break; | 5398 | break; |
5399 | case SETVAL: | 5399 | case SETVAL: |
5400 | case SETALL: | 5400 | case SETALL: |
5401 | perms = SEM__WRITE; | 5401 | perms = SEM__WRITE; |
5402 | break; | 5402 | break; |
5403 | case IPC_RMID: | 5403 | case IPC_RMID: |
5404 | perms = SEM__DESTROY; | 5404 | perms = SEM__DESTROY; |
5405 | break; | 5405 | break; |
5406 | case IPC_SET: | 5406 | case IPC_SET: |
5407 | perms = SEM__SETATTR; | 5407 | perms = SEM__SETATTR; |
5408 | break; | 5408 | break; |
5409 | case IPC_STAT: | 5409 | case IPC_STAT: |
5410 | case SEM_STAT: | 5410 | case SEM_STAT: |
5411 | perms = SEM__GETATTR | SEM__ASSOCIATE; | 5411 | perms = SEM__GETATTR | SEM__ASSOCIATE; |
5412 | break; | 5412 | break; |
5413 | default: | 5413 | default: |
5414 | return 0; | 5414 | return 0; |
5415 | } | 5415 | } |
5416 | 5416 | ||
5417 | err = ipc_has_perm(&sma->sem_perm, perms); | 5417 | err = ipc_has_perm(&sma->sem_perm, perms); |
5418 | return err; | 5418 | return err; |
5419 | } | 5419 | } |
5420 | 5420 | ||
5421 | static int selinux_sem_semop(struct sem_array *sma, | 5421 | static int selinux_sem_semop(struct sem_array *sma, |
5422 | struct sembuf *sops, unsigned nsops, int alter) | 5422 | struct sembuf *sops, unsigned nsops, int alter) |
5423 | { | 5423 | { |
5424 | u32 perms; | 5424 | u32 perms; |
5425 | 5425 | ||
5426 | if (alter) | 5426 | if (alter) |
5427 | perms = SEM__READ | SEM__WRITE; | 5427 | perms = SEM__READ | SEM__WRITE; |
5428 | else | 5428 | else |
5429 | perms = SEM__READ; | 5429 | perms = SEM__READ; |
5430 | 5430 | ||
5431 | return ipc_has_perm(&sma->sem_perm, perms); | 5431 | return ipc_has_perm(&sma->sem_perm, perms); |
5432 | } | 5432 | } |
5433 | 5433 | ||
5434 | static int selinux_ipc_permission(struct kern_ipc_perm *ipcp, short flag) | 5434 | static int selinux_ipc_permission(struct kern_ipc_perm *ipcp, short flag) |
5435 | { | 5435 | { |
5436 | u32 av = 0; | 5436 | u32 av = 0; |
5437 | 5437 | ||
5438 | av = 0; | 5438 | av = 0; |
5439 | if (flag & S_IRUGO) | 5439 | if (flag & S_IRUGO) |
5440 | av |= IPC__UNIX_READ; | 5440 | av |= IPC__UNIX_READ; |
5441 | if (flag & S_IWUGO) | 5441 | if (flag & S_IWUGO) |
5442 | av |= IPC__UNIX_WRITE; | 5442 | av |= IPC__UNIX_WRITE; |
5443 | 5443 | ||
5444 | if (av == 0) | 5444 | if (av == 0) |
5445 | return 0; | 5445 | return 0; |
5446 | 5446 | ||
5447 | return ipc_has_perm(ipcp, av); | 5447 | return ipc_has_perm(ipcp, av); |
5448 | } | 5448 | } |
5449 | 5449 | ||
5450 | static void selinux_ipc_getsecid(struct kern_ipc_perm *ipcp, u32 *secid) | 5450 | static void selinux_ipc_getsecid(struct kern_ipc_perm *ipcp, u32 *secid) |
5451 | { | 5451 | { |
5452 | struct ipc_security_struct *isec = ipcp->security; | 5452 | struct ipc_security_struct *isec = ipcp->security; |
5453 | *secid = isec->sid; | 5453 | *secid = isec->sid; |
5454 | } | 5454 | } |
5455 | 5455 | ||
5456 | static void selinux_d_instantiate(struct dentry *dentry, struct inode *inode) | 5456 | static void selinux_d_instantiate(struct dentry *dentry, struct inode *inode) |
5457 | { | 5457 | { |
5458 | if (inode) | 5458 | if (inode) |
5459 | inode_doinit_with_dentry(inode, dentry); | 5459 | inode_doinit_with_dentry(inode, dentry); |
5460 | } | 5460 | } |
5461 | 5461 | ||
5462 | static int selinux_getprocattr(struct task_struct *p, | 5462 | static int selinux_getprocattr(struct task_struct *p, |
5463 | char *name, char **value) | 5463 | char *name, char **value) |
5464 | { | 5464 | { |
5465 | const struct task_security_struct *__tsec; | 5465 | const struct task_security_struct *__tsec; |
5466 | u32 sid; | 5466 | u32 sid; |
5467 | int error; | 5467 | int error; |
5468 | unsigned len; | 5468 | unsigned len; |
5469 | 5469 | ||
5470 | if (current != p) { | 5470 | if (current != p) { |
5471 | error = current_has_perm(p, PROCESS__GETATTR); | 5471 | error = current_has_perm(p, PROCESS__GETATTR); |
5472 | if (error) | 5472 | if (error) |
5473 | return error; | 5473 | return error; |
5474 | } | 5474 | } |
5475 | 5475 | ||
5476 | rcu_read_lock(); | 5476 | rcu_read_lock(); |
5477 | __tsec = __task_cred(p)->security; | 5477 | __tsec = __task_cred(p)->security; |
5478 | 5478 | ||
5479 | if (!strcmp(name, "current")) | 5479 | if (!strcmp(name, "current")) |
5480 | sid = __tsec->sid; | 5480 | sid = __tsec->sid; |
5481 | else if (!strcmp(name, "prev")) | 5481 | else if (!strcmp(name, "prev")) |
5482 | sid = __tsec->osid; | 5482 | sid = __tsec->osid; |
5483 | else if (!strcmp(name, "exec")) | 5483 | else if (!strcmp(name, "exec")) |
5484 | sid = __tsec->exec_sid; | 5484 | sid = __tsec->exec_sid; |
5485 | else if (!strcmp(name, "fscreate")) | 5485 | else if (!strcmp(name, "fscreate")) |
5486 | sid = __tsec->create_sid; | 5486 | sid = __tsec->create_sid; |
5487 | else if (!strcmp(name, "keycreate")) | 5487 | else if (!strcmp(name, "keycreate")) |
5488 | sid = __tsec->keycreate_sid; | 5488 | sid = __tsec->keycreate_sid; |
5489 | else if (!strcmp(name, "sockcreate")) | 5489 | else if (!strcmp(name, "sockcreate")) |
5490 | sid = __tsec->sockcreate_sid; | 5490 | sid = __tsec->sockcreate_sid; |
5491 | else | 5491 | else |
5492 | goto invalid; | 5492 | goto invalid; |
5493 | rcu_read_unlock(); | 5493 | rcu_read_unlock(); |
5494 | 5494 | ||
5495 | if (!sid) | 5495 | if (!sid) |
5496 | return 0; | 5496 | return 0; |
5497 | 5497 | ||
5498 | error = security_sid_to_context(sid, value, &len); | 5498 | error = security_sid_to_context(sid, value, &len); |
5499 | if (error) | 5499 | if (error) |
5500 | return error; | 5500 | return error; |
5501 | return len; | 5501 | return len; |
5502 | 5502 | ||
5503 | invalid: | 5503 | invalid: |
5504 | rcu_read_unlock(); | 5504 | rcu_read_unlock(); |
5505 | return -EINVAL; | 5505 | return -EINVAL; |
5506 | } | 5506 | } |
5507 | 5507 | ||
5508 | static int selinux_setprocattr(struct task_struct *p, | 5508 | static int selinux_setprocattr(struct task_struct *p, |
5509 | char *name, void *value, size_t size) | 5509 | char *name, void *value, size_t size) |
5510 | { | 5510 | { |
5511 | struct task_security_struct *tsec; | 5511 | struct task_security_struct *tsec; |
5512 | struct task_struct *tracer; | 5512 | struct task_struct *tracer; |
5513 | struct cred *new; | 5513 | struct cred *new; |
5514 | u32 sid = 0, ptsid; | 5514 | u32 sid = 0, ptsid; |
5515 | int error; | 5515 | int error; |
5516 | char *str = value; | 5516 | char *str = value; |
5517 | 5517 | ||
5518 | if (current != p) { | 5518 | if (current != p) { |
5519 | /* SELinux only allows a process to change its own | 5519 | /* SELinux only allows a process to change its own |
5520 | security attributes. */ | 5520 | security attributes. */ |
5521 | return -EACCES; | 5521 | return -EACCES; |
5522 | } | 5522 | } |
5523 | 5523 | ||
5524 | /* | 5524 | /* |
5525 | * Basic control over ability to set these attributes at all. | 5525 | * Basic control over ability to set these attributes at all. |
5526 | * current == p, but we'll pass them separately in case the | 5526 | * current == p, but we'll pass them separately in case the |
5527 | * above restriction is ever removed. | 5527 | * above restriction is ever removed. |
5528 | */ | 5528 | */ |
5529 | if (!strcmp(name, "exec")) | 5529 | if (!strcmp(name, "exec")) |
5530 | error = current_has_perm(p, PROCESS__SETEXEC); | 5530 | error = current_has_perm(p, PROCESS__SETEXEC); |
5531 | else if (!strcmp(name, "fscreate")) | 5531 | else if (!strcmp(name, "fscreate")) |
5532 | error = current_has_perm(p, PROCESS__SETFSCREATE); | 5532 | error = current_has_perm(p, PROCESS__SETFSCREATE); |
5533 | else if (!strcmp(name, "keycreate")) | 5533 | else if (!strcmp(name, "keycreate")) |
5534 | error = current_has_perm(p, PROCESS__SETKEYCREATE); | 5534 | error = current_has_perm(p, PROCESS__SETKEYCREATE); |
5535 | else if (!strcmp(name, "sockcreate")) | 5535 | else if (!strcmp(name, "sockcreate")) |
5536 | error = current_has_perm(p, PROCESS__SETSOCKCREATE); | 5536 | error = current_has_perm(p, PROCESS__SETSOCKCREATE); |
5537 | else if (!strcmp(name, "current")) | 5537 | else if (!strcmp(name, "current")) |
5538 | error = current_has_perm(p, PROCESS__SETCURRENT); | 5538 | error = current_has_perm(p, PROCESS__SETCURRENT); |
5539 | else | 5539 | else |
5540 | error = -EINVAL; | 5540 | error = -EINVAL; |
5541 | if (error) | 5541 | if (error) |
5542 | return error; | 5542 | return error; |
5543 | 5543 | ||
5544 | /* Obtain a SID for the context, if one was specified. */ | 5544 | /* Obtain a SID for the context, if one was specified. */ |
5545 | if (size && str[1] && str[1] != '\n') { | 5545 | if (size && str[1] && str[1] != '\n') { |
5546 | if (str[size-1] == '\n') { | 5546 | if (str[size-1] == '\n') { |
5547 | str[size-1] = 0; | 5547 | str[size-1] = 0; |
5548 | size--; | 5548 | size--; |
5549 | } | 5549 | } |
5550 | error = security_context_to_sid(value, size, &sid, GFP_KERNEL); | 5550 | error = security_context_to_sid(value, size, &sid, GFP_KERNEL); |
5551 | if (error == -EINVAL && !strcmp(name, "fscreate")) { | 5551 | if (error == -EINVAL && !strcmp(name, "fscreate")) { |
5552 | if (!capable(CAP_MAC_ADMIN)) { | 5552 | if (!capable(CAP_MAC_ADMIN)) { |
5553 | struct audit_buffer *ab; | 5553 | struct audit_buffer *ab; |
5554 | size_t audit_size; | 5554 | size_t audit_size; |
5555 | 5555 | ||
5556 | /* We strip a nul only if it is at the end, otherwise the | 5556 | /* We strip a nul only if it is at the end, otherwise the |
5557 | * context contains a nul and we should audit that */ | 5557 | * context contains a nul and we should audit that */ |
5558 | if (str[size - 1] == '\0') | 5558 | if (str[size - 1] == '\0') |
5559 | audit_size = size - 1; | 5559 | audit_size = size - 1; |
5560 | else | 5560 | else |
5561 | audit_size = size; | 5561 | audit_size = size; |
5562 | ab = audit_log_start(current->audit_context, GFP_ATOMIC, AUDIT_SELINUX_ERR); | 5562 | ab = audit_log_start(current->audit_context, GFP_ATOMIC, AUDIT_SELINUX_ERR); |
5563 | audit_log_format(ab, "op=fscreate invalid_context="); | 5563 | audit_log_format(ab, "op=fscreate invalid_context="); |
5564 | audit_log_n_untrustedstring(ab, value, audit_size); | 5564 | audit_log_n_untrustedstring(ab, value, audit_size); |
5565 | audit_log_end(ab); | 5565 | audit_log_end(ab); |
5566 | 5566 | ||
5567 | return error; | 5567 | return error; |
5568 | } | 5568 | } |
5569 | error = security_context_to_sid_force(value, size, | 5569 | error = security_context_to_sid_force(value, size, |
5570 | &sid); | 5570 | &sid); |
5571 | } | 5571 | } |
5572 | if (error) | 5572 | if (error) |
5573 | return error; | 5573 | return error; |
5574 | } | 5574 | } |
5575 | 5575 | ||
5576 | new = prepare_creds(); | 5576 | new = prepare_creds(); |
5577 | if (!new) | 5577 | if (!new) |
5578 | return -ENOMEM; | 5578 | return -ENOMEM; |
5579 | 5579 | ||
5580 | /* Permission checking based on the specified context is | 5580 | /* Permission checking based on the specified context is |
5581 | performed during the actual operation (execve, | 5581 | performed during the actual operation (execve, |
5582 | open/mkdir/...), when we know the full context of the | 5582 | open/mkdir/...), when we know the full context of the |
5583 | operation. See selinux_bprm_set_creds for the execve | 5583 | operation. See selinux_bprm_set_creds for the execve |
5584 | checks and may_create for the file creation checks. The | 5584 | checks and may_create for the file creation checks. The |
5585 | operation will then fail if the context is not permitted. */ | 5585 | operation will then fail if the context is not permitted. */ |
5586 | tsec = new->security; | 5586 | tsec = new->security; |
5587 | if (!strcmp(name, "exec")) { | 5587 | if (!strcmp(name, "exec")) { |
5588 | tsec->exec_sid = sid; | 5588 | tsec->exec_sid = sid; |
5589 | } else if (!strcmp(name, "fscreate")) { | 5589 | } else if (!strcmp(name, "fscreate")) { |
5590 | tsec->create_sid = sid; | 5590 | tsec->create_sid = sid; |
5591 | } else if (!strcmp(name, "keycreate")) { | 5591 | } else if (!strcmp(name, "keycreate")) { |
5592 | error = may_create_key(sid, p); | 5592 | error = may_create_key(sid, p); |
5593 | if (error) | 5593 | if (error) |
5594 | goto abort_change; | 5594 | goto abort_change; |
5595 | tsec->keycreate_sid = sid; | 5595 | tsec->keycreate_sid = sid; |
5596 | } else if (!strcmp(name, "sockcreate")) { | 5596 | } else if (!strcmp(name, "sockcreate")) { |
5597 | tsec->sockcreate_sid = sid; | 5597 | tsec->sockcreate_sid = sid; |
5598 | } else if (!strcmp(name, "current")) { | 5598 | } else if (!strcmp(name, "current")) { |
5599 | error = -EINVAL; | 5599 | error = -EINVAL; |
5600 | if (sid == 0) | 5600 | if (sid == 0) |
5601 | goto abort_change; | 5601 | goto abort_change; |
5602 | 5602 | ||
5603 | /* Only allow single threaded processes to change context */ | 5603 | /* Only allow single threaded processes to change context */ |
5604 | error = -EPERM; | 5604 | error = -EPERM; |
5605 | if (!current_is_single_threaded()) { | 5605 | if (!current_is_single_threaded()) { |
5606 | error = security_bounded_transition(tsec->sid, sid); | 5606 | error = security_bounded_transition(tsec->sid, sid); |
5607 | if (error) | 5607 | if (error) |
5608 | goto abort_change; | 5608 | goto abort_change; |
5609 | } | 5609 | } |
5610 | 5610 | ||
5611 | /* Check permissions for the transition. */ | 5611 | /* Check permissions for the transition. */ |
5612 | error = avc_has_perm(tsec->sid, sid, SECCLASS_PROCESS, | 5612 | error = avc_has_perm(tsec->sid, sid, SECCLASS_PROCESS, |
5613 | PROCESS__DYNTRANSITION, NULL); | 5613 | PROCESS__DYNTRANSITION, NULL); |
5614 | if (error) | 5614 | if (error) |
5615 | goto abort_change; | 5615 | goto abort_change; |
5616 | 5616 | ||
5617 | /* Check for ptracing, and update the task SID if ok. | 5617 | /* Check for ptracing, and update the task SID if ok. |
5618 | Otherwise, leave SID unchanged and fail. */ | 5618 | Otherwise, leave SID unchanged and fail. */ |
5619 | ptsid = 0; | 5619 | ptsid = 0; |
5620 | rcu_read_lock(); | 5620 | rcu_read_lock(); |
5621 | tracer = ptrace_parent(p); | 5621 | tracer = ptrace_parent(p); |
5622 | if (tracer) | 5622 | if (tracer) |
5623 | ptsid = task_sid(tracer); | 5623 | ptsid = task_sid(tracer); |
5624 | rcu_read_unlock(); | 5624 | rcu_read_unlock(); |
5625 | 5625 | ||
5626 | if (tracer) { | 5626 | if (tracer) { |
5627 | error = avc_has_perm(ptsid, sid, SECCLASS_PROCESS, | 5627 | error = avc_has_perm(ptsid, sid, SECCLASS_PROCESS, |
5628 | PROCESS__PTRACE, NULL); | 5628 | PROCESS__PTRACE, NULL); |
5629 | if (error) | 5629 | if (error) |
5630 | goto abort_change; | 5630 | goto abort_change; |
5631 | } | 5631 | } |
5632 | 5632 | ||
5633 | tsec->sid = sid; | 5633 | tsec->sid = sid; |
5634 | } else { | 5634 | } else { |
5635 | error = -EINVAL; | 5635 | error = -EINVAL; |
5636 | goto abort_change; | 5636 | goto abort_change; |
5637 | } | 5637 | } |
5638 | 5638 | ||
5639 | commit_creds(new); | 5639 | commit_creds(new); |
5640 | return size; | 5640 | return size; |
5641 | 5641 | ||
5642 | abort_change: | 5642 | abort_change: |
5643 | abort_creds(new); | 5643 | abort_creds(new); |
5644 | return error; | 5644 | return error; |
5645 | } | 5645 | } |
5646 | 5646 | ||
5647 | static int selinux_ismaclabel(const char *name) | 5647 | static int selinux_ismaclabel(const char *name) |
5648 | { | 5648 | { |
5649 | return (strcmp(name, XATTR_SELINUX_SUFFIX) == 0); | 5649 | return (strcmp(name, XATTR_SELINUX_SUFFIX) == 0); |
5650 | } | 5650 | } |
5651 | 5651 | ||
5652 | static int selinux_secid_to_secctx(u32 secid, char **secdata, u32 *seclen) | 5652 | static int selinux_secid_to_secctx(u32 secid, char **secdata, u32 *seclen) |
5653 | { | 5653 | { |
5654 | return security_sid_to_context(secid, secdata, seclen); | 5654 | return security_sid_to_context(secid, secdata, seclen); |
5655 | } | 5655 | } |
5656 | 5656 | ||
5657 | static int selinux_secctx_to_secid(const char *secdata, u32 seclen, u32 *secid) | 5657 | static int selinux_secctx_to_secid(const char *secdata, u32 seclen, u32 *secid) |
5658 | { | 5658 | { |
5659 | return security_context_to_sid(secdata, seclen, secid, GFP_KERNEL); | 5659 | return security_context_to_sid(secdata, seclen, secid, GFP_KERNEL); |
5660 | } | 5660 | } |
5661 | 5661 | ||
5662 | static void selinux_release_secctx(char *secdata, u32 seclen) | 5662 | static void selinux_release_secctx(char *secdata, u32 seclen) |
5663 | { | 5663 | { |
5664 | kfree(secdata); | 5664 | kfree(secdata); |
5665 | } | 5665 | } |
5666 | 5666 | ||
5667 | /* | 5667 | /* |
5668 | * called with inode->i_mutex locked | 5668 | * called with inode->i_mutex locked |
5669 | */ | 5669 | */ |
5670 | static int selinux_inode_notifysecctx(struct inode *inode, void *ctx, u32 ctxlen) | 5670 | static int selinux_inode_notifysecctx(struct inode *inode, void *ctx, u32 ctxlen) |
5671 | { | 5671 | { |
5672 | return selinux_inode_setsecurity(inode, XATTR_SELINUX_SUFFIX, ctx, ctxlen, 0); | 5672 | return selinux_inode_setsecurity(inode, XATTR_SELINUX_SUFFIX, ctx, ctxlen, 0); |
5673 | } | 5673 | } |
5674 | 5674 | ||
5675 | /* | 5675 | /* |
5676 | * called with inode->i_mutex locked | 5676 | * called with inode->i_mutex locked |
5677 | */ | 5677 | */ |
5678 | static int selinux_inode_setsecctx(struct dentry *dentry, void *ctx, u32 ctxlen) | 5678 | static int selinux_inode_setsecctx(struct dentry *dentry, void *ctx, u32 ctxlen) |
5679 | { | 5679 | { |
5680 | return __vfs_setxattr_noperm(dentry, XATTR_NAME_SELINUX, ctx, ctxlen, 0); | 5680 | return __vfs_setxattr_noperm(dentry, XATTR_NAME_SELINUX, ctx, ctxlen, 0); |
5681 | } | 5681 | } |
5682 | 5682 | ||
5683 | static int selinux_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen) | 5683 | static int selinux_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen) |
5684 | { | 5684 | { |
5685 | int len = 0; | 5685 | int len = 0; |
5686 | len = selinux_inode_getsecurity(inode, XATTR_SELINUX_SUFFIX, | 5686 | len = selinux_inode_getsecurity(inode, XATTR_SELINUX_SUFFIX, |
5687 | ctx, true); | 5687 | ctx, true); |
5688 | if (len < 0) | 5688 | if (len < 0) |
5689 | return len; | 5689 | return len; |
5690 | *ctxlen = len; | 5690 | *ctxlen = len; |
5691 | return 0; | 5691 | return 0; |
5692 | } | 5692 | } |
5693 | #ifdef CONFIG_KEYS | 5693 | #ifdef CONFIG_KEYS |
5694 | 5694 | ||
5695 | static int selinux_key_alloc(struct key *k, const struct cred *cred, | 5695 | static int selinux_key_alloc(struct key *k, const struct cred *cred, |
5696 | unsigned long flags) | 5696 | unsigned long flags) |
5697 | { | 5697 | { |
5698 | const struct task_security_struct *tsec; | 5698 | const struct task_security_struct *tsec; |
5699 | struct key_security_struct *ksec; | 5699 | struct key_security_struct *ksec; |
5700 | 5700 | ||
5701 | ksec = kzalloc(sizeof(struct key_security_struct), GFP_KERNEL); | 5701 | ksec = kzalloc(sizeof(struct key_security_struct), GFP_KERNEL); |
5702 | if (!ksec) | 5702 | if (!ksec) |
5703 | return -ENOMEM; | 5703 | return -ENOMEM; |
5704 | 5704 | ||
5705 | tsec = cred->security; | 5705 | tsec = cred->security; |
5706 | if (tsec->keycreate_sid) | 5706 | if (tsec->keycreate_sid) |
5707 | ksec->sid = tsec->keycreate_sid; | 5707 | ksec->sid = tsec->keycreate_sid; |
5708 | else | 5708 | else |
5709 | ksec->sid = tsec->sid; | 5709 | ksec->sid = tsec->sid; |
5710 | 5710 | ||
5711 | k->security = ksec; | 5711 | k->security = ksec; |
5712 | return 0; | 5712 | return 0; |
5713 | } | 5713 | } |
5714 | 5714 | ||
5715 | static void selinux_key_free(struct key *k) | 5715 | static void selinux_key_free(struct key *k) |
5716 | { | 5716 | { |
5717 | struct key_security_struct *ksec = k->security; | 5717 | struct key_security_struct *ksec = k->security; |
5718 | 5718 | ||
5719 | k->security = NULL; | 5719 | k->security = NULL; |
5720 | kfree(ksec); | 5720 | kfree(ksec); |
5721 | } | 5721 | } |
5722 | 5722 | ||
5723 | static int selinux_key_permission(key_ref_t key_ref, | 5723 | static int selinux_key_permission(key_ref_t key_ref, |
5724 | const struct cred *cred, | 5724 | const struct cred *cred, |
5725 | key_perm_t perm) | 5725 | key_perm_t perm) |
5726 | { | 5726 | { |
5727 | struct key *key; | 5727 | struct key *key; |
5728 | struct key_security_struct *ksec; | 5728 | struct key_security_struct *ksec; |
5729 | u32 sid; | 5729 | u32 sid; |
5730 | 5730 | ||
5731 | /* if no specific permissions are requested, we skip the | 5731 | /* if no specific permissions are requested, we skip the |
5732 | permission check. No serious, additional covert channels | 5732 | permission check. No serious, additional covert channels |
5733 | appear to be created. */ | 5733 | appear to be created. */ |
5734 | if (perm == 0) | 5734 | if (perm == 0) |
5735 | return 0; | 5735 | return 0; |
5736 | 5736 | ||
5737 | sid = cred_sid(cred); | 5737 | sid = cred_sid(cred); |
5738 | 5738 | ||
5739 | key = key_ref_to_ptr(key_ref); | 5739 | key = key_ref_to_ptr(key_ref); |
5740 | ksec = key->security; | 5740 | ksec = key->security; |
5741 | 5741 | ||
5742 | return avc_has_perm(sid, ksec->sid, SECCLASS_KEY, perm, NULL); | 5742 | return avc_has_perm(sid, ksec->sid, SECCLASS_KEY, perm, NULL); |
5743 | } | 5743 | } |
5744 | 5744 | ||
5745 | static int selinux_key_getsecurity(struct key *key, char **_buffer) | 5745 | static int selinux_key_getsecurity(struct key *key, char **_buffer) |
5746 | { | 5746 | { |
5747 | struct key_security_struct *ksec = key->security; | 5747 | struct key_security_struct *ksec = key->security; |
5748 | char *context = NULL; | 5748 | char *context = NULL; |
5749 | unsigned len; | 5749 | unsigned len; |
5750 | int rc; | 5750 | int rc; |
5751 | 5751 | ||
5752 | rc = security_sid_to_context(ksec->sid, &context, &len); | 5752 | rc = security_sid_to_context(ksec->sid, &context, &len); |
5753 | if (!rc) | 5753 | if (!rc) |
5754 | rc = len; | 5754 | rc = len; |
5755 | *_buffer = context; | 5755 | *_buffer = context; |
5756 | return rc; | 5756 | return rc; |
5757 | } | 5757 | } |
5758 | 5758 | ||
5759 | #endif | 5759 | #endif |
5760 | 5760 | ||
5761 | static struct security_operations selinux_ops = { | 5761 | static struct security_operations selinux_ops = { |
5762 | .name = "selinux", | 5762 | .name = "selinux", |
5763 | 5763 | ||
5764 | .ptrace_access_check = selinux_ptrace_access_check, | 5764 | .ptrace_access_check = selinux_ptrace_access_check, |
5765 | .ptrace_traceme = selinux_ptrace_traceme, | 5765 | .ptrace_traceme = selinux_ptrace_traceme, |
5766 | .capget = selinux_capget, | 5766 | .capget = selinux_capget, |
5767 | .capset = selinux_capset, | 5767 | .capset = selinux_capset, |
5768 | .capable = selinux_capable, | 5768 | .capable = selinux_capable, |
5769 | .quotactl = selinux_quotactl, | 5769 | .quotactl = selinux_quotactl, |
5770 | .quota_on = selinux_quota_on, | 5770 | .quota_on = selinux_quota_on, |
5771 | .syslog = selinux_syslog, | 5771 | .syslog = selinux_syslog, |
5772 | .vm_enough_memory = selinux_vm_enough_memory, | 5772 | .vm_enough_memory = selinux_vm_enough_memory, |
5773 | 5773 | ||
5774 | .netlink_send = selinux_netlink_send, | 5774 | .netlink_send = selinux_netlink_send, |
5775 | 5775 | ||
5776 | .bprm_set_creds = selinux_bprm_set_creds, | 5776 | .bprm_set_creds = selinux_bprm_set_creds, |
5777 | .bprm_committing_creds = selinux_bprm_committing_creds, | 5777 | .bprm_committing_creds = selinux_bprm_committing_creds, |
5778 | .bprm_committed_creds = selinux_bprm_committed_creds, | 5778 | .bprm_committed_creds = selinux_bprm_committed_creds, |
5779 | .bprm_secureexec = selinux_bprm_secureexec, | 5779 | .bprm_secureexec = selinux_bprm_secureexec, |
5780 | 5780 | ||
5781 | .sb_alloc_security = selinux_sb_alloc_security, | 5781 | .sb_alloc_security = selinux_sb_alloc_security, |
5782 | .sb_free_security = selinux_sb_free_security, | 5782 | .sb_free_security = selinux_sb_free_security, |
5783 | .sb_copy_data = selinux_sb_copy_data, | 5783 | .sb_copy_data = selinux_sb_copy_data, |
5784 | .sb_remount = selinux_sb_remount, | 5784 | .sb_remount = selinux_sb_remount, |
5785 | .sb_kern_mount = selinux_sb_kern_mount, | 5785 | .sb_kern_mount = selinux_sb_kern_mount, |
5786 | .sb_show_options = selinux_sb_show_options, | 5786 | .sb_show_options = selinux_sb_show_options, |
5787 | .sb_statfs = selinux_sb_statfs, | 5787 | .sb_statfs = selinux_sb_statfs, |
5788 | .sb_mount = selinux_mount, | 5788 | .sb_mount = selinux_mount, |
5789 | .sb_umount = selinux_umount, | 5789 | .sb_umount = selinux_umount, |
5790 | .sb_set_mnt_opts = selinux_set_mnt_opts, | 5790 | .sb_set_mnt_opts = selinux_set_mnt_opts, |
5791 | .sb_clone_mnt_opts = selinux_sb_clone_mnt_opts, | 5791 | .sb_clone_mnt_opts = selinux_sb_clone_mnt_opts, |
5792 | .sb_parse_opts_str = selinux_parse_opts_str, | 5792 | .sb_parse_opts_str = selinux_parse_opts_str, |
5793 | 5793 | ||
5794 | .dentry_init_security = selinux_dentry_init_security, | 5794 | .dentry_init_security = selinux_dentry_init_security, |
5795 | 5795 | ||
5796 | .inode_alloc_security = selinux_inode_alloc_security, | 5796 | .inode_alloc_security = selinux_inode_alloc_security, |
5797 | .inode_free_security = selinux_inode_free_security, | 5797 | .inode_free_security = selinux_inode_free_security, |
5798 | .inode_init_security = selinux_inode_init_security, | 5798 | .inode_init_security = selinux_inode_init_security, |
5799 | .inode_create = selinux_inode_create, | 5799 | .inode_create = selinux_inode_create, |
5800 | .inode_link = selinux_inode_link, | 5800 | .inode_link = selinux_inode_link, |
5801 | .inode_unlink = selinux_inode_unlink, | 5801 | .inode_unlink = selinux_inode_unlink, |
5802 | .inode_symlink = selinux_inode_symlink, | 5802 | .inode_symlink = selinux_inode_symlink, |
5803 | .inode_mkdir = selinux_inode_mkdir, | 5803 | .inode_mkdir = selinux_inode_mkdir, |
5804 | .inode_rmdir = selinux_inode_rmdir, | 5804 | .inode_rmdir = selinux_inode_rmdir, |
5805 | .inode_mknod = selinux_inode_mknod, | 5805 | .inode_mknod = selinux_inode_mknod, |
5806 | .inode_rename = selinux_inode_rename, | 5806 | .inode_rename = selinux_inode_rename, |
5807 | .inode_readlink = selinux_inode_readlink, | 5807 | .inode_readlink = selinux_inode_readlink, |
5808 | .inode_follow_link = selinux_inode_follow_link, | 5808 | .inode_follow_link = selinux_inode_follow_link, |
5809 | .inode_permission = selinux_inode_permission, | 5809 | .inode_permission = selinux_inode_permission, |
5810 | .inode_setattr = selinux_inode_setattr, | 5810 | .inode_setattr = selinux_inode_setattr, |
5811 | .inode_getattr = selinux_inode_getattr, | 5811 | .inode_getattr = selinux_inode_getattr, |
5812 | .inode_setxattr = selinux_inode_setxattr, | 5812 | .inode_setxattr = selinux_inode_setxattr, |
5813 | .inode_post_setxattr = selinux_inode_post_setxattr, | 5813 | .inode_post_setxattr = selinux_inode_post_setxattr, |
5814 | .inode_getxattr = selinux_inode_getxattr, | 5814 | .inode_getxattr = selinux_inode_getxattr, |
5815 | .inode_listxattr = selinux_inode_listxattr, | 5815 | .inode_listxattr = selinux_inode_listxattr, |
5816 | .inode_removexattr = selinux_inode_removexattr, | 5816 | .inode_removexattr = selinux_inode_removexattr, |
5817 | .inode_getsecurity = selinux_inode_getsecurity, | 5817 | .inode_getsecurity = selinux_inode_getsecurity, |
5818 | .inode_setsecurity = selinux_inode_setsecurity, | 5818 | .inode_setsecurity = selinux_inode_setsecurity, |
5819 | .inode_listsecurity = selinux_inode_listsecurity, | 5819 | .inode_listsecurity = selinux_inode_listsecurity, |
5820 | .inode_getsecid = selinux_inode_getsecid, | 5820 | .inode_getsecid = selinux_inode_getsecid, |
5821 | 5821 | ||
5822 | .file_permission = selinux_file_permission, | 5822 | .file_permission = selinux_file_permission, |
5823 | .file_alloc_security = selinux_file_alloc_security, | 5823 | .file_alloc_security = selinux_file_alloc_security, |
5824 | .file_free_security = selinux_file_free_security, | 5824 | .file_free_security = selinux_file_free_security, |
5825 | .file_ioctl = selinux_file_ioctl, | 5825 | .file_ioctl = selinux_file_ioctl, |
5826 | .mmap_file = selinux_mmap_file, | 5826 | .mmap_file = selinux_mmap_file, |
5827 | .mmap_addr = selinux_mmap_addr, | 5827 | .mmap_addr = selinux_mmap_addr, |
5828 | .file_mprotect = selinux_file_mprotect, | 5828 | .file_mprotect = selinux_file_mprotect, |
5829 | .file_lock = selinux_file_lock, | 5829 | .file_lock = selinux_file_lock, |
5830 | .file_fcntl = selinux_file_fcntl, | 5830 | .file_fcntl = selinux_file_fcntl, |
5831 | .file_set_fowner = selinux_file_set_fowner, | 5831 | .file_set_fowner = selinux_file_set_fowner, |
5832 | .file_send_sigiotask = selinux_file_send_sigiotask, | 5832 | .file_send_sigiotask = selinux_file_send_sigiotask, |
5833 | .file_receive = selinux_file_receive, | 5833 | .file_receive = selinux_file_receive, |
5834 | 5834 | ||
5835 | .file_open = selinux_file_open, | 5835 | .file_open = selinux_file_open, |
5836 | 5836 | ||
5837 | .task_create = selinux_task_create, | 5837 | .task_create = selinux_task_create, |
5838 | .cred_alloc_blank = selinux_cred_alloc_blank, | 5838 | .cred_alloc_blank = selinux_cred_alloc_blank, |
5839 | .cred_free = selinux_cred_free, | 5839 | .cred_free = selinux_cred_free, |
5840 | .cred_prepare = selinux_cred_prepare, | 5840 | .cred_prepare = selinux_cred_prepare, |
5841 | .cred_transfer = selinux_cred_transfer, | 5841 | .cred_transfer = selinux_cred_transfer, |
5842 | .kernel_act_as = selinux_kernel_act_as, | 5842 | .kernel_act_as = selinux_kernel_act_as, |
5843 | .kernel_create_files_as = selinux_kernel_create_files_as, | 5843 | .kernel_create_files_as = selinux_kernel_create_files_as, |
5844 | .kernel_module_request = selinux_kernel_module_request, | 5844 | .kernel_module_request = selinux_kernel_module_request, |
5845 | .task_setpgid = selinux_task_setpgid, | 5845 | .task_setpgid = selinux_task_setpgid, |
5846 | .task_getpgid = selinux_task_getpgid, | 5846 | .task_getpgid = selinux_task_getpgid, |
5847 | .task_getsid = selinux_task_getsid, | 5847 | .task_getsid = selinux_task_getsid, |
5848 | .task_getsecid = selinux_task_getsecid, | 5848 | .task_getsecid = selinux_task_getsecid, |
5849 | .task_setnice = selinux_task_setnice, | 5849 | .task_setnice = selinux_task_setnice, |
5850 | .task_setioprio = selinux_task_setioprio, | 5850 | .task_setioprio = selinux_task_setioprio, |
5851 | .task_getioprio = selinux_task_getioprio, | 5851 | .task_getioprio = selinux_task_getioprio, |
5852 | .task_setrlimit = selinux_task_setrlimit, | 5852 | .task_setrlimit = selinux_task_setrlimit, |
5853 | .task_setscheduler = selinux_task_setscheduler, | 5853 | .task_setscheduler = selinux_task_setscheduler, |
5854 | .task_getscheduler = selinux_task_getscheduler, | 5854 | .task_getscheduler = selinux_task_getscheduler, |
5855 | .task_movememory = selinux_task_movememory, | 5855 | .task_movememory = selinux_task_movememory, |
5856 | .task_kill = selinux_task_kill, | 5856 | .task_kill = selinux_task_kill, |
5857 | .task_wait = selinux_task_wait, | 5857 | .task_wait = selinux_task_wait, |
5858 | .task_to_inode = selinux_task_to_inode, | 5858 | .task_to_inode = selinux_task_to_inode, |
5859 | 5859 | ||
5860 | .ipc_permission = selinux_ipc_permission, | 5860 | .ipc_permission = selinux_ipc_permission, |
5861 | .ipc_getsecid = selinux_ipc_getsecid, | 5861 | .ipc_getsecid = selinux_ipc_getsecid, |
5862 | 5862 | ||
5863 | .msg_msg_alloc_security = selinux_msg_msg_alloc_security, | 5863 | .msg_msg_alloc_security = selinux_msg_msg_alloc_security, |
5864 | .msg_msg_free_security = selinux_msg_msg_free_security, | 5864 | .msg_msg_free_security = selinux_msg_msg_free_security, |
5865 | 5865 | ||
5866 | .msg_queue_alloc_security = selinux_msg_queue_alloc_security, | 5866 | .msg_queue_alloc_security = selinux_msg_queue_alloc_security, |
5867 | .msg_queue_free_security = selinux_msg_queue_free_security, | 5867 | .msg_queue_free_security = selinux_msg_queue_free_security, |
5868 | .msg_queue_associate = selinux_msg_queue_associate, | 5868 | .msg_queue_associate = selinux_msg_queue_associate, |
5869 | .msg_queue_msgctl = selinux_msg_queue_msgctl, | 5869 | .msg_queue_msgctl = selinux_msg_queue_msgctl, |
5870 | .msg_queue_msgsnd = selinux_msg_queue_msgsnd, | 5870 | .msg_queue_msgsnd = selinux_msg_queue_msgsnd, |
5871 | .msg_queue_msgrcv = selinux_msg_queue_msgrcv, | 5871 | .msg_queue_msgrcv = selinux_msg_queue_msgrcv, |
5872 | 5872 | ||
5873 | .shm_alloc_security = selinux_shm_alloc_security, | 5873 | .shm_alloc_security = selinux_shm_alloc_security, |
5874 | .shm_free_security = selinux_shm_free_security, | 5874 | .shm_free_security = selinux_shm_free_security, |
5875 | .shm_associate = selinux_shm_associate, | 5875 | .shm_associate = selinux_shm_associate, |
5876 | .shm_shmctl = selinux_shm_shmctl, | 5876 | .shm_shmctl = selinux_shm_shmctl, |
5877 | .shm_shmat = selinux_shm_shmat, | 5877 | .shm_shmat = selinux_shm_shmat, |
5878 | 5878 | ||
5879 | .sem_alloc_security = selinux_sem_alloc_security, | 5879 | .sem_alloc_security = selinux_sem_alloc_security, |
5880 | .sem_free_security = selinux_sem_free_security, | 5880 | .sem_free_security = selinux_sem_free_security, |
5881 | .sem_associate = selinux_sem_associate, | 5881 | .sem_associate = selinux_sem_associate, |
5882 | .sem_semctl = selinux_sem_semctl, | 5882 | .sem_semctl = selinux_sem_semctl, |
5883 | .sem_semop = selinux_sem_semop, | 5883 | .sem_semop = selinux_sem_semop, |
5884 | 5884 | ||
5885 | .d_instantiate = selinux_d_instantiate, | 5885 | .d_instantiate = selinux_d_instantiate, |
5886 | 5886 | ||
5887 | .getprocattr = selinux_getprocattr, | 5887 | .getprocattr = selinux_getprocattr, |
5888 | .setprocattr = selinux_setprocattr, | 5888 | .setprocattr = selinux_setprocattr, |
5889 | 5889 | ||
5890 | .ismaclabel = selinux_ismaclabel, | 5890 | .ismaclabel = selinux_ismaclabel, |
5891 | .secid_to_secctx = selinux_secid_to_secctx, | 5891 | .secid_to_secctx = selinux_secid_to_secctx, |
5892 | .secctx_to_secid = selinux_secctx_to_secid, | 5892 | .secctx_to_secid = selinux_secctx_to_secid, |
5893 | .release_secctx = selinux_release_secctx, | 5893 | .release_secctx = selinux_release_secctx, |
5894 | .inode_notifysecctx = selinux_inode_notifysecctx, | 5894 | .inode_notifysecctx = selinux_inode_notifysecctx, |
5895 | .inode_setsecctx = selinux_inode_setsecctx, | 5895 | .inode_setsecctx = selinux_inode_setsecctx, |
5896 | .inode_getsecctx = selinux_inode_getsecctx, | 5896 | .inode_getsecctx = selinux_inode_getsecctx, |
5897 | 5897 | ||
5898 | .unix_stream_connect = selinux_socket_unix_stream_connect, | 5898 | .unix_stream_connect = selinux_socket_unix_stream_connect, |
5899 | .unix_may_send = selinux_socket_unix_may_send, | 5899 | .unix_may_send = selinux_socket_unix_may_send, |
5900 | 5900 | ||
5901 | .socket_create = selinux_socket_create, | 5901 | .socket_create = selinux_socket_create, |
5902 | .socket_post_create = selinux_socket_post_create, | 5902 | .socket_post_create = selinux_socket_post_create, |
5903 | .socket_bind = selinux_socket_bind, | 5903 | .socket_bind = selinux_socket_bind, |
5904 | .socket_connect = selinux_socket_connect, | 5904 | .socket_connect = selinux_socket_connect, |
5905 | .socket_listen = selinux_socket_listen, | 5905 | .socket_listen = selinux_socket_listen, |
5906 | .socket_accept = selinux_socket_accept, | 5906 | .socket_accept = selinux_socket_accept, |
5907 | .socket_sendmsg = selinux_socket_sendmsg, | 5907 | .socket_sendmsg = selinux_socket_sendmsg, |
5908 | .socket_recvmsg = selinux_socket_recvmsg, | 5908 | .socket_recvmsg = selinux_socket_recvmsg, |
5909 | .socket_getsockname = selinux_socket_getsockname, | 5909 | .socket_getsockname = selinux_socket_getsockname, |
5910 | .socket_getpeername = selinux_socket_getpeername, | 5910 | .socket_getpeername = selinux_socket_getpeername, |
5911 | .socket_getsockopt = selinux_socket_getsockopt, | 5911 | .socket_getsockopt = selinux_socket_getsockopt, |
5912 | .socket_setsockopt = selinux_socket_setsockopt, | 5912 | .socket_setsockopt = selinux_socket_setsockopt, |
5913 | .socket_shutdown = selinux_socket_shutdown, | 5913 | .socket_shutdown = selinux_socket_shutdown, |
5914 | .socket_sock_rcv_skb = selinux_socket_sock_rcv_skb, | 5914 | .socket_sock_rcv_skb = selinux_socket_sock_rcv_skb, |
5915 | .socket_getpeersec_stream = selinux_socket_getpeersec_stream, | 5915 | .socket_getpeersec_stream = selinux_socket_getpeersec_stream, |
5916 | .socket_getpeersec_dgram = selinux_socket_getpeersec_dgram, | 5916 | .socket_getpeersec_dgram = selinux_socket_getpeersec_dgram, |
5917 | .sk_alloc_security = selinux_sk_alloc_security, | 5917 | .sk_alloc_security = selinux_sk_alloc_security, |
5918 | .sk_free_security = selinux_sk_free_security, | 5918 | .sk_free_security = selinux_sk_free_security, |
5919 | .sk_clone_security = selinux_sk_clone_security, | 5919 | .sk_clone_security = selinux_sk_clone_security, |
5920 | .sk_getsecid = selinux_sk_getsecid, | 5920 | .sk_getsecid = selinux_sk_getsecid, |
5921 | .sock_graft = selinux_sock_graft, | 5921 | .sock_graft = selinux_sock_graft, |
5922 | .inet_conn_request = selinux_inet_conn_request, | 5922 | .inet_conn_request = selinux_inet_conn_request, |
5923 | .inet_csk_clone = selinux_inet_csk_clone, | 5923 | .inet_csk_clone = selinux_inet_csk_clone, |
5924 | .inet_conn_established = selinux_inet_conn_established, | 5924 | .inet_conn_established = selinux_inet_conn_established, |
5925 | .secmark_relabel_packet = selinux_secmark_relabel_packet, | 5925 | .secmark_relabel_packet = selinux_secmark_relabel_packet, |
5926 | .secmark_refcount_inc = selinux_secmark_refcount_inc, | 5926 | .secmark_refcount_inc = selinux_secmark_refcount_inc, |
5927 | .secmark_refcount_dec = selinux_secmark_refcount_dec, | 5927 | .secmark_refcount_dec = selinux_secmark_refcount_dec, |
5928 | .req_classify_flow = selinux_req_classify_flow, | 5928 | .req_classify_flow = selinux_req_classify_flow, |
5929 | .tun_dev_alloc_security = selinux_tun_dev_alloc_security, | 5929 | .tun_dev_alloc_security = selinux_tun_dev_alloc_security, |
5930 | .tun_dev_free_security = selinux_tun_dev_free_security, | 5930 | .tun_dev_free_security = selinux_tun_dev_free_security, |
5931 | .tun_dev_create = selinux_tun_dev_create, | 5931 | .tun_dev_create = selinux_tun_dev_create, |
5932 | .tun_dev_attach_queue = selinux_tun_dev_attach_queue, | 5932 | .tun_dev_attach_queue = selinux_tun_dev_attach_queue, |
5933 | .tun_dev_attach = selinux_tun_dev_attach, | 5933 | .tun_dev_attach = selinux_tun_dev_attach, |
5934 | .tun_dev_open = selinux_tun_dev_open, | 5934 | .tun_dev_open = selinux_tun_dev_open, |
5935 | .skb_owned_by = selinux_skb_owned_by, | 5935 | .skb_owned_by = selinux_skb_owned_by, |
5936 | 5936 | ||
5937 | #ifdef CONFIG_SECURITY_NETWORK_XFRM | 5937 | #ifdef CONFIG_SECURITY_NETWORK_XFRM |
5938 | .xfrm_policy_alloc_security = selinux_xfrm_policy_alloc, | 5938 | .xfrm_policy_alloc_security = selinux_xfrm_policy_alloc, |
5939 | .xfrm_policy_clone_security = selinux_xfrm_policy_clone, | 5939 | .xfrm_policy_clone_security = selinux_xfrm_policy_clone, |
5940 | .xfrm_policy_free_security = selinux_xfrm_policy_free, | 5940 | .xfrm_policy_free_security = selinux_xfrm_policy_free, |
5941 | .xfrm_policy_delete_security = selinux_xfrm_policy_delete, | 5941 | .xfrm_policy_delete_security = selinux_xfrm_policy_delete, |
5942 | .xfrm_state_alloc = selinux_xfrm_state_alloc, | 5942 | .xfrm_state_alloc = selinux_xfrm_state_alloc, |
5943 | .xfrm_state_alloc_acquire = selinux_xfrm_state_alloc_acquire, | 5943 | .xfrm_state_alloc_acquire = selinux_xfrm_state_alloc_acquire, |
5944 | .xfrm_state_free_security = selinux_xfrm_state_free, | 5944 | .xfrm_state_free_security = selinux_xfrm_state_free, |
5945 | .xfrm_state_delete_security = selinux_xfrm_state_delete, | 5945 | .xfrm_state_delete_security = selinux_xfrm_state_delete, |
5946 | .xfrm_policy_lookup = selinux_xfrm_policy_lookup, | 5946 | .xfrm_policy_lookup = selinux_xfrm_policy_lookup, |
5947 | .xfrm_state_pol_flow_match = selinux_xfrm_state_pol_flow_match, | 5947 | .xfrm_state_pol_flow_match = selinux_xfrm_state_pol_flow_match, |
5948 | .xfrm_decode_session = selinux_xfrm_decode_session, | 5948 | .xfrm_decode_session = selinux_xfrm_decode_session, |
5949 | #endif | 5949 | #endif |
5950 | 5950 | ||
5951 | #ifdef CONFIG_KEYS | 5951 | #ifdef CONFIG_KEYS |
5952 | .key_alloc = selinux_key_alloc, | 5952 | .key_alloc = selinux_key_alloc, |
5953 | .key_free = selinux_key_free, | 5953 | .key_free = selinux_key_free, |
5954 | .key_permission = selinux_key_permission, | 5954 | .key_permission = selinux_key_permission, |
5955 | .key_getsecurity = selinux_key_getsecurity, | 5955 | .key_getsecurity = selinux_key_getsecurity, |
5956 | #endif | 5956 | #endif |
5957 | 5957 | ||
5958 | #ifdef CONFIG_AUDIT | 5958 | #ifdef CONFIG_AUDIT |
5959 | .audit_rule_init = selinux_audit_rule_init, | 5959 | .audit_rule_init = selinux_audit_rule_init, |
5960 | .audit_rule_known = selinux_audit_rule_known, | 5960 | .audit_rule_known = selinux_audit_rule_known, |
5961 | .audit_rule_match = selinux_audit_rule_match, | 5961 | .audit_rule_match = selinux_audit_rule_match, |
5962 | .audit_rule_free = selinux_audit_rule_free, | 5962 | .audit_rule_free = selinux_audit_rule_free, |
5963 | #endif | 5963 | #endif |
5964 | }; | 5964 | }; |
5965 | 5965 | ||
5966 | static __init int selinux_init(void) | 5966 | static __init int selinux_init(void) |
5967 | { | 5967 | { |
5968 | if (!security_module_enable(&selinux_ops)) { | 5968 | if (!security_module_enable(&selinux_ops)) { |
5969 | selinux_enabled = 0; | 5969 | selinux_enabled = 0; |
5970 | return 0; | 5970 | return 0; |
5971 | } | 5971 | } |
5972 | 5972 | ||
5973 | if (!selinux_enabled) { | 5973 | if (!selinux_enabled) { |
5974 | printk(KERN_INFO "SELinux: Disabled at boot.\n"); | 5974 | printk(KERN_INFO "SELinux: Disabled at boot.\n"); |
5975 | return 0; | 5975 | return 0; |
5976 | } | 5976 | } |
5977 | 5977 | ||
5978 | printk(KERN_INFO "SELinux: Initializing.\n"); | 5978 | printk(KERN_INFO "SELinux: Initializing.\n"); |
5979 | 5979 | ||
5980 | /* Set the security state for the initial task. */ | 5980 | /* Set the security state for the initial task. */ |
5981 | cred_init_security(); | 5981 | cred_init_security(); |
5982 | 5982 | ||
5983 | default_noexec = !(VM_DATA_DEFAULT_FLAGS & VM_EXEC); | 5983 | default_noexec = !(VM_DATA_DEFAULT_FLAGS & VM_EXEC); |
5984 | 5984 | ||
5985 | sel_inode_cache = kmem_cache_create("selinux_inode_security", | 5985 | sel_inode_cache = kmem_cache_create("selinux_inode_security", |
5986 | sizeof(struct inode_security_struct), | 5986 | sizeof(struct inode_security_struct), |
5987 | 0, SLAB_PANIC, NULL); | 5987 | 0, SLAB_PANIC, NULL); |
5988 | avc_init(); | 5988 | avc_init(); |
5989 | 5989 | ||
5990 | if (register_security(&selinux_ops)) | 5990 | if (register_security(&selinux_ops)) |
5991 | panic("SELinux: Unable to register with kernel.\n"); | 5991 | panic("SELinux: Unable to register with kernel.\n"); |
5992 | 5992 | ||
5993 | if (selinux_enforcing) | 5993 | if (selinux_enforcing) |
5994 | printk(KERN_DEBUG "SELinux: Starting in enforcing mode\n"); | 5994 | printk(KERN_DEBUG "SELinux: Starting in enforcing mode\n"); |
5995 | else | 5995 | else |
5996 | printk(KERN_DEBUG "SELinux: Starting in permissive mode\n"); | 5996 | printk(KERN_DEBUG "SELinux: Starting in permissive mode\n"); |
5997 | 5997 | ||
5998 | return 0; | 5998 | return 0; |
5999 | } | 5999 | } |
6000 | 6000 | ||
6001 | static void delayed_superblock_init(struct super_block *sb, void *unused) | 6001 | static void delayed_superblock_init(struct super_block *sb, void *unused) |
6002 | { | 6002 | { |
6003 | superblock_doinit(sb, NULL); | 6003 | superblock_doinit(sb, NULL); |
6004 | } | 6004 | } |
6005 | 6005 | ||
6006 | void selinux_complete_init(void) | 6006 | void selinux_complete_init(void) |
6007 | { | 6007 | { |
6008 | printk(KERN_DEBUG "SELinux: Completing initialization.\n"); | 6008 | printk(KERN_DEBUG "SELinux: Completing initialization.\n"); |
6009 | 6009 | ||
6010 | /* Set up any superblocks initialized prior to the policy load. */ | 6010 | /* Set up any superblocks initialized prior to the policy load. */ |
6011 | printk(KERN_DEBUG "SELinux: Setting up existing superblocks.\n"); | 6011 | printk(KERN_DEBUG "SELinux: Setting up existing superblocks.\n"); |
6012 | iterate_supers(delayed_superblock_init, NULL); | 6012 | iterate_supers(delayed_superblock_init, NULL); |
6013 | } | 6013 | } |
6014 | 6014 | ||
6015 | /* SELinux requires early initialization in order to label | 6015 | /* SELinux requires early initialization in order to label |
6016 | all processes and objects when they are created. */ | 6016 | all processes and objects when they are created. */ |
6017 | security_initcall(selinux_init); | 6017 | security_initcall(selinux_init); |
6018 | 6018 | ||
6019 | #if defined(CONFIG_NETFILTER) | 6019 | #if defined(CONFIG_NETFILTER) |
6020 | 6020 | ||
6021 | static struct nf_hook_ops selinux_ipv4_ops[] = { | 6021 | static struct nf_hook_ops selinux_ipv4_ops[] = { |
6022 | { | 6022 | { |
6023 | .hook = selinux_ipv4_postroute, | 6023 | .hook = selinux_ipv4_postroute, |
6024 | .owner = THIS_MODULE, | 6024 | .owner = THIS_MODULE, |
6025 | .pf = NFPROTO_IPV4, | 6025 | .pf = NFPROTO_IPV4, |
6026 | .hooknum = NF_INET_POST_ROUTING, | 6026 | .hooknum = NF_INET_POST_ROUTING, |
6027 | .priority = NF_IP_PRI_SELINUX_LAST, | 6027 | .priority = NF_IP_PRI_SELINUX_LAST, |
6028 | }, | 6028 | }, |
6029 | { | 6029 | { |
6030 | .hook = selinux_ipv4_forward, | 6030 | .hook = selinux_ipv4_forward, |
6031 | .owner = THIS_MODULE, | 6031 | .owner = THIS_MODULE, |
6032 | .pf = NFPROTO_IPV4, | 6032 | .pf = NFPROTO_IPV4, |
6033 | .hooknum = NF_INET_FORWARD, | 6033 | .hooknum = NF_INET_FORWARD, |
6034 | .priority = NF_IP_PRI_SELINUX_FIRST, | 6034 | .priority = NF_IP_PRI_SELINUX_FIRST, |
6035 | }, | 6035 | }, |
6036 | { | 6036 | { |
6037 | .hook = selinux_ipv4_output, | 6037 | .hook = selinux_ipv4_output, |
6038 | .owner = THIS_MODULE, | 6038 | .owner = THIS_MODULE, |
6039 | .pf = NFPROTO_IPV4, | 6039 | .pf = NFPROTO_IPV4, |
6040 | .hooknum = NF_INET_LOCAL_OUT, | 6040 | .hooknum = NF_INET_LOCAL_OUT, |
6041 | .priority = NF_IP_PRI_SELINUX_FIRST, | 6041 | .priority = NF_IP_PRI_SELINUX_FIRST, |
6042 | } | 6042 | } |
6043 | }; | 6043 | }; |
6044 | 6044 | ||
6045 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | 6045 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) |
6046 | 6046 | ||
6047 | static struct nf_hook_ops selinux_ipv6_ops[] = { | 6047 | static struct nf_hook_ops selinux_ipv6_ops[] = { |
6048 | { | 6048 | { |
6049 | .hook = selinux_ipv6_postroute, | 6049 | .hook = selinux_ipv6_postroute, |
6050 | .owner = THIS_MODULE, | 6050 | .owner = THIS_MODULE, |
6051 | .pf = NFPROTO_IPV6, | 6051 | .pf = NFPROTO_IPV6, |
6052 | .hooknum = NF_INET_POST_ROUTING, | 6052 | .hooknum = NF_INET_POST_ROUTING, |
6053 | .priority = NF_IP6_PRI_SELINUX_LAST, | 6053 | .priority = NF_IP6_PRI_SELINUX_LAST, |
6054 | }, | 6054 | }, |
6055 | { | 6055 | { |
6056 | .hook = selinux_ipv6_forward, | 6056 | .hook = selinux_ipv6_forward, |
6057 | .owner = THIS_MODULE, | 6057 | .owner = THIS_MODULE, |
6058 | .pf = NFPROTO_IPV6, | 6058 | .pf = NFPROTO_IPV6, |
6059 | .hooknum = NF_INET_FORWARD, | 6059 | .hooknum = NF_INET_FORWARD, |
6060 | .priority = NF_IP6_PRI_SELINUX_FIRST, | 6060 | .priority = NF_IP6_PRI_SELINUX_FIRST, |
6061 | } | 6061 | } |
6062 | }; | 6062 | }; |
6063 | 6063 | ||
6064 | #endif /* IPV6 */ | 6064 | #endif /* IPV6 */ |
6065 | 6065 | ||
6066 | static int __init selinux_nf_ip_init(void) | 6066 | static int __init selinux_nf_ip_init(void) |
6067 | { | 6067 | { |
6068 | int err = 0; | 6068 | int err = 0; |
6069 | 6069 | ||
6070 | if (!selinux_enabled) | 6070 | if (!selinux_enabled) |
6071 | goto out; | 6071 | goto out; |
6072 | 6072 | ||
6073 | printk(KERN_DEBUG "SELinux: Registering netfilter hooks\n"); | 6073 | printk(KERN_DEBUG "SELinux: Registering netfilter hooks\n"); |
6074 | 6074 | ||
6075 | err = nf_register_hooks(selinux_ipv4_ops, ARRAY_SIZE(selinux_ipv4_ops)); | 6075 | err = nf_register_hooks(selinux_ipv4_ops, ARRAY_SIZE(selinux_ipv4_ops)); |
6076 | if (err) | 6076 | if (err) |
6077 | panic("SELinux: nf_register_hooks for IPv4: error %d\n", err); | 6077 | panic("SELinux: nf_register_hooks for IPv4: error %d\n", err); |
6078 | 6078 | ||
6079 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | 6079 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) |
6080 | err = nf_register_hooks(selinux_ipv6_ops, ARRAY_SIZE(selinux_ipv6_ops)); | 6080 | err = nf_register_hooks(selinux_ipv6_ops, ARRAY_SIZE(selinux_ipv6_ops)); |
6081 | if (err) | 6081 | if (err) |
6082 | panic("SELinux: nf_register_hooks for IPv6: error %d\n", err); | 6082 | panic("SELinux: nf_register_hooks for IPv6: error %d\n", err); |
6083 | #endif /* IPV6 */ | 6083 | #endif /* IPV6 */ |
6084 | 6084 | ||
6085 | out: | 6085 | out: |
6086 | return err; | 6086 | return err; |
6087 | } | 6087 | } |
6088 | 6088 | ||
6089 | __initcall(selinux_nf_ip_init); | 6089 | __initcall(selinux_nf_ip_init); |
6090 | 6090 | ||
6091 | #ifdef CONFIG_SECURITY_SELINUX_DISABLE | 6091 | #ifdef CONFIG_SECURITY_SELINUX_DISABLE |
6092 | static void selinux_nf_ip_exit(void) | 6092 | static void selinux_nf_ip_exit(void) |
6093 | { | 6093 | { |
6094 | printk(KERN_DEBUG "SELinux: Unregistering netfilter hooks\n"); | 6094 | printk(KERN_DEBUG "SELinux: Unregistering netfilter hooks\n"); |
6095 | 6095 | ||
6096 | nf_unregister_hooks(selinux_ipv4_ops, ARRAY_SIZE(selinux_ipv4_ops)); | 6096 | nf_unregister_hooks(selinux_ipv4_ops, ARRAY_SIZE(selinux_ipv4_ops)); |
6097 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | 6097 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) |
6098 | nf_unregister_hooks(selinux_ipv6_ops, ARRAY_SIZE(selinux_ipv6_ops)); | 6098 | nf_unregister_hooks(selinux_ipv6_ops, ARRAY_SIZE(selinux_ipv6_ops)); |
6099 | #endif /* IPV6 */ | 6099 | #endif /* IPV6 */ |
6100 | } | 6100 | } |
6101 | #endif | 6101 | #endif |
6102 | 6102 | ||
6103 | #else /* CONFIG_NETFILTER */ | 6103 | #else /* CONFIG_NETFILTER */ |
6104 | 6104 | ||
6105 | #ifdef CONFIG_SECURITY_SELINUX_DISABLE | 6105 | #ifdef CONFIG_SECURITY_SELINUX_DISABLE |
6106 | #define selinux_nf_ip_exit() | 6106 | #define selinux_nf_ip_exit() |
6107 | #endif | 6107 | #endif |
6108 | 6108 | ||
6109 | #endif /* CONFIG_NETFILTER */ | 6109 | #endif /* CONFIG_NETFILTER */ |
6110 | 6110 | ||
6111 | #ifdef CONFIG_SECURITY_SELINUX_DISABLE | 6111 | #ifdef CONFIG_SECURITY_SELINUX_DISABLE |
6112 | static int selinux_disabled; | 6112 | static int selinux_disabled; |
6113 | 6113 | ||
6114 | int selinux_disable(void) | 6114 | int selinux_disable(void) |
6115 | { | 6115 | { |
6116 | if (ss_initialized) { | 6116 | if (ss_initialized) { |
6117 | /* Not permitted after initial policy load. */ | 6117 | /* Not permitted after initial policy load. */ |
6118 | return -EINVAL; | 6118 | return -EINVAL; |
6119 | } | 6119 | } |
6120 | 6120 | ||
6121 | if (selinux_disabled) { | 6121 | if (selinux_disabled) { |
6122 | /* Only do this once. */ | 6122 | /* Only do this once. */ |
6123 | return -EINVAL; | 6123 | return -EINVAL; |
6124 | } | 6124 | } |
6125 | 6125 | ||
6126 | printk(KERN_INFO "SELinux: Disabled at runtime.\n"); | 6126 | printk(KERN_INFO "SELinux: Disabled at runtime.\n"); |
6127 | 6127 | ||
6128 | selinux_disabled = 1; | 6128 | selinux_disabled = 1; |
6129 | selinux_enabled = 0; | 6129 | selinux_enabled = 0; |
6130 | 6130 | ||
6131 | reset_security_ops(); | 6131 | reset_security_ops(); |
6132 | 6132 | ||
6133 | /* Try to destroy the avc node cache */ | 6133 | /* Try to destroy the avc node cache */ |
6134 | avc_disable(); | 6134 | avc_disable(); |
6135 | 6135 | ||
6136 | /* Unregister netfilter hooks. */ | 6136 | /* Unregister netfilter hooks. */ |
6137 | selinux_nf_ip_exit(); | 6137 | selinux_nf_ip_exit(); |
6138 | 6138 | ||
6139 | /* Unregister selinuxfs. */ | 6139 | /* Unregister selinuxfs. */ |
6140 | exit_sel_fs(); | 6140 | exit_sel_fs(); |
6141 | 6141 | ||
6142 | return 0; | 6142 | return 0; |
6143 | } | 6143 | } |
6144 | #endif | 6144 | #endif |
6145 | 6145 |