Commit 625bba662c17e917e164dc37f61aebbc49987ed6

Authored by Linus Torvalds

Merge tag 'locks-v3.15-2' of git://git.samba.org/jlayton/linux

Pull file locking fixes from Jeff Layton:
 "File locking related bugfixes for v3.15 (pile #2)

   - fix for a long-standing bug in __break_lease that can cause soft
     lockups
   - renaming of file-private locks to "open file description" locks,
     and the command macros to more visually distinct names

  The fix for __break_lease is also in the pile of patches for which
  Bruce sent a pull request, but I assume that your merge procedure will
  handle that correctly.

  For the other patches, I don't like the fact that we need to rename
  this stuff at this late stage, but it should be settled now
  (hopefully)"

* tag 'locks-v3.15-2' of git://git.samba.org/jlayton/linux:
  locks: rename FL_FILE_PVT and IS_FILE_PVT to use "*_OFDLCK" instead
  locks: rename file-private locks to "open file description locks"
  locks: allow __break_lease to sleep even when break_time is 0

Showing 7 changed files Inline Diff

arch/arm/kernel/sys_oabi-compat.c
1 /* 1 /*
2 * arch/arm/kernel/sys_oabi-compat.c 2 * arch/arm/kernel/sys_oabi-compat.c
3 * 3 *
4 * Compatibility wrappers for syscalls that are used from 4 * Compatibility wrappers for syscalls that are used from
5 * old ABI user space binaries with an EABI kernel. 5 * old ABI user space binaries with an EABI kernel.
6 * 6 *
7 * Author: Nicolas Pitre 7 * Author: Nicolas Pitre
8 * Created: Oct 7, 2005 8 * Created: Oct 7, 2005
9 * Copyright: MontaVista Software, Inc. 9 * Copyright: MontaVista Software, Inc.
10 * 10 *
11 * This program is free software; you can redistribute it and/or modify 11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License version 2 as 12 * it under the terms of the GNU General Public License version 2 as
13 * published by the Free Software Foundation. 13 * published by the Free Software Foundation.
14 */ 14 */
15 15
16 /* 16 /*
17 * The legacy ABI and the new ARM EABI have different rules making some 17 * The legacy ABI and the new ARM EABI have different rules making some
18 * syscalls incompatible especially with structure arguments. 18 * syscalls incompatible especially with structure arguments.
19 * Most notably, Eabi says 64-bit members should be 64-bit aligned instead of 19 * Most notably, Eabi says 64-bit members should be 64-bit aligned instead of
20 * simply word aligned. EABI also pads structures to the size of the largest 20 * simply word aligned. EABI also pads structures to the size of the largest
21 * member it contains instead of the invariant 32-bit. 21 * member it contains instead of the invariant 32-bit.
22 * 22 *
23 * The following syscalls are affected: 23 * The following syscalls are affected:
24 * 24 *
25 * sys_stat64: 25 * sys_stat64:
26 * sys_lstat64: 26 * sys_lstat64:
27 * sys_fstat64: 27 * sys_fstat64:
28 * sys_fstatat64: 28 * sys_fstatat64:
29 * 29 *
30 * struct stat64 has different sizes and some members are shifted 30 * struct stat64 has different sizes and some members are shifted
31 * Compatibility wrappers are needed for them and provided below. 31 * Compatibility wrappers are needed for them and provided below.
32 * 32 *
33 * sys_fcntl64: 33 * sys_fcntl64:
34 * 34 *
35 * struct flock64 has different sizes and some members are shifted 35 * struct flock64 has different sizes and some members are shifted
36 * A compatibility wrapper is needed and provided below. 36 * A compatibility wrapper is needed and provided below.
37 * 37 *
38 * sys_statfs64: 38 * sys_statfs64:
39 * sys_fstatfs64: 39 * sys_fstatfs64:
40 * 40 *
41 * struct statfs64 has extra padding with EABI growing its size from 41 * struct statfs64 has extra padding with EABI growing its size from
42 * 84 to 88. This struct is now __attribute__((packed,aligned(4))) 42 * 84 to 88. This struct is now __attribute__((packed,aligned(4)))
43 * with a small assembly wrapper to force the sz argument to 84 if it is 88 43 * with a small assembly wrapper to force the sz argument to 84 if it is 88
44 * to avoid copying the extra padding over user space unexpecting it. 44 * to avoid copying the extra padding over user space unexpecting it.
45 * 45 *
46 * sys_newuname: 46 * sys_newuname:
47 * 47 *
48 * struct new_utsname has no padding with EABI. No problem there. 48 * struct new_utsname has no padding with EABI. No problem there.
49 * 49 *
50 * sys_epoll_ctl: 50 * sys_epoll_ctl:
51 * sys_epoll_wait: 51 * sys_epoll_wait:
52 * 52 *
53 * struct epoll_event has its second member shifted also affecting the 53 * struct epoll_event has its second member shifted also affecting the
54 * structure size. Compatibility wrappers are needed and provided below. 54 * structure size. Compatibility wrappers are needed and provided below.
55 * 55 *
56 * sys_ipc: 56 * sys_ipc:
57 * sys_semop: 57 * sys_semop:
58 * sys_semtimedop: 58 * sys_semtimedop:
59 * 59 *
60 * struct sembuf loses its padding with EABI. Since arrays of them are 60 * struct sembuf loses its padding with EABI. Since arrays of them are
61 * used they have to be copyed to remove the padding. Compatibility wrappers 61 * used they have to be copyed to remove the padding. Compatibility wrappers
62 * provided below. 62 * provided below.
63 * 63 *
64 * sys_bind: 64 * sys_bind:
65 * sys_connect: 65 * sys_connect:
66 * sys_sendmsg: 66 * sys_sendmsg:
67 * sys_sendto: 67 * sys_sendto:
68 * sys_socketcall: 68 * sys_socketcall:
69 * 69 *
70 * struct sockaddr_un loses its padding with EABI. Since the size of the 70 * struct sockaddr_un loses its padding with EABI. Since the size of the
71 * structure is used as a validation test in unix_mkname(), we need to 71 * structure is used as a validation test in unix_mkname(), we need to
72 * change the length argument to 110 whenever it is 112. Compatibility 72 * change the length argument to 110 whenever it is 112. Compatibility
73 * wrappers provided below. 73 * wrappers provided below.
74 */ 74 */
75 75
76 #include <linux/syscalls.h> 76 #include <linux/syscalls.h>
77 #include <linux/errno.h> 77 #include <linux/errno.h>
78 #include <linux/fs.h> 78 #include <linux/fs.h>
79 #include <linux/fcntl.h> 79 #include <linux/fcntl.h>
80 #include <linux/eventpoll.h> 80 #include <linux/eventpoll.h>
81 #include <linux/sem.h> 81 #include <linux/sem.h>
82 #include <linux/socket.h> 82 #include <linux/socket.h>
83 #include <linux/net.h> 83 #include <linux/net.h>
84 #include <linux/ipc.h> 84 #include <linux/ipc.h>
85 #include <linux/uaccess.h> 85 #include <linux/uaccess.h>
86 #include <linux/slab.h> 86 #include <linux/slab.h>
87 87
88 struct oldabi_stat64 { 88 struct oldabi_stat64 {
89 unsigned long long st_dev; 89 unsigned long long st_dev;
90 unsigned int __pad1; 90 unsigned int __pad1;
91 unsigned long __st_ino; 91 unsigned long __st_ino;
92 unsigned int st_mode; 92 unsigned int st_mode;
93 unsigned int st_nlink; 93 unsigned int st_nlink;
94 94
95 unsigned long st_uid; 95 unsigned long st_uid;
96 unsigned long st_gid; 96 unsigned long st_gid;
97 97
98 unsigned long long st_rdev; 98 unsigned long long st_rdev;
99 unsigned int __pad2; 99 unsigned int __pad2;
100 100
101 long long st_size; 101 long long st_size;
102 unsigned long st_blksize; 102 unsigned long st_blksize;
103 unsigned long long st_blocks; 103 unsigned long long st_blocks;
104 104
105 unsigned long st_atime; 105 unsigned long st_atime;
106 unsigned long st_atime_nsec; 106 unsigned long st_atime_nsec;
107 107
108 unsigned long st_mtime; 108 unsigned long st_mtime;
109 unsigned long st_mtime_nsec; 109 unsigned long st_mtime_nsec;
110 110
111 unsigned long st_ctime; 111 unsigned long st_ctime;
112 unsigned long st_ctime_nsec; 112 unsigned long st_ctime_nsec;
113 113
114 unsigned long long st_ino; 114 unsigned long long st_ino;
115 } __attribute__ ((packed,aligned(4))); 115 } __attribute__ ((packed,aligned(4)));
116 116
117 static long cp_oldabi_stat64(struct kstat *stat, 117 static long cp_oldabi_stat64(struct kstat *stat,
118 struct oldabi_stat64 __user *statbuf) 118 struct oldabi_stat64 __user *statbuf)
119 { 119 {
120 struct oldabi_stat64 tmp; 120 struct oldabi_stat64 tmp;
121 121
122 tmp.st_dev = huge_encode_dev(stat->dev); 122 tmp.st_dev = huge_encode_dev(stat->dev);
123 tmp.__pad1 = 0; 123 tmp.__pad1 = 0;
124 tmp.__st_ino = stat->ino; 124 tmp.__st_ino = stat->ino;
125 tmp.st_mode = stat->mode; 125 tmp.st_mode = stat->mode;
126 tmp.st_nlink = stat->nlink; 126 tmp.st_nlink = stat->nlink;
127 tmp.st_uid = from_kuid_munged(current_user_ns(), stat->uid); 127 tmp.st_uid = from_kuid_munged(current_user_ns(), stat->uid);
128 tmp.st_gid = from_kgid_munged(current_user_ns(), stat->gid); 128 tmp.st_gid = from_kgid_munged(current_user_ns(), stat->gid);
129 tmp.st_rdev = huge_encode_dev(stat->rdev); 129 tmp.st_rdev = huge_encode_dev(stat->rdev);
130 tmp.st_size = stat->size; 130 tmp.st_size = stat->size;
131 tmp.st_blocks = stat->blocks; 131 tmp.st_blocks = stat->blocks;
132 tmp.__pad2 = 0; 132 tmp.__pad2 = 0;
133 tmp.st_blksize = stat->blksize; 133 tmp.st_blksize = stat->blksize;
134 tmp.st_atime = stat->atime.tv_sec; 134 tmp.st_atime = stat->atime.tv_sec;
135 tmp.st_atime_nsec = stat->atime.tv_nsec; 135 tmp.st_atime_nsec = stat->atime.tv_nsec;
136 tmp.st_mtime = stat->mtime.tv_sec; 136 tmp.st_mtime = stat->mtime.tv_sec;
137 tmp.st_mtime_nsec = stat->mtime.tv_nsec; 137 tmp.st_mtime_nsec = stat->mtime.tv_nsec;
138 tmp.st_ctime = stat->ctime.tv_sec; 138 tmp.st_ctime = stat->ctime.tv_sec;
139 tmp.st_ctime_nsec = stat->ctime.tv_nsec; 139 tmp.st_ctime_nsec = stat->ctime.tv_nsec;
140 tmp.st_ino = stat->ino; 140 tmp.st_ino = stat->ino;
141 return copy_to_user(statbuf,&tmp,sizeof(tmp)) ? -EFAULT : 0; 141 return copy_to_user(statbuf,&tmp,sizeof(tmp)) ? -EFAULT : 0;
142 } 142 }
143 143
144 asmlinkage long sys_oabi_stat64(const char __user * filename, 144 asmlinkage long sys_oabi_stat64(const char __user * filename,
145 struct oldabi_stat64 __user * statbuf) 145 struct oldabi_stat64 __user * statbuf)
146 { 146 {
147 struct kstat stat; 147 struct kstat stat;
148 int error = vfs_stat(filename, &stat); 148 int error = vfs_stat(filename, &stat);
149 if (!error) 149 if (!error)
150 error = cp_oldabi_stat64(&stat, statbuf); 150 error = cp_oldabi_stat64(&stat, statbuf);
151 return error; 151 return error;
152 } 152 }
153 153
154 asmlinkage long sys_oabi_lstat64(const char __user * filename, 154 asmlinkage long sys_oabi_lstat64(const char __user * filename,
155 struct oldabi_stat64 __user * statbuf) 155 struct oldabi_stat64 __user * statbuf)
156 { 156 {
157 struct kstat stat; 157 struct kstat stat;
158 int error = vfs_lstat(filename, &stat); 158 int error = vfs_lstat(filename, &stat);
159 if (!error) 159 if (!error)
160 error = cp_oldabi_stat64(&stat, statbuf); 160 error = cp_oldabi_stat64(&stat, statbuf);
161 return error; 161 return error;
162 } 162 }
163 163
164 asmlinkage long sys_oabi_fstat64(unsigned long fd, 164 asmlinkage long sys_oabi_fstat64(unsigned long fd,
165 struct oldabi_stat64 __user * statbuf) 165 struct oldabi_stat64 __user * statbuf)
166 { 166 {
167 struct kstat stat; 167 struct kstat stat;
168 int error = vfs_fstat(fd, &stat); 168 int error = vfs_fstat(fd, &stat);
169 if (!error) 169 if (!error)
170 error = cp_oldabi_stat64(&stat, statbuf); 170 error = cp_oldabi_stat64(&stat, statbuf);
171 return error; 171 return error;
172 } 172 }
173 173
174 asmlinkage long sys_oabi_fstatat64(int dfd, 174 asmlinkage long sys_oabi_fstatat64(int dfd,
175 const char __user *filename, 175 const char __user *filename,
176 struct oldabi_stat64 __user *statbuf, 176 struct oldabi_stat64 __user *statbuf,
177 int flag) 177 int flag)
178 { 178 {
179 struct kstat stat; 179 struct kstat stat;
180 int error; 180 int error;
181 181
182 error = vfs_fstatat(dfd, filename, &stat, flag); 182 error = vfs_fstatat(dfd, filename, &stat, flag);
183 if (error) 183 if (error)
184 return error; 184 return error;
185 return cp_oldabi_stat64(&stat, statbuf); 185 return cp_oldabi_stat64(&stat, statbuf);
186 } 186 }
187 187
188 struct oabi_flock64 { 188 struct oabi_flock64 {
189 short l_type; 189 short l_type;
190 short l_whence; 190 short l_whence;
191 loff_t l_start; 191 loff_t l_start;
192 loff_t l_len; 192 loff_t l_len;
193 pid_t l_pid; 193 pid_t l_pid;
194 } __attribute__ ((packed,aligned(4))); 194 } __attribute__ ((packed,aligned(4)));
195 195
196 asmlinkage long sys_oabi_fcntl64(unsigned int fd, unsigned int cmd, 196 asmlinkage long sys_oabi_fcntl64(unsigned int fd, unsigned int cmd,
197 unsigned long arg) 197 unsigned long arg)
198 { 198 {
199 struct oabi_flock64 user; 199 struct oabi_flock64 user;
200 struct flock64 kernel; 200 struct flock64 kernel;
201 mm_segment_t fs = USER_DS; /* initialized to kill a warning */ 201 mm_segment_t fs = USER_DS; /* initialized to kill a warning */
202 unsigned long local_arg = arg; 202 unsigned long local_arg = arg;
203 int ret; 203 int ret;
204 204
205 switch (cmd) { 205 switch (cmd) {
206 case F_GETLKP: 206 case F_OFD_GETLK:
207 case F_SETLKP: 207 case F_OFD_SETLK:
208 case F_SETLKPW: 208 case F_OFD_SETLKW:
209 case F_GETLK64: 209 case F_GETLK64:
210 case F_SETLK64: 210 case F_SETLK64:
211 case F_SETLKW64: 211 case F_SETLKW64:
212 if (copy_from_user(&user, (struct oabi_flock64 __user *)arg, 212 if (copy_from_user(&user, (struct oabi_flock64 __user *)arg,
213 sizeof(user))) 213 sizeof(user)))
214 return -EFAULT; 214 return -EFAULT;
215 kernel.l_type = user.l_type; 215 kernel.l_type = user.l_type;
216 kernel.l_whence = user.l_whence; 216 kernel.l_whence = user.l_whence;
217 kernel.l_start = user.l_start; 217 kernel.l_start = user.l_start;
218 kernel.l_len = user.l_len; 218 kernel.l_len = user.l_len;
219 kernel.l_pid = user.l_pid; 219 kernel.l_pid = user.l_pid;
220 local_arg = (unsigned long)&kernel; 220 local_arg = (unsigned long)&kernel;
221 fs = get_fs(); 221 fs = get_fs();
222 set_fs(KERNEL_DS); 222 set_fs(KERNEL_DS);
223 } 223 }
224 224
225 ret = sys_fcntl64(fd, cmd, local_arg); 225 ret = sys_fcntl64(fd, cmd, local_arg);
226 226
227 switch (cmd) { 227 switch (cmd) {
228 case F_GETLK64: 228 case F_GETLK64:
229 if (!ret) { 229 if (!ret) {
230 user.l_type = kernel.l_type; 230 user.l_type = kernel.l_type;
231 user.l_whence = kernel.l_whence; 231 user.l_whence = kernel.l_whence;
232 user.l_start = kernel.l_start; 232 user.l_start = kernel.l_start;
233 user.l_len = kernel.l_len; 233 user.l_len = kernel.l_len;
234 user.l_pid = kernel.l_pid; 234 user.l_pid = kernel.l_pid;
235 if (copy_to_user((struct oabi_flock64 __user *)arg, 235 if (copy_to_user((struct oabi_flock64 __user *)arg,
236 &user, sizeof(user))) 236 &user, sizeof(user)))
237 ret = -EFAULT; 237 ret = -EFAULT;
238 } 238 }
239 case F_SETLK64: 239 case F_SETLK64:
240 case F_SETLKW64: 240 case F_SETLKW64:
241 set_fs(fs); 241 set_fs(fs);
242 } 242 }
243 243
244 return ret; 244 return ret;
245 } 245 }
246 246
247 struct oabi_epoll_event { 247 struct oabi_epoll_event {
248 __u32 events; 248 __u32 events;
249 __u64 data; 249 __u64 data;
250 } __attribute__ ((packed,aligned(4))); 250 } __attribute__ ((packed,aligned(4)));
251 251
252 asmlinkage long sys_oabi_epoll_ctl(int epfd, int op, int fd, 252 asmlinkage long sys_oabi_epoll_ctl(int epfd, int op, int fd,
253 struct oabi_epoll_event __user *event) 253 struct oabi_epoll_event __user *event)
254 { 254 {
255 struct oabi_epoll_event user; 255 struct oabi_epoll_event user;
256 struct epoll_event kernel; 256 struct epoll_event kernel;
257 mm_segment_t fs; 257 mm_segment_t fs;
258 long ret; 258 long ret;
259 259
260 if (op == EPOLL_CTL_DEL) 260 if (op == EPOLL_CTL_DEL)
261 return sys_epoll_ctl(epfd, op, fd, NULL); 261 return sys_epoll_ctl(epfd, op, fd, NULL);
262 if (copy_from_user(&user, event, sizeof(user))) 262 if (copy_from_user(&user, event, sizeof(user)))
263 return -EFAULT; 263 return -EFAULT;
264 kernel.events = user.events; 264 kernel.events = user.events;
265 kernel.data = user.data; 265 kernel.data = user.data;
266 fs = get_fs(); 266 fs = get_fs();
267 set_fs(KERNEL_DS); 267 set_fs(KERNEL_DS);
268 ret = sys_epoll_ctl(epfd, op, fd, &kernel); 268 ret = sys_epoll_ctl(epfd, op, fd, &kernel);
269 set_fs(fs); 269 set_fs(fs);
270 return ret; 270 return ret;
271 } 271 }
272 272
273 asmlinkage long sys_oabi_epoll_wait(int epfd, 273 asmlinkage long sys_oabi_epoll_wait(int epfd,
274 struct oabi_epoll_event __user *events, 274 struct oabi_epoll_event __user *events,
275 int maxevents, int timeout) 275 int maxevents, int timeout)
276 { 276 {
277 struct epoll_event *kbuf; 277 struct epoll_event *kbuf;
278 mm_segment_t fs; 278 mm_segment_t fs;
279 long ret, err, i; 279 long ret, err, i;
280 280
281 if (maxevents <= 0 || maxevents > (INT_MAX/sizeof(struct epoll_event))) 281 if (maxevents <= 0 || maxevents > (INT_MAX/sizeof(struct epoll_event)))
282 return -EINVAL; 282 return -EINVAL;
283 kbuf = kmalloc(sizeof(*kbuf) * maxevents, GFP_KERNEL); 283 kbuf = kmalloc(sizeof(*kbuf) * maxevents, GFP_KERNEL);
284 if (!kbuf) 284 if (!kbuf)
285 return -ENOMEM; 285 return -ENOMEM;
286 fs = get_fs(); 286 fs = get_fs();
287 set_fs(KERNEL_DS); 287 set_fs(KERNEL_DS);
288 ret = sys_epoll_wait(epfd, kbuf, maxevents, timeout); 288 ret = sys_epoll_wait(epfd, kbuf, maxevents, timeout);
289 set_fs(fs); 289 set_fs(fs);
290 err = 0; 290 err = 0;
291 for (i = 0; i < ret; i++) { 291 for (i = 0; i < ret; i++) {
292 __put_user_error(kbuf[i].events, &events->events, err); 292 __put_user_error(kbuf[i].events, &events->events, err);
293 __put_user_error(kbuf[i].data, &events->data, err); 293 __put_user_error(kbuf[i].data, &events->data, err);
294 events++; 294 events++;
295 } 295 }
296 kfree(kbuf); 296 kfree(kbuf);
297 return err ? -EFAULT : ret; 297 return err ? -EFAULT : ret;
298 } 298 }
299 299
300 struct oabi_sembuf { 300 struct oabi_sembuf {
301 unsigned short sem_num; 301 unsigned short sem_num;
302 short sem_op; 302 short sem_op;
303 short sem_flg; 303 short sem_flg;
304 unsigned short __pad; 304 unsigned short __pad;
305 }; 305 };
306 306
307 asmlinkage long sys_oabi_semtimedop(int semid, 307 asmlinkage long sys_oabi_semtimedop(int semid,
308 struct oabi_sembuf __user *tsops, 308 struct oabi_sembuf __user *tsops,
309 unsigned nsops, 309 unsigned nsops,
310 const struct timespec __user *timeout) 310 const struct timespec __user *timeout)
311 { 311 {
312 struct sembuf *sops; 312 struct sembuf *sops;
313 struct timespec local_timeout; 313 struct timespec local_timeout;
314 long err; 314 long err;
315 int i; 315 int i;
316 316
317 if (nsops < 1 || nsops > SEMOPM) 317 if (nsops < 1 || nsops > SEMOPM)
318 return -EINVAL; 318 return -EINVAL;
319 sops = kmalloc(sizeof(*sops) * nsops, GFP_KERNEL); 319 sops = kmalloc(sizeof(*sops) * nsops, GFP_KERNEL);
320 if (!sops) 320 if (!sops)
321 return -ENOMEM; 321 return -ENOMEM;
322 err = 0; 322 err = 0;
323 for (i = 0; i < nsops; i++) { 323 for (i = 0; i < nsops; i++) {
324 __get_user_error(sops[i].sem_num, &tsops->sem_num, err); 324 __get_user_error(sops[i].sem_num, &tsops->sem_num, err);
325 __get_user_error(sops[i].sem_op, &tsops->sem_op, err); 325 __get_user_error(sops[i].sem_op, &tsops->sem_op, err);
326 __get_user_error(sops[i].sem_flg, &tsops->sem_flg, err); 326 __get_user_error(sops[i].sem_flg, &tsops->sem_flg, err);
327 tsops++; 327 tsops++;
328 } 328 }
329 if (timeout) { 329 if (timeout) {
330 /* copy this as well before changing domain protection */ 330 /* copy this as well before changing domain protection */
331 err |= copy_from_user(&local_timeout, timeout, sizeof(*timeout)); 331 err |= copy_from_user(&local_timeout, timeout, sizeof(*timeout));
332 timeout = &local_timeout; 332 timeout = &local_timeout;
333 } 333 }
334 if (err) { 334 if (err) {
335 err = -EFAULT; 335 err = -EFAULT;
336 } else { 336 } else {
337 mm_segment_t fs = get_fs(); 337 mm_segment_t fs = get_fs();
338 set_fs(KERNEL_DS); 338 set_fs(KERNEL_DS);
339 err = sys_semtimedop(semid, sops, nsops, timeout); 339 err = sys_semtimedop(semid, sops, nsops, timeout);
340 set_fs(fs); 340 set_fs(fs);
341 } 341 }
342 kfree(sops); 342 kfree(sops);
343 return err; 343 return err;
344 } 344 }
345 345
346 asmlinkage long sys_oabi_semop(int semid, struct oabi_sembuf __user *tsops, 346 asmlinkage long sys_oabi_semop(int semid, struct oabi_sembuf __user *tsops,
347 unsigned nsops) 347 unsigned nsops)
348 { 348 {
349 return sys_oabi_semtimedop(semid, tsops, nsops, NULL); 349 return sys_oabi_semtimedop(semid, tsops, nsops, NULL);
350 } 350 }
351 351
352 asmlinkage int sys_oabi_ipc(uint call, int first, int second, int third, 352 asmlinkage int sys_oabi_ipc(uint call, int first, int second, int third,
353 void __user *ptr, long fifth) 353 void __user *ptr, long fifth)
354 { 354 {
355 switch (call & 0xffff) { 355 switch (call & 0xffff) {
356 case SEMOP: 356 case SEMOP:
357 return sys_oabi_semtimedop(first, 357 return sys_oabi_semtimedop(first,
358 (struct oabi_sembuf __user *)ptr, 358 (struct oabi_sembuf __user *)ptr,
359 second, NULL); 359 second, NULL);
360 case SEMTIMEDOP: 360 case SEMTIMEDOP:
361 return sys_oabi_semtimedop(first, 361 return sys_oabi_semtimedop(first,
362 (struct oabi_sembuf __user *)ptr, 362 (struct oabi_sembuf __user *)ptr,
363 second, 363 second,
364 (const struct timespec __user *)fifth); 364 (const struct timespec __user *)fifth);
365 default: 365 default:
366 return sys_ipc(call, first, second, third, ptr, fifth); 366 return sys_ipc(call, first, second, third, ptr, fifth);
367 } 367 }
368 } 368 }
369 369
370 asmlinkage long sys_oabi_bind(int fd, struct sockaddr __user *addr, int addrlen) 370 asmlinkage long sys_oabi_bind(int fd, struct sockaddr __user *addr, int addrlen)
371 { 371 {
372 sa_family_t sa_family; 372 sa_family_t sa_family;
373 if (addrlen == 112 && 373 if (addrlen == 112 &&
374 get_user(sa_family, &addr->sa_family) == 0 && 374 get_user(sa_family, &addr->sa_family) == 0 &&
375 sa_family == AF_UNIX) 375 sa_family == AF_UNIX)
376 addrlen = 110; 376 addrlen = 110;
377 return sys_bind(fd, addr, addrlen); 377 return sys_bind(fd, addr, addrlen);
378 } 378 }
379 379
380 asmlinkage long sys_oabi_connect(int fd, struct sockaddr __user *addr, int addrlen) 380 asmlinkage long sys_oabi_connect(int fd, struct sockaddr __user *addr, int addrlen)
381 { 381 {
382 sa_family_t sa_family; 382 sa_family_t sa_family;
383 if (addrlen == 112 && 383 if (addrlen == 112 &&
384 get_user(sa_family, &addr->sa_family) == 0 && 384 get_user(sa_family, &addr->sa_family) == 0 &&
385 sa_family == AF_UNIX) 385 sa_family == AF_UNIX)
386 addrlen = 110; 386 addrlen = 110;
387 return sys_connect(fd, addr, addrlen); 387 return sys_connect(fd, addr, addrlen);
388 } 388 }
389 389
390 asmlinkage long sys_oabi_sendto(int fd, void __user *buff, 390 asmlinkage long sys_oabi_sendto(int fd, void __user *buff,
391 size_t len, unsigned flags, 391 size_t len, unsigned flags,
392 struct sockaddr __user *addr, 392 struct sockaddr __user *addr,
393 int addrlen) 393 int addrlen)
394 { 394 {
395 sa_family_t sa_family; 395 sa_family_t sa_family;
396 if (addrlen == 112 && 396 if (addrlen == 112 &&
397 get_user(sa_family, &addr->sa_family) == 0 && 397 get_user(sa_family, &addr->sa_family) == 0 &&
398 sa_family == AF_UNIX) 398 sa_family == AF_UNIX)
399 addrlen = 110; 399 addrlen = 110;
400 return sys_sendto(fd, buff, len, flags, addr, addrlen); 400 return sys_sendto(fd, buff, len, flags, addr, addrlen);
401 } 401 }
402 402
403 asmlinkage long sys_oabi_sendmsg(int fd, struct msghdr __user *msg, unsigned flags) 403 asmlinkage long sys_oabi_sendmsg(int fd, struct msghdr __user *msg, unsigned flags)
404 { 404 {
405 struct sockaddr __user *addr; 405 struct sockaddr __user *addr;
406 int msg_namelen; 406 int msg_namelen;
407 sa_family_t sa_family; 407 sa_family_t sa_family;
408 if (msg && 408 if (msg &&
409 get_user(msg_namelen, &msg->msg_namelen) == 0 && 409 get_user(msg_namelen, &msg->msg_namelen) == 0 &&
410 msg_namelen == 112 && 410 msg_namelen == 112 &&
411 get_user(addr, &msg->msg_name) == 0 && 411 get_user(addr, &msg->msg_name) == 0 &&
412 get_user(sa_family, &addr->sa_family) == 0 && 412 get_user(sa_family, &addr->sa_family) == 0 &&
413 sa_family == AF_UNIX) 413 sa_family == AF_UNIX)
414 { 414 {
415 /* 415 /*
416 * HACK ALERT: there is a limit to how much backward bending 416 * HACK ALERT: there is a limit to how much backward bending
417 * we should do for what is actually a transitional 417 * we should do for what is actually a transitional
418 * compatibility layer. This already has known flaws with 418 * compatibility layer. This already has known flaws with
419 * a few ioctls that we don't intend to fix. Therefore 419 * a few ioctls that we don't intend to fix. Therefore
420 * consider this blatent hack as another one... and take care 420 * consider this blatent hack as another one... and take care
421 * to run for cover. In most cases it will "just work fine". 421 * to run for cover. In most cases it will "just work fine".
422 * If it doesn't, well, tough. 422 * If it doesn't, well, tough.
423 */ 423 */
424 put_user(110, &msg->msg_namelen); 424 put_user(110, &msg->msg_namelen);
425 } 425 }
426 return sys_sendmsg(fd, msg, flags); 426 return sys_sendmsg(fd, msg, flags);
427 } 427 }
428 428
429 asmlinkage long sys_oabi_socketcall(int call, unsigned long __user *args) 429 asmlinkage long sys_oabi_socketcall(int call, unsigned long __user *args)
430 { 430 {
431 unsigned long r = -EFAULT, a[6]; 431 unsigned long r = -EFAULT, a[6];
432 432
433 switch (call) { 433 switch (call) {
434 case SYS_BIND: 434 case SYS_BIND:
435 if (copy_from_user(a, args, 3 * sizeof(long)) == 0) 435 if (copy_from_user(a, args, 3 * sizeof(long)) == 0)
436 r = sys_oabi_bind(a[0], (struct sockaddr __user *)a[1], a[2]); 436 r = sys_oabi_bind(a[0], (struct sockaddr __user *)a[1], a[2]);
437 break; 437 break;
438 case SYS_CONNECT: 438 case SYS_CONNECT:
439 if (copy_from_user(a, args, 3 * sizeof(long)) == 0) 439 if (copy_from_user(a, args, 3 * sizeof(long)) == 0)
440 r = sys_oabi_connect(a[0], (struct sockaddr __user *)a[1], a[2]); 440 r = sys_oabi_connect(a[0], (struct sockaddr __user *)a[1], a[2]);
441 break; 441 break;
442 case SYS_SENDTO: 442 case SYS_SENDTO:
443 if (copy_from_user(a, args, 6 * sizeof(long)) == 0) 443 if (copy_from_user(a, args, 6 * sizeof(long)) == 0)
444 r = sys_oabi_sendto(a[0], (void __user *)a[1], a[2], a[3], 444 r = sys_oabi_sendto(a[0], (void __user *)a[1], a[2], a[3],
445 (struct sockaddr __user *)a[4], a[5]); 445 (struct sockaddr __user *)a[4], a[5]);
446 break; 446 break;
447 case SYS_SENDMSG: 447 case SYS_SENDMSG:
448 if (copy_from_user(a, args, 3 * sizeof(long)) == 0) 448 if (copy_from_user(a, args, 3 * sizeof(long)) == 0)
449 r = sys_oabi_sendmsg(a[0], (struct msghdr __user *)a[1], a[2]); 449 r = sys_oabi_sendmsg(a[0], (struct msghdr __user *)a[1], a[2]);
450 break; 450 break;
451 default: 451 default:
452 r = sys_socketcall(call, args); 452 r = sys_socketcall(call, args);
453 } 453 }
454 454
455 return r; 455 return r;
456 } 456 }
457 457
1 /* 1 /*
2 * linux/fs/compat.c 2 * linux/fs/compat.c
3 * 3 *
4 * Kernel compatibililty routines for e.g. 32 bit syscall support 4 * Kernel compatibililty routines for e.g. 32 bit syscall support
5 * on 64 bit kernels. 5 * on 64 bit kernels.
6 * 6 *
7 * Copyright (C) 2002 Stephen Rothwell, IBM Corporation 7 * Copyright (C) 2002 Stephen Rothwell, IBM Corporation
8 * Copyright (C) 1997-2000 Jakub Jelinek (jakub@redhat.com) 8 * Copyright (C) 1997-2000 Jakub Jelinek (jakub@redhat.com)
9 * Copyright (C) 1998 Eddie C. Dost (ecd@skynet.be) 9 * Copyright (C) 1998 Eddie C. Dost (ecd@skynet.be)
10 * Copyright (C) 2001,2002 Andi Kleen, SuSE Labs 10 * Copyright (C) 2001,2002 Andi Kleen, SuSE Labs
11 * Copyright (C) 2003 Pavel Machek (pavel@ucw.cz) 11 * Copyright (C) 2003 Pavel Machek (pavel@ucw.cz)
12 * 12 *
13 * This program is free software; you can redistribute it and/or modify 13 * This program is free software; you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License version 2 as 14 * it under the terms of the GNU General Public License version 2 as
15 * published by the Free Software Foundation. 15 * published by the Free Software Foundation.
16 */ 16 */
17 17
18 #include <linux/stddef.h> 18 #include <linux/stddef.h>
19 #include <linux/kernel.h> 19 #include <linux/kernel.h>
20 #include <linux/linkage.h> 20 #include <linux/linkage.h>
21 #include <linux/compat.h> 21 #include <linux/compat.h>
22 #include <linux/errno.h> 22 #include <linux/errno.h>
23 #include <linux/time.h> 23 #include <linux/time.h>
24 #include <linux/fs.h> 24 #include <linux/fs.h>
25 #include <linux/fcntl.h> 25 #include <linux/fcntl.h>
26 #include <linux/namei.h> 26 #include <linux/namei.h>
27 #include <linux/file.h> 27 #include <linux/file.h>
28 #include <linux/fdtable.h> 28 #include <linux/fdtable.h>
29 #include <linux/vfs.h> 29 #include <linux/vfs.h>
30 #include <linux/ioctl.h> 30 #include <linux/ioctl.h>
31 #include <linux/init.h> 31 #include <linux/init.h>
32 #include <linux/ncp_mount.h> 32 #include <linux/ncp_mount.h>
33 #include <linux/nfs4_mount.h> 33 #include <linux/nfs4_mount.h>
34 #include <linux/syscalls.h> 34 #include <linux/syscalls.h>
35 #include <linux/ctype.h> 35 #include <linux/ctype.h>
36 #include <linux/dirent.h> 36 #include <linux/dirent.h>
37 #include <linux/fsnotify.h> 37 #include <linux/fsnotify.h>
38 #include <linux/highuid.h> 38 #include <linux/highuid.h>
39 #include <linux/personality.h> 39 #include <linux/personality.h>
40 #include <linux/rwsem.h> 40 #include <linux/rwsem.h>
41 #include <linux/tsacct_kern.h> 41 #include <linux/tsacct_kern.h>
42 #include <linux/security.h> 42 #include <linux/security.h>
43 #include <linux/highmem.h> 43 #include <linux/highmem.h>
44 #include <linux/signal.h> 44 #include <linux/signal.h>
45 #include <linux/poll.h> 45 #include <linux/poll.h>
46 #include <linux/mm.h> 46 #include <linux/mm.h>
47 #include <linux/fs_struct.h> 47 #include <linux/fs_struct.h>
48 #include <linux/slab.h> 48 #include <linux/slab.h>
49 #include <linux/pagemap.h> 49 #include <linux/pagemap.h>
50 #include <linux/aio.h> 50 #include <linux/aio.h>
51 51
52 #include <asm/uaccess.h> 52 #include <asm/uaccess.h>
53 #include <asm/mmu_context.h> 53 #include <asm/mmu_context.h>
54 #include <asm/ioctls.h> 54 #include <asm/ioctls.h>
55 #include "internal.h" 55 #include "internal.h"
56 56
57 int compat_log = 1; 57 int compat_log = 1;
58 58
59 int compat_printk(const char *fmt, ...) 59 int compat_printk(const char *fmt, ...)
60 { 60 {
61 va_list ap; 61 va_list ap;
62 int ret; 62 int ret;
63 if (!compat_log) 63 if (!compat_log)
64 return 0; 64 return 0;
65 va_start(ap, fmt); 65 va_start(ap, fmt);
66 ret = vprintk(fmt, ap); 66 ret = vprintk(fmt, ap);
67 va_end(ap); 67 va_end(ap);
68 return ret; 68 return ret;
69 } 69 }
70 70
71 /* 71 /*
72 * Not all architectures have sys_utime, so implement this in terms 72 * Not all architectures have sys_utime, so implement this in terms
73 * of sys_utimes. 73 * of sys_utimes.
74 */ 74 */
75 COMPAT_SYSCALL_DEFINE2(utime, const char __user *, filename, 75 COMPAT_SYSCALL_DEFINE2(utime, const char __user *, filename,
76 struct compat_utimbuf __user *, t) 76 struct compat_utimbuf __user *, t)
77 { 77 {
78 struct timespec tv[2]; 78 struct timespec tv[2];
79 79
80 if (t) { 80 if (t) {
81 if (get_user(tv[0].tv_sec, &t->actime) || 81 if (get_user(tv[0].tv_sec, &t->actime) ||
82 get_user(tv[1].tv_sec, &t->modtime)) 82 get_user(tv[1].tv_sec, &t->modtime))
83 return -EFAULT; 83 return -EFAULT;
84 tv[0].tv_nsec = 0; 84 tv[0].tv_nsec = 0;
85 tv[1].tv_nsec = 0; 85 tv[1].tv_nsec = 0;
86 } 86 }
87 return do_utimes(AT_FDCWD, filename, t ? tv : NULL, 0); 87 return do_utimes(AT_FDCWD, filename, t ? tv : NULL, 0);
88 } 88 }
89 89
90 COMPAT_SYSCALL_DEFINE4(utimensat, unsigned int, dfd, const char __user *, filename, struct compat_timespec __user *, t, int, flags) 90 COMPAT_SYSCALL_DEFINE4(utimensat, unsigned int, dfd, const char __user *, filename, struct compat_timespec __user *, t, int, flags)
91 { 91 {
92 struct timespec tv[2]; 92 struct timespec tv[2];
93 93
94 if (t) { 94 if (t) {
95 if (compat_get_timespec(&tv[0], &t[0]) || 95 if (compat_get_timespec(&tv[0], &t[0]) ||
96 compat_get_timespec(&tv[1], &t[1])) 96 compat_get_timespec(&tv[1], &t[1]))
97 return -EFAULT; 97 return -EFAULT;
98 98
99 if (tv[0].tv_nsec == UTIME_OMIT && tv[1].tv_nsec == UTIME_OMIT) 99 if (tv[0].tv_nsec == UTIME_OMIT && tv[1].tv_nsec == UTIME_OMIT)
100 return 0; 100 return 0;
101 } 101 }
102 return do_utimes(dfd, filename, t ? tv : NULL, flags); 102 return do_utimes(dfd, filename, t ? tv : NULL, flags);
103 } 103 }
104 104
105 COMPAT_SYSCALL_DEFINE3(futimesat, unsigned int, dfd, const char __user *, filename, struct compat_timeval __user *, t) 105 COMPAT_SYSCALL_DEFINE3(futimesat, unsigned int, dfd, const char __user *, filename, struct compat_timeval __user *, t)
106 { 106 {
107 struct timespec tv[2]; 107 struct timespec tv[2];
108 108
109 if (t) { 109 if (t) {
110 if (get_user(tv[0].tv_sec, &t[0].tv_sec) || 110 if (get_user(tv[0].tv_sec, &t[0].tv_sec) ||
111 get_user(tv[0].tv_nsec, &t[0].tv_usec) || 111 get_user(tv[0].tv_nsec, &t[0].tv_usec) ||
112 get_user(tv[1].tv_sec, &t[1].tv_sec) || 112 get_user(tv[1].tv_sec, &t[1].tv_sec) ||
113 get_user(tv[1].tv_nsec, &t[1].tv_usec)) 113 get_user(tv[1].tv_nsec, &t[1].tv_usec))
114 return -EFAULT; 114 return -EFAULT;
115 if (tv[0].tv_nsec >= 1000000 || tv[0].tv_nsec < 0 || 115 if (tv[0].tv_nsec >= 1000000 || tv[0].tv_nsec < 0 ||
116 tv[1].tv_nsec >= 1000000 || tv[1].tv_nsec < 0) 116 tv[1].tv_nsec >= 1000000 || tv[1].tv_nsec < 0)
117 return -EINVAL; 117 return -EINVAL;
118 tv[0].tv_nsec *= 1000; 118 tv[0].tv_nsec *= 1000;
119 tv[1].tv_nsec *= 1000; 119 tv[1].tv_nsec *= 1000;
120 } 120 }
121 return do_utimes(dfd, filename, t ? tv : NULL, 0); 121 return do_utimes(dfd, filename, t ? tv : NULL, 0);
122 } 122 }
123 123
124 COMPAT_SYSCALL_DEFINE2(utimes, const char __user *, filename, struct compat_timeval __user *, t) 124 COMPAT_SYSCALL_DEFINE2(utimes, const char __user *, filename, struct compat_timeval __user *, t)
125 { 125 {
126 return compat_sys_futimesat(AT_FDCWD, filename, t); 126 return compat_sys_futimesat(AT_FDCWD, filename, t);
127 } 127 }
128 128
129 static int cp_compat_stat(struct kstat *stat, struct compat_stat __user *ubuf) 129 static int cp_compat_stat(struct kstat *stat, struct compat_stat __user *ubuf)
130 { 130 {
131 struct compat_stat tmp; 131 struct compat_stat tmp;
132 132
133 if (!old_valid_dev(stat->dev) || !old_valid_dev(stat->rdev)) 133 if (!old_valid_dev(stat->dev) || !old_valid_dev(stat->rdev))
134 return -EOVERFLOW; 134 return -EOVERFLOW;
135 135
136 memset(&tmp, 0, sizeof(tmp)); 136 memset(&tmp, 0, sizeof(tmp));
137 tmp.st_dev = old_encode_dev(stat->dev); 137 tmp.st_dev = old_encode_dev(stat->dev);
138 tmp.st_ino = stat->ino; 138 tmp.st_ino = stat->ino;
139 if (sizeof(tmp.st_ino) < sizeof(stat->ino) && tmp.st_ino != stat->ino) 139 if (sizeof(tmp.st_ino) < sizeof(stat->ino) && tmp.st_ino != stat->ino)
140 return -EOVERFLOW; 140 return -EOVERFLOW;
141 tmp.st_mode = stat->mode; 141 tmp.st_mode = stat->mode;
142 tmp.st_nlink = stat->nlink; 142 tmp.st_nlink = stat->nlink;
143 if (tmp.st_nlink != stat->nlink) 143 if (tmp.st_nlink != stat->nlink)
144 return -EOVERFLOW; 144 return -EOVERFLOW;
145 SET_UID(tmp.st_uid, from_kuid_munged(current_user_ns(), stat->uid)); 145 SET_UID(tmp.st_uid, from_kuid_munged(current_user_ns(), stat->uid));
146 SET_GID(tmp.st_gid, from_kgid_munged(current_user_ns(), stat->gid)); 146 SET_GID(tmp.st_gid, from_kgid_munged(current_user_ns(), stat->gid));
147 tmp.st_rdev = old_encode_dev(stat->rdev); 147 tmp.st_rdev = old_encode_dev(stat->rdev);
148 if ((u64) stat->size > MAX_NON_LFS) 148 if ((u64) stat->size > MAX_NON_LFS)
149 return -EOVERFLOW; 149 return -EOVERFLOW;
150 tmp.st_size = stat->size; 150 tmp.st_size = stat->size;
151 tmp.st_atime = stat->atime.tv_sec; 151 tmp.st_atime = stat->atime.tv_sec;
152 tmp.st_atime_nsec = stat->atime.tv_nsec; 152 tmp.st_atime_nsec = stat->atime.tv_nsec;
153 tmp.st_mtime = stat->mtime.tv_sec; 153 tmp.st_mtime = stat->mtime.tv_sec;
154 tmp.st_mtime_nsec = stat->mtime.tv_nsec; 154 tmp.st_mtime_nsec = stat->mtime.tv_nsec;
155 tmp.st_ctime = stat->ctime.tv_sec; 155 tmp.st_ctime = stat->ctime.tv_sec;
156 tmp.st_ctime_nsec = stat->ctime.tv_nsec; 156 tmp.st_ctime_nsec = stat->ctime.tv_nsec;
157 tmp.st_blocks = stat->blocks; 157 tmp.st_blocks = stat->blocks;
158 tmp.st_blksize = stat->blksize; 158 tmp.st_blksize = stat->blksize;
159 return copy_to_user(ubuf, &tmp, sizeof(tmp)) ? -EFAULT : 0; 159 return copy_to_user(ubuf, &tmp, sizeof(tmp)) ? -EFAULT : 0;
160 } 160 }
161 161
162 COMPAT_SYSCALL_DEFINE2(newstat, const char __user *, filename, 162 COMPAT_SYSCALL_DEFINE2(newstat, const char __user *, filename,
163 struct compat_stat __user *, statbuf) 163 struct compat_stat __user *, statbuf)
164 { 164 {
165 struct kstat stat; 165 struct kstat stat;
166 int error; 166 int error;
167 167
168 error = vfs_stat(filename, &stat); 168 error = vfs_stat(filename, &stat);
169 if (error) 169 if (error)
170 return error; 170 return error;
171 return cp_compat_stat(&stat, statbuf); 171 return cp_compat_stat(&stat, statbuf);
172 } 172 }
173 173
174 COMPAT_SYSCALL_DEFINE2(newlstat, const char __user *, filename, 174 COMPAT_SYSCALL_DEFINE2(newlstat, const char __user *, filename,
175 struct compat_stat __user *, statbuf) 175 struct compat_stat __user *, statbuf)
176 { 176 {
177 struct kstat stat; 177 struct kstat stat;
178 int error; 178 int error;
179 179
180 error = vfs_lstat(filename, &stat); 180 error = vfs_lstat(filename, &stat);
181 if (error) 181 if (error)
182 return error; 182 return error;
183 return cp_compat_stat(&stat, statbuf); 183 return cp_compat_stat(&stat, statbuf);
184 } 184 }
185 185
186 #ifndef __ARCH_WANT_STAT64 186 #ifndef __ARCH_WANT_STAT64
187 COMPAT_SYSCALL_DEFINE4(newfstatat, unsigned int, dfd, 187 COMPAT_SYSCALL_DEFINE4(newfstatat, unsigned int, dfd,
188 const char __user *, filename, 188 const char __user *, filename,
189 struct compat_stat __user *, statbuf, int, flag) 189 struct compat_stat __user *, statbuf, int, flag)
190 { 190 {
191 struct kstat stat; 191 struct kstat stat;
192 int error; 192 int error;
193 193
194 error = vfs_fstatat(dfd, filename, &stat, flag); 194 error = vfs_fstatat(dfd, filename, &stat, flag);
195 if (error) 195 if (error)
196 return error; 196 return error;
197 return cp_compat_stat(&stat, statbuf); 197 return cp_compat_stat(&stat, statbuf);
198 } 198 }
199 #endif 199 #endif
200 200
201 COMPAT_SYSCALL_DEFINE2(newfstat, unsigned int, fd, 201 COMPAT_SYSCALL_DEFINE2(newfstat, unsigned int, fd,
202 struct compat_stat __user *, statbuf) 202 struct compat_stat __user *, statbuf)
203 { 203 {
204 struct kstat stat; 204 struct kstat stat;
205 int error = vfs_fstat(fd, &stat); 205 int error = vfs_fstat(fd, &stat);
206 206
207 if (!error) 207 if (!error)
208 error = cp_compat_stat(&stat, statbuf); 208 error = cp_compat_stat(&stat, statbuf);
209 return error; 209 return error;
210 } 210 }
211 211
212 static int put_compat_statfs(struct compat_statfs __user *ubuf, struct kstatfs *kbuf) 212 static int put_compat_statfs(struct compat_statfs __user *ubuf, struct kstatfs *kbuf)
213 { 213 {
214 214
215 if (sizeof ubuf->f_blocks == 4) { 215 if (sizeof ubuf->f_blocks == 4) {
216 if ((kbuf->f_blocks | kbuf->f_bfree | kbuf->f_bavail | 216 if ((kbuf->f_blocks | kbuf->f_bfree | kbuf->f_bavail |
217 kbuf->f_bsize | kbuf->f_frsize) & 0xffffffff00000000ULL) 217 kbuf->f_bsize | kbuf->f_frsize) & 0xffffffff00000000ULL)
218 return -EOVERFLOW; 218 return -EOVERFLOW;
219 /* f_files and f_ffree may be -1; it's okay 219 /* f_files and f_ffree may be -1; it's okay
220 * to stuff that into 32 bits */ 220 * to stuff that into 32 bits */
221 if (kbuf->f_files != 0xffffffffffffffffULL 221 if (kbuf->f_files != 0xffffffffffffffffULL
222 && (kbuf->f_files & 0xffffffff00000000ULL)) 222 && (kbuf->f_files & 0xffffffff00000000ULL))
223 return -EOVERFLOW; 223 return -EOVERFLOW;
224 if (kbuf->f_ffree != 0xffffffffffffffffULL 224 if (kbuf->f_ffree != 0xffffffffffffffffULL
225 && (kbuf->f_ffree & 0xffffffff00000000ULL)) 225 && (kbuf->f_ffree & 0xffffffff00000000ULL))
226 return -EOVERFLOW; 226 return -EOVERFLOW;
227 } 227 }
228 if (!access_ok(VERIFY_WRITE, ubuf, sizeof(*ubuf)) || 228 if (!access_ok(VERIFY_WRITE, ubuf, sizeof(*ubuf)) ||
229 __put_user(kbuf->f_type, &ubuf->f_type) || 229 __put_user(kbuf->f_type, &ubuf->f_type) ||
230 __put_user(kbuf->f_bsize, &ubuf->f_bsize) || 230 __put_user(kbuf->f_bsize, &ubuf->f_bsize) ||
231 __put_user(kbuf->f_blocks, &ubuf->f_blocks) || 231 __put_user(kbuf->f_blocks, &ubuf->f_blocks) ||
232 __put_user(kbuf->f_bfree, &ubuf->f_bfree) || 232 __put_user(kbuf->f_bfree, &ubuf->f_bfree) ||
233 __put_user(kbuf->f_bavail, &ubuf->f_bavail) || 233 __put_user(kbuf->f_bavail, &ubuf->f_bavail) ||
234 __put_user(kbuf->f_files, &ubuf->f_files) || 234 __put_user(kbuf->f_files, &ubuf->f_files) ||
235 __put_user(kbuf->f_ffree, &ubuf->f_ffree) || 235 __put_user(kbuf->f_ffree, &ubuf->f_ffree) ||
236 __put_user(kbuf->f_namelen, &ubuf->f_namelen) || 236 __put_user(kbuf->f_namelen, &ubuf->f_namelen) ||
237 __put_user(kbuf->f_fsid.val[0], &ubuf->f_fsid.val[0]) || 237 __put_user(kbuf->f_fsid.val[0], &ubuf->f_fsid.val[0]) ||
238 __put_user(kbuf->f_fsid.val[1], &ubuf->f_fsid.val[1]) || 238 __put_user(kbuf->f_fsid.val[1], &ubuf->f_fsid.val[1]) ||
239 __put_user(kbuf->f_frsize, &ubuf->f_frsize) || 239 __put_user(kbuf->f_frsize, &ubuf->f_frsize) ||
240 __put_user(kbuf->f_flags, &ubuf->f_flags) || 240 __put_user(kbuf->f_flags, &ubuf->f_flags) ||
241 __clear_user(ubuf->f_spare, sizeof(ubuf->f_spare))) 241 __clear_user(ubuf->f_spare, sizeof(ubuf->f_spare)))
242 return -EFAULT; 242 return -EFAULT;
243 return 0; 243 return 0;
244 } 244 }
245 245
246 /* 246 /*
247 * The following statfs calls are copies of code from fs/statfs.c and 247 * The following statfs calls are copies of code from fs/statfs.c and
248 * should be checked against those from time to time 248 * should be checked against those from time to time
249 */ 249 */
250 COMPAT_SYSCALL_DEFINE2(statfs, const char __user *, pathname, struct compat_statfs __user *, buf) 250 COMPAT_SYSCALL_DEFINE2(statfs, const char __user *, pathname, struct compat_statfs __user *, buf)
251 { 251 {
252 struct kstatfs tmp; 252 struct kstatfs tmp;
253 int error = user_statfs(pathname, &tmp); 253 int error = user_statfs(pathname, &tmp);
254 if (!error) 254 if (!error)
255 error = put_compat_statfs(buf, &tmp); 255 error = put_compat_statfs(buf, &tmp);
256 return error; 256 return error;
257 } 257 }
258 258
259 COMPAT_SYSCALL_DEFINE2(fstatfs, unsigned int, fd, struct compat_statfs __user *, buf) 259 COMPAT_SYSCALL_DEFINE2(fstatfs, unsigned int, fd, struct compat_statfs __user *, buf)
260 { 260 {
261 struct kstatfs tmp; 261 struct kstatfs tmp;
262 int error = fd_statfs(fd, &tmp); 262 int error = fd_statfs(fd, &tmp);
263 if (!error) 263 if (!error)
264 error = put_compat_statfs(buf, &tmp); 264 error = put_compat_statfs(buf, &tmp);
265 return error; 265 return error;
266 } 266 }
267 267
268 static int put_compat_statfs64(struct compat_statfs64 __user *ubuf, struct kstatfs *kbuf) 268 static int put_compat_statfs64(struct compat_statfs64 __user *ubuf, struct kstatfs *kbuf)
269 { 269 {
270 if (sizeof ubuf->f_blocks == 4) { 270 if (sizeof ubuf->f_blocks == 4) {
271 if ((kbuf->f_blocks | kbuf->f_bfree | kbuf->f_bavail | 271 if ((kbuf->f_blocks | kbuf->f_bfree | kbuf->f_bavail |
272 kbuf->f_bsize | kbuf->f_frsize) & 0xffffffff00000000ULL) 272 kbuf->f_bsize | kbuf->f_frsize) & 0xffffffff00000000ULL)
273 return -EOVERFLOW; 273 return -EOVERFLOW;
274 /* f_files and f_ffree may be -1; it's okay 274 /* f_files and f_ffree may be -1; it's okay
275 * to stuff that into 32 bits */ 275 * to stuff that into 32 bits */
276 if (kbuf->f_files != 0xffffffffffffffffULL 276 if (kbuf->f_files != 0xffffffffffffffffULL
277 && (kbuf->f_files & 0xffffffff00000000ULL)) 277 && (kbuf->f_files & 0xffffffff00000000ULL))
278 return -EOVERFLOW; 278 return -EOVERFLOW;
279 if (kbuf->f_ffree != 0xffffffffffffffffULL 279 if (kbuf->f_ffree != 0xffffffffffffffffULL
280 && (kbuf->f_ffree & 0xffffffff00000000ULL)) 280 && (kbuf->f_ffree & 0xffffffff00000000ULL))
281 return -EOVERFLOW; 281 return -EOVERFLOW;
282 } 282 }
283 if (!access_ok(VERIFY_WRITE, ubuf, sizeof(*ubuf)) || 283 if (!access_ok(VERIFY_WRITE, ubuf, sizeof(*ubuf)) ||
284 __put_user(kbuf->f_type, &ubuf->f_type) || 284 __put_user(kbuf->f_type, &ubuf->f_type) ||
285 __put_user(kbuf->f_bsize, &ubuf->f_bsize) || 285 __put_user(kbuf->f_bsize, &ubuf->f_bsize) ||
286 __put_user(kbuf->f_blocks, &ubuf->f_blocks) || 286 __put_user(kbuf->f_blocks, &ubuf->f_blocks) ||
287 __put_user(kbuf->f_bfree, &ubuf->f_bfree) || 287 __put_user(kbuf->f_bfree, &ubuf->f_bfree) ||
288 __put_user(kbuf->f_bavail, &ubuf->f_bavail) || 288 __put_user(kbuf->f_bavail, &ubuf->f_bavail) ||
289 __put_user(kbuf->f_files, &ubuf->f_files) || 289 __put_user(kbuf->f_files, &ubuf->f_files) ||
290 __put_user(kbuf->f_ffree, &ubuf->f_ffree) || 290 __put_user(kbuf->f_ffree, &ubuf->f_ffree) ||
291 __put_user(kbuf->f_namelen, &ubuf->f_namelen) || 291 __put_user(kbuf->f_namelen, &ubuf->f_namelen) ||
292 __put_user(kbuf->f_fsid.val[0], &ubuf->f_fsid.val[0]) || 292 __put_user(kbuf->f_fsid.val[0], &ubuf->f_fsid.val[0]) ||
293 __put_user(kbuf->f_fsid.val[1], &ubuf->f_fsid.val[1]) || 293 __put_user(kbuf->f_fsid.val[1], &ubuf->f_fsid.val[1]) ||
294 __put_user(kbuf->f_frsize, &ubuf->f_frsize) || 294 __put_user(kbuf->f_frsize, &ubuf->f_frsize) ||
295 __put_user(kbuf->f_flags, &ubuf->f_flags) || 295 __put_user(kbuf->f_flags, &ubuf->f_flags) ||
296 __clear_user(ubuf->f_spare, sizeof(ubuf->f_spare))) 296 __clear_user(ubuf->f_spare, sizeof(ubuf->f_spare)))
297 return -EFAULT; 297 return -EFAULT;
298 return 0; 298 return 0;
299 } 299 }
300 300
301 COMPAT_SYSCALL_DEFINE3(statfs64, const char __user *, pathname, compat_size_t, sz, struct compat_statfs64 __user *, buf) 301 COMPAT_SYSCALL_DEFINE3(statfs64, const char __user *, pathname, compat_size_t, sz, struct compat_statfs64 __user *, buf)
302 { 302 {
303 struct kstatfs tmp; 303 struct kstatfs tmp;
304 int error; 304 int error;
305 305
306 if (sz != sizeof(*buf)) 306 if (sz != sizeof(*buf))
307 return -EINVAL; 307 return -EINVAL;
308 308
309 error = user_statfs(pathname, &tmp); 309 error = user_statfs(pathname, &tmp);
310 if (!error) 310 if (!error)
311 error = put_compat_statfs64(buf, &tmp); 311 error = put_compat_statfs64(buf, &tmp);
312 return error; 312 return error;
313 } 313 }
314 314
315 COMPAT_SYSCALL_DEFINE3(fstatfs64, unsigned int, fd, compat_size_t, sz, struct compat_statfs64 __user *, buf) 315 COMPAT_SYSCALL_DEFINE3(fstatfs64, unsigned int, fd, compat_size_t, sz, struct compat_statfs64 __user *, buf)
316 { 316 {
317 struct kstatfs tmp; 317 struct kstatfs tmp;
318 int error; 318 int error;
319 319
320 if (sz != sizeof(*buf)) 320 if (sz != sizeof(*buf))
321 return -EINVAL; 321 return -EINVAL;
322 322
323 error = fd_statfs(fd, &tmp); 323 error = fd_statfs(fd, &tmp);
324 if (!error) 324 if (!error)
325 error = put_compat_statfs64(buf, &tmp); 325 error = put_compat_statfs64(buf, &tmp);
326 return error; 326 return error;
327 } 327 }
328 328
329 /* 329 /*
330 * This is a copy of sys_ustat, just dealing with a structure layout. 330 * This is a copy of sys_ustat, just dealing with a structure layout.
331 * Given how simple this syscall is that apporach is more maintainable 331 * Given how simple this syscall is that apporach is more maintainable
332 * than the various conversion hacks. 332 * than the various conversion hacks.
333 */ 333 */
334 COMPAT_SYSCALL_DEFINE2(ustat, unsigned, dev, struct compat_ustat __user *, u) 334 COMPAT_SYSCALL_DEFINE2(ustat, unsigned, dev, struct compat_ustat __user *, u)
335 { 335 {
336 struct compat_ustat tmp; 336 struct compat_ustat tmp;
337 struct kstatfs sbuf; 337 struct kstatfs sbuf;
338 int err = vfs_ustat(new_decode_dev(dev), &sbuf); 338 int err = vfs_ustat(new_decode_dev(dev), &sbuf);
339 if (err) 339 if (err)
340 return err; 340 return err;
341 341
342 memset(&tmp, 0, sizeof(struct compat_ustat)); 342 memset(&tmp, 0, sizeof(struct compat_ustat));
343 tmp.f_tfree = sbuf.f_bfree; 343 tmp.f_tfree = sbuf.f_bfree;
344 tmp.f_tinode = sbuf.f_ffree; 344 tmp.f_tinode = sbuf.f_ffree;
345 if (copy_to_user(u, &tmp, sizeof(struct compat_ustat))) 345 if (copy_to_user(u, &tmp, sizeof(struct compat_ustat)))
346 return -EFAULT; 346 return -EFAULT;
347 return 0; 347 return 0;
348 } 348 }
349 349
350 static int get_compat_flock(struct flock *kfl, struct compat_flock __user *ufl) 350 static int get_compat_flock(struct flock *kfl, struct compat_flock __user *ufl)
351 { 351 {
352 if (!access_ok(VERIFY_READ, ufl, sizeof(*ufl)) || 352 if (!access_ok(VERIFY_READ, ufl, sizeof(*ufl)) ||
353 __get_user(kfl->l_type, &ufl->l_type) || 353 __get_user(kfl->l_type, &ufl->l_type) ||
354 __get_user(kfl->l_whence, &ufl->l_whence) || 354 __get_user(kfl->l_whence, &ufl->l_whence) ||
355 __get_user(kfl->l_start, &ufl->l_start) || 355 __get_user(kfl->l_start, &ufl->l_start) ||
356 __get_user(kfl->l_len, &ufl->l_len) || 356 __get_user(kfl->l_len, &ufl->l_len) ||
357 __get_user(kfl->l_pid, &ufl->l_pid)) 357 __get_user(kfl->l_pid, &ufl->l_pid))
358 return -EFAULT; 358 return -EFAULT;
359 return 0; 359 return 0;
360 } 360 }
361 361
362 static int put_compat_flock(struct flock *kfl, struct compat_flock __user *ufl) 362 static int put_compat_flock(struct flock *kfl, struct compat_flock __user *ufl)
363 { 363 {
364 if (!access_ok(VERIFY_WRITE, ufl, sizeof(*ufl)) || 364 if (!access_ok(VERIFY_WRITE, ufl, sizeof(*ufl)) ||
365 __put_user(kfl->l_type, &ufl->l_type) || 365 __put_user(kfl->l_type, &ufl->l_type) ||
366 __put_user(kfl->l_whence, &ufl->l_whence) || 366 __put_user(kfl->l_whence, &ufl->l_whence) ||
367 __put_user(kfl->l_start, &ufl->l_start) || 367 __put_user(kfl->l_start, &ufl->l_start) ||
368 __put_user(kfl->l_len, &ufl->l_len) || 368 __put_user(kfl->l_len, &ufl->l_len) ||
369 __put_user(kfl->l_pid, &ufl->l_pid)) 369 __put_user(kfl->l_pid, &ufl->l_pid))
370 return -EFAULT; 370 return -EFAULT;
371 return 0; 371 return 0;
372 } 372 }
373 373
374 #ifndef HAVE_ARCH_GET_COMPAT_FLOCK64 374 #ifndef HAVE_ARCH_GET_COMPAT_FLOCK64
375 static int get_compat_flock64(struct flock *kfl, struct compat_flock64 __user *ufl) 375 static int get_compat_flock64(struct flock *kfl, struct compat_flock64 __user *ufl)
376 { 376 {
377 if (!access_ok(VERIFY_READ, ufl, sizeof(*ufl)) || 377 if (!access_ok(VERIFY_READ, ufl, sizeof(*ufl)) ||
378 __get_user(kfl->l_type, &ufl->l_type) || 378 __get_user(kfl->l_type, &ufl->l_type) ||
379 __get_user(kfl->l_whence, &ufl->l_whence) || 379 __get_user(kfl->l_whence, &ufl->l_whence) ||
380 __get_user(kfl->l_start, &ufl->l_start) || 380 __get_user(kfl->l_start, &ufl->l_start) ||
381 __get_user(kfl->l_len, &ufl->l_len) || 381 __get_user(kfl->l_len, &ufl->l_len) ||
382 __get_user(kfl->l_pid, &ufl->l_pid)) 382 __get_user(kfl->l_pid, &ufl->l_pid))
383 return -EFAULT; 383 return -EFAULT;
384 return 0; 384 return 0;
385 } 385 }
386 #endif 386 #endif
387 387
388 #ifndef HAVE_ARCH_PUT_COMPAT_FLOCK64 388 #ifndef HAVE_ARCH_PUT_COMPAT_FLOCK64
389 static int put_compat_flock64(struct flock *kfl, struct compat_flock64 __user *ufl) 389 static int put_compat_flock64(struct flock *kfl, struct compat_flock64 __user *ufl)
390 { 390 {
391 if (!access_ok(VERIFY_WRITE, ufl, sizeof(*ufl)) || 391 if (!access_ok(VERIFY_WRITE, ufl, sizeof(*ufl)) ||
392 __put_user(kfl->l_type, &ufl->l_type) || 392 __put_user(kfl->l_type, &ufl->l_type) ||
393 __put_user(kfl->l_whence, &ufl->l_whence) || 393 __put_user(kfl->l_whence, &ufl->l_whence) ||
394 __put_user(kfl->l_start, &ufl->l_start) || 394 __put_user(kfl->l_start, &ufl->l_start) ||
395 __put_user(kfl->l_len, &ufl->l_len) || 395 __put_user(kfl->l_len, &ufl->l_len) ||
396 __put_user(kfl->l_pid, &ufl->l_pid)) 396 __put_user(kfl->l_pid, &ufl->l_pid))
397 return -EFAULT; 397 return -EFAULT;
398 return 0; 398 return 0;
399 } 399 }
400 #endif 400 #endif
401 401
402 static unsigned int 402 static unsigned int
403 convert_fcntl_cmd(unsigned int cmd) 403 convert_fcntl_cmd(unsigned int cmd)
404 { 404 {
405 switch (cmd) { 405 switch (cmd) {
406 case F_GETLK64: 406 case F_GETLK64:
407 return F_GETLK; 407 return F_GETLK;
408 case F_SETLK64: 408 case F_SETLK64:
409 return F_SETLK; 409 return F_SETLK;
410 case F_SETLKW64: 410 case F_SETLKW64:
411 return F_SETLKW; 411 return F_SETLKW;
412 } 412 }
413 413
414 return cmd; 414 return cmd;
415 } 415 }
416 416
417 COMPAT_SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd, 417 COMPAT_SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd,
418 compat_ulong_t, arg) 418 compat_ulong_t, arg)
419 { 419 {
420 mm_segment_t old_fs; 420 mm_segment_t old_fs;
421 struct flock f; 421 struct flock f;
422 long ret; 422 long ret;
423 unsigned int conv_cmd; 423 unsigned int conv_cmd;
424 424
425 switch (cmd) { 425 switch (cmd) {
426 case F_GETLK: 426 case F_GETLK:
427 case F_SETLK: 427 case F_SETLK:
428 case F_SETLKW: 428 case F_SETLKW:
429 ret = get_compat_flock(&f, compat_ptr(arg)); 429 ret = get_compat_flock(&f, compat_ptr(arg));
430 if (ret != 0) 430 if (ret != 0)
431 break; 431 break;
432 old_fs = get_fs(); 432 old_fs = get_fs();
433 set_fs(KERNEL_DS); 433 set_fs(KERNEL_DS);
434 ret = sys_fcntl(fd, cmd, (unsigned long)&f); 434 ret = sys_fcntl(fd, cmd, (unsigned long)&f);
435 set_fs(old_fs); 435 set_fs(old_fs);
436 if (cmd == F_GETLK && ret == 0) { 436 if (cmd == F_GETLK && ret == 0) {
437 /* GETLK was successful and we need to return the data... 437 /* GETLK was successful and we need to return the data...
438 * but it needs to fit in the compat structure. 438 * but it needs to fit in the compat structure.
439 * l_start shouldn't be too big, unless the original 439 * l_start shouldn't be too big, unless the original
440 * start + end is greater than COMPAT_OFF_T_MAX, in which 440 * start + end is greater than COMPAT_OFF_T_MAX, in which
441 * case the app was asking for trouble, so we return 441 * case the app was asking for trouble, so we return
442 * -EOVERFLOW in that case. 442 * -EOVERFLOW in that case.
443 * l_len could be too big, in which case we just truncate it, 443 * l_len could be too big, in which case we just truncate it,
444 * and only allow the app to see that part of the conflicting 444 * and only allow the app to see that part of the conflicting
445 * lock that might make sense to it anyway 445 * lock that might make sense to it anyway
446 */ 446 */
447 447
448 if (f.l_start > COMPAT_OFF_T_MAX) 448 if (f.l_start > COMPAT_OFF_T_MAX)
449 ret = -EOVERFLOW; 449 ret = -EOVERFLOW;
450 if (f.l_len > COMPAT_OFF_T_MAX) 450 if (f.l_len > COMPAT_OFF_T_MAX)
451 f.l_len = COMPAT_OFF_T_MAX; 451 f.l_len = COMPAT_OFF_T_MAX;
452 if (ret == 0) 452 if (ret == 0)
453 ret = put_compat_flock(&f, compat_ptr(arg)); 453 ret = put_compat_flock(&f, compat_ptr(arg));
454 } 454 }
455 break; 455 break;
456 456
457 case F_GETLK64: 457 case F_GETLK64:
458 case F_SETLK64: 458 case F_SETLK64:
459 case F_SETLKW64: 459 case F_SETLKW64:
460 case F_GETLKP: 460 case F_OFD_GETLK:
461 case F_SETLKP: 461 case F_OFD_SETLK:
462 case F_SETLKPW: 462 case F_OFD_SETLKW:
463 ret = get_compat_flock64(&f, compat_ptr(arg)); 463 ret = get_compat_flock64(&f, compat_ptr(arg));
464 if (ret != 0) 464 if (ret != 0)
465 break; 465 break;
466 old_fs = get_fs(); 466 old_fs = get_fs();
467 set_fs(KERNEL_DS); 467 set_fs(KERNEL_DS);
468 conv_cmd = convert_fcntl_cmd(cmd); 468 conv_cmd = convert_fcntl_cmd(cmd);
469 ret = sys_fcntl(fd, conv_cmd, (unsigned long)&f); 469 ret = sys_fcntl(fd, conv_cmd, (unsigned long)&f);
470 set_fs(old_fs); 470 set_fs(old_fs);
471 if ((conv_cmd == F_GETLK || conv_cmd == F_GETLKP) && ret == 0) { 471 if ((conv_cmd == F_GETLK || conv_cmd == F_OFD_GETLK) && ret == 0) {
472 /* need to return lock information - see above for commentary */ 472 /* need to return lock information - see above for commentary */
473 if (f.l_start > COMPAT_LOFF_T_MAX) 473 if (f.l_start > COMPAT_LOFF_T_MAX)
474 ret = -EOVERFLOW; 474 ret = -EOVERFLOW;
475 if (f.l_len > COMPAT_LOFF_T_MAX) 475 if (f.l_len > COMPAT_LOFF_T_MAX)
476 f.l_len = COMPAT_LOFF_T_MAX; 476 f.l_len = COMPAT_LOFF_T_MAX;
477 if (ret == 0) 477 if (ret == 0)
478 ret = put_compat_flock64(&f, compat_ptr(arg)); 478 ret = put_compat_flock64(&f, compat_ptr(arg));
479 } 479 }
480 break; 480 break;
481 481
482 default: 482 default:
483 ret = sys_fcntl(fd, cmd, arg); 483 ret = sys_fcntl(fd, cmd, arg);
484 break; 484 break;
485 } 485 }
486 return ret; 486 return ret;
487 } 487 }
488 488
489 COMPAT_SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, 489 COMPAT_SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd,
490 compat_ulong_t, arg) 490 compat_ulong_t, arg)
491 { 491 {
492 switch (cmd) { 492 switch (cmd) {
493 case F_GETLK64: 493 case F_GETLK64:
494 case F_SETLK64: 494 case F_SETLK64:
495 case F_SETLKW64: 495 case F_SETLKW64:
496 case F_GETLKP: 496 case F_OFD_GETLK:
497 case F_SETLKP: 497 case F_OFD_SETLK:
498 case F_SETLKPW: 498 case F_OFD_SETLKW:
499 return -EINVAL; 499 return -EINVAL;
500 } 500 }
501 return compat_sys_fcntl64(fd, cmd, arg); 501 return compat_sys_fcntl64(fd, cmd, arg);
502 } 502 }
503 503
504 COMPAT_SYSCALL_DEFINE2(io_setup, unsigned, nr_reqs, u32 __user *, ctx32p) 504 COMPAT_SYSCALL_DEFINE2(io_setup, unsigned, nr_reqs, u32 __user *, ctx32p)
505 { 505 {
506 long ret; 506 long ret;
507 aio_context_t ctx64; 507 aio_context_t ctx64;
508 508
509 mm_segment_t oldfs = get_fs(); 509 mm_segment_t oldfs = get_fs();
510 if (unlikely(get_user(ctx64, ctx32p))) 510 if (unlikely(get_user(ctx64, ctx32p)))
511 return -EFAULT; 511 return -EFAULT;
512 512
513 set_fs(KERNEL_DS); 513 set_fs(KERNEL_DS);
514 /* The __user pointer cast is valid because of the set_fs() */ 514 /* The __user pointer cast is valid because of the set_fs() */
515 ret = sys_io_setup(nr_reqs, (aio_context_t __user *) &ctx64); 515 ret = sys_io_setup(nr_reqs, (aio_context_t __user *) &ctx64);
516 set_fs(oldfs); 516 set_fs(oldfs);
517 /* truncating is ok because it's a user address */ 517 /* truncating is ok because it's a user address */
518 if (!ret) 518 if (!ret)
519 ret = put_user((u32) ctx64, ctx32p); 519 ret = put_user((u32) ctx64, ctx32p);
520 return ret; 520 return ret;
521 } 521 }
522 522
523 COMPAT_SYSCALL_DEFINE5(io_getevents, compat_aio_context_t, ctx_id, 523 COMPAT_SYSCALL_DEFINE5(io_getevents, compat_aio_context_t, ctx_id,
524 compat_long_t, min_nr, 524 compat_long_t, min_nr,
525 compat_long_t, nr, 525 compat_long_t, nr,
526 struct io_event __user *, events, 526 struct io_event __user *, events,
527 struct compat_timespec __user *, timeout) 527 struct compat_timespec __user *, timeout)
528 { 528 {
529 struct timespec t; 529 struct timespec t;
530 struct timespec __user *ut = NULL; 530 struct timespec __user *ut = NULL;
531 531
532 if (timeout) { 532 if (timeout) {
533 if (compat_get_timespec(&t, timeout)) 533 if (compat_get_timespec(&t, timeout))
534 return -EFAULT; 534 return -EFAULT;
535 535
536 ut = compat_alloc_user_space(sizeof(*ut)); 536 ut = compat_alloc_user_space(sizeof(*ut));
537 if (copy_to_user(ut, &t, sizeof(t)) ) 537 if (copy_to_user(ut, &t, sizeof(t)) )
538 return -EFAULT; 538 return -EFAULT;
539 } 539 }
540 return sys_io_getevents(ctx_id, min_nr, nr, events, ut); 540 return sys_io_getevents(ctx_id, min_nr, nr, events, ut);
541 } 541 }
542 542
543 /* A write operation does a read from user space and vice versa */ 543 /* A write operation does a read from user space and vice versa */
544 #define vrfy_dir(type) ((type) == READ ? VERIFY_WRITE : VERIFY_READ) 544 #define vrfy_dir(type) ((type) == READ ? VERIFY_WRITE : VERIFY_READ)
545 545
546 ssize_t compat_rw_copy_check_uvector(int type, 546 ssize_t compat_rw_copy_check_uvector(int type,
547 const struct compat_iovec __user *uvector, unsigned long nr_segs, 547 const struct compat_iovec __user *uvector, unsigned long nr_segs,
548 unsigned long fast_segs, struct iovec *fast_pointer, 548 unsigned long fast_segs, struct iovec *fast_pointer,
549 struct iovec **ret_pointer) 549 struct iovec **ret_pointer)
550 { 550 {
551 compat_ssize_t tot_len; 551 compat_ssize_t tot_len;
552 struct iovec *iov = *ret_pointer = fast_pointer; 552 struct iovec *iov = *ret_pointer = fast_pointer;
553 ssize_t ret = 0; 553 ssize_t ret = 0;
554 int seg; 554 int seg;
555 555
556 /* 556 /*
557 * SuS says "The readv() function *may* fail if the iovcnt argument 557 * SuS says "The readv() function *may* fail if the iovcnt argument
558 * was less than or equal to 0, or greater than {IOV_MAX}. Linux has 558 * was less than or equal to 0, or greater than {IOV_MAX}. Linux has
559 * traditionally returned zero for zero segments, so... 559 * traditionally returned zero for zero segments, so...
560 */ 560 */
561 if (nr_segs == 0) 561 if (nr_segs == 0)
562 goto out; 562 goto out;
563 563
564 ret = -EINVAL; 564 ret = -EINVAL;
565 if (nr_segs > UIO_MAXIOV || nr_segs < 0) 565 if (nr_segs > UIO_MAXIOV || nr_segs < 0)
566 goto out; 566 goto out;
567 if (nr_segs > fast_segs) { 567 if (nr_segs > fast_segs) {
568 ret = -ENOMEM; 568 ret = -ENOMEM;
569 iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL); 569 iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL);
570 if (iov == NULL) 570 if (iov == NULL)
571 goto out; 571 goto out;
572 } 572 }
573 *ret_pointer = iov; 573 *ret_pointer = iov;
574 574
575 ret = -EFAULT; 575 ret = -EFAULT;
576 if (!access_ok(VERIFY_READ, uvector, nr_segs*sizeof(*uvector))) 576 if (!access_ok(VERIFY_READ, uvector, nr_segs*sizeof(*uvector)))
577 goto out; 577 goto out;
578 578
579 /* 579 /*
580 * Single unix specification: 580 * Single unix specification:
581 * We should -EINVAL if an element length is not >= 0 and fitting an 581 * We should -EINVAL if an element length is not >= 0 and fitting an
582 * ssize_t. 582 * ssize_t.
583 * 583 *
584 * In Linux, the total length is limited to MAX_RW_COUNT, there is 584 * In Linux, the total length is limited to MAX_RW_COUNT, there is
585 * no overflow possibility. 585 * no overflow possibility.
586 */ 586 */
587 tot_len = 0; 587 tot_len = 0;
588 ret = -EINVAL; 588 ret = -EINVAL;
589 for (seg = 0; seg < nr_segs; seg++) { 589 for (seg = 0; seg < nr_segs; seg++) {
590 compat_uptr_t buf; 590 compat_uptr_t buf;
591 compat_ssize_t len; 591 compat_ssize_t len;
592 592
593 if (__get_user(len, &uvector->iov_len) || 593 if (__get_user(len, &uvector->iov_len) ||
594 __get_user(buf, &uvector->iov_base)) { 594 __get_user(buf, &uvector->iov_base)) {
595 ret = -EFAULT; 595 ret = -EFAULT;
596 goto out; 596 goto out;
597 } 597 }
598 if (len < 0) /* size_t not fitting in compat_ssize_t .. */ 598 if (len < 0) /* size_t not fitting in compat_ssize_t .. */
599 goto out; 599 goto out;
600 if (type >= 0 && 600 if (type >= 0 &&
601 !access_ok(vrfy_dir(type), compat_ptr(buf), len)) { 601 !access_ok(vrfy_dir(type), compat_ptr(buf), len)) {
602 ret = -EFAULT; 602 ret = -EFAULT;
603 goto out; 603 goto out;
604 } 604 }
605 if (len > MAX_RW_COUNT - tot_len) 605 if (len > MAX_RW_COUNT - tot_len)
606 len = MAX_RW_COUNT - tot_len; 606 len = MAX_RW_COUNT - tot_len;
607 tot_len += len; 607 tot_len += len;
608 iov->iov_base = compat_ptr(buf); 608 iov->iov_base = compat_ptr(buf);
609 iov->iov_len = (compat_size_t) len; 609 iov->iov_len = (compat_size_t) len;
610 uvector++; 610 uvector++;
611 iov++; 611 iov++;
612 } 612 }
613 ret = tot_len; 613 ret = tot_len;
614 614
615 out: 615 out:
616 return ret; 616 return ret;
617 } 617 }
618 618
619 static inline long 619 static inline long
620 copy_iocb(long nr, u32 __user *ptr32, struct iocb __user * __user *ptr64) 620 copy_iocb(long nr, u32 __user *ptr32, struct iocb __user * __user *ptr64)
621 { 621 {
622 compat_uptr_t uptr; 622 compat_uptr_t uptr;
623 int i; 623 int i;
624 624
625 for (i = 0; i < nr; ++i) { 625 for (i = 0; i < nr; ++i) {
626 if (get_user(uptr, ptr32 + i)) 626 if (get_user(uptr, ptr32 + i))
627 return -EFAULT; 627 return -EFAULT;
628 if (put_user(compat_ptr(uptr), ptr64 + i)) 628 if (put_user(compat_ptr(uptr), ptr64 + i))
629 return -EFAULT; 629 return -EFAULT;
630 } 630 }
631 return 0; 631 return 0;
632 } 632 }
633 633
634 #define MAX_AIO_SUBMITS (PAGE_SIZE/sizeof(struct iocb *)) 634 #define MAX_AIO_SUBMITS (PAGE_SIZE/sizeof(struct iocb *))
635 635
636 COMPAT_SYSCALL_DEFINE3(io_submit, compat_aio_context_t, ctx_id, 636 COMPAT_SYSCALL_DEFINE3(io_submit, compat_aio_context_t, ctx_id,
637 int, nr, u32 __user *, iocb) 637 int, nr, u32 __user *, iocb)
638 { 638 {
639 struct iocb __user * __user *iocb64; 639 struct iocb __user * __user *iocb64;
640 long ret; 640 long ret;
641 641
642 if (unlikely(nr < 0)) 642 if (unlikely(nr < 0))
643 return -EINVAL; 643 return -EINVAL;
644 644
645 if (nr > MAX_AIO_SUBMITS) 645 if (nr > MAX_AIO_SUBMITS)
646 nr = MAX_AIO_SUBMITS; 646 nr = MAX_AIO_SUBMITS;
647 647
648 iocb64 = compat_alloc_user_space(nr * sizeof(*iocb64)); 648 iocb64 = compat_alloc_user_space(nr * sizeof(*iocb64));
649 ret = copy_iocb(nr, iocb, iocb64); 649 ret = copy_iocb(nr, iocb, iocb64);
650 if (!ret) 650 if (!ret)
651 ret = do_io_submit(ctx_id, nr, iocb64, 1); 651 ret = do_io_submit(ctx_id, nr, iocb64, 1);
652 return ret; 652 return ret;
653 } 653 }
654 654
655 struct compat_ncp_mount_data { 655 struct compat_ncp_mount_data {
656 compat_int_t version; 656 compat_int_t version;
657 compat_uint_t ncp_fd; 657 compat_uint_t ncp_fd;
658 __compat_uid_t mounted_uid; 658 __compat_uid_t mounted_uid;
659 compat_pid_t wdog_pid; 659 compat_pid_t wdog_pid;
660 unsigned char mounted_vol[NCP_VOLNAME_LEN + 1]; 660 unsigned char mounted_vol[NCP_VOLNAME_LEN + 1];
661 compat_uint_t time_out; 661 compat_uint_t time_out;
662 compat_uint_t retry_count; 662 compat_uint_t retry_count;
663 compat_uint_t flags; 663 compat_uint_t flags;
664 __compat_uid_t uid; 664 __compat_uid_t uid;
665 __compat_gid_t gid; 665 __compat_gid_t gid;
666 compat_mode_t file_mode; 666 compat_mode_t file_mode;
667 compat_mode_t dir_mode; 667 compat_mode_t dir_mode;
668 }; 668 };
669 669
670 struct compat_ncp_mount_data_v4 { 670 struct compat_ncp_mount_data_v4 {
671 compat_int_t version; 671 compat_int_t version;
672 compat_ulong_t flags; 672 compat_ulong_t flags;
673 compat_ulong_t mounted_uid; 673 compat_ulong_t mounted_uid;
674 compat_long_t wdog_pid; 674 compat_long_t wdog_pid;
675 compat_uint_t ncp_fd; 675 compat_uint_t ncp_fd;
676 compat_uint_t time_out; 676 compat_uint_t time_out;
677 compat_uint_t retry_count; 677 compat_uint_t retry_count;
678 compat_ulong_t uid; 678 compat_ulong_t uid;
679 compat_ulong_t gid; 679 compat_ulong_t gid;
680 compat_ulong_t file_mode; 680 compat_ulong_t file_mode;
681 compat_ulong_t dir_mode; 681 compat_ulong_t dir_mode;
682 }; 682 };
683 683
684 static void *do_ncp_super_data_conv(void *raw_data) 684 static void *do_ncp_super_data_conv(void *raw_data)
685 { 685 {
686 int version = *(unsigned int *)raw_data; 686 int version = *(unsigned int *)raw_data;
687 687
688 if (version == 3) { 688 if (version == 3) {
689 struct compat_ncp_mount_data *c_n = raw_data; 689 struct compat_ncp_mount_data *c_n = raw_data;
690 struct ncp_mount_data *n = raw_data; 690 struct ncp_mount_data *n = raw_data;
691 691
692 n->dir_mode = c_n->dir_mode; 692 n->dir_mode = c_n->dir_mode;
693 n->file_mode = c_n->file_mode; 693 n->file_mode = c_n->file_mode;
694 n->gid = c_n->gid; 694 n->gid = c_n->gid;
695 n->uid = c_n->uid; 695 n->uid = c_n->uid;
696 memmove (n->mounted_vol, c_n->mounted_vol, (sizeof (c_n->mounted_vol) + 3 * sizeof (unsigned int))); 696 memmove (n->mounted_vol, c_n->mounted_vol, (sizeof (c_n->mounted_vol) + 3 * sizeof (unsigned int)));
697 n->wdog_pid = c_n->wdog_pid; 697 n->wdog_pid = c_n->wdog_pid;
698 n->mounted_uid = c_n->mounted_uid; 698 n->mounted_uid = c_n->mounted_uid;
699 } else if (version == 4) { 699 } else if (version == 4) {
700 struct compat_ncp_mount_data_v4 *c_n = raw_data; 700 struct compat_ncp_mount_data_v4 *c_n = raw_data;
701 struct ncp_mount_data_v4 *n = raw_data; 701 struct ncp_mount_data_v4 *n = raw_data;
702 702
703 n->dir_mode = c_n->dir_mode; 703 n->dir_mode = c_n->dir_mode;
704 n->file_mode = c_n->file_mode; 704 n->file_mode = c_n->file_mode;
705 n->gid = c_n->gid; 705 n->gid = c_n->gid;
706 n->uid = c_n->uid; 706 n->uid = c_n->uid;
707 n->retry_count = c_n->retry_count; 707 n->retry_count = c_n->retry_count;
708 n->time_out = c_n->time_out; 708 n->time_out = c_n->time_out;
709 n->ncp_fd = c_n->ncp_fd; 709 n->ncp_fd = c_n->ncp_fd;
710 n->wdog_pid = c_n->wdog_pid; 710 n->wdog_pid = c_n->wdog_pid;
711 n->mounted_uid = c_n->mounted_uid; 711 n->mounted_uid = c_n->mounted_uid;
712 n->flags = c_n->flags; 712 n->flags = c_n->flags;
713 } else if (version != 5) { 713 } else if (version != 5) {
714 return NULL; 714 return NULL;
715 } 715 }
716 716
717 return raw_data; 717 return raw_data;
718 } 718 }
719 719
720 720
721 struct compat_nfs_string { 721 struct compat_nfs_string {
722 compat_uint_t len; 722 compat_uint_t len;
723 compat_uptr_t data; 723 compat_uptr_t data;
724 }; 724 };
725 725
726 static inline void compat_nfs_string(struct nfs_string *dst, 726 static inline void compat_nfs_string(struct nfs_string *dst,
727 struct compat_nfs_string *src) 727 struct compat_nfs_string *src)
728 { 728 {
729 dst->data = compat_ptr(src->data); 729 dst->data = compat_ptr(src->data);
730 dst->len = src->len; 730 dst->len = src->len;
731 } 731 }
732 732
733 struct compat_nfs4_mount_data_v1 { 733 struct compat_nfs4_mount_data_v1 {
734 compat_int_t version; 734 compat_int_t version;
735 compat_int_t flags; 735 compat_int_t flags;
736 compat_int_t rsize; 736 compat_int_t rsize;
737 compat_int_t wsize; 737 compat_int_t wsize;
738 compat_int_t timeo; 738 compat_int_t timeo;
739 compat_int_t retrans; 739 compat_int_t retrans;
740 compat_int_t acregmin; 740 compat_int_t acregmin;
741 compat_int_t acregmax; 741 compat_int_t acregmax;
742 compat_int_t acdirmin; 742 compat_int_t acdirmin;
743 compat_int_t acdirmax; 743 compat_int_t acdirmax;
744 struct compat_nfs_string client_addr; 744 struct compat_nfs_string client_addr;
745 struct compat_nfs_string mnt_path; 745 struct compat_nfs_string mnt_path;
746 struct compat_nfs_string hostname; 746 struct compat_nfs_string hostname;
747 compat_uint_t host_addrlen; 747 compat_uint_t host_addrlen;
748 compat_uptr_t host_addr; 748 compat_uptr_t host_addr;
749 compat_int_t proto; 749 compat_int_t proto;
750 compat_int_t auth_flavourlen; 750 compat_int_t auth_flavourlen;
751 compat_uptr_t auth_flavours; 751 compat_uptr_t auth_flavours;
752 }; 752 };
753 753
754 static int do_nfs4_super_data_conv(void *raw_data) 754 static int do_nfs4_super_data_conv(void *raw_data)
755 { 755 {
756 int version = *(compat_uint_t *) raw_data; 756 int version = *(compat_uint_t *) raw_data;
757 757
758 if (version == 1) { 758 if (version == 1) {
759 struct compat_nfs4_mount_data_v1 *raw = raw_data; 759 struct compat_nfs4_mount_data_v1 *raw = raw_data;
760 struct nfs4_mount_data *real = raw_data; 760 struct nfs4_mount_data *real = raw_data;
761 761
762 /* copy the fields backwards */ 762 /* copy the fields backwards */
763 real->auth_flavours = compat_ptr(raw->auth_flavours); 763 real->auth_flavours = compat_ptr(raw->auth_flavours);
764 real->auth_flavourlen = raw->auth_flavourlen; 764 real->auth_flavourlen = raw->auth_flavourlen;
765 real->proto = raw->proto; 765 real->proto = raw->proto;
766 real->host_addr = compat_ptr(raw->host_addr); 766 real->host_addr = compat_ptr(raw->host_addr);
767 real->host_addrlen = raw->host_addrlen; 767 real->host_addrlen = raw->host_addrlen;
768 compat_nfs_string(&real->hostname, &raw->hostname); 768 compat_nfs_string(&real->hostname, &raw->hostname);
769 compat_nfs_string(&real->mnt_path, &raw->mnt_path); 769 compat_nfs_string(&real->mnt_path, &raw->mnt_path);
770 compat_nfs_string(&real->client_addr, &raw->client_addr); 770 compat_nfs_string(&real->client_addr, &raw->client_addr);
771 real->acdirmax = raw->acdirmax; 771 real->acdirmax = raw->acdirmax;
772 real->acdirmin = raw->acdirmin; 772 real->acdirmin = raw->acdirmin;
773 real->acregmax = raw->acregmax; 773 real->acregmax = raw->acregmax;
774 real->acregmin = raw->acregmin; 774 real->acregmin = raw->acregmin;
775 real->retrans = raw->retrans; 775 real->retrans = raw->retrans;
776 real->timeo = raw->timeo; 776 real->timeo = raw->timeo;
777 real->wsize = raw->wsize; 777 real->wsize = raw->wsize;
778 real->rsize = raw->rsize; 778 real->rsize = raw->rsize;
779 real->flags = raw->flags; 779 real->flags = raw->flags;
780 real->version = raw->version; 780 real->version = raw->version;
781 } 781 }
782 782
783 return 0; 783 return 0;
784 } 784 }
785 785
786 #define NCPFS_NAME "ncpfs" 786 #define NCPFS_NAME "ncpfs"
787 #define NFS4_NAME "nfs4" 787 #define NFS4_NAME "nfs4"
788 788
789 COMPAT_SYSCALL_DEFINE5(mount, const char __user *, dev_name, 789 COMPAT_SYSCALL_DEFINE5(mount, const char __user *, dev_name,
790 const char __user *, dir_name, 790 const char __user *, dir_name,
791 const char __user *, type, compat_ulong_t, flags, 791 const char __user *, type, compat_ulong_t, flags,
792 const void __user *, data) 792 const void __user *, data)
793 { 793 {
794 char *kernel_type; 794 char *kernel_type;
795 unsigned long data_page; 795 unsigned long data_page;
796 char *kernel_dev; 796 char *kernel_dev;
797 struct filename *dir; 797 struct filename *dir;
798 int retval; 798 int retval;
799 799
800 retval = copy_mount_string(type, &kernel_type); 800 retval = copy_mount_string(type, &kernel_type);
801 if (retval < 0) 801 if (retval < 0)
802 goto out; 802 goto out;
803 803
804 dir = getname(dir_name); 804 dir = getname(dir_name);
805 retval = PTR_ERR(dir); 805 retval = PTR_ERR(dir);
806 if (IS_ERR(dir)) 806 if (IS_ERR(dir))
807 goto out1; 807 goto out1;
808 808
809 retval = copy_mount_string(dev_name, &kernel_dev); 809 retval = copy_mount_string(dev_name, &kernel_dev);
810 if (retval < 0) 810 if (retval < 0)
811 goto out2; 811 goto out2;
812 812
813 retval = copy_mount_options(data, &data_page); 813 retval = copy_mount_options(data, &data_page);
814 if (retval < 0) 814 if (retval < 0)
815 goto out3; 815 goto out3;
816 816
817 retval = -EINVAL; 817 retval = -EINVAL;
818 818
819 if (kernel_type && data_page) { 819 if (kernel_type && data_page) {
820 if (!strcmp(kernel_type, NCPFS_NAME)) { 820 if (!strcmp(kernel_type, NCPFS_NAME)) {
821 do_ncp_super_data_conv((void *)data_page); 821 do_ncp_super_data_conv((void *)data_page);
822 } else if (!strcmp(kernel_type, NFS4_NAME)) { 822 } else if (!strcmp(kernel_type, NFS4_NAME)) {
823 if (do_nfs4_super_data_conv((void *) data_page)) 823 if (do_nfs4_super_data_conv((void *) data_page))
824 goto out4; 824 goto out4;
825 } 825 }
826 } 826 }
827 827
828 retval = do_mount(kernel_dev, dir->name, kernel_type, 828 retval = do_mount(kernel_dev, dir->name, kernel_type,
829 flags, (void*)data_page); 829 flags, (void*)data_page);
830 830
831 out4: 831 out4:
832 free_page(data_page); 832 free_page(data_page);
833 out3: 833 out3:
834 kfree(kernel_dev); 834 kfree(kernel_dev);
835 out2: 835 out2:
836 putname(dir); 836 putname(dir);
837 out1: 837 out1:
838 kfree(kernel_type); 838 kfree(kernel_type);
839 out: 839 out:
840 return retval; 840 return retval;
841 } 841 }
842 842
843 struct compat_old_linux_dirent { 843 struct compat_old_linux_dirent {
844 compat_ulong_t d_ino; 844 compat_ulong_t d_ino;
845 compat_ulong_t d_offset; 845 compat_ulong_t d_offset;
846 unsigned short d_namlen; 846 unsigned short d_namlen;
847 char d_name[1]; 847 char d_name[1];
848 }; 848 };
849 849
850 struct compat_readdir_callback { 850 struct compat_readdir_callback {
851 struct dir_context ctx; 851 struct dir_context ctx;
852 struct compat_old_linux_dirent __user *dirent; 852 struct compat_old_linux_dirent __user *dirent;
853 int result; 853 int result;
854 }; 854 };
855 855
856 static int compat_fillonedir(void *__buf, const char *name, int namlen, 856 static int compat_fillonedir(void *__buf, const char *name, int namlen,
857 loff_t offset, u64 ino, unsigned int d_type) 857 loff_t offset, u64 ino, unsigned int d_type)
858 { 858 {
859 struct compat_readdir_callback *buf = __buf; 859 struct compat_readdir_callback *buf = __buf;
860 struct compat_old_linux_dirent __user *dirent; 860 struct compat_old_linux_dirent __user *dirent;
861 compat_ulong_t d_ino; 861 compat_ulong_t d_ino;
862 862
863 if (buf->result) 863 if (buf->result)
864 return -EINVAL; 864 return -EINVAL;
865 d_ino = ino; 865 d_ino = ino;
866 if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) { 866 if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) {
867 buf->result = -EOVERFLOW; 867 buf->result = -EOVERFLOW;
868 return -EOVERFLOW; 868 return -EOVERFLOW;
869 } 869 }
870 buf->result++; 870 buf->result++;
871 dirent = buf->dirent; 871 dirent = buf->dirent;
872 if (!access_ok(VERIFY_WRITE, dirent, 872 if (!access_ok(VERIFY_WRITE, dirent,
873 (unsigned long)(dirent->d_name + namlen + 1) - 873 (unsigned long)(dirent->d_name + namlen + 1) -
874 (unsigned long)dirent)) 874 (unsigned long)dirent))
875 goto efault; 875 goto efault;
876 if ( __put_user(d_ino, &dirent->d_ino) || 876 if ( __put_user(d_ino, &dirent->d_ino) ||
877 __put_user(offset, &dirent->d_offset) || 877 __put_user(offset, &dirent->d_offset) ||
878 __put_user(namlen, &dirent->d_namlen) || 878 __put_user(namlen, &dirent->d_namlen) ||
879 __copy_to_user(dirent->d_name, name, namlen) || 879 __copy_to_user(dirent->d_name, name, namlen) ||
880 __put_user(0, dirent->d_name + namlen)) 880 __put_user(0, dirent->d_name + namlen))
881 goto efault; 881 goto efault;
882 return 0; 882 return 0;
883 efault: 883 efault:
884 buf->result = -EFAULT; 884 buf->result = -EFAULT;
885 return -EFAULT; 885 return -EFAULT;
886 } 886 }
887 887
888 COMPAT_SYSCALL_DEFINE3(old_readdir, unsigned int, fd, 888 COMPAT_SYSCALL_DEFINE3(old_readdir, unsigned int, fd,
889 struct compat_old_linux_dirent __user *, dirent, unsigned int, count) 889 struct compat_old_linux_dirent __user *, dirent, unsigned int, count)
890 { 890 {
891 int error; 891 int error;
892 struct fd f = fdget(fd); 892 struct fd f = fdget(fd);
893 struct compat_readdir_callback buf = { 893 struct compat_readdir_callback buf = {
894 .ctx.actor = compat_fillonedir, 894 .ctx.actor = compat_fillonedir,
895 .dirent = dirent 895 .dirent = dirent
896 }; 896 };
897 897
898 if (!f.file) 898 if (!f.file)
899 return -EBADF; 899 return -EBADF;
900 900
901 error = iterate_dir(f.file, &buf.ctx); 901 error = iterate_dir(f.file, &buf.ctx);
902 if (buf.result) 902 if (buf.result)
903 error = buf.result; 903 error = buf.result;
904 904
905 fdput(f); 905 fdput(f);
906 return error; 906 return error;
907 } 907 }
908 908
909 struct compat_linux_dirent { 909 struct compat_linux_dirent {
910 compat_ulong_t d_ino; 910 compat_ulong_t d_ino;
911 compat_ulong_t d_off; 911 compat_ulong_t d_off;
912 unsigned short d_reclen; 912 unsigned short d_reclen;
913 char d_name[1]; 913 char d_name[1];
914 }; 914 };
915 915
916 struct compat_getdents_callback { 916 struct compat_getdents_callback {
917 struct dir_context ctx; 917 struct dir_context ctx;
918 struct compat_linux_dirent __user *current_dir; 918 struct compat_linux_dirent __user *current_dir;
919 struct compat_linux_dirent __user *previous; 919 struct compat_linux_dirent __user *previous;
920 int count; 920 int count;
921 int error; 921 int error;
922 }; 922 };
923 923
924 static int compat_filldir(void *__buf, const char *name, int namlen, 924 static int compat_filldir(void *__buf, const char *name, int namlen,
925 loff_t offset, u64 ino, unsigned int d_type) 925 loff_t offset, u64 ino, unsigned int d_type)
926 { 926 {
927 struct compat_linux_dirent __user * dirent; 927 struct compat_linux_dirent __user * dirent;
928 struct compat_getdents_callback *buf = __buf; 928 struct compat_getdents_callback *buf = __buf;
929 compat_ulong_t d_ino; 929 compat_ulong_t d_ino;
930 int reclen = ALIGN(offsetof(struct compat_linux_dirent, d_name) + 930 int reclen = ALIGN(offsetof(struct compat_linux_dirent, d_name) +
931 namlen + 2, sizeof(compat_long_t)); 931 namlen + 2, sizeof(compat_long_t));
932 932
933 buf->error = -EINVAL; /* only used if we fail.. */ 933 buf->error = -EINVAL; /* only used if we fail.. */
934 if (reclen > buf->count) 934 if (reclen > buf->count)
935 return -EINVAL; 935 return -EINVAL;
936 d_ino = ino; 936 d_ino = ino;
937 if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) { 937 if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) {
938 buf->error = -EOVERFLOW; 938 buf->error = -EOVERFLOW;
939 return -EOVERFLOW; 939 return -EOVERFLOW;
940 } 940 }
941 dirent = buf->previous; 941 dirent = buf->previous;
942 if (dirent) { 942 if (dirent) {
943 if (__put_user(offset, &dirent->d_off)) 943 if (__put_user(offset, &dirent->d_off))
944 goto efault; 944 goto efault;
945 } 945 }
946 dirent = buf->current_dir; 946 dirent = buf->current_dir;
947 if (__put_user(d_ino, &dirent->d_ino)) 947 if (__put_user(d_ino, &dirent->d_ino))
948 goto efault; 948 goto efault;
949 if (__put_user(reclen, &dirent->d_reclen)) 949 if (__put_user(reclen, &dirent->d_reclen))
950 goto efault; 950 goto efault;
951 if (copy_to_user(dirent->d_name, name, namlen)) 951 if (copy_to_user(dirent->d_name, name, namlen))
952 goto efault; 952 goto efault;
953 if (__put_user(0, dirent->d_name + namlen)) 953 if (__put_user(0, dirent->d_name + namlen))
954 goto efault; 954 goto efault;
955 if (__put_user(d_type, (char __user *) dirent + reclen - 1)) 955 if (__put_user(d_type, (char __user *) dirent + reclen - 1))
956 goto efault; 956 goto efault;
957 buf->previous = dirent; 957 buf->previous = dirent;
958 dirent = (void __user *)dirent + reclen; 958 dirent = (void __user *)dirent + reclen;
959 buf->current_dir = dirent; 959 buf->current_dir = dirent;
960 buf->count -= reclen; 960 buf->count -= reclen;
961 return 0; 961 return 0;
962 efault: 962 efault:
963 buf->error = -EFAULT; 963 buf->error = -EFAULT;
964 return -EFAULT; 964 return -EFAULT;
965 } 965 }
966 966
967 COMPAT_SYSCALL_DEFINE3(getdents, unsigned int, fd, 967 COMPAT_SYSCALL_DEFINE3(getdents, unsigned int, fd,
968 struct compat_linux_dirent __user *, dirent, unsigned int, count) 968 struct compat_linux_dirent __user *, dirent, unsigned int, count)
969 { 969 {
970 struct fd f; 970 struct fd f;
971 struct compat_linux_dirent __user * lastdirent; 971 struct compat_linux_dirent __user * lastdirent;
972 struct compat_getdents_callback buf = { 972 struct compat_getdents_callback buf = {
973 .ctx.actor = compat_filldir, 973 .ctx.actor = compat_filldir,
974 .current_dir = dirent, 974 .current_dir = dirent,
975 .count = count 975 .count = count
976 }; 976 };
977 int error; 977 int error;
978 978
979 if (!access_ok(VERIFY_WRITE, dirent, count)) 979 if (!access_ok(VERIFY_WRITE, dirent, count))
980 return -EFAULT; 980 return -EFAULT;
981 981
982 f = fdget(fd); 982 f = fdget(fd);
983 if (!f.file) 983 if (!f.file)
984 return -EBADF; 984 return -EBADF;
985 985
986 error = iterate_dir(f.file, &buf.ctx); 986 error = iterate_dir(f.file, &buf.ctx);
987 if (error >= 0) 987 if (error >= 0)
988 error = buf.error; 988 error = buf.error;
989 lastdirent = buf.previous; 989 lastdirent = buf.previous;
990 if (lastdirent) { 990 if (lastdirent) {
991 if (put_user(buf.ctx.pos, &lastdirent->d_off)) 991 if (put_user(buf.ctx.pos, &lastdirent->d_off))
992 error = -EFAULT; 992 error = -EFAULT;
993 else 993 else
994 error = count - buf.count; 994 error = count - buf.count;
995 } 995 }
996 fdput(f); 996 fdput(f);
997 return error; 997 return error;
998 } 998 }
999 999
1000 #ifdef __ARCH_WANT_COMPAT_SYS_GETDENTS64 1000 #ifdef __ARCH_WANT_COMPAT_SYS_GETDENTS64
1001 1001
1002 struct compat_getdents_callback64 { 1002 struct compat_getdents_callback64 {
1003 struct dir_context ctx; 1003 struct dir_context ctx;
1004 struct linux_dirent64 __user *current_dir; 1004 struct linux_dirent64 __user *current_dir;
1005 struct linux_dirent64 __user *previous; 1005 struct linux_dirent64 __user *previous;
1006 int count; 1006 int count;
1007 int error; 1007 int error;
1008 }; 1008 };
1009 1009
1010 static int compat_filldir64(void * __buf, const char * name, int namlen, loff_t offset, 1010 static int compat_filldir64(void * __buf, const char * name, int namlen, loff_t offset,
1011 u64 ino, unsigned int d_type) 1011 u64 ino, unsigned int d_type)
1012 { 1012 {
1013 struct linux_dirent64 __user *dirent; 1013 struct linux_dirent64 __user *dirent;
1014 struct compat_getdents_callback64 *buf = __buf; 1014 struct compat_getdents_callback64 *buf = __buf;
1015 int reclen = ALIGN(offsetof(struct linux_dirent64, d_name) + namlen + 1, 1015 int reclen = ALIGN(offsetof(struct linux_dirent64, d_name) + namlen + 1,
1016 sizeof(u64)); 1016 sizeof(u64));
1017 u64 off; 1017 u64 off;
1018 1018
1019 buf->error = -EINVAL; /* only used if we fail.. */ 1019 buf->error = -EINVAL; /* only used if we fail.. */
1020 if (reclen > buf->count) 1020 if (reclen > buf->count)
1021 return -EINVAL; 1021 return -EINVAL;
1022 dirent = buf->previous; 1022 dirent = buf->previous;
1023 1023
1024 if (dirent) { 1024 if (dirent) {
1025 if (__put_user_unaligned(offset, &dirent->d_off)) 1025 if (__put_user_unaligned(offset, &dirent->d_off))
1026 goto efault; 1026 goto efault;
1027 } 1027 }
1028 dirent = buf->current_dir; 1028 dirent = buf->current_dir;
1029 if (__put_user_unaligned(ino, &dirent->d_ino)) 1029 if (__put_user_unaligned(ino, &dirent->d_ino))
1030 goto efault; 1030 goto efault;
1031 off = 0; 1031 off = 0;
1032 if (__put_user_unaligned(off, &dirent->d_off)) 1032 if (__put_user_unaligned(off, &dirent->d_off))
1033 goto efault; 1033 goto efault;
1034 if (__put_user(reclen, &dirent->d_reclen)) 1034 if (__put_user(reclen, &dirent->d_reclen))
1035 goto efault; 1035 goto efault;
1036 if (__put_user(d_type, &dirent->d_type)) 1036 if (__put_user(d_type, &dirent->d_type))
1037 goto efault; 1037 goto efault;
1038 if (copy_to_user(dirent->d_name, name, namlen)) 1038 if (copy_to_user(dirent->d_name, name, namlen))
1039 goto efault; 1039 goto efault;
1040 if (__put_user(0, dirent->d_name + namlen)) 1040 if (__put_user(0, dirent->d_name + namlen))
1041 goto efault; 1041 goto efault;
1042 buf->previous = dirent; 1042 buf->previous = dirent;
1043 dirent = (void __user *)dirent + reclen; 1043 dirent = (void __user *)dirent + reclen;
1044 buf->current_dir = dirent; 1044 buf->current_dir = dirent;
1045 buf->count -= reclen; 1045 buf->count -= reclen;
1046 return 0; 1046 return 0;
1047 efault: 1047 efault:
1048 buf->error = -EFAULT; 1048 buf->error = -EFAULT;
1049 return -EFAULT; 1049 return -EFAULT;
1050 } 1050 }
1051 1051
1052 COMPAT_SYSCALL_DEFINE3(getdents64, unsigned int, fd, 1052 COMPAT_SYSCALL_DEFINE3(getdents64, unsigned int, fd,
1053 struct linux_dirent64 __user *, dirent, unsigned int, count) 1053 struct linux_dirent64 __user *, dirent, unsigned int, count)
1054 { 1054 {
1055 struct fd f; 1055 struct fd f;
1056 struct linux_dirent64 __user * lastdirent; 1056 struct linux_dirent64 __user * lastdirent;
1057 struct compat_getdents_callback64 buf = { 1057 struct compat_getdents_callback64 buf = {
1058 .ctx.actor = compat_filldir64, 1058 .ctx.actor = compat_filldir64,
1059 .current_dir = dirent, 1059 .current_dir = dirent,
1060 .count = count 1060 .count = count
1061 }; 1061 };
1062 int error; 1062 int error;
1063 1063
1064 if (!access_ok(VERIFY_WRITE, dirent, count)) 1064 if (!access_ok(VERIFY_WRITE, dirent, count))
1065 return -EFAULT; 1065 return -EFAULT;
1066 1066
1067 f = fdget(fd); 1067 f = fdget(fd);
1068 if (!f.file) 1068 if (!f.file)
1069 return -EBADF; 1069 return -EBADF;
1070 1070
1071 error = iterate_dir(f.file, &buf.ctx); 1071 error = iterate_dir(f.file, &buf.ctx);
1072 if (error >= 0) 1072 if (error >= 0)
1073 error = buf.error; 1073 error = buf.error;
1074 lastdirent = buf.previous; 1074 lastdirent = buf.previous;
1075 if (lastdirent) { 1075 if (lastdirent) {
1076 typeof(lastdirent->d_off) d_off = buf.ctx.pos; 1076 typeof(lastdirent->d_off) d_off = buf.ctx.pos;
1077 if (__put_user_unaligned(d_off, &lastdirent->d_off)) 1077 if (__put_user_unaligned(d_off, &lastdirent->d_off))
1078 error = -EFAULT; 1078 error = -EFAULT;
1079 else 1079 else
1080 error = count - buf.count; 1080 error = count - buf.count;
1081 } 1081 }
1082 fdput(f); 1082 fdput(f);
1083 return error; 1083 return error;
1084 } 1084 }
1085 #endif /* __ARCH_WANT_COMPAT_SYS_GETDENTS64 */ 1085 #endif /* __ARCH_WANT_COMPAT_SYS_GETDENTS64 */
1086 1086
1087 /* 1087 /*
1088 * Exactly like fs/open.c:sys_open(), except that it doesn't set the 1088 * Exactly like fs/open.c:sys_open(), except that it doesn't set the
1089 * O_LARGEFILE flag. 1089 * O_LARGEFILE flag.
1090 */ 1090 */
1091 COMPAT_SYSCALL_DEFINE3(open, const char __user *, filename, int, flags, umode_t, mode) 1091 COMPAT_SYSCALL_DEFINE3(open, const char __user *, filename, int, flags, umode_t, mode)
1092 { 1092 {
1093 return do_sys_open(AT_FDCWD, filename, flags, mode); 1093 return do_sys_open(AT_FDCWD, filename, flags, mode);
1094 } 1094 }
1095 1095
1096 /* 1096 /*
1097 * Exactly like fs/open.c:sys_openat(), except that it doesn't set the 1097 * Exactly like fs/open.c:sys_openat(), except that it doesn't set the
1098 * O_LARGEFILE flag. 1098 * O_LARGEFILE flag.
1099 */ 1099 */
1100 COMPAT_SYSCALL_DEFINE4(openat, int, dfd, const char __user *, filename, int, flags, umode_t, mode) 1100 COMPAT_SYSCALL_DEFINE4(openat, int, dfd, const char __user *, filename, int, flags, umode_t, mode)
1101 { 1101 {
1102 return do_sys_open(dfd, filename, flags, mode); 1102 return do_sys_open(dfd, filename, flags, mode);
1103 } 1103 }
1104 1104
1105 #define __COMPAT_NFDBITS (8 * sizeof(compat_ulong_t)) 1105 #define __COMPAT_NFDBITS (8 * sizeof(compat_ulong_t))
1106 1106
1107 static int poll_select_copy_remaining(struct timespec *end_time, void __user *p, 1107 static int poll_select_copy_remaining(struct timespec *end_time, void __user *p,
1108 int timeval, int ret) 1108 int timeval, int ret)
1109 { 1109 {
1110 struct timespec ts; 1110 struct timespec ts;
1111 1111
1112 if (!p) 1112 if (!p)
1113 return ret; 1113 return ret;
1114 1114
1115 if (current->personality & STICKY_TIMEOUTS) 1115 if (current->personality & STICKY_TIMEOUTS)
1116 goto sticky; 1116 goto sticky;
1117 1117
1118 /* No update for zero timeout */ 1118 /* No update for zero timeout */
1119 if (!end_time->tv_sec && !end_time->tv_nsec) 1119 if (!end_time->tv_sec && !end_time->tv_nsec)
1120 return ret; 1120 return ret;
1121 1121
1122 ktime_get_ts(&ts); 1122 ktime_get_ts(&ts);
1123 ts = timespec_sub(*end_time, ts); 1123 ts = timespec_sub(*end_time, ts);
1124 if (ts.tv_sec < 0) 1124 if (ts.tv_sec < 0)
1125 ts.tv_sec = ts.tv_nsec = 0; 1125 ts.tv_sec = ts.tv_nsec = 0;
1126 1126
1127 if (timeval) { 1127 if (timeval) {
1128 struct compat_timeval rtv; 1128 struct compat_timeval rtv;
1129 1129
1130 rtv.tv_sec = ts.tv_sec; 1130 rtv.tv_sec = ts.tv_sec;
1131 rtv.tv_usec = ts.tv_nsec / NSEC_PER_USEC; 1131 rtv.tv_usec = ts.tv_nsec / NSEC_PER_USEC;
1132 1132
1133 if (!copy_to_user(p, &rtv, sizeof(rtv))) 1133 if (!copy_to_user(p, &rtv, sizeof(rtv)))
1134 return ret; 1134 return ret;
1135 } else { 1135 } else {
1136 struct compat_timespec rts; 1136 struct compat_timespec rts;
1137 1137
1138 rts.tv_sec = ts.tv_sec; 1138 rts.tv_sec = ts.tv_sec;
1139 rts.tv_nsec = ts.tv_nsec; 1139 rts.tv_nsec = ts.tv_nsec;
1140 1140
1141 if (!copy_to_user(p, &rts, sizeof(rts))) 1141 if (!copy_to_user(p, &rts, sizeof(rts)))
1142 return ret; 1142 return ret;
1143 } 1143 }
1144 /* 1144 /*
1145 * If an application puts its timeval in read-only memory, we 1145 * If an application puts its timeval in read-only memory, we
1146 * don't want the Linux-specific update to the timeval to 1146 * don't want the Linux-specific update to the timeval to
1147 * cause a fault after the select has completed 1147 * cause a fault after the select has completed
1148 * successfully. However, because we're not updating the 1148 * successfully. However, because we're not updating the
1149 * timeval, we can't restart the system call. 1149 * timeval, we can't restart the system call.
1150 */ 1150 */
1151 1151
1152 sticky: 1152 sticky:
1153 if (ret == -ERESTARTNOHAND) 1153 if (ret == -ERESTARTNOHAND)
1154 ret = -EINTR; 1154 ret = -EINTR;
1155 return ret; 1155 return ret;
1156 } 1156 }
1157 1157
1158 /* 1158 /*
1159 * Ooo, nasty. We need here to frob 32-bit unsigned longs to 1159 * Ooo, nasty. We need here to frob 32-bit unsigned longs to
1160 * 64-bit unsigned longs. 1160 * 64-bit unsigned longs.
1161 */ 1161 */
1162 static 1162 static
1163 int compat_get_fd_set(unsigned long nr, compat_ulong_t __user *ufdset, 1163 int compat_get_fd_set(unsigned long nr, compat_ulong_t __user *ufdset,
1164 unsigned long *fdset) 1164 unsigned long *fdset)
1165 { 1165 {
1166 nr = DIV_ROUND_UP(nr, __COMPAT_NFDBITS); 1166 nr = DIV_ROUND_UP(nr, __COMPAT_NFDBITS);
1167 if (ufdset) { 1167 if (ufdset) {
1168 unsigned long odd; 1168 unsigned long odd;
1169 1169
1170 if (!access_ok(VERIFY_WRITE, ufdset, nr*sizeof(compat_ulong_t))) 1170 if (!access_ok(VERIFY_WRITE, ufdset, nr*sizeof(compat_ulong_t)))
1171 return -EFAULT; 1171 return -EFAULT;
1172 1172
1173 odd = nr & 1UL; 1173 odd = nr & 1UL;
1174 nr &= ~1UL; 1174 nr &= ~1UL;
1175 while (nr) { 1175 while (nr) {
1176 unsigned long h, l; 1176 unsigned long h, l;
1177 if (__get_user(l, ufdset) || __get_user(h, ufdset+1)) 1177 if (__get_user(l, ufdset) || __get_user(h, ufdset+1))
1178 return -EFAULT; 1178 return -EFAULT;
1179 ufdset += 2; 1179 ufdset += 2;
1180 *fdset++ = h << 32 | l; 1180 *fdset++ = h << 32 | l;
1181 nr -= 2; 1181 nr -= 2;
1182 } 1182 }
1183 if (odd && __get_user(*fdset, ufdset)) 1183 if (odd && __get_user(*fdset, ufdset))
1184 return -EFAULT; 1184 return -EFAULT;
1185 } else { 1185 } else {
1186 /* Tricky, must clear full unsigned long in the 1186 /* Tricky, must clear full unsigned long in the
1187 * kernel fdset at the end, this makes sure that 1187 * kernel fdset at the end, this makes sure that
1188 * actually happens. 1188 * actually happens.
1189 */ 1189 */
1190 memset(fdset, 0, ((nr + 1) & ~1)*sizeof(compat_ulong_t)); 1190 memset(fdset, 0, ((nr + 1) & ~1)*sizeof(compat_ulong_t));
1191 } 1191 }
1192 return 0; 1192 return 0;
1193 } 1193 }
1194 1194
1195 static 1195 static
1196 int compat_set_fd_set(unsigned long nr, compat_ulong_t __user *ufdset, 1196 int compat_set_fd_set(unsigned long nr, compat_ulong_t __user *ufdset,
1197 unsigned long *fdset) 1197 unsigned long *fdset)
1198 { 1198 {
1199 unsigned long odd; 1199 unsigned long odd;
1200 nr = DIV_ROUND_UP(nr, __COMPAT_NFDBITS); 1200 nr = DIV_ROUND_UP(nr, __COMPAT_NFDBITS);
1201 1201
1202 if (!ufdset) 1202 if (!ufdset)
1203 return 0; 1203 return 0;
1204 1204
1205 odd = nr & 1UL; 1205 odd = nr & 1UL;
1206 nr &= ~1UL; 1206 nr &= ~1UL;
1207 while (nr) { 1207 while (nr) {
1208 unsigned long h, l; 1208 unsigned long h, l;
1209 l = *fdset++; 1209 l = *fdset++;
1210 h = l >> 32; 1210 h = l >> 32;
1211 if (__put_user(l, ufdset) || __put_user(h, ufdset+1)) 1211 if (__put_user(l, ufdset) || __put_user(h, ufdset+1))
1212 return -EFAULT; 1212 return -EFAULT;
1213 ufdset += 2; 1213 ufdset += 2;
1214 nr -= 2; 1214 nr -= 2;
1215 } 1215 }
1216 if (odd && __put_user(*fdset, ufdset)) 1216 if (odd && __put_user(*fdset, ufdset))
1217 return -EFAULT; 1217 return -EFAULT;
1218 return 0; 1218 return 0;
1219 } 1219 }
1220 1220
1221 1221
1222 /* 1222 /*
1223 * This is a virtual copy of sys_select from fs/select.c and probably 1223 * This is a virtual copy of sys_select from fs/select.c and probably
1224 * should be compared to it from time to time 1224 * should be compared to it from time to time
1225 */ 1225 */
1226 1226
1227 /* 1227 /*
1228 * We can actually return ERESTARTSYS instead of EINTR, but I'd 1228 * We can actually return ERESTARTSYS instead of EINTR, but I'd
1229 * like to be certain this leads to no problems. So I return 1229 * like to be certain this leads to no problems. So I return
1230 * EINTR just for safety. 1230 * EINTR just for safety.
1231 * 1231 *
1232 * Update: ERESTARTSYS breaks at least the xview clock binary, so 1232 * Update: ERESTARTSYS breaks at least the xview clock binary, so
1233 * I'm trying ERESTARTNOHAND which restart only when you want to. 1233 * I'm trying ERESTARTNOHAND which restart only when you want to.
1234 */ 1234 */
1235 int compat_core_sys_select(int n, compat_ulong_t __user *inp, 1235 int compat_core_sys_select(int n, compat_ulong_t __user *inp,
1236 compat_ulong_t __user *outp, compat_ulong_t __user *exp, 1236 compat_ulong_t __user *outp, compat_ulong_t __user *exp,
1237 struct timespec *end_time) 1237 struct timespec *end_time)
1238 { 1238 {
1239 fd_set_bits fds; 1239 fd_set_bits fds;
1240 void *bits; 1240 void *bits;
1241 int size, max_fds, ret = -EINVAL; 1241 int size, max_fds, ret = -EINVAL;
1242 struct fdtable *fdt; 1242 struct fdtable *fdt;
1243 long stack_fds[SELECT_STACK_ALLOC/sizeof(long)]; 1243 long stack_fds[SELECT_STACK_ALLOC/sizeof(long)];
1244 1244
1245 if (n < 0) 1245 if (n < 0)
1246 goto out_nofds; 1246 goto out_nofds;
1247 1247
1248 /* max_fds can increase, so grab it once to avoid race */ 1248 /* max_fds can increase, so grab it once to avoid race */
1249 rcu_read_lock(); 1249 rcu_read_lock();
1250 fdt = files_fdtable(current->files); 1250 fdt = files_fdtable(current->files);
1251 max_fds = fdt->max_fds; 1251 max_fds = fdt->max_fds;
1252 rcu_read_unlock(); 1252 rcu_read_unlock();
1253 if (n > max_fds) 1253 if (n > max_fds)
1254 n = max_fds; 1254 n = max_fds;
1255 1255
1256 /* 1256 /*
1257 * We need 6 bitmaps (in/out/ex for both incoming and outgoing), 1257 * We need 6 bitmaps (in/out/ex for both incoming and outgoing),
1258 * since we used fdset we need to allocate memory in units of 1258 * since we used fdset we need to allocate memory in units of
1259 * long-words. 1259 * long-words.
1260 */ 1260 */
1261 size = FDS_BYTES(n); 1261 size = FDS_BYTES(n);
1262 bits = stack_fds; 1262 bits = stack_fds;
1263 if (size > sizeof(stack_fds) / 6) { 1263 if (size > sizeof(stack_fds) / 6) {
1264 bits = kmalloc(6 * size, GFP_KERNEL); 1264 bits = kmalloc(6 * size, GFP_KERNEL);
1265 ret = -ENOMEM; 1265 ret = -ENOMEM;
1266 if (!bits) 1266 if (!bits)
1267 goto out_nofds; 1267 goto out_nofds;
1268 } 1268 }
1269 fds.in = (unsigned long *) bits; 1269 fds.in = (unsigned long *) bits;
1270 fds.out = (unsigned long *) (bits + size); 1270 fds.out = (unsigned long *) (bits + size);
1271 fds.ex = (unsigned long *) (bits + 2*size); 1271 fds.ex = (unsigned long *) (bits + 2*size);
1272 fds.res_in = (unsigned long *) (bits + 3*size); 1272 fds.res_in = (unsigned long *) (bits + 3*size);
1273 fds.res_out = (unsigned long *) (bits + 4*size); 1273 fds.res_out = (unsigned long *) (bits + 4*size);
1274 fds.res_ex = (unsigned long *) (bits + 5*size); 1274 fds.res_ex = (unsigned long *) (bits + 5*size);
1275 1275
1276 if ((ret = compat_get_fd_set(n, inp, fds.in)) || 1276 if ((ret = compat_get_fd_set(n, inp, fds.in)) ||
1277 (ret = compat_get_fd_set(n, outp, fds.out)) || 1277 (ret = compat_get_fd_set(n, outp, fds.out)) ||
1278 (ret = compat_get_fd_set(n, exp, fds.ex))) 1278 (ret = compat_get_fd_set(n, exp, fds.ex)))
1279 goto out; 1279 goto out;
1280 zero_fd_set(n, fds.res_in); 1280 zero_fd_set(n, fds.res_in);
1281 zero_fd_set(n, fds.res_out); 1281 zero_fd_set(n, fds.res_out);
1282 zero_fd_set(n, fds.res_ex); 1282 zero_fd_set(n, fds.res_ex);
1283 1283
1284 ret = do_select(n, &fds, end_time); 1284 ret = do_select(n, &fds, end_time);
1285 1285
1286 if (ret < 0) 1286 if (ret < 0)
1287 goto out; 1287 goto out;
1288 if (!ret) { 1288 if (!ret) {
1289 ret = -ERESTARTNOHAND; 1289 ret = -ERESTARTNOHAND;
1290 if (signal_pending(current)) 1290 if (signal_pending(current))
1291 goto out; 1291 goto out;
1292 ret = 0; 1292 ret = 0;
1293 } 1293 }
1294 1294
1295 if (compat_set_fd_set(n, inp, fds.res_in) || 1295 if (compat_set_fd_set(n, inp, fds.res_in) ||
1296 compat_set_fd_set(n, outp, fds.res_out) || 1296 compat_set_fd_set(n, outp, fds.res_out) ||
1297 compat_set_fd_set(n, exp, fds.res_ex)) 1297 compat_set_fd_set(n, exp, fds.res_ex))
1298 ret = -EFAULT; 1298 ret = -EFAULT;
1299 out: 1299 out:
1300 if (bits != stack_fds) 1300 if (bits != stack_fds)
1301 kfree(bits); 1301 kfree(bits);
1302 out_nofds: 1302 out_nofds:
1303 return ret; 1303 return ret;
1304 } 1304 }
1305 1305
1306 COMPAT_SYSCALL_DEFINE5(select, int, n, compat_ulong_t __user *, inp, 1306 COMPAT_SYSCALL_DEFINE5(select, int, n, compat_ulong_t __user *, inp,
1307 compat_ulong_t __user *, outp, compat_ulong_t __user *, exp, 1307 compat_ulong_t __user *, outp, compat_ulong_t __user *, exp,
1308 struct compat_timeval __user *, tvp) 1308 struct compat_timeval __user *, tvp)
1309 { 1309 {
1310 struct timespec end_time, *to = NULL; 1310 struct timespec end_time, *to = NULL;
1311 struct compat_timeval tv; 1311 struct compat_timeval tv;
1312 int ret; 1312 int ret;
1313 1313
1314 if (tvp) { 1314 if (tvp) {
1315 if (copy_from_user(&tv, tvp, sizeof(tv))) 1315 if (copy_from_user(&tv, tvp, sizeof(tv)))
1316 return -EFAULT; 1316 return -EFAULT;
1317 1317
1318 to = &end_time; 1318 to = &end_time;
1319 if (poll_select_set_timeout(to, 1319 if (poll_select_set_timeout(to,
1320 tv.tv_sec + (tv.tv_usec / USEC_PER_SEC), 1320 tv.tv_sec + (tv.tv_usec / USEC_PER_SEC),
1321 (tv.tv_usec % USEC_PER_SEC) * NSEC_PER_USEC)) 1321 (tv.tv_usec % USEC_PER_SEC) * NSEC_PER_USEC))
1322 return -EINVAL; 1322 return -EINVAL;
1323 } 1323 }
1324 1324
1325 ret = compat_core_sys_select(n, inp, outp, exp, to); 1325 ret = compat_core_sys_select(n, inp, outp, exp, to);
1326 ret = poll_select_copy_remaining(&end_time, tvp, 1, ret); 1326 ret = poll_select_copy_remaining(&end_time, tvp, 1, ret);
1327 1327
1328 return ret; 1328 return ret;
1329 } 1329 }
1330 1330
1331 struct compat_sel_arg_struct { 1331 struct compat_sel_arg_struct {
1332 compat_ulong_t n; 1332 compat_ulong_t n;
1333 compat_uptr_t inp; 1333 compat_uptr_t inp;
1334 compat_uptr_t outp; 1334 compat_uptr_t outp;
1335 compat_uptr_t exp; 1335 compat_uptr_t exp;
1336 compat_uptr_t tvp; 1336 compat_uptr_t tvp;
1337 }; 1337 };
1338 1338
1339 COMPAT_SYSCALL_DEFINE1(old_select, struct compat_sel_arg_struct __user *, arg) 1339 COMPAT_SYSCALL_DEFINE1(old_select, struct compat_sel_arg_struct __user *, arg)
1340 { 1340 {
1341 struct compat_sel_arg_struct a; 1341 struct compat_sel_arg_struct a;
1342 1342
1343 if (copy_from_user(&a, arg, sizeof(a))) 1343 if (copy_from_user(&a, arg, sizeof(a)))
1344 return -EFAULT; 1344 return -EFAULT;
1345 return compat_sys_select(a.n, compat_ptr(a.inp), compat_ptr(a.outp), 1345 return compat_sys_select(a.n, compat_ptr(a.inp), compat_ptr(a.outp),
1346 compat_ptr(a.exp), compat_ptr(a.tvp)); 1346 compat_ptr(a.exp), compat_ptr(a.tvp));
1347 } 1347 }
1348 1348
1349 static long do_compat_pselect(int n, compat_ulong_t __user *inp, 1349 static long do_compat_pselect(int n, compat_ulong_t __user *inp,
1350 compat_ulong_t __user *outp, compat_ulong_t __user *exp, 1350 compat_ulong_t __user *outp, compat_ulong_t __user *exp,
1351 struct compat_timespec __user *tsp, compat_sigset_t __user *sigmask, 1351 struct compat_timespec __user *tsp, compat_sigset_t __user *sigmask,
1352 compat_size_t sigsetsize) 1352 compat_size_t sigsetsize)
1353 { 1353 {
1354 compat_sigset_t ss32; 1354 compat_sigset_t ss32;
1355 sigset_t ksigmask, sigsaved; 1355 sigset_t ksigmask, sigsaved;
1356 struct compat_timespec ts; 1356 struct compat_timespec ts;
1357 struct timespec end_time, *to = NULL; 1357 struct timespec end_time, *to = NULL;
1358 int ret; 1358 int ret;
1359 1359
1360 if (tsp) { 1360 if (tsp) {
1361 if (copy_from_user(&ts, tsp, sizeof(ts))) 1361 if (copy_from_user(&ts, tsp, sizeof(ts)))
1362 return -EFAULT; 1362 return -EFAULT;
1363 1363
1364 to = &end_time; 1364 to = &end_time;
1365 if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec)) 1365 if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec))
1366 return -EINVAL; 1366 return -EINVAL;
1367 } 1367 }
1368 1368
1369 if (sigmask) { 1369 if (sigmask) {
1370 if (sigsetsize != sizeof(compat_sigset_t)) 1370 if (sigsetsize != sizeof(compat_sigset_t))
1371 return -EINVAL; 1371 return -EINVAL;
1372 if (copy_from_user(&ss32, sigmask, sizeof(ss32))) 1372 if (copy_from_user(&ss32, sigmask, sizeof(ss32)))
1373 return -EFAULT; 1373 return -EFAULT;
1374 sigset_from_compat(&ksigmask, &ss32); 1374 sigset_from_compat(&ksigmask, &ss32);
1375 1375
1376 sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP)); 1376 sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP));
1377 sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); 1377 sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
1378 } 1378 }
1379 1379
1380 ret = compat_core_sys_select(n, inp, outp, exp, to); 1380 ret = compat_core_sys_select(n, inp, outp, exp, to);
1381 ret = poll_select_copy_remaining(&end_time, tsp, 0, ret); 1381 ret = poll_select_copy_remaining(&end_time, tsp, 0, ret);
1382 1382
1383 if (ret == -ERESTARTNOHAND) { 1383 if (ret == -ERESTARTNOHAND) {
1384 /* 1384 /*
1385 * Don't restore the signal mask yet. Let do_signal() deliver 1385 * Don't restore the signal mask yet. Let do_signal() deliver
1386 * the signal on the way back to userspace, before the signal 1386 * the signal on the way back to userspace, before the signal
1387 * mask is restored. 1387 * mask is restored.
1388 */ 1388 */
1389 if (sigmask) { 1389 if (sigmask) {
1390 memcpy(&current->saved_sigmask, &sigsaved, 1390 memcpy(&current->saved_sigmask, &sigsaved,
1391 sizeof(sigsaved)); 1391 sizeof(sigsaved));
1392 set_restore_sigmask(); 1392 set_restore_sigmask();
1393 } 1393 }
1394 } else if (sigmask) 1394 } else if (sigmask)
1395 sigprocmask(SIG_SETMASK, &sigsaved, NULL); 1395 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
1396 1396
1397 return ret; 1397 return ret;
1398 } 1398 }
1399 1399
1400 COMPAT_SYSCALL_DEFINE6(pselect6, int, n, compat_ulong_t __user *, inp, 1400 COMPAT_SYSCALL_DEFINE6(pselect6, int, n, compat_ulong_t __user *, inp,
1401 compat_ulong_t __user *, outp, compat_ulong_t __user *, exp, 1401 compat_ulong_t __user *, outp, compat_ulong_t __user *, exp,
1402 struct compat_timespec __user *, tsp, void __user *, sig) 1402 struct compat_timespec __user *, tsp, void __user *, sig)
1403 { 1403 {
1404 compat_size_t sigsetsize = 0; 1404 compat_size_t sigsetsize = 0;
1405 compat_uptr_t up = 0; 1405 compat_uptr_t up = 0;
1406 1406
1407 if (sig) { 1407 if (sig) {
1408 if (!access_ok(VERIFY_READ, sig, 1408 if (!access_ok(VERIFY_READ, sig,
1409 sizeof(compat_uptr_t)+sizeof(compat_size_t)) || 1409 sizeof(compat_uptr_t)+sizeof(compat_size_t)) ||
1410 __get_user(up, (compat_uptr_t __user *)sig) || 1410 __get_user(up, (compat_uptr_t __user *)sig) ||
1411 __get_user(sigsetsize, 1411 __get_user(sigsetsize,
1412 (compat_size_t __user *)(sig+sizeof(up)))) 1412 (compat_size_t __user *)(sig+sizeof(up))))
1413 return -EFAULT; 1413 return -EFAULT;
1414 } 1414 }
1415 return do_compat_pselect(n, inp, outp, exp, tsp, compat_ptr(up), 1415 return do_compat_pselect(n, inp, outp, exp, tsp, compat_ptr(up),
1416 sigsetsize); 1416 sigsetsize);
1417 } 1417 }
1418 1418
1419 COMPAT_SYSCALL_DEFINE5(ppoll, struct pollfd __user *, ufds, 1419 COMPAT_SYSCALL_DEFINE5(ppoll, struct pollfd __user *, ufds,
1420 unsigned int, nfds, struct compat_timespec __user *, tsp, 1420 unsigned int, nfds, struct compat_timespec __user *, tsp,
1421 const compat_sigset_t __user *, sigmask, compat_size_t, sigsetsize) 1421 const compat_sigset_t __user *, sigmask, compat_size_t, sigsetsize)
1422 { 1422 {
1423 compat_sigset_t ss32; 1423 compat_sigset_t ss32;
1424 sigset_t ksigmask, sigsaved; 1424 sigset_t ksigmask, sigsaved;
1425 struct compat_timespec ts; 1425 struct compat_timespec ts;
1426 struct timespec end_time, *to = NULL; 1426 struct timespec end_time, *to = NULL;
1427 int ret; 1427 int ret;
1428 1428
1429 if (tsp) { 1429 if (tsp) {
1430 if (copy_from_user(&ts, tsp, sizeof(ts))) 1430 if (copy_from_user(&ts, tsp, sizeof(ts)))
1431 return -EFAULT; 1431 return -EFAULT;
1432 1432
1433 to = &end_time; 1433 to = &end_time;
1434 if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec)) 1434 if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec))
1435 return -EINVAL; 1435 return -EINVAL;
1436 } 1436 }
1437 1437
1438 if (sigmask) { 1438 if (sigmask) {
1439 if (sigsetsize != sizeof(compat_sigset_t)) 1439 if (sigsetsize != sizeof(compat_sigset_t))
1440 return -EINVAL; 1440 return -EINVAL;
1441 if (copy_from_user(&ss32, sigmask, sizeof(ss32))) 1441 if (copy_from_user(&ss32, sigmask, sizeof(ss32)))
1442 return -EFAULT; 1442 return -EFAULT;
1443 sigset_from_compat(&ksigmask, &ss32); 1443 sigset_from_compat(&ksigmask, &ss32);
1444 1444
1445 sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP)); 1445 sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP));
1446 sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); 1446 sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
1447 } 1447 }
1448 1448
1449 ret = do_sys_poll(ufds, nfds, to); 1449 ret = do_sys_poll(ufds, nfds, to);
1450 1450
1451 /* We can restart this syscall, usually */ 1451 /* We can restart this syscall, usually */
1452 if (ret == -EINTR) { 1452 if (ret == -EINTR) {
1453 /* 1453 /*
1454 * Don't restore the signal mask yet. Let do_signal() deliver 1454 * Don't restore the signal mask yet. Let do_signal() deliver
1455 * the signal on the way back to userspace, before the signal 1455 * the signal on the way back to userspace, before the signal
1456 * mask is restored. 1456 * mask is restored.
1457 */ 1457 */
1458 if (sigmask) { 1458 if (sigmask) {
1459 memcpy(&current->saved_sigmask, &sigsaved, 1459 memcpy(&current->saved_sigmask, &sigsaved,
1460 sizeof(sigsaved)); 1460 sizeof(sigsaved));
1461 set_restore_sigmask(); 1461 set_restore_sigmask();
1462 } 1462 }
1463 ret = -ERESTARTNOHAND; 1463 ret = -ERESTARTNOHAND;
1464 } else if (sigmask) 1464 } else if (sigmask)
1465 sigprocmask(SIG_SETMASK, &sigsaved, NULL); 1465 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
1466 1466
1467 ret = poll_select_copy_remaining(&end_time, tsp, 0, ret); 1467 ret = poll_select_copy_remaining(&end_time, tsp, 0, ret);
1468 1468
1469 return ret; 1469 return ret;
1470 } 1470 }
1471 1471
1472 #ifdef CONFIG_FHANDLE 1472 #ifdef CONFIG_FHANDLE
1473 /* 1473 /*
1474 * Exactly like fs/open.c:sys_open_by_handle_at(), except that it 1474 * Exactly like fs/open.c:sys_open_by_handle_at(), except that it
1475 * doesn't set the O_LARGEFILE flag. 1475 * doesn't set the O_LARGEFILE flag.
1476 */ 1476 */
1477 COMPAT_SYSCALL_DEFINE3(open_by_handle_at, int, mountdirfd, 1477 COMPAT_SYSCALL_DEFINE3(open_by_handle_at, int, mountdirfd,
1478 struct file_handle __user *, handle, int, flags) 1478 struct file_handle __user *, handle, int, flags)
1479 { 1479 {
1480 return do_handle_open(mountdirfd, handle, flags); 1480 return do_handle_open(mountdirfd, handle, flags);
1481 } 1481 }
1482 #endif 1482 #endif
1483 1483
1 /* 1 /*
2 * linux/fs/fcntl.c 2 * linux/fs/fcntl.c
3 * 3 *
4 * Copyright (C) 1991, 1992 Linus Torvalds 4 * Copyright (C) 1991, 1992 Linus Torvalds
5 */ 5 */
6 6
7 #include <linux/syscalls.h> 7 #include <linux/syscalls.h>
8 #include <linux/init.h> 8 #include <linux/init.h>
9 #include <linux/mm.h> 9 #include <linux/mm.h>
10 #include <linux/fs.h> 10 #include <linux/fs.h>
11 #include <linux/file.h> 11 #include <linux/file.h>
12 #include <linux/fdtable.h> 12 #include <linux/fdtable.h>
13 #include <linux/capability.h> 13 #include <linux/capability.h>
14 #include <linux/dnotify.h> 14 #include <linux/dnotify.h>
15 #include <linux/slab.h> 15 #include <linux/slab.h>
16 #include <linux/module.h> 16 #include <linux/module.h>
17 #include <linux/pipe_fs_i.h> 17 #include <linux/pipe_fs_i.h>
18 #include <linux/security.h> 18 #include <linux/security.h>
19 #include <linux/ptrace.h> 19 #include <linux/ptrace.h>
20 #include <linux/signal.h> 20 #include <linux/signal.h>
21 #include <linux/rcupdate.h> 21 #include <linux/rcupdate.h>
22 #include <linux/pid_namespace.h> 22 #include <linux/pid_namespace.h>
23 #include <linux/user_namespace.h> 23 #include <linux/user_namespace.h>
24 24
25 #include <asm/poll.h> 25 #include <asm/poll.h>
26 #include <asm/siginfo.h> 26 #include <asm/siginfo.h>
27 #include <asm/uaccess.h> 27 #include <asm/uaccess.h>
28 28
29 #define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT | O_NOATIME) 29 #define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT | O_NOATIME)
30 30
31 static int setfl(int fd, struct file * filp, unsigned long arg) 31 static int setfl(int fd, struct file * filp, unsigned long arg)
32 { 32 {
33 struct inode * inode = file_inode(filp); 33 struct inode * inode = file_inode(filp);
34 int error = 0; 34 int error = 0;
35 35
36 /* 36 /*
37 * O_APPEND cannot be cleared if the file is marked as append-only 37 * O_APPEND cannot be cleared if the file is marked as append-only
38 * and the file is open for write. 38 * and the file is open for write.
39 */ 39 */
40 if (((arg ^ filp->f_flags) & O_APPEND) && IS_APPEND(inode)) 40 if (((arg ^ filp->f_flags) & O_APPEND) && IS_APPEND(inode))
41 return -EPERM; 41 return -EPERM;
42 42
43 /* O_NOATIME can only be set by the owner or superuser */ 43 /* O_NOATIME can only be set by the owner or superuser */
44 if ((arg & O_NOATIME) && !(filp->f_flags & O_NOATIME)) 44 if ((arg & O_NOATIME) && !(filp->f_flags & O_NOATIME))
45 if (!inode_owner_or_capable(inode)) 45 if (!inode_owner_or_capable(inode))
46 return -EPERM; 46 return -EPERM;
47 47
48 /* required for strict SunOS emulation */ 48 /* required for strict SunOS emulation */
49 if (O_NONBLOCK != O_NDELAY) 49 if (O_NONBLOCK != O_NDELAY)
50 if (arg & O_NDELAY) 50 if (arg & O_NDELAY)
51 arg |= O_NONBLOCK; 51 arg |= O_NONBLOCK;
52 52
53 if (arg & O_DIRECT) { 53 if (arg & O_DIRECT) {
54 if (!filp->f_mapping || !filp->f_mapping->a_ops || 54 if (!filp->f_mapping || !filp->f_mapping->a_ops ||
55 !filp->f_mapping->a_ops->direct_IO) 55 !filp->f_mapping->a_ops->direct_IO)
56 return -EINVAL; 56 return -EINVAL;
57 } 57 }
58 58
59 if (filp->f_op->check_flags) 59 if (filp->f_op->check_flags)
60 error = filp->f_op->check_flags(arg); 60 error = filp->f_op->check_flags(arg);
61 if (error) 61 if (error)
62 return error; 62 return error;
63 63
64 /* 64 /*
65 * ->fasync() is responsible for setting the FASYNC bit. 65 * ->fasync() is responsible for setting the FASYNC bit.
66 */ 66 */
67 if (((arg ^ filp->f_flags) & FASYNC) && filp->f_op->fasync) { 67 if (((arg ^ filp->f_flags) & FASYNC) && filp->f_op->fasync) {
68 error = filp->f_op->fasync(fd, filp, (arg & FASYNC) != 0); 68 error = filp->f_op->fasync(fd, filp, (arg & FASYNC) != 0);
69 if (error < 0) 69 if (error < 0)
70 goto out; 70 goto out;
71 if (error > 0) 71 if (error > 0)
72 error = 0; 72 error = 0;
73 } 73 }
74 spin_lock(&filp->f_lock); 74 spin_lock(&filp->f_lock);
75 filp->f_flags = (arg & SETFL_MASK) | (filp->f_flags & ~SETFL_MASK); 75 filp->f_flags = (arg & SETFL_MASK) | (filp->f_flags & ~SETFL_MASK);
76 spin_unlock(&filp->f_lock); 76 spin_unlock(&filp->f_lock);
77 77
78 out: 78 out:
79 return error; 79 return error;
80 } 80 }
81 81
82 static void f_modown(struct file *filp, struct pid *pid, enum pid_type type, 82 static void f_modown(struct file *filp, struct pid *pid, enum pid_type type,
83 int force) 83 int force)
84 { 84 {
85 write_lock_irq(&filp->f_owner.lock); 85 write_lock_irq(&filp->f_owner.lock);
86 if (force || !filp->f_owner.pid) { 86 if (force || !filp->f_owner.pid) {
87 put_pid(filp->f_owner.pid); 87 put_pid(filp->f_owner.pid);
88 filp->f_owner.pid = get_pid(pid); 88 filp->f_owner.pid = get_pid(pid);
89 filp->f_owner.pid_type = type; 89 filp->f_owner.pid_type = type;
90 90
91 if (pid) { 91 if (pid) {
92 const struct cred *cred = current_cred(); 92 const struct cred *cred = current_cred();
93 filp->f_owner.uid = cred->uid; 93 filp->f_owner.uid = cred->uid;
94 filp->f_owner.euid = cred->euid; 94 filp->f_owner.euid = cred->euid;
95 } 95 }
96 } 96 }
97 write_unlock_irq(&filp->f_owner.lock); 97 write_unlock_irq(&filp->f_owner.lock);
98 } 98 }
99 99
100 int __f_setown(struct file *filp, struct pid *pid, enum pid_type type, 100 int __f_setown(struct file *filp, struct pid *pid, enum pid_type type,
101 int force) 101 int force)
102 { 102 {
103 int err; 103 int err;
104 104
105 err = security_file_set_fowner(filp); 105 err = security_file_set_fowner(filp);
106 if (err) 106 if (err)
107 return err; 107 return err;
108 108
109 f_modown(filp, pid, type, force); 109 f_modown(filp, pid, type, force);
110 return 0; 110 return 0;
111 } 111 }
112 EXPORT_SYMBOL(__f_setown); 112 EXPORT_SYMBOL(__f_setown);
113 113
114 int f_setown(struct file *filp, unsigned long arg, int force) 114 int f_setown(struct file *filp, unsigned long arg, int force)
115 { 115 {
116 enum pid_type type; 116 enum pid_type type;
117 struct pid *pid; 117 struct pid *pid;
118 int who = arg; 118 int who = arg;
119 int result; 119 int result;
120 type = PIDTYPE_PID; 120 type = PIDTYPE_PID;
121 if (who < 0) { 121 if (who < 0) {
122 type = PIDTYPE_PGID; 122 type = PIDTYPE_PGID;
123 who = -who; 123 who = -who;
124 } 124 }
125 rcu_read_lock(); 125 rcu_read_lock();
126 pid = find_vpid(who); 126 pid = find_vpid(who);
127 result = __f_setown(filp, pid, type, force); 127 result = __f_setown(filp, pid, type, force);
128 rcu_read_unlock(); 128 rcu_read_unlock();
129 return result; 129 return result;
130 } 130 }
131 EXPORT_SYMBOL(f_setown); 131 EXPORT_SYMBOL(f_setown);
132 132
133 void f_delown(struct file *filp) 133 void f_delown(struct file *filp)
134 { 134 {
135 f_modown(filp, NULL, PIDTYPE_PID, 1); 135 f_modown(filp, NULL, PIDTYPE_PID, 1);
136 } 136 }
137 137
138 pid_t f_getown(struct file *filp) 138 pid_t f_getown(struct file *filp)
139 { 139 {
140 pid_t pid; 140 pid_t pid;
141 read_lock(&filp->f_owner.lock); 141 read_lock(&filp->f_owner.lock);
142 pid = pid_vnr(filp->f_owner.pid); 142 pid = pid_vnr(filp->f_owner.pid);
143 if (filp->f_owner.pid_type == PIDTYPE_PGID) 143 if (filp->f_owner.pid_type == PIDTYPE_PGID)
144 pid = -pid; 144 pid = -pid;
145 read_unlock(&filp->f_owner.lock); 145 read_unlock(&filp->f_owner.lock);
146 return pid; 146 return pid;
147 } 147 }
148 148
149 static int f_setown_ex(struct file *filp, unsigned long arg) 149 static int f_setown_ex(struct file *filp, unsigned long arg)
150 { 150 {
151 struct f_owner_ex __user *owner_p = (void __user *)arg; 151 struct f_owner_ex __user *owner_p = (void __user *)arg;
152 struct f_owner_ex owner; 152 struct f_owner_ex owner;
153 struct pid *pid; 153 struct pid *pid;
154 int type; 154 int type;
155 int ret; 155 int ret;
156 156
157 ret = copy_from_user(&owner, owner_p, sizeof(owner)); 157 ret = copy_from_user(&owner, owner_p, sizeof(owner));
158 if (ret) 158 if (ret)
159 return -EFAULT; 159 return -EFAULT;
160 160
161 switch (owner.type) { 161 switch (owner.type) {
162 case F_OWNER_TID: 162 case F_OWNER_TID:
163 type = PIDTYPE_MAX; 163 type = PIDTYPE_MAX;
164 break; 164 break;
165 165
166 case F_OWNER_PID: 166 case F_OWNER_PID:
167 type = PIDTYPE_PID; 167 type = PIDTYPE_PID;
168 break; 168 break;
169 169
170 case F_OWNER_PGRP: 170 case F_OWNER_PGRP:
171 type = PIDTYPE_PGID; 171 type = PIDTYPE_PGID;
172 break; 172 break;
173 173
174 default: 174 default:
175 return -EINVAL; 175 return -EINVAL;
176 } 176 }
177 177
178 rcu_read_lock(); 178 rcu_read_lock();
179 pid = find_vpid(owner.pid); 179 pid = find_vpid(owner.pid);
180 if (owner.pid && !pid) 180 if (owner.pid && !pid)
181 ret = -ESRCH; 181 ret = -ESRCH;
182 else 182 else
183 ret = __f_setown(filp, pid, type, 1); 183 ret = __f_setown(filp, pid, type, 1);
184 rcu_read_unlock(); 184 rcu_read_unlock();
185 185
186 return ret; 186 return ret;
187 } 187 }
188 188
189 static int f_getown_ex(struct file *filp, unsigned long arg) 189 static int f_getown_ex(struct file *filp, unsigned long arg)
190 { 190 {
191 struct f_owner_ex __user *owner_p = (void __user *)arg; 191 struct f_owner_ex __user *owner_p = (void __user *)arg;
192 struct f_owner_ex owner; 192 struct f_owner_ex owner;
193 int ret = 0; 193 int ret = 0;
194 194
195 read_lock(&filp->f_owner.lock); 195 read_lock(&filp->f_owner.lock);
196 owner.pid = pid_vnr(filp->f_owner.pid); 196 owner.pid = pid_vnr(filp->f_owner.pid);
197 switch (filp->f_owner.pid_type) { 197 switch (filp->f_owner.pid_type) {
198 case PIDTYPE_MAX: 198 case PIDTYPE_MAX:
199 owner.type = F_OWNER_TID; 199 owner.type = F_OWNER_TID;
200 break; 200 break;
201 201
202 case PIDTYPE_PID: 202 case PIDTYPE_PID:
203 owner.type = F_OWNER_PID; 203 owner.type = F_OWNER_PID;
204 break; 204 break;
205 205
206 case PIDTYPE_PGID: 206 case PIDTYPE_PGID:
207 owner.type = F_OWNER_PGRP; 207 owner.type = F_OWNER_PGRP;
208 break; 208 break;
209 209
210 default: 210 default:
211 WARN_ON(1); 211 WARN_ON(1);
212 ret = -EINVAL; 212 ret = -EINVAL;
213 break; 213 break;
214 } 214 }
215 read_unlock(&filp->f_owner.lock); 215 read_unlock(&filp->f_owner.lock);
216 216
217 if (!ret) { 217 if (!ret) {
218 ret = copy_to_user(owner_p, &owner, sizeof(owner)); 218 ret = copy_to_user(owner_p, &owner, sizeof(owner));
219 if (ret) 219 if (ret)
220 ret = -EFAULT; 220 ret = -EFAULT;
221 } 221 }
222 return ret; 222 return ret;
223 } 223 }
224 224
225 #ifdef CONFIG_CHECKPOINT_RESTORE 225 #ifdef CONFIG_CHECKPOINT_RESTORE
226 static int f_getowner_uids(struct file *filp, unsigned long arg) 226 static int f_getowner_uids(struct file *filp, unsigned long arg)
227 { 227 {
228 struct user_namespace *user_ns = current_user_ns(); 228 struct user_namespace *user_ns = current_user_ns();
229 uid_t __user *dst = (void __user *)arg; 229 uid_t __user *dst = (void __user *)arg;
230 uid_t src[2]; 230 uid_t src[2];
231 int err; 231 int err;
232 232
233 read_lock(&filp->f_owner.lock); 233 read_lock(&filp->f_owner.lock);
234 src[0] = from_kuid(user_ns, filp->f_owner.uid); 234 src[0] = from_kuid(user_ns, filp->f_owner.uid);
235 src[1] = from_kuid(user_ns, filp->f_owner.euid); 235 src[1] = from_kuid(user_ns, filp->f_owner.euid);
236 read_unlock(&filp->f_owner.lock); 236 read_unlock(&filp->f_owner.lock);
237 237
238 err = put_user(src[0], &dst[0]); 238 err = put_user(src[0], &dst[0]);
239 err |= put_user(src[1], &dst[1]); 239 err |= put_user(src[1], &dst[1]);
240 240
241 return err; 241 return err;
242 } 242 }
243 #else 243 #else
244 static int f_getowner_uids(struct file *filp, unsigned long arg) 244 static int f_getowner_uids(struct file *filp, unsigned long arg)
245 { 245 {
246 return -EINVAL; 246 return -EINVAL;
247 } 247 }
248 #endif 248 #endif
249 249
250 static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, 250 static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
251 struct file *filp) 251 struct file *filp)
252 { 252 {
253 long err = -EINVAL; 253 long err = -EINVAL;
254 254
255 switch (cmd) { 255 switch (cmd) {
256 case F_DUPFD: 256 case F_DUPFD:
257 err = f_dupfd(arg, filp, 0); 257 err = f_dupfd(arg, filp, 0);
258 break; 258 break;
259 case F_DUPFD_CLOEXEC: 259 case F_DUPFD_CLOEXEC:
260 err = f_dupfd(arg, filp, O_CLOEXEC); 260 err = f_dupfd(arg, filp, O_CLOEXEC);
261 break; 261 break;
262 case F_GETFD: 262 case F_GETFD:
263 err = get_close_on_exec(fd) ? FD_CLOEXEC : 0; 263 err = get_close_on_exec(fd) ? FD_CLOEXEC : 0;
264 break; 264 break;
265 case F_SETFD: 265 case F_SETFD:
266 err = 0; 266 err = 0;
267 set_close_on_exec(fd, arg & FD_CLOEXEC); 267 set_close_on_exec(fd, arg & FD_CLOEXEC);
268 break; 268 break;
269 case F_GETFL: 269 case F_GETFL:
270 err = filp->f_flags; 270 err = filp->f_flags;
271 break; 271 break;
272 case F_SETFL: 272 case F_SETFL:
273 err = setfl(fd, filp, arg); 273 err = setfl(fd, filp, arg);
274 break; 274 break;
275 #if BITS_PER_LONG != 32 275 #if BITS_PER_LONG != 32
276 /* 32-bit arches must use fcntl64() */ 276 /* 32-bit arches must use fcntl64() */
277 case F_GETLKP: 277 case F_OFD_GETLK:
278 #endif 278 #endif
279 case F_GETLK: 279 case F_GETLK:
280 err = fcntl_getlk(filp, cmd, (struct flock __user *) arg); 280 err = fcntl_getlk(filp, cmd, (struct flock __user *) arg);
281 break; 281 break;
282 #if BITS_PER_LONG != 32 282 #if BITS_PER_LONG != 32
283 /* 32-bit arches must use fcntl64() */ 283 /* 32-bit arches must use fcntl64() */
284 case F_SETLKP: 284 case F_OFD_SETLK:
285 case F_SETLKPW: 285 case F_OFD_SETLKW:
286 #endif 286 #endif
287 /* Fallthrough */ 287 /* Fallthrough */
288 case F_SETLK: 288 case F_SETLK:
289 case F_SETLKW: 289 case F_SETLKW:
290 err = fcntl_setlk(fd, filp, cmd, (struct flock __user *) arg); 290 err = fcntl_setlk(fd, filp, cmd, (struct flock __user *) arg);
291 break; 291 break;
292 case F_GETOWN: 292 case F_GETOWN:
293 /* 293 /*
294 * XXX If f_owner is a process group, the 294 * XXX If f_owner is a process group, the
295 * negative return value will get converted 295 * negative return value will get converted
296 * into an error. Oops. If we keep the 296 * into an error. Oops. If we keep the
297 * current syscall conventions, the only way 297 * current syscall conventions, the only way
298 * to fix this will be in libc. 298 * to fix this will be in libc.
299 */ 299 */
300 err = f_getown(filp); 300 err = f_getown(filp);
301 force_successful_syscall_return(); 301 force_successful_syscall_return();
302 break; 302 break;
303 case F_SETOWN: 303 case F_SETOWN:
304 err = f_setown(filp, arg, 1); 304 err = f_setown(filp, arg, 1);
305 break; 305 break;
306 case F_GETOWN_EX: 306 case F_GETOWN_EX:
307 err = f_getown_ex(filp, arg); 307 err = f_getown_ex(filp, arg);
308 break; 308 break;
309 case F_SETOWN_EX: 309 case F_SETOWN_EX:
310 err = f_setown_ex(filp, arg); 310 err = f_setown_ex(filp, arg);
311 break; 311 break;
312 case F_GETOWNER_UIDS: 312 case F_GETOWNER_UIDS:
313 err = f_getowner_uids(filp, arg); 313 err = f_getowner_uids(filp, arg);
314 break; 314 break;
315 case F_GETSIG: 315 case F_GETSIG:
316 err = filp->f_owner.signum; 316 err = filp->f_owner.signum;
317 break; 317 break;
318 case F_SETSIG: 318 case F_SETSIG:
319 /* arg == 0 restores default behaviour. */ 319 /* arg == 0 restores default behaviour. */
320 if (!valid_signal(arg)) { 320 if (!valid_signal(arg)) {
321 break; 321 break;
322 } 322 }
323 err = 0; 323 err = 0;
324 filp->f_owner.signum = arg; 324 filp->f_owner.signum = arg;
325 break; 325 break;
326 case F_GETLEASE: 326 case F_GETLEASE:
327 err = fcntl_getlease(filp); 327 err = fcntl_getlease(filp);
328 break; 328 break;
329 case F_SETLEASE: 329 case F_SETLEASE:
330 err = fcntl_setlease(fd, filp, arg); 330 err = fcntl_setlease(fd, filp, arg);
331 break; 331 break;
332 case F_NOTIFY: 332 case F_NOTIFY:
333 err = fcntl_dirnotify(fd, filp, arg); 333 err = fcntl_dirnotify(fd, filp, arg);
334 break; 334 break;
335 case F_SETPIPE_SZ: 335 case F_SETPIPE_SZ:
336 case F_GETPIPE_SZ: 336 case F_GETPIPE_SZ:
337 err = pipe_fcntl(filp, cmd, arg); 337 err = pipe_fcntl(filp, cmd, arg);
338 break; 338 break;
339 default: 339 default:
340 break; 340 break;
341 } 341 }
342 return err; 342 return err;
343 } 343 }
344 344
345 static int check_fcntl_cmd(unsigned cmd) 345 static int check_fcntl_cmd(unsigned cmd)
346 { 346 {
347 switch (cmd) { 347 switch (cmd) {
348 case F_DUPFD: 348 case F_DUPFD:
349 case F_DUPFD_CLOEXEC: 349 case F_DUPFD_CLOEXEC:
350 case F_GETFD: 350 case F_GETFD:
351 case F_SETFD: 351 case F_SETFD:
352 case F_GETFL: 352 case F_GETFL:
353 return 1; 353 return 1;
354 } 354 }
355 return 0; 355 return 0;
356 } 356 }
357 357
358 SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, unsigned long, arg) 358 SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, unsigned long, arg)
359 { 359 {
360 struct fd f = fdget_raw(fd); 360 struct fd f = fdget_raw(fd);
361 long err = -EBADF; 361 long err = -EBADF;
362 362
363 if (!f.file) 363 if (!f.file)
364 goto out; 364 goto out;
365 365
366 if (unlikely(f.file->f_mode & FMODE_PATH)) { 366 if (unlikely(f.file->f_mode & FMODE_PATH)) {
367 if (!check_fcntl_cmd(cmd)) 367 if (!check_fcntl_cmd(cmd))
368 goto out1; 368 goto out1;
369 } 369 }
370 370
371 err = security_file_fcntl(f.file, cmd, arg); 371 err = security_file_fcntl(f.file, cmd, arg);
372 if (!err) 372 if (!err)
373 err = do_fcntl(fd, cmd, arg, f.file); 373 err = do_fcntl(fd, cmd, arg, f.file);
374 374
375 out1: 375 out1:
376 fdput(f); 376 fdput(f);
377 out: 377 out:
378 return err; 378 return err;
379 } 379 }
380 380
381 #if BITS_PER_LONG == 32 381 #if BITS_PER_LONG == 32
382 SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd, 382 SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd,
383 unsigned long, arg) 383 unsigned long, arg)
384 { 384 {
385 struct fd f = fdget_raw(fd); 385 struct fd f = fdget_raw(fd);
386 long err = -EBADF; 386 long err = -EBADF;
387 387
388 if (!f.file) 388 if (!f.file)
389 goto out; 389 goto out;
390 390
391 if (unlikely(f.file->f_mode & FMODE_PATH)) { 391 if (unlikely(f.file->f_mode & FMODE_PATH)) {
392 if (!check_fcntl_cmd(cmd)) 392 if (!check_fcntl_cmd(cmd))
393 goto out1; 393 goto out1;
394 } 394 }
395 395
396 err = security_file_fcntl(f.file, cmd, arg); 396 err = security_file_fcntl(f.file, cmd, arg);
397 if (err) 397 if (err)
398 goto out1; 398 goto out1;
399 399
400 switch (cmd) { 400 switch (cmd) {
401 case F_GETLK64: 401 case F_GETLK64:
402 case F_GETLKP: 402 case F_OFD_GETLK:
403 err = fcntl_getlk64(f.file, cmd, (struct flock64 __user *) arg); 403 err = fcntl_getlk64(f.file, cmd, (struct flock64 __user *) arg);
404 break; 404 break;
405 case F_SETLK64: 405 case F_SETLK64:
406 case F_SETLKW64: 406 case F_SETLKW64:
407 case F_SETLKP: 407 case F_OFD_SETLK:
408 case F_SETLKPW: 408 case F_OFD_SETLKW:
409 err = fcntl_setlk64(fd, f.file, cmd, 409 err = fcntl_setlk64(fd, f.file, cmd,
410 (struct flock64 __user *) arg); 410 (struct flock64 __user *) arg);
411 break; 411 break;
412 default: 412 default:
413 err = do_fcntl(fd, cmd, arg, f.file); 413 err = do_fcntl(fd, cmd, arg, f.file);
414 break; 414 break;
415 } 415 }
416 out1: 416 out1:
417 fdput(f); 417 fdput(f);
418 out: 418 out:
419 return err; 419 return err;
420 } 420 }
421 #endif 421 #endif
422 422
423 /* Table to convert sigio signal codes into poll band bitmaps */ 423 /* Table to convert sigio signal codes into poll band bitmaps */
424 424
425 static const long band_table[NSIGPOLL] = { 425 static const long band_table[NSIGPOLL] = {
426 POLLIN | POLLRDNORM, /* POLL_IN */ 426 POLLIN | POLLRDNORM, /* POLL_IN */
427 POLLOUT | POLLWRNORM | POLLWRBAND, /* POLL_OUT */ 427 POLLOUT | POLLWRNORM | POLLWRBAND, /* POLL_OUT */
428 POLLIN | POLLRDNORM | POLLMSG, /* POLL_MSG */ 428 POLLIN | POLLRDNORM | POLLMSG, /* POLL_MSG */
429 POLLERR, /* POLL_ERR */ 429 POLLERR, /* POLL_ERR */
430 POLLPRI | POLLRDBAND, /* POLL_PRI */ 430 POLLPRI | POLLRDBAND, /* POLL_PRI */
431 POLLHUP | POLLERR /* POLL_HUP */ 431 POLLHUP | POLLERR /* POLL_HUP */
432 }; 432 };
433 433
434 static inline int sigio_perm(struct task_struct *p, 434 static inline int sigio_perm(struct task_struct *p,
435 struct fown_struct *fown, int sig) 435 struct fown_struct *fown, int sig)
436 { 436 {
437 const struct cred *cred; 437 const struct cred *cred;
438 int ret; 438 int ret;
439 439
440 rcu_read_lock(); 440 rcu_read_lock();
441 cred = __task_cred(p); 441 cred = __task_cred(p);
442 ret = ((uid_eq(fown->euid, GLOBAL_ROOT_UID) || 442 ret = ((uid_eq(fown->euid, GLOBAL_ROOT_UID) ||
443 uid_eq(fown->euid, cred->suid) || uid_eq(fown->euid, cred->uid) || 443 uid_eq(fown->euid, cred->suid) || uid_eq(fown->euid, cred->uid) ||
444 uid_eq(fown->uid, cred->suid) || uid_eq(fown->uid, cred->uid)) && 444 uid_eq(fown->uid, cred->suid) || uid_eq(fown->uid, cred->uid)) &&
445 !security_file_send_sigiotask(p, fown, sig)); 445 !security_file_send_sigiotask(p, fown, sig));
446 rcu_read_unlock(); 446 rcu_read_unlock();
447 return ret; 447 return ret;
448 } 448 }
449 449
450 static void send_sigio_to_task(struct task_struct *p, 450 static void send_sigio_to_task(struct task_struct *p,
451 struct fown_struct *fown, 451 struct fown_struct *fown,
452 int fd, int reason, int group) 452 int fd, int reason, int group)
453 { 453 {
454 /* 454 /*
455 * F_SETSIG can change ->signum lockless in parallel, make 455 * F_SETSIG can change ->signum lockless in parallel, make
456 * sure we read it once and use the same value throughout. 456 * sure we read it once and use the same value throughout.
457 */ 457 */
458 int signum = ACCESS_ONCE(fown->signum); 458 int signum = ACCESS_ONCE(fown->signum);
459 459
460 if (!sigio_perm(p, fown, signum)) 460 if (!sigio_perm(p, fown, signum))
461 return; 461 return;
462 462
463 switch (signum) { 463 switch (signum) {
464 siginfo_t si; 464 siginfo_t si;
465 default: 465 default:
466 /* Queue a rt signal with the appropriate fd as its 466 /* Queue a rt signal with the appropriate fd as its
467 value. We use SI_SIGIO as the source, not 467 value. We use SI_SIGIO as the source, not
468 SI_KERNEL, since kernel signals always get 468 SI_KERNEL, since kernel signals always get
469 delivered even if we can't queue. Failure to 469 delivered even if we can't queue. Failure to
470 queue in this case _should_ be reported; we fall 470 queue in this case _should_ be reported; we fall
471 back to SIGIO in that case. --sct */ 471 back to SIGIO in that case. --sct */
472 si.si_signo = signum; 472 si.si_signo = signum;
473 si.si_errno = 0; 473 si.si_errno = 0;
474 si.si_code = reason; 474 si.si_code = reason;
475 /* Make sure we are called with one of the POLL_* 475 /* Make sure we are called with one of the POLL_*
476 reasons, otherwise we could leak kernel stack into 476 reasons, otherwise we could leak kernel stack into
477 userspace. */ 477 userspace. */
478 BUG_ON((reason & __SI_MASK) != __SI_POLL); 478 BUG_ON((reason & __SI_MASK) != __SI_POLL);
479 if (reason - POLL_IN >= NSIGPOLL) 479 if (reason - POLL_IN >= NSIGPOLL)
480 si.si_band = ~0L; 480 si.si_band = ~0L;
481 else 481 else
482 si.si_band = band_table[reason - POLL_IN]; 482 si.si_band = band_table[reason - POLL_IN];
483 si.si_fd = fd; 483 si.si_fd = fd;
484 if (!do_send_sig_info(signum, &si, p, group)) 484 if (!do_send_sig_info(signum, &si, p, group))
485 break; 485 break;
486 /* fall-through: fall back on the old plain SIGIO signal */ 486 /* fall-through: fall back on the old plain SIGIO signal */
487 case 0: 487 case 0:
488 do_send_sig_info(SIGIO, SEND_SIG_PRIV, p, group); 488 do_send_sig_info(SIGIO, SEND_SIG_PRIV, p, group);
489 } 489 }
490 } 490 }
491 491
492 void send_sigio(struct fown_struct *fown, int fd, int band) 492 void send_sigio(struct fown_struct *fown, int fd, int band)
493 { 493 {
494 struct task_struct *p; 494 struct task_struct *p;
495 enum pid_type type; 495 enum pid_type type;
496 struct pid *pid; 496 struct pid *pid;
497 int group = 1; 497 int group = 1;
498 498
499 read_lock(&fown->lock); 499 read_lock(&fown->lock);
500 500
501 type = fown->pid_type; 501 type = fown->pid_type;
502 if (type == PIDTYPE_MAX) { 502 if (type == PIDTYPE_MAX) {
503 group = 0; 503 group = 0;
504 type = PIDTYPE_PID; 504 type = PIDTYPE_PID;
505 } 505 }
506 506
507 pid = fown->pid; 507 pid = fown->pid;
508 if (!pid) 508 if (!pid)
509 goto out_unlock_fown; 509 goto out_unlock_fown;
510 510
511 read_lock(&tasklist_lock); 511 read_lock(&tasklist_lock);
512 do_each_pid_task(pid, type, p) { 512 do_each_pid_task(pid, type, p) {
513 send_sigio_to_task(p, fown, fd, band, group); 513 send_sigio_to_task(p, fown, fd, band, group);
514 } while_each_pid_task(pid, type, p); 514 } while_each_pid_task(pid, type, p);
515 read_unlock(&tasklist_lock); 515 read_unlock(&tasklist_lock);
516 out_unlock_fown: 516 out_unlock_fown:
517 read_unlock(&fown->lock); 517 read_unlock(&fown->lock);
518 } 518 }
519 519
520 static void send_sigurg_to_task(struct task_struct *p, 520 static void send_sigurg_to_task(struct task_struct *p,
521 struct fown_struct *fown, int group) 521 struct fown_struct *fown, int group)
522 { 522 {
523 if (sigio_perm(p, fown, SIGURG)) 523 if (sigio_perm(p, fown, SIGURG))
524 do_send_sig_info(SIGURG, SEND_SIG_PRIV, p, group); 524 do_send_sig_info(SIGURG, SEND_SIG_PRIV, p, group);
525 } 525 }
526 526
527 int send_sigurg(struct fown_struct *fown) 527 int send_sigurg(struct fown_struct *fown)
528 { 528 {
529 struct task_struct *p; 529 struct task_struct *p;
530 enum pid_type type; 530 enum pid_type type;
531 struct pid *pid; 531 struct pid *pid;
532 int group = 1; 532 int group = 1;
533 int ret = 0; 533 int ret = 0;
534 534
535 read_lock(&fown->lock); 535 read_lock(&fown->lock);
536 536
537 type = fown->pid_type; 537 type = fown->pid_type;
538 if (type == PIDTYPE_MAX) { 538 if (type == PIDTYPE_MAX) {
539 group = 0; 539 group = 0;
540 type = PIDTYPE_PID; 540 type = PIDTYPE_PID;
541 } 541 }
542 542
543 pid = fown->pid; 543 pid = fown->pid;
544 if (!pid) 544 if (!pid)
545 goto out_unlock_fown; 545 goto out_unlock_fown;
546 546
547 ret = 1; 547 ret = 1;
548 548
549 read_lock(&tasklist_lock); 549 read_lock(&tasklist_lock);
550 do_each_pid_task(pid, type, p) { 550 do_each_pid_task(pid, type, p) {
551 send_sigurg_to_task(p, fown, group); 551 send_sigurg_to_task(p, fown, group);
552 } while_each_pid_task(pid, type, p); 552 } while_each_pid_task(pid, type, p);
553 read_unlock(&tasklist_lock); 553 read_unlock(&tasklist_lock);
554 out_unlock_fown: 554 out_unlock_fown:
555 read_unlock(&fown->lock); 555 read_unlock(&fown->lock);
556 return ret; 556 return ret;
557 } 557 }
558 558
559 static DEFINE_SPINLOCK(fasync_lock); 559 static DEFINE_SPINLOCK(fasync_lock);
560 static struct kmem_cache *fasync_cache __read_mostly; 560 static struct kmem_cache *fasync_cache __read_mostly;
561 561
562 static void fasync_free_rcu(struct rcu_head *head) 562 static void fasync_free_rcu(struct rcu_head *head)
563 { 563 {
564 kmem_cache_free(fasync_cache, 564 kmem_cache_free(fasync_cache,
565 container_of(head, struct fasync_struct, fa_rcu)); 565 container_of(head, struct fasync_struct, fa_rcu));
566 } 566 }
567 567
568 /* 568 /*
569 * Remove a fasync entry. If successfully removed, return 569 * Remove a fasync entry. If successfully removed, return
570 * positive and clear the FASYNC flag. If no entry exists, 570 * positive and clear the FASYNC flag. If no entry exists,
571 * do nothing and return 0. 571 * do nothing and return 0.
572 * 572 *
573 * NOTE! It is very important that the FASYNC flag always 573 * NOTE! It is very important that the FASYNC flag always
574 * match the state "is the filp on a fasync list". 574 * match the state "is the filp on a fasync list".
575 * 575 *
576 */ 576 */
577 int fasync_remove_entry(struct file *filp, struct fasync_struct **fapp) 577 int fasync_remove_entry(struct file *filp, struct fasync_struct **fapp)
578 { 578 {
579 struct fasync_struct *fa, **fp; 579 struct fasync_struct *fa, **fp;
580 int result = 0; 580 int result = 0;
581 581
582 spin_lock(&filp->f_lock); 582 spin_lock(&filp->f_lock);
583 spin_lock(&fasync_lock); 583 spin_lock(&fasync_lock);
584 for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) { 584 for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) {
585 if (fa->fa_file != filp) 585 if (fa->fa_file != filp)
586 continue; 586 continue;
587 587
588 spin_lock_irq(&fa->fa_lock); 588 spin_lock_irq(&fa->fa_lock);
589 fa->fa_file = NULL; 589 fa->fa_file = NULL;
590 spin_unlock_irq(&fa->fa_lock); 590 spin_unlock_irq(&fa->fa_lock);
591 591
592 *fp = fa->fa_next; 592 *fp = fa->fa_next;
593 call_rcu(&fa->fa_rcu, fasync_free_rcu); 593 call_rcu(&fa->fa_rcu, fasync_free_rcu);
594 filp->f_flags &= ~FASYNC; 594 filp->f_flags &= ~FASYNC;
595 result = 1; 595 result = 1;
596 break; 596 break;
597 } 597 }
598 spin_unlock(&fasync_lock); 598 spin_unlock(&fasync_lock);
599 spin_unlock(&filp->f_lock); 599 spin_unlock(&filp->f_lock);
600 return result; 600 return result;
601 } 601 }
602 602
603 struct fasync_struct *fasync_alloc(void) 603 struct fasync_struct *fasync_alloc(void)
604 { 604 {
605 return kmem_cache_alloc(fasync_cache, GFP_KERNEL); 605 return kmem_cache_alloc(fasync_cache, GFP_KERNEL);
606 } 606 }
607 607
608 /* 608 /*
609 * NOTE! This can be used only for unused fasync entries: 609 * NOTE! This can be used only for unused fasync entries:
610 * entries that actually got inserted on the fasync list 610 * entries that actually got inserted on the fasync list
611 * need to be released by rcu - see fasync_remove_entry. 611 * need to be released by rcu - see fasync_remove_entry.
612 */ 612 */
613 void fasync_free(struct fasync_struct *new) 613 void fasync_free(struct fasync_struct *new)
614 { 614 {
615 kmem_cache_free(fasync_cache, new); 615 kmem_cache_free(fasync_cache, new);
616 } 616 }
617 617
618 /* 618 /*
619 * Insert a new entry into the fasync list. Return the pointer to the 619 * Insert a new entry into the fasync list. Return the pointer to the
620 * old one if we didn't use the new one. 620 * old one if we didn't use the new one.
621 * 621 *
622 * NOTE! It is very important that the FASYNC flag always 622 * NOTE! It is very important that the FASYNC flag always
623 * match the state "is the filp on a fasync list". 623 * match the state "is the filp on a fasync list".
624 */ 624 */
625 struct fasync_struct *fasync_insert_entry(int fd, struct file *filp, struct fasync_struct **fapp, struct fasync_struct *new) 625 struct fasync_struct *fasync_insert_entry(int fd, struct file *filp, struct fasync_struct **fapp, struct fasync_struct *new)
626 { 626 {
627 struct fasync_struct *fa, **fp; 627 struct fasync_struct *fa, **fp;
628 628
629 spin_lock(&filp->f_lock); 629 spin_lock(&filp->f_lock);
630 spin_lock(&fasync_lock); 630 spin_lock(&fasync_lock);
631 for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) { 631 for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) {
632 if (fa->fa_file != filp) 632 if (fa->fa_file != filp)
633 continue; 633 continue;
634 634
635 spin_lock_irq(&fa->fa_lock); 635 spin_lock_irq(&fa->fa_lock);
636 fa->fa_fd = fd; 636 fa->fa_fd = fd;
637 spin_unlock_irq(&fa->fa_lock); 637 spin_unlock_irq(&fa->fa_lock);
638 goto out; 638 goto out;
639 } 639 }
640 640
641 spin_lock_init(&new->fa_lock); 641 spin_lock_init(&new->fa_lock);
642 new->magic = FASYNC_MAGIC; 642 new->magic = FASYNC_MAGIC;
643 new->fa_file = filp; 643 new->fa_file = filp;
644 new->fa_fd = fd; 644 new->fa_fd = fd;
645 new->fa_next = *fapp; 645 new->fa_next = *fapp;
646 rcu_assign_pointer(*fapp, new); 646 rcu_assign_pointer(*fapp, new);
647 filp->f_flags |= FASYNC; 647 filp->f_flags |= FASYNC;
648 648
649 out: 649 out:
650 spin_unlock(&fasync_lock); 650 spin_unlock(&fasync_lock);
651 spin_unlock(&filp->f_lock); 651 spin_unlock(&filp->f_lock);
652 return fa; 652 return fa;
653 } 653 }
654 654
655 /* 655 /*
656 * Add a fasync entry. Return negative on error, positive if 656 * Add a fasync entry. Return negative on error, positive if
657 * added, and zero if did nothing but change an existing one. 657 * added, and zero if did nothing but change an existing one.
658 */ 658 */
659 static int fasync_add_entry(int fd, struct file *filp, struct fasync_struct **fapp) 659 static int fasync_add_entry(int fd, struct file *filp, struct fasync_struct **fapp)
660 { 660 {
661 struct fasync_struct *new; 661 struct fasync_struct *new;
662 662
663 new = fasync_alloc(); 663 new = fasync_alloc();
664 if (!new) 664 if (!new)
665 return -ENOMEM; 665 return -ENOMEM;
666 666
667 /* 667 /*
668 * fasync_insert_entry() returns the old (update) entry if 668 * fasync_insert_entry() returns the old (update) entry if
669 * it existed. 669 * it existed.
670 * 670 *
671 * So free the (unused) new entry and return 0 to let the 671 * So free the (unused) new entry and return 0 to let the
672 * caller know that we didn't add any new fasync entries. 672 * caller know that we didn't add any new fasync entries.
673 */ 673 */
674 if (fasync_insert_entry(fd, filp, fapp, new)) { 674 if (fasync_insert_entry(fd, filp, fapp, new)) {
675 fasync_free(new); 675 fasync_free(new);
676 return 0; 676 return 0;
677 } 677 }
678 678
679 return 1; 679 return 1;
680 } 680 }
681 681
682 /* 682 /*
683 * fasync_helper() is used by almost all character device drivers 683 * fasync_helper() is used by almost all character device drivers
684 * to set up the fasync queue, and for regular files by the file 684 * to set up the fasync queue, and for regular files by the file
685 * lease code. It returns negative on error, 0 if it did no changes 685 * lease code. It returns negative on error, 0 if it did no changes
686 * and positive if it added/deleted the entry. 686 * and positive if it added/deleted the entry.
687 */ 687 */
688 int fasync_helper(int fd, struct file * filp, int on, struct fasync_struct **fapp) 688 int fasync_helper(int fd, struct file * filp, int on, struct fasync_struct **fapp)
689 { 689 {
690 if (!on) 690 if (!on)
691 return fasync_remove_entry(filp, fapp); 691 return fasync_remove_entry(filp, fapp);
692 return fasync_add_entry(fd, filp, fapp); 692 return fasync_add_entry(fd, filp, fapp);
693 } 693 }
694 694
695 EXPORT_SYMBOL(fasync_helper); 695 EXPORT_SYMBOL(fasync_helper);
696 696
697 /* 697 /*
698 * rcu_read_lock() is held 698 * rcu_read_lock() is held
699 */ 699 */
700 static void kill_fasync_rcu(struct fasync_struct *fa, int sig, int band) 700 static void kill_fasync_rcu(struct fasync_struct *fa, int sig, int band)
701 { 701 {
702 while (fa) { 702 while (fa) {
703 struct fown_struct *fown; 703 struct fown_struct *fown;
704 unsigned long flags; 704 unsigned long flags;
705 705
706 if (fa->magic != FASYNC_MAGIC) { 706 if (fa->magic != FASYNC_MAGIC) {
707 printk(KERN_ERR "kill_fasync: bad magic number in " 707 printk(KERN_ERR "kill_fasync: bad magic number in "
708 "fasync_struct!\n"); 708 "fasync_struct!\n");
709 return; 709 return;
710 } 710 }
711 spin_lock_irqsave(&fa->fa_lock, flags); 711 spin_lock_irqsave(&fa->fa_lock, flags);
712 if (fa->fa_file) { 712 if (fa->fa_file) {
713 fown = &fa->fa_file->f_owner; 713 fown = &fa->fa_file->f_owner;
714 /* Don't send SIGURG to processes which have not set a 714 /* Don't send SIGURG to processes which have not set a
715 queued signum: SIGURG has its own default signalling 715 queued signum: SIGURG has its own default signalling
716 mechanism. */ 716 mechanism. */
717 if (!(sig == SIGURG && fown->signum == 0)) 717 if (!(sig == SIGURG && fown->signum == 0))
718 send_sigio(fown, fa->fa_fd, band); 718 send_sigio(fown, fa->fa_fd, band);
719 } 719 }
720 spin_unlock_irqrestore(&fa->fa_lock, flags); 720 spin_unlock_irqrestore(&fa->fa_lock, flags);
721 fa = rcu_dereference(fa->fa_next); 721 fa = rcu_dereference(fa->fa_next);
722 } 722 }
723 } 723 }
724 724
725 void kill_fasync(struct fasync_struct **fp, int sig, int band) 725 void kill_fasync(struct fasync_struct **fp, int sig, int band)
726 { 726 {
727 /* First a quick test without locking: usually 727 /* First a quick test without locking: usually
728 * the list is empty. 728 * the list is empty.
729 */ 729 */
730 if (*fp) { 730 if (*fp) {
731 rcu_read_lock(); 731 rcu_read_lock();
732 kill_fasync_rcu(rcu_dereference(*fp), sig, band); 732 kill_fasync_rcu(rcu_dereference(*fp), sig, band);
733 rcu_read_unlock(); 733 rcu_read_unlock();
734 } 734 }
735 } 735 }
736 EXPORT_SYMBOL(kill_fasync); 736 EXPORT_SYMBOL(kill_fasync);
737 737
738 static int __init fcntl_init(void) 738 static int __init fcntl_init(void)
739 { 739 {
740 /* 740 /*
741 * Please add new bits here to ensure allocation uniqueness. 741 * Please add new bits here to ensure allocation uniqueness.
742 * Exceptions: O_NONBLOCK is a two bit define on parisc; O_NDELAY 742 * Exceptions: O_NONBLOCK is a two bit define on parisc; O_NDELAY
743 * is defined as O_NONBLOCK on some platforms and not on others. 743 * is defined as O_NONBLOCK on some platforms and not on others.
744 */ 744 */
745 BUILD_BUG_ON(20 - 1 /* for O_RDONLY being 0 */ != HWEIGHT32( 745 BUILD_BUG_ON(20 - 1 /* for O_RDONLY being 0 */ != HWEIGHT32(
746 O_RDONLY | O_WRONLY | O_RDWR | 746 O_RDONLY | O_WRONLY | O_RDWR |
747 O_CREAT | O_EXCL | O_NOCTTY | 747 O_CREAT | O_EXCL | O_NOCTTY |
748 O_TRUNC | O_APPEND | /* O_NONBLOCK | */ 748 O_TRUNC | O_APPEND | /* O_NONBLOCK | */
749 __O_SYNC | O_DSYNC | FASYNC | 749 __O_SYNC | O_DSYNC | FASYNC |
750 O_DIRECT | O_LARGEFILE | O_DIRECTORY | 750 O_DIRECT | O_LARGEFILE | O_DIRECTORY |
751 O_NOFOLLOW | O_NOATIME | O_CLOEXEC | 751 O_NOFOLLOW | O_NOATIME | O_CLOEXEC |
752 __FMODE_EXEC | O_PATH | __O_TMPFILE 752 __FMODE_EXEC | O_PATH | __O_TMPFILE
753 )); 753 ));
754 754
755 fasync_cache = kmem_cache_create("fasync_cache", 755 fasync_cache = kmem_cache_create("fasync_cache",
756 sizeof(struct fasync_struct), 0, SLAB_PANIC, NULL); 756 sizeof(struct fasync_struct), 0, SLAB_PANIC, NULL);
757 return 0; 757 return 0;
758 } 758 }
759 759
760 module_init(fcntl_init) 760 module_init(fcntl_init)
761 761
1 /* 1 /*
2 * linux/fs/locks.c 2 * linux/fs/locks.c
3 * 3 *
4 * Provide support for fcntl()'s F_GETLK, F_SETLK, and F_SETLKW calls. 4 * Provide support for fcntl()'s F_GETLK, F_SETLK, and F_SETLKW calls.
5 * Doug Evans (dje@spiff.uucp), August 07, 1992 5 * Doug Evans (dje@spiff.uucp), August 07, 1992
6 * 6 *
7 * Deadlock detection added. 7 * Deadlock detection added.
8 * FIXME: one thing isn't handled yet: 8 * FIXME: one thing isn't handled yet:
9 * - mandatory locks (requires lots of changes elsewhere) 9 * - mandatory locks (requires lots of changes elsewhere)
10 * Kelly Carmichael (kelly@[142.24.8.65]), September 17, 1994. 10 * Kelly Carmichael (kelly@[142.24.8.65]), September 17, 1994.
11 * 11 *
12 * Miscellaneous edits, and a total rewrite of posix_lock_file() code. 12 * Miscellaneous edits, and a total rewrite of posix_lock_file() code.
13 * Kai Petzke (wpp@marie.physik.tu-berlin.de), 1994 13 * Kai Petzke (wpp@marie.physik.tu-berlin.de), 1994
14 * 14 *
15 * Converted file_lock_table to a linked list from an array, which eliminates 15 * Converted file_lock_table to a linked list from an array, which eliminates
16 * the limits on how many active file locks are open. 16 * the limits on how many active file locks are open.
17 * Chad Page (pageone@netcom.com), November 27, 1994 17 * Chad Page (pageone@netcom.com), November 27, 1994
18 * 18 *
19 * Removed dependency on file descriptors. dup()'ed file descriptors now 19 * Removed dependency on file descriptors. dup()'ed file descriptors now
20 * get the same locks as the original file descriptors, and a close() on 20 * get the same locks as the original file descriptors, and a close() on
21 * any file descriptor removes ALL the locks on the file for the current 21 * any file descriptor removes ALL the locks on the file for the current
22 * process. Since locks still depend on the process id, locks are inherited 22 * process. Since locks still depend on the process id, locks are inherited
23 * after an exec() but not after a fork(). This agrees with POSIX, and both 23 * after an exec() but not after a fork(). This agrees with POSIX, and both
24 * BSD and SVR4 practice. 24 * BSD and SVR4 practice.
25 * Andy Walker (andy@lysaker.kvaerner.no), February 14, 1995 25 * Andy Walker (andy@lysaker.kvaerner.no), February 14, 1995
26 * 26 *
27 * Scrapped free list which is redundant now that we allocate locks 27 * Scrapped free list which is redundant now that we allocate locks
28 * dynamically with kmalloc()/kfree(). 28 * dynamically with kmalloc()/kfree().
29 * Andy Walker (andy@lysaker.kvaerner.no), February 21, 1995 29 * Andy Walker (andy@lysaker.kvaerner.no), February 21, 1995
30 * 30 *
31 * Implemented two lock personalities - FL_FLOCK and FL_POSIX. 31 * Implemented two lock personalities - FL_FLOCK and FL_POSIX.
32 * 32 *
33 * FL_POSIX locks are created with calls to fcntl() and lockf() through the 33 * FL_POSIX locks are created with calls to fcntl() and lockf() through the
34 * fcntl() system call. They have the semantics described above. 34 * fcntl() system call. They have the semantics described above.
35 * 35 *
36 * FL_FLOCK locks are created with calls to flock(), through the flock() 36 * FL_FLOCK locks are created with calls to flock(), through the flock()
37 * system call, which is new. Old C libraries implement flock() via fcntl() 37 * system call, which is new. Old C libraries implement flock() via fcntl()
38 * and will continue to use the old, broken implementation. 38 * and will continue to use the old, broken implementation.
39 * 39 *
40 * FL_FLOCK locks follow the 4.4 BSD flock() semantics. They are associated 40 * FL_FLOCK locks follow the 4.4 BSD flock() semantics. They are associated
41 * with a file pointer (filp). As a result they can be shared by a parent 41 * with a file pointer (filp). As a result they can be shared by a parent
42 * process and its children after a fork(). They are removed when the last 42 * process and its children after a fork(). They are removed when the last
43 * file descriptor referring to the file pointer is closed (unless explicitly 43 * file descriptor referring to the file pointer is closed (unless explicitly
44 * unlocked). 44 * unlocked).
45 * 45 *
46 * FL_FLOCK locks never deadlock, an existing lock is always removed before 46 * FL_FLOCK locks never deadlock, an existing lock is always removed before
47 * upgrading from shared to exclusive (or vice versa). When this happens 47 * upgrading from shared to exclusive (or vice versa). When this happens
48 * any processes blocked by the current lock are woken up and allowed to 48 * any processes blocked by the current lock are woken up and allowed to
49 * run before the new lock is applied. 49 * run before the new lock is applied.
50 * Andy Walker (andy@lysaker.kvaerner.no), June 09, 1995 50 * Andy Walker (andy@lysaker.kvaerner.no), June 09, 1995
51 * 51 *
52 * Removed some race conditions in flock_lock_file(), marked other possible 52 * Removed some race conditions in flock_lock_file(), marked other possible
53 * races. Just grep for FIXME to see them. 53 * races. Just grep for FIXME to see them.
54 * Dmitry Gorodchanin (pgmdsg@ibi.com), February 09, 1996. 54 * Dmitry Gorodchanin (pgmdsg@ibi.com), February 09, 1996.
55 * 55 *
56 * Addressed Dmitry's concerns. Deadlock checking no longer recursive. 56 * Addressed Dmitry's concerns. Deadlock checking no longer recursive.
57 * Lock allocation changed to GFP_ATOMIC as we can't afford to sleep 57 * Lock allocation changed to GFP_ATOMIC as we can't afford to sleep
58 * once we've checked for blocking and deadlocking. 58 * once we've checked for blocking and deadlocking.
59 * Andy Walker (andy@lysaker.kvaerner.no), April 03, 1996. 59 * Andy Walker (andy@lysaker.kvaerner.no), April 03, 1996.
60 * 60 *
61 * Initial implementation of mandatory locks. SunOS turned out to be 61 * Initial implementation of mandatory locks. SunOS turned out to be
62 * a rotten model, so I implemented the "obvious" semantics. 62 * a rotten model, so I implemented the "obvious" semantics.
63 * See 'Documentation/filesystems/mandatory-locking.txt' for details. 63 * See 'Documentation/filesystems/mandatory-locking.txt' for details.
64 * Andy Walker (andy@lysaker.kvaerner.no), April 06, 1996. 64 * Andy Walker (andy@lysaker.kvaerner.no), April 06, 1996.
65 * 65 *
66 * Don't allow mandatory locks on mmap()'ed files. Added simple functions to 66 * Don't allow mandatory locks on mmap()'ed files. Added simple functions to
67 * check if a file has mandatory locks, used by mmap(), open() and creat() to 67 * check if a file has mandatory locks, used by mmap(), open() and creat() to
68 * see if system call should be rejected. Ref. HP-UX/SunOS/Solaris Reference 68 * see if system call should be rejected. Ref. HP-UX/SunOS/Solaris Reference
69 * Manual, Section 2. 69 * Manual, Section 2.
70 * Andy Walker (andy@lysaker.kvaerner.no), April 09, 1996. 70 * Andy Walker (andy@lysaker.kvaerner.no), April 09, 1996.
71 * 71 *
72 * Tidied up block list handling. Added '/proc/locks' interface. 72 * Tidied up block list handling. Added '/proc/locks' interface.
73 * Andy Walker (andy@lysaker.kvaerner.no), April 24, 1996. 73 * Andy Walker (andy@lysaker.kvaerner.no), April 24, 1996.
74 * 74 *
75 * Fixed deadlock condition for pathological code that mixes calls to 75 * Fixed deadlock condition for pathological code that mixes calls to
76 * flock() and fcntl(). 76 * flock() and fcntl().
77 * Andy Walker (andy@lysaker.kvaerner.no), April 29, 1996. 77 * Andy Walker (andy@lysaker.kvaerner.no), April 29, 1996.
78 * 78 *
79 * Allow only one type of locking scheme (FL_POSIX or FL_FLOCK) to be in use 79 * Allow only one type of locking scheme (FL_POSIX or FL_FLOCK) to be in use
80 * for a given file at a time. Changed the CONFIG_LOCK_MANDATORY scheme to 80 * for a given file at a time. Changed the CONFIG_LOCK_MANDATORY scheme to
81 * guarantee sensible behaviour in the case where file system modules might 81 * guarantee sensible behaviour in the case where file system modules might
82 * be compiled with different options than the kernel itself. 82 * be compiled with different options than the kernel itself.
83 * Andy Walker (andy@lysaker.kvaerner.no), May 15, 1996. 83 * Andy Walker (andy@lysaker.kvaerner.no), May 15, 1996.
84 * 84 *
85 * Added a couple of missing wake_up() calls. Thanks to Thomas Meckel 85 * Added a couple of missing wake_up() calls. Thanks to Thomas Meckel
86 * (Thomas.Meckel@mni.fh-giessen.de) for spotting this. 86 * (Thomas.Meckel@mni.fh-giessen.de) for spotting this.
87 * Andy Walker (andy@lysaker.kvaerner.no), May 15, 1996. 87 * Andy Walker (andy@lysaker.kvaerner.no), May 15, 1996.
88 * 88 *
89 * Changed FL_POSIX locks to use the block list in the same way as FL_FLOCK 89 * Changed FL_POSIX locks to use the block list in the same way as FL_FLOCK
90 * locks. Changed process synchronisation to avoid dereferencing locks that 90 * locks. Changed process synchronisation to avoid dereferencing locks that
91 * have already been freed. 91 * have already been freed.
92 * Andy Walker (andy@lysaker.kvaerner.no), Sep 21, 1996. 92 * Andy Walker (andy@lysaker.kvaerner.no), Sep 21, 1996.
93 * 93 *
94 * Made the block list a circular list to minimise searching in the list. 94 * Made the block list a circular list to minimise searching in the list.
95 * Andy Walker (andy@lysaker.kvaerner.no), Sep 25, 1996. 95 * Andy Walker (andy@lysaker.kvaerner.no), Sep 25, 1996.
96 * 96 *
97 * Made mandatory locking a mount option. Default is not to allow mandatory 97 * Made mandatory locking a mount option. Default is not to allow mandatory
98 * locking. 98 * locking.
99 * Andy Walker (andy@lysaker.kvaerner.no), Oct 04, 1996. 99 * Andy Walker (andy@lysaker.kvaerner.no), Oct 04, 1996.
100 * 100 *
101 * Some adaptations for NFS support. 101 * Some adaptations for NFS support.
102 * Olaf Kirch (okir@monad.swb.de), Dec 1996, 102 * Olaf Kirch (okir@monad.swb.de), Dec 1996,
103 * 103 *
104 * Fixed /proc/locks interface so that we can't overrun the buffer we are handed. 104 * Fixed /proc/locks interface so that we can't overrun the buffer we are handed.
105 * Andy Walker (andy@lysaker.kvaerner.no), May 12, 1997. 105 * Andy Walker (andy@lysaker.kvaerner.no), May 12, 1997.
106 * 106 *
107 * Use slab allocator instead of kmalloc/kfree. 107 * Use slab allocator instead of kmalloc/kfree.
108 * Use generic list implementation from <linux/list.h>. 108 * Use generic list implementation from <linux/list.h>.
109 * Sped up posix_locks_deadlock by only considering blocked locks. 109 * Sped up posix_locks_deadlock by only considering blocked locks.
110 * Matthew Wilcox <willy@debian.org>, March, 2000. 110 * Matthew Wilcox <willy@debian.org>, March, 2000.
111 * 111 *
112 * Leases and LOCK_MAND 112 * Leases and LOCK_MAND
113 * Matthew Wilcox <willy@debian.org>, June, 2000. 113 * Matthew Wilcox <willy@debian.org>, June, 2000.
114 * Stephen Rothwell <sfr@canb.auug.org.au>, June, 2000. 114 * Stephen Rothwell <sfr@canb.auug.org.au>, June, 2000.
115 */ 115 */
116 116
117 #include <linux/capability.h> 117 #include <linux/capability.h>
118 #include <linux/file.h> 118 #include <linux/file.h>
119 #include <linux/fdtable.h> 119 #include <linux/fdtable.h>
120 #include <linux/fs.h> 120 #include <linux/fs.h>
121 #include <linux/init.h> 121 #include <linux/init.h>
122 #include <linux/module.h> 122 #include <linux/module.h>
123 #include <linux/security.h> 123 #include <linux/security.h>
124 #include <linux/slab.h> 124 #include <linux/slab.h>
125 #include <linux/syscalls.h> 125 #include <linux/syscalls.h>
126 #include <linux/time.h> 126 #include <linux/time.h>
127 #include <linux/rcupdate.h> 127 #include <linux/rcupdate.h>
128 #include <linux/pid_namespace.h> 128 #include <linux/pid_namespace.h>
129 #include <linux/hashtable.h> 129 #include <linux/hashtable.h>
130 #include <linux/percpu.h> 130 #include <linux/percpu.h>
131 #include <linux/lglock.h> 131 #include <linux/lglock.h>
132 132
133 #include <asm/uaccess.h> 133 #include <asm/uaccess.h>
134 134
135 #define IS_POSIX(fl) (fl->fl_flags & FL_POSIX) 135 #define IS_POSIX(fl) (fl->fl_flags & FL_POSIX)
136 #define IS_FLOCK(fl) (fl->fl_flags & FL_FLOCK) 136 #define IS_FLOCK(fl) (fl->fl_flags & FL_FLOCK)
137 #define IS_LEASE(fl) (fl->fl_flags & (FL_LEASE|FL_DELEG)) 137 #define IS_LEASE(fl) (fl->fl_flags & (FL_LEASE|FL_DELEG))
138 #define IS_FILE_PVT(fl) (fl->fl_flags & FL_FILE_PVT) 138 #define IS_OFDLCK(fl) (fl->fl_flags & FL_OFDLCK)
139 139
140 static bool lease_breaking(struct file_lock *fl) 140 static bool lease_breaking(struct file_lock *fl)
141 { 141 {
142 return fl->fl_flags & (FL_UNLOCK_PENDING | FL_DOWNGRADE_PENDING); 142 return fl->fl_flags & (FL_UNLOCK_PENDING | FL_DOWNGRADE_PENDING);
143 } 143 }
144 144
145 static int target_leasetype(struct file_lock *fl) 145 static int target_leasetype(struct file_lock *fl)
146 { 146 {
147 if (fl->fl_flags & FL_UNLOCK_PENDING) 147 if (fl->fl_flags & FL_UNLOCK_PENDING)
148 return F_UNLCK; 148 return F_UNLCK;
149 if (fl->fl_flags & FL_DOWNGRADE_PENDING) 149 if (fl->fl_flags & FL_DOWNGRADE_PENDING)
150 return F_RDLCK; 150 return F_RDLCK;
151 return fl->fl_type; 151 return fl->fl_type;
152 } 152 }
153 153
154 int leases_enable = 1; 154 int leases_enable = 1;
155 int lease_break_time = 45; 155 int lease_break_time = 45;
156 156
157 #define for_each_lock(inode, lockp) \ 157 #define for_each_lock(inode, lockp) \
158 for (lockp = &inode->i_flock; *lockp != NULL; lockp = &(*lockp)->fl_next) 158 for (lockp = &inode->i_flock; *lockp != NULL; lockp = &(*lockp)->fl_next)
159 159
160 /* 160 /*
161 * The global file_lock_list is only used for displaying /proc/locks, so we 161 * The global file_lock_list is only used for displaying /proc/locks, so we
162 * keep a list on each CPU, with each list protected by its own spinlock via 162 * keep a list on each CPU, with each list protected by its own spinlock via
163 * the file_lock_lglock. Note that alterations to the list also require that 163 * the file_lock_lglock. Note that alterations to the list also require that
164 * the relevant i_lock is held. 164 * the relevant i_lock is held.
165 */ 165 */
166 DEFINE_STATIC_LGLOCK(file_lock_lglock); 166 DEFINE_STATIC_LGLOCK(file_lock_lglock);
167 static DEFINE_PER_CPU(struct hlist_head, file_lock_list); 167 static DEFINE_PER_CPU(struct hlist_head, file_lock_list);
168 168
169 /* 169 /*
170 * The blocked_hash is used to find POSIX lock loops for deadlock detection. 170 * The blocked_hash is used to find POSIX lock loops for deadlock detection.
171 * It is protected by blocked_lock_lock. 171 * It is protected by blocked_lock_lock.
172 * 172 *
173 * We hash locks by lockowner in order to optimize searching for the lock a 173 * We hash locks by lockowner in order to optimize searching for the lock a
174 * particular lockowner is waiting on. 174 * particular lockowner is waiting on.
175 * 175 *
176 * FIXME: make this value scale via some heuristic? We generally will want more 176 * FIXME: make this value scale via some heuristic? We generally will want more
177 * buckets when we have more lockowners holding locks, but that's a little 177 * buckets when we have more lockowners holding locks, but that's a little
178 * difficult to determine without knowing what the workload will look like. 178 * difficult to determine without knowing what the workload will look like.
179 */ 179 */
180 #define BLOCKED_HASH_BITS 7 180 #define BLOCKED_HASH_BITS 7
181 static DEFINE_HASHTABLE(blocked_hash, BLOCKED_HASH_BITS); 181 static DEFINE_HASHTABLE(blocked_hash, BLOCKED_HASH_BITS);
182 182
183 /* 183 /*
184 * This lock protects the blocked_hash. Generally, if you're accessing it, you 184 * This lock protects the blocked_hash. Generally, if you're accessing it, you
185 * want to be holding this lock. 185 * want to be holding this lock.
186 * 186 *
187 * In addition, it also protects the fl->fl_block list, and the fl->fl_next 187 * In addition, it also protects the fl->fl_block list, and the fl->fl_next
188 * pointer for file_lock structures that are acting as lock requests (in 188 * pointer for file_lock structures that are acting as lock requests (in
189 * contrast to those that are acting as records of acquired locks). 189 * contrast to those that are acting as records of acquired locks).
190 * 190 *
191 * Note that when we acquire this lock in order to change the above fields, 191 * Note that when we acquire this lock in order to change the above fields,
192 * we often hold the i_lock as well. In certain cases, when reading the fields 192 * we often hold the i_lock as well. In certain cases, when reading the fields
193 * protected by this lock, we can skip acquiring it iff we already hold the 193 * protected by this lock, we can skip acquiring it iff we already hold the
194 * i_lock. 194 * i_lock.
195 * 195 *
196 * In particular, adding an entry to the fl_block list requires that you hold 196 * In particular, adding an entry to the fl_block list requires that you hold
197 * both the i_lock and the blocked_lock_lock (acquired in that order). Deleting 197 * both the i_lock and the blocked_lock_lock (acquired in that order). Deleting
198 * an entry from the list however only requires the file_lock_lock. 198 * an entry from the list however only requires the file_lock_lock.
199 */ 199 */
200 static DEFINE_SPINLOCK(blocked_lock_lock); 200 static DEFINE_SPINLOCK(blocked_lock_lock);
201 201
202 static struct kmem_cache *filelock_cache __read_mostly; 202 static struct kmem_cache *filelock_cache __read_mostly;
203 203
204 static void locks_init_lock_heads(struct file_lock *fl) 204 static void locks_init_lock_heads(struct file_lock *fl)
205 { 205 {
206 INIT_HLIST_NODE(&fl->fl_link); 206 INIT_HLIST_NODE(&fl->fl_link);
207 INIT_LIST_HEAD(&fl->fl_block); 207 INIT_LIST_HEAD(&fl->fl_block);
208 init_waitqueue_head(&fl->fl_wait); 208 init_waitqueue_head(&fl->fl_wait);
209 } 209 }
210 210
211 /* Allocate an empty lock structure. */ 211 /* Allocate an empty lock structure. */
212 struct file_lock *locks_alloc_lock(void) 212 struct file_lock *locks_alloc_lock(void)
213 { 213 {
214 struct file_lock *fl = kmem_cache_zalloc(filelock_cache, GFP_KERNEL); 214 struct file_lock *fl = kmem_cache_zalloc(filelock_cache, GFP_KERNEL);
215 215
216 if (fl) 216 if (fl)
217 locks_init_lock_heads(fl); 217 locks_init_lock_heads(fl);
218 218
219 return fl; 219 return fl;
220 } 220 }
221 EXPORT_SYMBOL_GPL(locks_alloc_lock); 221 EXPORT_SYMBOL_GPL(locks_alloc_lock);
222 222
223 void locks_release_private(struct file_lock *fl) 223 void locks_release_private(struct file_lock *fl)
224 { 224 {
225 if (fl->fl_ops) { 225 if (fl->fl_ops) {
226 if (fl->fl_ops->fl_release_private) 226 if (fl->fl_ops->fl_release_private)
227 fl->fl_ops->fl_release_private(fl); 227 fl->fl_ops->fl_release_private(fl);
228 fl->fl_ops = NULL; 228 fl->fl_ops = NULL;
229 } 229 }
230 fl->fl_lmops = NULL; 230 fl->fl_lmops = NULL;
231 231
232 } 232 }
233 EXPORT_SYMBOL_GPL(locks_release_private); 233 EXPORT_SYMBOL_GPL(locks_release_private);
234 234
235 /* Free a lock which is not in use. */ 235 /* Free a lock which is not in use. */
236 void locks_free_lock(struct file_lock *fl) 236 void locks_free_lock(struct file_lock *fl)
237 { 237 {
238 BUG_ON(waitqueue_active(&fl->fl_wait)); 238 BUG_ON(waitqueue_active(&fl->fl_wait));
239 BUG_ON(!list_empty(&fl->fl_block)); 239 BUG_ON(!list_empty(&fl->fl_block));
240 BUG_ON(!hlist_unhashed(&fl->fl_link)); 240 BUG_ON(!hlist_unhashed(&fl->fl_link));
241 241
242 locks_release_private(fl); 242 locks_release_private(fl);
243 kmem_cache_free(filelock_cache, fl); 243 kmem_cache_free(filelock_cache, fl);
244 } 244 }
245 EXPORT_SYMBOL(locks_free_lock); 245 EXPORT_SYMBOL(locks_free_lock);
246 246
247 void locks_init_lock(struct file_lock *fl) 247 void locks_init_lock(struct file_lock *fl)
248 { 248 {
249 memset(fl, 0, sizeof(struct file_lock)); 249 memset(fl, 0, sizeof(struct file_lock));
250 locks_init_lock_heads(fl); 250 locks_init_lock_heads(fl);
251 } 251 }
252 252
253 EXPORT_SYMBOL(locks_init_lock); 253 EXPORT_SYMBOL(locks_init_lock);
254 254
255 static void locks_copy_private(struct file_lock *new, struct file_lock *fl) 255 static void locks_copy_private(struct file_lock *new, struct file_lock *fl)
256 { 256 {
257 if (fl->fl_ops) { 257 if (fl->fl_ops) {
258 if (fl->fl_ops->fl_copy_lock) 258 if (fl->fl_ops->fl_copy_lock)
259 fl->fl_ops->fl_copy_lock(new, fl); 259 fl->fl_ops->fl_copy_lock(new, fl);
260 new->fl_ops = fl->fl_ops; 260 new->fl_ops = fl->fl_ops;
261 } 261 }
262 if (fl->fl_lmops) 262 if (fl->fl_lmops)
263 new->fl_lmops = fl->fl_lmops; 263 new->fl_lmops = fl->fl_lmops;
264 } 264 }
265 265
266 /* 266 /*
267 * Initialize a new lock from an existing file_lock structure. 267 * Initialize a new lock from an existing file_lock structure.
268 */ 268 */
269 void __locks_copy_lock(struct file_lock *new, const struct file_lock *fl) 269 void __locks_copy_lock(struct file_lock *new, const struct file_lock *fl)
270 { 270 {
271 new->fl_owner = fl->fl_owner; 271 new->fl_owner = fl->fl_owner;
272 new->fl_pid = fl->fl_pid; 272 new->fl_pid = fl->fl_pid;
273 new->fl_file = NULL; 273 new->fl_file = NULL;
274 new->fl_flags = fl->fl_flags; 274 new->fl_flags = fl->fl_flags;
275 new->fl_type = fl->fl_type; 275 new->fl_type = fl->fl_type;
276 new->fl_start = fl->fl_start; 276 new->fl_start = fl->fl_start;
277 new->fl_end = fl->fl_end; 277 new->fl_end = fl->fl_end;
278 new->fl_ops = NULL; 278 new->fl_ops = NULL;
279 new->fl_lmops = NULL; 279 new->fl_lmops = NULL;
280 } 280 }
281 EXPORT_SYMBOL(__locks_copy_lock); 281 EXPORT_SYMBOL(__locks_copy_lock);
282 282
283 void locks_copy_lock(struct file_lock *new, struct file_lock *fl) 283 void locks_copy_lock(struct file_lock *new, struct file_lock *fl)
284 { 284 {
285 locks_release_private(new); 285 locks_release_private(new);
286 286
287 __locks_copy_lock(new, fl); 287 __locks_copy_lock(new, fl);
288 new->fl_file = fl->fl_file; 288 new->fl_file = fl->fl_file;
289 new->fl_ops = fl->fl_ops; 289 new->fl_ops = fl->fl_ops;
290 new->fl_lmops = fl->fl_lmops; 290 new->fl_lmops = fl->fl_lmops;
291 291
292 locks_copy_private(new, fl); 292 locks_copy_private(new, fl);
293 } 293 }
294 294
295 EXPORT_SYMBOL(locks_copy_lock); 295 EXPORT_SYMBOL(locks_copy_lock);
296 296
297 static inline int flock_translate_cmd(int cmd) { 297 static inline int flock_translate_cmd(int cmd) {
298 if (cmd & LOCK_MAND) 298 if (cmd & LOCK_MAND)
299 return cmd & (LOCK_MAND | LOCK_RW); 299 return cmd & (LOCK_MAND | LOCK_RW);
300 switch (cmd) { 300 switch (cmd) {
301 case LOCK_SH: 301 case LOCK_SH:
302 return F_RDLCK; 302 return F_RDLCK;
303 case LOCK_EX: 303 case LOCK_EX:
304 return F_WRLCK; 304 return F_WRLCK;
305 case LOCK_UN: 305 case LOCK_UN:
306 return F_UNLCK; 306 return F_UNLCK;
307 } 307 }
308 return -EINVAL; 308 return -EINVAL;
309 } 309 }
310 310
311 /* Fill in a file_lock structure with an appropriate FLOCK lock. */ 311 /* Fill in a file_lock structure with an appropriate FLOCK lock. */
312 static int flock_make_lock(struct file *filp, struct file_lock **lock, 312 static int flock_make_lock(struct file *filp, struct file_lock **lock,
313 unsigned int cmd) 313 unsigned int cmd)
314 { 314 {
315 struct file_lock *fl; 315 struct file_lock *fl;
316 int type = flock_translate_cmd(cmd); 316 int type = flock_translate_cmd(cmd);
317 if (type < 0) 317 if (type < 0)
318 return type; 318 return type;
319 319
320 fl = locks_alloc_lock(); 320 fl = locks_alloc_lock();
321 if (fl == NULL) 321 if (fl == NULL)
322 return -ENOMEM; 322 return -ENOMEM;
323 323
324 fl->fl_file = filp; 324 fl->fl_file = filp;
325 fl->fl_pid = current->tgid; 325 fl->fl_pid = current->tgid;
326 fl->fl_flags = FL_FLOCK; 326 fl->fl_flags = FL_FLOCK;
327 fl->fl_type = type; 327 fl->fl_type = type;
328 fl->fl_end = OFFSET_MAX; 328 fl->fl_end = OFFSET_MAX;
329 329
330 *lock = fl; 330 *lock = fl;
331 return 0; 331 return 0;
332 } 332 }
333 333
334 static int assign_type(struct file_lock *fl, long type) 334 static int assign_type(struct file_lock *fl, long type)
335 { 335 {
336 switch (type) { 336 switch (type) {
337 case F_RDLCK: 337 case F_RDLCK:
338 case F_WRLCK: 338 case F_WRLCK:
339 case F_UNLCK: 339 case F_UNLCK:
340 fl->fl_type = type; 340 fl->fl_type = type;
341 break; 341 break;
342 default: 342 default:
343 return -EINVAL; 343 return -EINVAL;
344 } 344 }
345 return 0; 345 return 0;
346 } 346 }
347 347
348 static int flock64_to_posix_lock(struct file *filp, struct file_lock *fl, 348 static int flock64_to_posix_lock(struct file *filp, struct file_lock *fl,
349 struct flock64 *l) 349 struct flock64 *l)
350 { 350 {
351 switch (l->l_whence) { 351 switch (l->l_whence) {
352 case SEEK_SET: 352 case SEEK_SET:
353 fl->fl_start = 0; 353 fl->fl_start = 0;
354 break; 354 break;
355 case SEEK_CUR: 355 case SEEK_CUR:
356 fl->fl_start = filp->f_pos; 356 fl->fl_start = filp->f_pos;
357 break; 357 break;
358 case SEEK_END: 358 case SEEK_END:
359 fl->fl_start = i_size_read(file_inode(filp)); 359 fl->fl_start = i_size_read(file_inode(filp));
360 break; 360 break;
361 default: 361 default:
362 return -EINVAL; 362 return -EINVAL;
363 } 363 }
364 if (l->l_start > OFFSET_MAX - fl->fl_start) 364 if (l->l_start > OFFSET_MAX - fl->fl_start)
365 return -EOVERFLOW; 365 return -EOVERFLOW;
366 fl->fl_start += l->l_start; 366 fl->fl_start += l->l_start;
367 if (fl->fl_start < 0) 367 if (fl->fl_start < 0)
368 return -EINVAL; 368 return -EINVAL;
369 369
370 /* POSIX-1996 leaves the case l->l_len < 0 undefined; 370 /* POSIX-1996 leaves the case l->l_len < 0 undefined;
371 POSIX-2001 defines it. */ 371 POSIX-2001 defines it. */
372 if (l->l_len > 0) { 372 if (l->l_len > 0) {
373 if (l->l_len - 1 > OFFSET_MAX - fl->fl_start) 373 if (l->l_len - 1 > OFFSET_MAX - fl->fl_start)
374 return -EOVERFLOW; 374 return -EOVERFLOW;
375 fl->fl_end = fl->fl_start + l->l_len - 1; 375 fl->fl_end = fl->fl_start + l->l_len - 1;
376 376
377 } else if (l->l_len < 0) { 377 } else if (l->l_len < 0) {
378 if (fl->fl_start + l->l_len < 0) 378 if (fl->fl_start + l->l_len < 0)
379 return -EINVAL; 379 return -EINVAL;
380 fl->fl_end = fl->fl_start - 1; 380 fl->fl_end = fl->fl_start - 1;
381 fl->fl_start += l->l_len; 381 fl->fl_start += l->l_len;
382 } else 382 } else
383 fl->fl_end = OFFSET_MAX; 383 fl->fl_end = OFFSET_MAX;
384 384
385 fl->fl_owner = current->files; 385 fl->fl_owner = current->files;
386 fl->fl_pid = current->tgid; 386 fl->fl_pid = current->tgid;
387 fl->fl_file = filp; 387 fl->fl_file = filp;
388 fl->fl_flags = FL_POSIX; 388 fl->fl_flags = FL_POSIX;
389 fl->fl_ops = NULL; 389 fl->fl_ops = NULL;
390 fl->fl_lmops = NULL; 390 fl->fl_lmops = NULL;
391 391
392 /* Ensure that fl->fl_filp has compatible f_mode */ 392 /* Ensure that fl->fl_filp has compatible f_mode */
393 switch (l->l_type) { 393 switch (l->l_type) {
394 case F_RDLCK: 394 case F_RDLCK:
395 if (!(filp->f_mode & FMODE_READ)) 395 if (!(filp->f_mode & FMODE_READ))
396 return -EBADF; 396 return -EBADF;
397 break; 397 break;
398 case F_WRLCK: 398 case F_WRLCK:
399 if (!(filp->f_mode & FMODE_WRITE)) 399 if (!(filp->f_mode & FMODE_WRITE))
400 return -EBADF; 400 return -EBADF;
401 break; 401 break;
402 } 402 }
403 403
404 return assign_type(fl, l->l_type); 404 return assign_type(fl, l->l_type);
405 } 405 }
406 406
407 /* Verify a "struct flock" and copy it to a "struct file_lock" as a POSIX 407 /* Verify a "struct flock" and copy it to a "struct file_lock" as a POSIX
408 * style lock. 408 * style lock.
409 */ 409 */
410 static int flock_to_posix_lock(struct file *filp, struct file_lock *fl, 410 static int flock_to_posix_lock(struct file *filp, struct file_lock *fl,
411 struct flock *l) 411 struct flock *l)
412 { 412 {
413 struct flock64 ll = { 413 struct flock64 ll = {
414 .l_type = l->l_type, 414 .l_type = l->l_type,
415 .l_whence = l->l_whence, 415 .l_whence = l->l_whence,
416 .l_start = l->l_start, 416 .l_start = l->l_start,
417 .l_len = l->l_len, 417 .l_len = l->l_len,
418 }; 418 };
419 419
420 return flock64_to_posix_lock(filp, fl, &ll); 420 return flock64_to_posix_lock(filp, fl, &ll);
421 } 421 }
422 422
423 /* default lease lock manager operations */ 423 /* default lease lock manager operations */
424 static void lease_break_callback(struct file_lock *fl) 424 static void lease_break_callback(struct file_lock *fl)
425 { 425 {
426 kill_fasync(&fl->fl_fasync, SIGIO, POLL_MSG); 426 kill_fasync(&fl->fl_fasync, SIGIO, POLL_MSG);
427 } 427 }
428 428
429 static const struct lock_manager_operations lease_manager_ops = { 429 static const struct lock_manager_operations lease_manager_ops = {
430 .lm_break = lease_break_callback, 430 .lm_break = lease_break_callback,
431 .lm_change = lease_modify, 431 .lm_change = lease_modify,
432 }; 432 };
433 433
434 /* 434 /*
435 * Initialize a lease, use the default lock manager operations 435 * Initialize a lease, use the default lock manager operations
436 */ 436 */
437 static int lease_init(struct file *filp, long type, struct file_lock *fl) 437 static int lease_init(struct file *filp, long type, struct file_lock *fl)
438 { 438 {
439 if (assign_type(fl, type) != 0) 439 if (assign_type(fl, type) != 0)
440 return -EINVAL; 440 return -EINVAL;
441 441
442 fl->fl_owner = current->files; 442 fl->fl_owner = current->files;
443 fl->fl_pid = current->tgid; 443 fl->fl_pid = current->tgid;
444 444
445 fl->fl_file = filp; 445 fl->fl_file = filp;
446 fl->fl_flags = FL_LEASE; 446 fl->fl_flags = FL_LEASE;
447 fl->fl_start = 0; 447 fl->fl_start = 0;
448 fl->fl_end = OFFSET_MAX; 448 fl->fl_end = OFFSET_MAX;
449 fl->fl_ops = NULL; 449 fl->fl_ops = NULL;
450 fl->fl_lmops = &lease_manager_ops; 450 fl->fl_lmops = &lease_manager_ops;
451 return 0; 451 return 0;
452 } 452 }
453 453
454 /* Allocate a file_lock initialised to this type of lease */ 454 /* Allocate a file_lock initialised to this type of lease */
455 static struct file_lock *lease_alloc(struct file *filp, long type) 455 static struct file_lock *lease_alloc(struct file *filp, long type)
456 { 456 {
457 struct file_lock *fl = locks_alloc_lock(); 457 struct file_lock *fl = locks_alloc_lock();
458 int error = -ENOMEM; 458 int error = -ENOMEM;
459 459
460 if (fl == NULL) 460 if (fl == NULL)
461 return ERR_PTR(error); 461 return ERR_PTR(error);
462 462
463 error = lease_init(filp, type, fl); 463 error = lease_init(filp, type, fl);
464 if (error) { 464 if (error) {
465 locks_free_lock(fl); 465 locks_free_lock(fl);
466 return ERR_PTR(error); 466 return ERR_PTR(error);
467 } 467 }
468 return fl; 468 return fl;
469 } 469 }
470 470
471 /* Check if two locks overlap each other. 471 /* Check if two locks overlap each other.
472 */ 472 */
473 static inline int locks_overlap(struct file_lock *fl1, struct file_lock *fl2) 473 static inline int locks_overlap(struct file_lock *fl1, struct file_lock *fl2)
474 { 474 {
475 return ((fl1->fl_end >= fl2->fl_start) && 475 return ((fl1->fl_end >= fl2->fl_start) &&
476 (fl2->fl_end >= fl1->fl_start)); 476 (fl2->fl_end >= fl1->fl_start));
477 } 477 }
478 478
479 /* 479 /*
480 * Check whether two locks have the same owner. 480 * Check whether two locks have the same owner.
481 */ 481 */
482 static int posix_same_owner(struct file_lock *fl1, struct file_lock *fl2) 482 static int posix_same_owner(struct file_lock *fl1, struct file_lock *fl2)
483 { 483 {
484 if (fl1->fl_lmops && fl1->fl_lmops->lm_compare_owner) 484 if (fl1->fl_lmops && fl1->fl_lmops->lm_compare_owner)
485 return fl2->fl_lmops == fl1->fl_lmops && 485 return fl2->fl_lmops == fl1->fl_lmops &&
486 fl1->fl_lmops->lm_compare_owner(fl1, fl2); 486 fl1->fl_lmops->lm_compare_owner(fl1, fl2);
487 return fl1->fl_owner == fl2->fl_owner; 487 return fl1->fl_owner == fl2->fl_owner;
488 } 488 }
489 489
490 /* Must be called with the i_lock held! */ 490 /* Must be called with the i_lock held! */
491 static void locks_insert_global_locks(struct file_lock *fl) 491 static void locks_insert_global_locks(struct file_lock *fl)
492 { 492 {
493 lg_local_lock(&file_lock_lglock); 493 lg_local_lock(&file_lock_lglock);
494 fl->fl_link_cpu = smp_processor_id(); 494 fl->fl_link_cpu = smp_processor_id();
495 hlist_add_head(&fl->fl_link, this_cpu_ptr(&file_lock_list)); 495 hlist_add_head(&fl->fl_link, this_cpu_ptr(&file_lock_list));
496 lg_local_unlock(&file_lock_lglock); 496 lg_local_unlock(&file_lock_lglock);
497 } 497 }
498 498
499 /* Must be called with the i_lock held! */ 499 /* Must be called with the i_lock held! */
500 static void locks_delete_global_locks(struct file_lock *fl) 500 static void locks_delete_global_locks(struct file_lock *fl)
501 { 501 {
502 /* 502 /*
503 * Avoid taking lock if already unhashed. This is safe since this check 503 * Avoid taking lock if already unhashed. This is safe since this check
504 * is done while holding the i_lock, and new insertions into the list 504 * is done while holding the i_lock, and new insertions into the list
505 * also require that it be held. 505 * also require that it be held.
506 */ 506 */
507 if (hlist_unhashed(&fl->fl_link)) 507 if (hlist_unhashed(&fl->fl_link))
508 return; 508 return;
509 lg_local_lock_cpu(&file_lock_lglock, fl->fl_link_cpu); 509 lg_local_lock_cpu(&file_lock_lglock, fl->fl_link_cpu);
510 hlist_del_init(&fl->fl_link); 510 hlist_del_init(&fl->fl_link);
511 lg_local_unlock_cpu(&file_lock_lglock, fl->fl_link_cpu); 511 lg_local_unlock_cpu(&file_lock_lglock, fl->fl_link_cpu);
512 } 512 }
513 513
514 static unsigned long 514 static unsigned long
515 posix_owner_key(struct file_lock *fl) 515 posix_owner_key(struct file_lock *fl)
516 { 516 {
517 if (fl->fl_lmops && fl->fl_lmops->lm_owner_key) 517 if (fl->fl_lmops && fl->fl_lmops->lm_owner_key)
518 return fl->fl_lmops->lm_owner_key(fl); 518 return fl->fl_lmops->lm_owner_key(fl);
519 return (unsigned long)fl->fl_owner; 519 return (unsigned long)fl->fl_owner;
520 } 520 }
521 521
522 static void locks_insert_global_blocked(struct file_lock *waiter) 522 static void locks_insert_global_blocked(struct file_lock *waiter)
523 { 523 {
524 hash_add(blocked_hash, &waiter->fl_link, posix_owner_key(waiter)); 524 hash_add(blocked_hash, &waiter->fl_link, posix_owner_key(waiter));
525 } 525 }
526 526
527 static void locks_delete_global_blocked(struct file_lock *waiter) 527 static void locks_delete_global_blocked(struct file_lock *waiter)
528 { 528 {
529 hash_del(&waiter->fl_link); 529 hash_del(&waiter->fl_link);
530 } 530 }
531 531
532 /* Remove waiter from blocker's block list. 532 /* Remove waiter from blocker's block list.
533 * When blocker ends up pointing to itself then the list is empty. 533 * When blocker ends up pointing to itself then the list is empty.
534 * 534 *
535 * Must be called with blocked_lock_lock held. 535 * Must be called with blocked_lock_lock held.
536 */ 536 */
537 static void __locks_delete_block(struct file_lock *waiter) 537 static void __locks_delete_block(struct file_lock *waiter)
538 { 538 {
539 locks_delete_global_blocked(waiter); 539 locks_delete_global_blocked(waiter);
540 list_del_init(&waiter->fl_block); 540 list_del_init(&waiter->fl_block);
541 waiter->fl_next = NULL; 541 waiter->fl_next = NULL;
542 } 542 }
543 543
544 static void locks_delete_block(struct file_lock *waiter) 544 static void locks_delete_block(struct file_lock *waiter)
545 { 545 {
546 spin_lock(&blocked_lock_lock); 546 spin_lock(&blocked_lock_lock);
547 __locks_delete_block(waiter); 547 __locks_delete_block(waiter);
548 spin_unlock(&blocked_lock_lock); 548 spin_unlock(&blocked_lock_lock);
549 } 549 }
550 550
551 /* Insert waiter into blocker's block list. 551 /* Insert waiter into blocker's block list.
552 * We use a circular list so that processes can be easily woken up in 552 * We use a circular list so that processes can be easily woken up in
553 * the order they blocked. The documentation doesn't require this but 553 * the order they blocked. The documentation doesn't require this but
554 * it seems like the reasonable thing to do. 554 * it seems like the reasonable thing to do.
555 * 555 *
556 * Must be called with both the i_lock and blocked_lock_lock held. The fl_block 556 * Must be called with both the i_lock and blocked_lock_lock held. The fl_block
557 * list itself is protected by the blocked_lock_lock, but by ensuring that the 557 * list itself is protected by the blocked_lock_lock, but by ensuring that the
558 * i_lock is also held on insertions we can avoid taking the blocked_lock_lock 558 * i_lock is also held on insertions we can avoid taking the blocked_lock_lock
559 * in some cases when we see that the fl_block list is empty. 559 * in some cases when we see that the fl_block list is empty.
560 */ 560 */
561 static void __locks_insert_block(struct file_lock *blocker, 561 static void __locks_insert_block(struct file_lock *blocker,
562 struct file_lock *waiter) 562 struct file_lock *waiter)
563 { 563 {
564 BUG_ON(!list_empty(&waiter->fl_block)); 564 BUG_ON(!list_empty(&waiter->fl_block));
565 waiter->fl_next = blocker; 565 waiter->fl_next = blocker;
566 list_add_tail(&waiter->fl_block, &blocker->fl_block); 566 list_add_tail(&waiter->fl_block, &blocker->fl_block);
567 if (IS_POSIX(blocker) && !IS_FILE_PVT(blocker)) 567 if (IS_POSIX(blocker) && !IS_OFDLCK(blocker))
568 locks_insert_global_blocked(waiter); 568 locks_insert_global_blocked(waiter);
569 } 569 }
570 570
571 /* Must be called with i_lock held. */ 571 /* Must be called with i_lock held. */
572 static void locks_insert_block(struct file_lock *blocker, 572 static void locks_insert_block(struct file_lock *blocker,
573 struct file_lock *waiter) 573 struct file_lock *waiter)
574 { 574 {
575 spin_lock(&blocked_lock_lock); 575 spin_lock(&blocked_lock_lock);
576 __locks_insert_block(blocker, waiter); 576 __locks_insert_block(blocker, waiter);
577 spin_unlock(&blocked_lock_lock); 577 spin_unlock(&blocked_lock_lock);
578 } 578 }
579 579
580 /* 580 /*
581 * Wake up processes blocked waiting for blocker. 581 * Wake up processes blocked waiting for blocker.
582 * 582 *
583 * Must be called with the inode->i_lock held! 583 * Must be called with the inode->i_lock held!
584 */ 584 */
585 static void locks_wake_up_blocks(struct file_lock *blocker) 585 static void locks_wake_up_blocks(struct file_lock *blocker)
586 { 586 {
587 /* 587 /*
588 * Avoid taking global lock if list is empty. This is safe since new 588 * Avoid taking global lock if list is empty. This is safe since new
589 * blocked requests are only added to the list under the i_lock, and 589 * blocked requests are only added to the list under the i_lock, and
590 * the i_lock is always held here. Note that removal from the fl_block 590 * the i_lock is always held here. Note that removal from the fl_block
591 * list does not require the i_lock, so we must recheck list_empty() 591 * list does not require the i_lock, so we must recheck list_empty()
592 * after acquiring the blocked_lock_lock. 592 * after acquiring the blocked_lock_lock.
593 */ 593 */
594 if (list_empty(&blocker->fl_block)) 594 if (list_empty(&blocker->fl_block))
595 return; 595 return;
596 596
597 spin_lock(&blocked_lock_lock); 597 spin_lock(&blocked_lock_lock);
598 while (!list_empty(&blocker->fl_block)) { 598 while (!list_empty(&blocker->fl_block)) {
599 struct file_lock *waiter; 599 struct file_lock *waiter;
600 600
601 waiter = list_first_entry(&blocker->fl_block, 601 waiter = list_first_entry(&blocker->fl_block,
602 struct file_lock, fl_block); 602 struct file_lock, fl_block);
603 __locks_delete_block(waiter); 603 __locks_delete_block(waiter);
604 if (waiter->fl_lmops && waiter->fl_lmops->lm_notify) 604 if (waiter->fl_lmops && waiter->fl_lmops->lm_notify)
605 waiter->fl_lmops->lm_notify(waiter); 605 waiter->fl_lmops->lm_notify(waiter);
606 else 606 else
607 wake_up(&waiter->fl_wait); 607 wake_up(&waiter->fl_wait);
608 } 608 }
609 spin_unlock(&blocked_lock_lock); 609 spin_unlock(&blocked_lock_lock);
610 } 610 }
611 611
612 /* Insert file lock fl into an inode's lock list at the position indicated 612 /* Insert file lock fl into an inode's lock list at the position indicated
613 * by pos. At the same time add the lock to the global file lock list. 613 * by pos. At the same time add the lock to the global file lock list.
614 * 614 *
615 * Must be called with the i_lock held! 615 * Must be called with the i_lock held!
616 */ 616 */
617 static void locks_insert_lock(struct file_lock **pos, struct file_lock *fl) 617 static void locks_insert_lock(struct file_lock **pos, struct file_lock *fl)
618 { 618 {
619 fl->fl_nspid = get_pid(task_tgid(current)); 619 fl->fl_nspid = get_pid(task_tgid(current));
620 620
621 /* insert into file's list */ 621 /* insert into file's list */
622 fl->fl_next = *pos; 622 fl->fl_next = *pos;
623 *pos = fl; 623 *pos = fl;
624 624
625 locks_insert_global_locks(fl); 625 locks_insert_global_locks(fl);
626 } 626 }
627 627
628 /** 628 /**
629 * locks_delete_lock - Delete a lock and then free it. 629 * locks_delete_lock - Delete a lock and then free it.
630 * @thisfl_p: pointer that points to the fl_next field of the previous 630 * @thisfl_p: pointer that points to the fl_next field of the previous
631 * inode->i_flock list entry 631 * inode->i_flock list entry
632 * 632 *
633 * Unlink a lock from all lists and free the namespace reference, but don't 633 * Unlink a lock from all lists and free the namespace reference, but don't
634 * free it yet. Wake up processes that are blocked waiting for this lock and 634 * free it yet. Wake up processes that are blocked waiting for this lock and
635 * notify the FS that the lock has been cleared. 635 * notify the FS that the lock has been cleared.
636 * 636 *
637 * Must be called with the i_lock held! 637 * Must be called with the i_lock held!
638 */ 638 */
639 static void locks_unlink_lock(struct file_lock **thisfl_p) 639 static void locks_unlink_lock(struct file_lock **thisfl_p)
640 { 640 {
641 struct file_lock *fl = *thisfl_p; 641 struct file_lock *fl = *thisfl_p;
642 642
643 locks_delete_global_locks(fl); 643 locks_delete_global_locks(fl);
644 644
645 *thisfl_p = fl->fl_next; 645 *thisfl_p = fl->fl_next;
646 fl->fl_next = NULL; 646 fl->fl_next = NULL;
647 647
648 if (fl->fl_nspid) { 648 if (fl->fl_nspid) {
649 put_pid(fl->fl_nspid); 649 put_pid(fl->fl_nspid);
650 fl->fl_nspid = NULL; 650 fl->fl_nspid = NULL;
651 } 651 }
652 652
653 locks_wake_up_blocks(fl); 653 locks_wake_up_blocks(fl);
654 } 654 }
655 655
656 /* 656 /*
657 * Unlink a lock from all lists and free it. 657 * Unlink a lock from all lists and free it.
658 * 658 *
659 * Must be called with i_lock held! 659 * Must be called with i_lock held!
660 */ 660 */
661 static void locks_delete_lock(struct file_lock **thisfl_p) 661 static void locks_delete_lock(struct file_lock **thisfl_p)
662 { 662 {
663 struct file_lock *fl = *thisfl_p; 663 struct file_lock *fl = *thisfl_p;
664 664
665 locks_unlink_lock(thisfl_p); 665 locks_unlink_lock(thisfl_p);
666 locks_free_lock(fl); 666 locks_free_lock(fl);
667 } 667 }
668 668
669 /* Determine if lock sys_fl blocks lock caller_fl. Common functionality 669 /* Determine if lock sys_fl blocks lock caller_fl. Common functionality
670 * checks for shared/exclusive status of overlapping locks. 670 * checks for shared/exclusive status of overlapping locks.
671 */ 671 */
672 static int locks_conflict(struct file_lock *caller_fl, struct file_lock *sys_fl) 672 static int locks_conflict(struct file_lock *caller_fl, struct file_lock *sys_fl)
673 { 673 {
674 if (sys_fl->fl_type == F_WRLCK) 674 if (sys_fl->fl_type == F_WRLCK)
675 return 1; 675 return 1;
676 if (caller_fl->fl_type == F_WRLCK) 676 if (caller_fl->fl_type == F_WRLCK)
677 return 1; 677 return 1;
678 return 0; 678 return 0;
679 } 679 }
680 680
681 /* Determine if lock sys_fl blocks lock caller_fl. POSIX specific 681 /* Determine if lock sys_fl blocks lock caller_fl. POSIX specific
682 * checking before calling the locks_conflict(). 682 * checking before calling the locks_conflict().
683 */ 683 */
684 static int posix_locks_conflict(struct file_lock *caller_fl, struct file_lock *sys_fl) 684 static int posix_locks_conflict(struct file_lock *caller_fl, struct file_lock *sys_fl)
685 { 685 {
686 /* POSIX locks owned by the same process do not conflict with 686 /* POSIX locks owned by the same process do not conflict with
687 * each other. 687 * each other.
688 */ 688 */
689 if (!IS_POSIX(sys_fl) || posix_same_owner(caller_fl, sys_fl)) 689 if (!IS_POSIX(sys_fl) || posix_same_owner(caller_fl, sys_fl))
690 return (0); 690 return (0);
691 691
692 /* Check whether they overlap */ 692 /* Check whether they overlap */
693 if (!locks_overlap(caller_fl, sys_fl)) 693 if (!locks_overlap(caller_fl, sys_fl))
694 return 0; 694 return 0;
695 695
696 return (locks_conflict(caller_fl, sys_fl)); 696 return (locks_conflict(caller_fl, sys_fl));
697 } 697 }
698 698
699 /* Determine if lock sys_fl blocks lock caller_fl. FLOCK specific 699 /* Determine if lock sys_fl blocks lock caller_fl. FLOCK specific
700 * checking before calling the locks_conflict(). 700 * checking before calling the locks_conflict().
701 */ 701 */
702 static int flock_locks_conflict(struct file_lock *caller_fl, struct file_lock *sys_fl) 702 static int flock_locks_conflict(struct file_lock *caller_fl, struct file_lock *sys_fl)
703 { 703 {
704 /* FLOCK locks referring to the same filp do not conflict with 704 /* FLOCK locks referring to the same filp do not conflict with
705 * each other. 705 * each other.
706 */ 706 */
707 if (!IS_FLOCK(sys_fl) || (caller_fl->fl_file == sys_fl->fl_file)) 707 if (!IS_FLOCK(sys_fl) || (caller_fl->fl_file == sys_fl->fl_file))
708 return (0); 708 return (0);
709 if ((caller_fl->fl_type & LOCK_MAND) || (sys_fl->fl_type & LOCK_MAND)) 709 if ((caller_fl->fl_type & LOCK_MAND) || (sys_fl->fl_type & LOCK_MAND))
710 return 0; 710 return 0;
711 711
712 return (locks_conflict(caller_fl, sys_fl)); 712 return (locks_conflict(caller_fl, sys_fl));
713 } 713 }
714 714
715 void 715 void
716 posix_test_lock(struct file *filp, struct file_lock *fl) 716 posix_test_lock(struct file *filp, struct file_lock *fl)
717 { 717 {
718 struct file_lock *cfl; 718 struct file_lock *cfl;
719 struct inode *inode = file_inode(filp); 719 struct inode *inode = file_inode(filp);
720 720
721 spin_lock(&inode->i_lock); 721 spin_lock(&inode->i_lock);
722 for (cfl = file_inode(filp)->i_flock; cfl; cfl = cfl->fl_next) { 722 for (cfl = file_inode(filp)->i_flock; cfl; cfl = cfl->fl_next) {
723 if (!IS_POSIX(cfl)) 723 if (!IS_POSIX(cfl))
724 continue; 724 continue;
725 if (posix_locks_conflict(fl, cfl)) 725 if (posix_locks_conflict(fl, cfl))
726 break; 726 break;
727 } 727 }
728 if (cfl) { 728 if (cfl) {
729 __locks_copy_lock(fl, cfl); 729 __locks_copy_lock(fl, cfl);
730 if (cfl->fl_nspid) 730 if (cfl->fl_nspid)
731 fl->fl_pid = pid_vnr(cfl->fl_nspid); 731 fl->fl_pid = pid_vnr(cfl->fl_nspid);
732 } else 732 } else
733 fl->fl_type = F_UNLCK; 733 fl->fl_type = F_UNLCK;
734 spin_unlock(&inode->i_lock); 734 spin_unlock(&inode->i_lock);
735 return; 735 return;
736 } 736 }
737 EXPORT_SYMBOL(posix_test_lock); 737 EXPORT_SYMBOL(posix_test_lock);
738 738
739 /* 739 /*
740 * Deadlock detection: 740 * Deadlock detection:
741 * 741 *
742 * We attempt to detect deadlocks that are due purely to posix file 742 * We attempt to detect deadlocks that are due purely to posix file
743 * locks. 743 * locks.
744 * 744 *
745 * We assume that a task can be waiting for at most one lock at a time. 745 * We assume that a task can be waiting for at most one lock at a time.
746 * So for any acquired lock, the process holding that lock may be 746 * So for any acquired lock, the process holding that lock may be
747 * waiting on at most one other lock. That lock in turns may be held by 747 * waiting on at most one other lock. That lock in turns may be held by
748 * someone waiting for at most one other lock. Given a requested lock 748 * someone waiting for at most one other lock. Given a requested lock
749 * caller_fl which is about to wait for a conflicting lock block_fl, we 749 * caller_fl which is about to wait for a conflicting lock block_fl, we
750 * follow this chain of waiters to ensure we are not about to create a 750 * follow this chain of waiters to ensure we are not about to create a
751 * cycle. 751 * cycle.
752 * 752 *
753 * Since we do this before we ever put a process to sleep on a lock, we 753 * Since we do this before we ever put a process to sleep on a lock, we
754 * are ensured that there is never a cycle; that is what guarantees that 754 * are ensured that there is never a cycle; that is what guarantees that
755 * the while() loop in posix_locks_deadlock() eventually completes. 755 * the while() loop in posix_locks_deadlock() eventually completes.
756 * 756 *
757 * Note: the above assumption may not be true when handling lock 757 * Note: the above assumption may not be true when handling lock
758 * requests from a broken NFS client. It may also fail in the presence 758 * requests from a broken NFS client. It may also fail in the presence
759 * of tasks (such as posix threads) sharing the same open file table. 759 * of tasks (such as posix threads) sharing the same open file table.
760 * To handle those cases, we just bail out after a few iterations. 760 * To handle those cases, we just bail out after a few iterations.
761 * 761 *
762 * For FL_FILE_PVT locks, the owner is the filp, not the files_struct. 762 * For FL_OFDLCK locks, the owner is the filp, not the files_struct.
763 * Because the owner is not even nominally tied to a thread of 763 * Because the owner is not even nominally tied to a thread of
764 * execution, the deadlock detection below can't reasonably work well. Just 764 * execution, the deadlock detection below can't reasonably work well. Just
765 * skip it for those. 765 * skip it for those.
766 * 766 *
767 * In principle, we could do a more limited deadlock detection on FL_FILE_PVT 767 * In principle, we could do a more limited deadlock detection on FL_OFDLCK
768 * locks that just checks for the case where two tasks are attempting to 768 * locks that just checks for the case where two tasks are attempting to
769 * upgrade from read to write locks on the same inode. 769 * upgrade from read to write locks on the same inode.
770 */ 770 */
771 771
772 #define MAX_DEADLK_ITERATIONS 10 772 #define MAX_DEADLK_ITERATIONS 10
773 773
774 /* Find a lock that the owner of the given block_fl is blocking on. */ 774 /* Find a lock that the owner of the given block_fl is blocking on. */
775 static struct file_lock *what_owner_is_waiting_for(struct file_lock *block_fl) 775 static struct file_lock *what_owner_is_waiting_for(struct file_lock *block_fl)
776 { 776 {
777 struct file_lock *fl; 777 struct file_lock *fl;
778 778
779 hash_for_each_possible(blocked_hash, fl, fl_link, posix_owner_key(block_fl)) { 779 hash_for_each_possible(blocked_hash, fl, fl_link, posix_owner_key(block_fl)) {
780 if (posix_same_owner(fl, block_fl)) 780 if (posix_same_owner(fl, block_fl))
781 return fl->fl_next; 781 return fl->fl_next;
782 } 782 }
783 return NULL; 783 return NULL;
784 } 784 }
785 785
786 /* Must be called with the blocked_lock_lock held! */ 786 /* Must be called with the blocked_lock_lock held! */
787 static int posix_locks_deadlock(struct file_lock *caller_fl, 787 static int posix_locks_deadlock(struct file_lock *caller_fl,
788 struct file_lock *block_fl) 788 struct file_lock *block_fl)
789 { 789 {
790 int i = 0; 790 int i = 0;
791 791
792 /* 792 /*
793 * This deadlock detector can't reasonably detect deadlocks with 793 * This deadlock detector can't reasonably detect deadlocks with
794 * FL_FILE_PVT locks, since they aren't owned by a process, per-se. 794 * FL_OFDLCK locks, since they aren't owned by a process, per-se.
795 */ 795 */
796 if (IS_FILE_PVT(caller_fl)) 796 if (IS_OFDLCK(caller_fl))
797 return 0; 797 return 0;
798 798
799 while ((block_fl = what_owner_is_waiting_for(block_fl))) { 799 while ((block_fl = what_owner_is_waiting_for(block_fl))) {
800 if (i++ > MAX_DEADLK_ITERATIONS) 800 if (i++ > MAX_DEADLK_ITERATIONS)
801 return 0; 801 return 0;
802 if (posix_same_owner(caller_fl, block_fl)) 802 if (posix_same_owner(caller_fl, block_fl))
803 return 1; 803 return 1;
804 } 804 }
805 return 0; 805 return 0;
806 } 806 }
807 807
808 /* Try to create a FLOCK lock on filp. We always insert new FLOCK locks 808 /* Try to create a FLOCK lock on filp. We always insert new FLOCK locks
809 * after any leases, but before any posix locks. 809 * after any leases, but before any posix locks.
810 * 810 *
811 * Note that if called with an FL_EXISTS argument, the caller may determine 811 * Note that if called with an FL_EXISTS argument, the caller may determine
812 * whether or not a lock was successfully freed by testing the return 812 * whether or not a lock was successfully freed by testing the return
813 * value for -ENOENT. 813 * value for -ENOENT.
814 */ 814 */
815 static int flock_lock_file(struct file *filp, struct file_lock *request) 815 static int flock_lock_file(struct file *filp, struct file_lock *request)
816 { 816 {
817 struct file_lock *new_fl = NULL; 817 struct file_lock *new_fl = NULL;
818 struct file_lock **before; 818 struct file_lock **before;
819 struct inode * inode = file_inode(filp); 819 struct inode * inode = file_inode(filp);
820 int error = 0; 820 int error = 0;
821 int found = 0; 821 int found = 0;
822 822
823 if (!(request->fl_flags & FL_ACCESS) && (request->fl_type != F_UNLCK)) { 823 if (!(request->fl_flags & FL_ACCESS) && (request->fl_type != F_UNLCK)) {
824 new_fl = locks_alloc_lock(); 824 new_fl = locks_alloc_lock();
825 if (!new_fl) 825 if (!new_fl)
826 return -ENOMEM; 826 return -ENOMEM;
827 } 827 }
828 828
829 spin_lock(&inode->i_lock); 829 spin_lock(&inode->i_lock);
830 if (request->fl_flags & FL_ACCESS) 830 if (request->fl_flags & FL_ACCESS)
831 goto find_conflict; 831 goto find_conflict;
832 832
833 for_each_lock(inode, before) { 833 for_each_lock(inode, before) {
834 struct file_lock *fl = *before; 834 struct file_lock *fl = *before;
835 if (IS_POSIX(fl)) 835 if (IS_POSIX(fl))
836 break; 836 break;
837 if (IS_LEASE(fl)) 837 if (IS_LEASE(fl))
838 continue; 838 continue;
839 if (filp != fl->fl_file) 839 if (filp != fl->fl_file)
840 continue; 840 continue;
841 if (request->fl_type == fl->fl_type) 841 if (request->fl_type == fl->fl_type)
842 goto out; 842 goto out;
843 found = 1; 843 found = 1;
844 locks_delete_lock(before); 844 locks_delete_lock(before);
845 break; 845 break;
846 } 846 }
847 847
848 if (request->fl_type == F_UNLCK) { 848 if (request->fl_type == F_UNLCK) {
849 if ((request->fl_flags & FL_EXISTS) && !found) 849 if ((request->fl_flags & FL_EXISTS) && !found)
850 error = -ENOENT; 850 error = -ENOENT;
851 goto out; 851 goto out;
852 } 852 }
853 853
854 /* 854 /*
855 * If a higher-priority process was blocked on the old file lock, 855 * If a higher-priority process was blocked on the old file lock,
856 * give it the opportunity to lock the file. 856 * give it the opportunity to lock the file.
857 */ 857 */
858 if (found) { 858 if (found) {
859 spin_unlock(&inode->i_lock); 859 spin_unlock(&inode->i_lock);
860 cond_resched(); 860 cond_resched();
861 spin_lock(&inode->i_lock); 861 spin_lock(&inode->i_lock);
862 } 862 }
863 863
864 find_conflict: 864 find_conflict:
865 for_each_lock(inode, before) { 865 for_each_lock(inode, before) {
866 struct file_lock *fl = *before; 866 struct file_lock *fl = *before;
867 if (IS_POSIX(fl)) 867 if (IS_POSIX(fl))
868 break; 868 break;
869 if (IS_LEASE(fl)) 869 if (IS_LEASE(fl))
870 continue; 870 continue;
871 if (!flock_locks_conflict(request, fl)) 871 if (!flock_locks_conflict(request, fl))
872 continue; 872 continue;
873 error = -EAGAIN; 873 error = -EAGAIN;
874 if (!(request->fl_flags & FL_SLEEP)) 874 if (!(request->fl_flags & FL_SLEEP))
875 goto out; 875 goto out;
876 error = FILE_LOCK_DEFERRED; 876 error = FILE_LOCK_DEFERRED;
877 locks_insert_block(fl, request); 877 locks_insert_block(fl, request);
878 goto out; 878 goto out;
879 } 879 }
880 if (request->fl_flags & FL_ACCESS) 880 if (request->fl_flags & FL_ACCESS)
881 goto out; 881 goto out;
882 locks_copy_lock(new_fl, request); 882 locks_copy_lock(new_fl, request);
883 locks_insert_lock(before, new_fl); 883 locks_insert_lock(before, new_fl);
884 new_fl = NULL; 884 new_fl = NULL;
885 error = 0; 885 error = 0;
886 886
887 out: 887 out:
888 spin_unlock(&inode->i_lock); 888 spin_unlock(&inode->i_lock);
889 if (new_fl) 889 if (new_fl)
890 locks_free_lock(new_fl); 890 locks_free_lock(new_fl);
891 return error; 891 return error;
892 } 892 }
893 893
894 static int __posix_lock_file(struct inode *inode, struct file_lock *request, struct file_lock *conflock) 894 static int __posix_lock_file(struct inode *inode, struct file_lock *request, struct file_lock *conflock)
895 { 895 {
896 struct file_lock *fl; 896 struct file_lock *fl;
897 struct file_lock *new_fl = NULL; 897 struct file_lock *new_fl = NULL;
898 struct file_lock *new_fl2 = NULL; 898 struct file_lock *new_fl2 = NULL;
899 struct file_lock *left = NULL; 899 struct file_lock *left = NULL;
900 struct file_lock *right = NULL; 900 struct file_lock *right = NULL;
901 struct file_lock **before; 901 struct file_lock **before;
902 int error; 902 int error;
903 bool added = false; 903 bool added = false;
904 904
905 /* 905 /*
906 * We may need two file_lock structures for this operation, 906 * We may need two file_lock structures for this operation,
907 * so we get them in advance to avoid races. 907 * so we get them in advance to avoid races.
908 * 908 *
909 * In some cases we can be sure, that no new locks will be needed 909 * In some cases we can be sure, that no new locks will be needed
910 */ 910 */
911 if (!(request->fl_flags & FL_ACCESS) && 911 if (!(request->fl_flags & FL_ACCESS) &&
912 (request->fl_type != F_UNLCK || 912 (request->fl_type != F_UNLCK ||
913 request->fl_start != 0 || request->fl_end != OFFSET_MAX)) { 913 request->fl_start != 0 || request->fl_end != OFFSET_MAX)) {
914 new_fl = locks_alloc_lock(); 914 new_fl = locks_alloc_lock();
915 new_fl2 = locks_alloc_lock(); 915 new_fl2 = locks_alloc_lock();
916 } 916 }
917 917
918 spin_lock(&inode->i_lock); 918 spin_lock(&inode->i_lock);
919 /* 919 /*
920 * New lock request. Walk all POSIX locks and look for conflicts. If 920 * New lock request. Walk all POSIX locks and look for conflicts. If
921 * there are any, either return error or put the request on the 921 * there are any, either return error or put the request on the
922 * blocker's list of waiters and the global blocked_hash. 922 * blocker's list of waiters and the global blocked_hash.
923 */ 923 */
924 if (request->fl_type != F_UNLCK) { 924 if (request->fl_type != F_UNLCK) {
925 for_each_lock(inode, before) { 925 for_each_lock(inode, before) {
926 fl = *before; 926 fl = *before;
927 if (!IS_POSIX(fl)) 927 if (!IS_POSIX(fl))
928 continue; 928 continue;
929 if (!posix_locks_conflict(request, fl)) 929 if (!posix_locks_conflict(request, fl))
930 continue; 930 continue;
931 if (conflock) 931 if (conflock)
932 __locks_copy_lock(conflock, fl); 932 __locks_copy_lock(conflock, fl);
933 error = -EAGAIN; 933 error = -EAGAIN;
934 if (!(request->fl_flags & FL_SLEEP)) 934 if (!(request->fl_flags & FL_SLEEP))
935 goto out; 935 goto out;
936 /* 936 /*
937 * Deadlock detection and insertion into the blocked 937 * Deadlock detection and insertion into the blocked
938 * locks list must be done while holding the same lock! 938 * locks list must be done while holding the same lock!
939 */ 939 */
940 error = -EDEADLK; 940 error = -EDEADLK;
941 spin_lock(&blocked_lock_lock); 941 spin_lock(&blocked_lock_lock);
942 if (likely(!posix_locks_deadlock(request, fl))) { 942 if (likely(!posix_locks_deadlock(request, fl))) {
943 error = FILE_LOCK_DEFERRED; 943 error = FILE_LOCK_DEFERRED;
944 __locks_insert_block(fl, request); 944 __locks_insert_block(fl, request);
945 } 945 }
946 spin_unlock(&blocked_lock_lock); 946 spin_unlock(&blocked_lock_lock);
947 goto out; 947 goto out;
948 } 948 }
949 } 949 }
950 950
951 /* If we're just looking for a conflict, we're done. */ 951 /* If we're just looking for a conflict, we're done. */
952 error = 0; 952 error = 0;
953 if (request->fl_flags & FL_ACCESS) 953 if (request->fl_flags & FL_ACCESS)
954 goto out; 954 goto out;
955 955
956 /* 956 /*
957 * Find the first old lock with the same owner as the new lock. 957 * Find the first old lock with the same owner as the new lock.
958 */ 958 */
959 959
960 before = &inode->i_flock; 960 before = &inode->i_flock;
961 961
962 /* First skip locks owned by other processes. */ 962 /* First skip locks owned by other processes. */
963 while ((fl = *before) && (!IS_POSIX(fl) || 963 while ((fl = *before) && (!IS_POSIX(fl) ||
964 !posix_same_owner(request, fl))) { 964 !posix_same_owner(request, fl))) {
965 before = &fl->fl_next; 965 before = &fl->fl_next;
966 } 966 }
967 967
968 /* Process locks with this owner. */ 968 /* Process locks with this owner. */
969 while ((fl = *before) && posix_same_owner(request, fl)) { 969 while ((fl = *before) && posix_same_owner(request, fl)) {
970 /* Detect adjacent or overlapping regions (if same lock type) 970 /* Detect adjacent or overlapping regions (if same lock type)
971 */ 971 */
972 if (request->fl_type == fl->fl_type) { 972 if (request->fl_type == fl->fl_type) {
973 /* In all comparisons of start vs end, use 973 /* In all comparisons of start vs end, use
974 * "start - 1" rather than "end + 1". If end 974 * "start - 1" rather than "end + 1". If end
975 * is OFFSET_MAX, end + 1 will become negative. 975 * is OFFSET_MAX, end + 1 will become negative.
976 */ 976 */
977 if (fl->fl_end < request->fl_start - 1) 977 if (fl->fl_end < request->fl_start - 1)
978 goto next_lock; 978 goto next_lock;
979 /* If the next lock in the list has entirely bigger 979 /* If the next lock in the list has entirely bigger
980 * addresses than the new one, insert the lock here. 980 * addresses than the new one, insert the lock here.
981 */ 981 */
982 if (fl->fl_start - 1 > request->fl_end) 982 if (fl->fl_start - 1 > request->fl_end)
983 break; 983 break;
984 984
985 /* If we come here, the new and old lock are of the 985 /* If we come here, the new and old lock are of the
986 * same type and adjacent or overlapping. Make one 986 * same type and adjacent or overlapping. Make one
987 * lock yielding from the lower start address of both 987 * lock yielding from the lower start address of both
988 * locks to the higher end address. 988 * locks to the higher end address.
989 */ 989 */
990 if (fl->fl_start > request->fl_start) 990 if (fl->fl_start > request->fl_start)
991 fl->fl_start = request->fl_start; 991 fl->fl_start = request->fl_start;
992 else 992 else
993 request->fl_start = fl->fl_start; 993 request->fl_start = fl->fl_start;
994 if (fl->fl_end < request->fl_end) 994 if (fl->fl_end < request->fl_end)
995 fl->fl_end = request->fl_end; 995 fl->fl_end = request->fl_end;
996 else 996 else
997 request->fl_end = fl->fl_end; 997 request->fl_end = fl->fl_end;
998 if (added) { 998 if (added) {
999 locks_delete_lock(before); 999 locks_delete_lock(before);
1000 continue; 1000 continue;
1001 } 1001 }
1002 request = fl; 1002 request = fl;
1003 added = true; 1003 added = true;
1004 } 1004 }
1005 else { 1005 else {
1006 /* Processing for different lock types is a bit 1006 /* Processing for different lock types is a bit
1007 * more complex. 1007 * more complex.
1008 */ 1008 */
1009 if (fl->fl_end < request->fl_start) 1009 if (fl->fl_end < request->fl_start)
1010 goto next_lock; 1010 goto next_lock;
1011 if (fl->fl_start > request->fl_end) 1011 if (fl->fl_start > request->fl_end)
1012 break; 1012 break;
1013 if (request->fl_type == F_UNLCK) 1013 if (request->fl_type == F_UNLCK)
1014 added = true; 1014 added = true;
1015 if (fl->fl_start < request->fl_start) 1015 if (fl->fl_start < request->fl_start)
1016 left = fl; 1016 left = fl;
1017 /* If the next lock in the list has a higher end 1017 /* If the next lock in the list has a higher end
1018 * address than the new one, insert the new one here. 1018 * address than the new one, insert the new one here.
1019 */ 1019 */
1020 if (fl->fl_end > request->fl_end) { 1020 if (fl->fl_end > request->fl_end) {
1021 right = fl; 1021 right = fl;
1022 break; 1022 break;
1023 } 1023 }
1024 if (fl->fl_start >= request->fl_start) { 1024 if (fl->fl_start >= request->fl_start) {
1025 /* The new lock completely replaces an old 1025 /* The new lock completely replaces an old
1026 * one (This may happen several times). 1026 * one (This may happen several times).
1027 */ 1027 */
1028 if (added) { 1028 if (added) {
1029 locks_delete_lock(before); 1029 locks_delete_lock(before);
1030 continue; 1030 continue;
1031 } 1031 }
1032 /* Replace the old lock with the new one. 1032 /* Replace the old lock with the new one.
1033 * Wake up anybody waiting for the old one, 1033 * Wake up anybody waiting for the old one,
1034 * as the change in lock type might satisfy 1034 * as the change in lock type might satisfy
1035 * their needs. 1035 * their needs.
1036 */ 1036 */
1037 locks_wake_up_blocks(fl); 1037 locks_wake_up_blocks(fl);
1038 fl->fl_start = request->fl_start; 1038 fl->fl_start = request->fl_start;
1039 fl->fl_end = request->fl_end; 1039 fl->fl_end = request->fl_end;
1040 fl->fl_type = request->fl_type; 1040 fl->fl_type = request->fl_type;
1041 locks_release_private(fl); 1041 locks_release_private(fl);
1042 locks_copy_private(fl, request); 1042 locks_copy_private(fl, request);
1043 request = fl; 1043 request = fl;
1044 added = true; 1044 added = true;
1045 } 1045 }
1046 } 1046 }
1047 /* Go on to next lock. 1047 /* Go on to next lock.
1048 */ 1048 */
1049 next_lock: 1049 next_lock:
1050 before = &fl->fl_next; 1050 before = &fl->fl_next;
1051 } 1051 }
1052 1052
1053 /* 1053 /*
1054 * The above code only modifies existing locks in case of merging or 1054 * The above code only modifies existing locks in case of merging or
1055 * replacing. If new lock(s) need to be inserted all modifications are 1055 * replacing. If new lock(s) need to be inserted all modifications are
1056 * done below this, so it's safe yet to bail out. 1056 * done below this, so it's safe yet to bail out.
1057 */ 1057 */
1058 error = -ENOLCK; /* "no luck" */ 1058 error = -ENOLCK; /* "no luck" */
1059 if (right && left == right && !new_fl2) 1059 if (right && left == right && !new_fl2)
1060 goto out; 1060 goto out;
1061 1061
1062 error = 0; 1062 error = 0;
1063 if (!added) { 1063 if (!added) {
1064 if (request->fl_type == F_UNLCK) { 1064 if (request->fl_type == F_UNLCK) {
1065 if (request->fl_flags & FL_EXISTS) 1065 if (request->fl_flags & FL_EXISTS)
1066 error = -ENOENT; 1066 error = -ENOENT;
1067 goto out; 1067 goto out;
1068 } 1068 }
1069 1069
1070 if (!new_fl) { 1070 if (!new_fl) {
1071 error = -ENOLCK; 1071 error = -ENOLCK;
1072 goto out; 1072 goto out;
1073 } 1073 }
1074 locks_copy_lock(new_fl, request); 1074 locks_copy_lock(new_fl, request);
1075 locks_insert_lock(before, new_fl); 1075 locks_insert_lock(before, new_fl);
1076 new_fl = NULL; 1076 new_fl = NULL;
1077 } 1077 }
1078 if (right) { 1078 if (right) {
1079 if (left == right) { 1079 if (left == right) {
1080 /* The new lock breaks the old one in two pieces, 1080 /* The new lock breaks the old one in two pieces,
1081 * so we have to use the second new lock. 1081 * so we have to use the second new lock.
1082 */ 1082 */
1083 left = new_fl2; 1083 left = new_fl2;
1084 new_fl2 = NULL; 1084 new_fl2 = NULL;
1085 locks_copy_lock(left, right); 1085 locks_copy_lock(left, right);
1086 locks_insert_lock(before, left); 1086 locks_insert_lock(before, left);
1087 } 1087 }
1088 right->fl_start = request->fl_end + 1; 1088 right->fl_start = request->fl_end + 1;
1089 locks_wake_up_blocks(right); 1089 locks_wake_up_blocks(right);
1090 } 1090 }
1091 if (left) { 1091 if (left) {
1092 left->fl_end = request->fl_start - 1; 1092 left->fl_end = request->fl_start - 1;
1093 locks_wake_up_blocks(left); 1093 locks_wake_up_blocks(left);
1094 } 1094 }
1095 out: 1095 out:
1096 spin_unlock(&inode->i_lock); 1096 spin_unlock(&inode->i_lock);
1097 /* 1097 /*
1098 * Free any unused locks. 1098 * Free any unused locks.
1099 */ 1099 */
1100 if (new_fl) 1100 if (new_fl)
1101 locks_free_lock(new_fl); 1101 locks_free_lock(new_fl);
1102 if (new_fl2) 1102 if (new_fl2)
1103 locks_free_lock(new_fl2); 1103 locks_free_lock(new_fl2);
1104 return error; 1104 return error;
1105 } 1105 }
1106 1106
1107 /** 1107 /**
1108 * posix_lock_file - Apply a POSIX-style lock to a file 1108 * posix_lock_file - Apply a POSIX-style lock to a file
1109 * @filp: The file to apply the lock to 1109 * @filp: The file to apply the lock to
1110 * @fl: The lock to be applied 1110 * @fl: The lock to be applied
1111 * @conflock: Place to return a copy of the conflicting lock, if found. 1111 * @conflock: Place to return a copy of the conflicting lock, if found.
1112 * 1112 *
1113 * Add a POSIX style lock to a file. 1113 * Add a POSIX style lock to a file.
1114 * We merge adjacent & overlapping locks whenever possible. 1114 * We merge adjacent & overlapping locks whenever possible.
1115 * POSIX locks are sorted by owner task, then by starting address 1115 * POSIX locks are sorted by owner task, then by starting address
1116 * 1116 *
1117 * Note that if called with an FL_EXISTS argument, the caller may determine 1117 * Note that if called with an FL_EXISTS argument, the caller may determine
1118 * whether or not a lock was successfully freed by testing the return 1118 * whether or not a lock was successfully freed by testing the return
1119 * value for -ENOENT. 1119 * value for -ENOENT.
1120 */ 1120 */
1121 int posix_lock_file(struct file *filp, struct file_lock *fl, 1121 int posix_lock_file(struct file *filp, struct file_lock *fl,
1122 struct file_lock *conflock) 1122 struct file_lock *conflock)
1123 { 1123 {
1124 return __posix_lock_file(file_inode(filp), fl, conflock); 1124 return __posix_lock_file(file_inode(filp), fl, conflock);
1125 } 1125 }
1126 EXPORT_SYMBOL(posix_lock_file); 1126 EXPORT_SYMBOL(posix_lock_file);
1127 1127
1128 /** 1128 /**
1129 * posix_lock_file_wait - Apply a POSIX-style lock to a file 1129 * posix_lock_file_wait - Apply a POSIX-style lock to a file
1130 * @filp: The file to apply the lock to 1130 * @filp: The file to apply the lock to
1131 * @fl: The lock to be applied 1131 * @fl: The lock to be applied
1132 * 1132 *
1133 * Add a POSIX style lock to a file. 1133 * Add a POSIX style lock to a file.
1134 * We merge adjacent & overlapping locks whenever possible. 1134 * We merge adjacent & overlapping locks whenever possible.
1135 * POSIX locks are sorted by owner task, then by starting address 1135 * POSIX locks are sorted by owner task, then by starting address
1136 */ 1136 */
1137 int posix_lock_file_wait(struct file *filp, struct file_lock *fl) 1137 int posix_lock_file_wait(struct file *filp, struct file_lock *fl)
1138 { 1138 {
1139 int error; 1139 int error;
1140 might_sleep (); 1140 might_sleep ();
1141 for (;;) { 1141 for (;;) {
1142 error = posix_lock_file(filp, fl, NULL); 1142 error = posix_lock_file(filp, fl, NULL);
1143 if (error != FILE_LOCK_DEFERRED) 1143 if (error != FILE_LOCK_DEFERRED)
1144 break; 1144 break;
1145 error = wait_event_interruptible(fl->fl_wait, !fl->fl_next); 1145 error = wait_event_interruptible(fl->fl_wait, !fl->fl_next);
1146 if (!error) 1146 if (!error)
1147 continue; 1147 continue;
1148 1148
1149 locks_delete_block(fl); 1149 locks_delete_block(fl);
1150 break; 1150 break;
1151 } 1151 }
1152 return error; 1152 return error;
1153 } 1153 }
1154 EXPORT_SYMBOL(posix_lock_file_wait); 1154 EXPORT_SYMBOL(posix_lock_file_wait);
1155 1155
1156 /** 1156 /**
1157 * locks_mandatory_locked - Check for an active lock 1157 * locks_mandatory_locked - Check for an active lock
1158 * @file: the file to check 1158 * @file: the file to check
1159 * 1159 *
1160 * Searches the inode's list of locks to find any POSIX locks which conflict. 1160 * Searches the inode's list of locks to find any POSIX locks which conflict.
1161 * This function is called from locks_verify_locked() only. 1161 * This function is called from locks_verify_locked() only.
1162 */ 1162 */
1163 int locks_mandatory_locked(struct file *file) 1163 int locks_mandatory_locked(struct file *file)
1164 { 1164 {
1165 struct inode *inode = file_inode(file); 1165 struct inode *inode = file_inode(file);
1166 fl_owner_t owner = current->files; 1166 fl_owner_t owner = current->files;
1167 struct file_lock *fl; 1167 struct file_lock *fl;
1168 1168
1169 /* 1169 /*
1170 * Search the lock list for this inode for any POSIX locks. 1170 * Search the lock list for this inode for any POSIX locks.
1171 */ 1171 */
1172 spin_lock(&inode->i_lock); 1172 spin_lock(&inode->i_lock);
1173 for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { 1173 for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) {
1174 if (!IS_POSIX(fl)) 1174 if (!IS_POSIX(fl))
1175 continue; 1175 continue;
1176 if (fl->fl_owner != owner && fl->fl_owner != (fl_owner_t)file) 1176 if (fl->fl_owner != owner && fl->fl_owner != (fl_owner_t)file)
1177 break; 1177 break;
1178 } 1178 }
1179 spin_unlock(&inode->i_lock); 1179 spin_unlock(&inode->i_lock);
1180 return fl ? -EAGAIN : 0; 1180 return fl ? -EAGAIN : 0;
1181 } 1181 }
1182 1182
1183 /** 1183 /**
1184 * locks_mandatory_area - Check for a conflicting lock 1184 * locks_mandatory_area - Check for a conflicting lock
1185 * @read_write: %FLOCK_VERIFY_WRITE for exclusive access, %FLOCK_VERIFY_READ 1185 * @read_write: %FLOCK_VERIFY_WRITE for exclusive access, %FLOCK_VERIFY_READ
1186 * for shared 1186 * for shared
1187 * @inode: the file to check 1187 * @inode: the file to check
1188 * @filp: how the file was opened (if it was) 1188 * @filp: how the file was opened (if it was)
1189 * @offset: start of area to check 1189 * @offset: start of area to check
1190 * @count: length of area to check 1190 * @count: length of area to check
1191 * 1191 *
1192 * Searches the inode's list of locks to find any POSIX locks which conflict. 1192 * Searches the inode's list of locks to find any POSIX locks which conflict.
1193 * This function is called from rw_verify_area() and 1193 * This function is called from rw_verify_area() and
1194 * locks_verify_truncate(). 1194 * locks_verify_truncate().
1195 */ 1195 */
1196 int locks_mandatory_area(int read_write, struct inode *inode, 1196 int locks_mandatory_area(int read_write, struct inode *inode,
1197 struct file *filp, loff_t offset, 1197 struct file *filp, loff_t offset,
1198 size_t count) 1198 size_t count)
1199 { 1199 {
1200 struct file_lock fl; 1200 struct file_lock fl;
1201 int error; 1201 int error;
1202 bool sleep = false; 1202 bool sleep = false;
1203 1203
1204 locks_init_lock(&fl); 1204 locks_init_lock(&fl);
1205 fl.fl_pid = current->tgid; 1205 fl.fl_pid = current->tgid;
1206 fl.fl_file = filp; 1206 fl.fl_file = filp;
1207 fl.fl_flags = FL_POSIX | FL_ACCESS; 1207 fl.fl_flags = FL_POSIX | FL_ACCESS;
1208 if (filp && !(filp->f_flags & O_NONBLOCK)) 1208 if (filp && !(filp->f_flags & O_NONBLOCK))
1209 sleep = true; 1209 sleep = true;
1210 fl.fl_type = (read_write == FLOCK_VERIFY_WRITE) ? F_WRLCK : F_RDLCK; 1210 fl.fl_type = (read_write == FLOCK_VERIFY_WRITE) ? F_WRLCK : F_RDLCK;
1211 fl.fl_start = offset; 1211 fl.fl_start = offset;
1212 fl.fl_end = offset + count - 1; 1212 fl.fl_end = offset + count - 1;
1213 1213
1214 for (;;) { 1214 for (;;) {
1215 if (filp) { 1215 if (filp) {
1216 fl.fl_owner = (fl_owner_t)filp; 1216 fl.fl_owner = (fl_owner_t)filp;
1217 fl.fl_flags &= ~FL_SLEEP; 1217 fl.fl_flags &= ~FL_SLEEP;
1218 error = __posix_lock_file(inode, &fl, NULL); 1218 error = __posix_lock_file(inode, &fl, NULL);
1219 if (!error) 1219 if (!error)
1220 break; 1220 break;
1221 } 1221 }
1222 1222
1223 if (sleep) 1223 if (sleep)
1224 fl.fl_flags |= FL_SLEEP; 1224 fl.fl_flags |= FL_SLEEP;
1225 fl.fl_owner = current->files; 1225 fl.fl_owner = current->files;
1226 error = __posix_lock_file(inode, &fl, NULL); 1226 error = __posix_lock_file(inode, &fl, NULL);
1227 if (error != FILE_LOCK_DEFERRED) 1227 if (error != FILE_LOCK_DEFERRED)
1228 break; 1228 break;
1229 error = wait_event_interruptible(fl.fl_wait, !fl.fl_next); 1229 error = wait_event_interruptible(fl.fl_wait, !fl.fl_next);
1230 if (!error) { 1230 if (!error) {
1231 /* 1231 /*
1232 * If we've been sleeping someone might have 1232 * If we've been sleeping someone might have
1233 * changed the permissions behind our back. 1233 * changed the permissions behind our back.
1234 */ 1234 */
1235 if (__mandatory_lock(inode)) 1235 if (__mandatory_lock(inode))
1236 continue; 1236 continue;
1237 } 1237 }
1238 1238
1239 locks_delete_block(&fl); 1239 locks_delete_block(&fl);
1240 break; 1240 break;
1241 } 1241 }
1242 1242
1243 return error; 1243 return error;
1244 } 1244 }
1245 1245
1246 EXPORT_SYMBOL(locks_mandatory_area); 1246 EXPORT_SYMBOL(locks_mandatory_area);
1247 1247
1248 static void lease_clear_pending(struct file_lock *fl, int arg) 1248 static void lease_clear_pending(struct file_lock *fl, int arg)
1249 { 1249 {
1250 switch (arg) { 1250 switch (arg) {
1251 case F_UNLCK: 1251 case F_UNLCK:
1252 fl->fl_flags &= ~FL_UNLOCK_PENDING; 1252 fl->fl_flags &= ~FL_UNLOCK_PENDING;
1253 /* fall through: */ 1253 /* fall through: */
1254 case F_RDLCK: 1254 case F_RDLCK:
1255 fl->fl_flags &= ~FL_DOWNGRADE_PENDING; 1255 fl->fl_flags &= ~FL_DOWNGRADE_PENDING;
1256 } 1256 }
1257 } 1257 }
1258 1258
1259 /* We already had a lease on this file; just change its type */ 1259 /* We already had a lease on this file; just change its type */
1260 int lease_modify(struct file_lock **before, int arg) 1260 int lease_modify(struct file_lock **before, int arg)
1261 { 1261 {
1262 struct file_lock *fl = *before; 1262 struct file_lock *fl = *before;
1263 int error = assign_type(fl, arg); 1263 int error = assign_type(fl, arg);
1264 1264
1265 if (error) 1265 if (error)
1266 return error; 1266 return error;
1267 lease_clear_pending(fl, arg); 1267 lease_clear_pending(fl, arg);
1268 locks_wake_up_blocks(fl); 1268 locks_wake_up_blocks(fl);
1269 if (arg == F_UNLCK) { 1269 if (arg == F_UNLCK) {
1270 struct file *filp = fl->fl_file; 1270 struct file *filp = fl->fl_file;
1271 1271
1272 f_delown(filp); 1272 f_delown(filp);
1273 filp->f_owner.signum = 0; 1273 filp->f_owner.signum = 0;
1274 fasync_helper(0, fl->fl_file, 0, &fl->fl_fasync); 1274 fasync_helper(0, fl->fl_file, 0, &fl->fl_fasync);
1275 if (fl->fl_fasync != NULL) { 1275 if (fl->fl_fasync != NULL) {
1276 printk(KERN_ERR "locks_delete_lock: fasync == %p\n", fl->fl_fasync); 1276 printk(KERN_ERR "locks_delete_lock: fasync == %p\n", fl->fl_fasync);
1277 fl->fl_fasync = NULL; 1277 fl->fl_fasync = NULL;
1278 } 1278 }
1279 locks_delete_lock(before); 1279 locks_delete_lock(before);
1280 } 1280 }
1281 return 0; 1281 return 0;
1282 } 1282 }
1283 1283
1284 EXPORT_SYMBOL(lease_modify); 1284 EXPORT_SYMBOL(lease_modify);
1285 1285
1286 static bool past_time(unsigned long then) 1286 static bool past_time(unsigned long then)
1287 { 1287 {
1288 if (!then) 1288 if (!then)
1289 /* 0 is a special value meaning "this never expires": */ 1289 /* 0 is a special value meaning "this never expires": */
1290 return false; 1290 return false;
1291 return time_after(jiffies, then); 1291 return time_after(jiffies, then);
1292 } 1292 }
1293 1293
1294 static void time_out_leases(struct inode *inode) 1294 static void time_out_leases(struct inode *inode)
1295 { 1295 {
1296 struct file_lock **before; 1296 struct file_lock **before;
1297 struct file_lock *fl; 1297 struct file_lock *fl;
1298 1298
1299 before = &inode->i_flock; 1299 before = &inode->i_flock;
1300 while ((fl = *before) && IS_LEASE(fl) && lease_breaking(fl)) { 1300 while ((fl = *before) && IS_LEASE(fl) && lease_breaking(fl)) {
1301 if (past_time(fl->fl_downgrade_time)) 1301 if (past_time(fl->fl_downgrade_time))
1302 lease_modify(before, F_RDLCK); 1302 lease_modify(before, F_RDLCK);
1303 if (past_time(fl->fl_break_time)) 1303 if (past_time(fl->fl_break_time))
1304 lease_modify(before, F_UNLCK); 1304 lease_modify(before, F_UNLCK);
1305 if (fl == *before) /* lease_modify may have freed fl */ 1305 if (fl == *before) /* lease_modify may have freed fl */
1306 before = &fl->fl_next; 1306 before = &fl->fl_next;
1307 } 1307 }
1308 } 1308 }
1309 1309
1310 static bool leases_conflict(struct file_lock *lease, struct file_lock *breaker) 1310 static bool leases_conflict(struct file_lock *lease, struct file_lock *breaker)
1311 { 1311 {
1312 if ((breaker->fl_flags & FL_DELEG) && (lease->fl_flags & FL_LEASE)) 1312 if ((breaker->fl_flags & FL_DELEG) && (lease->fl_flags & FL_LEASE))
1313 return false; 1313 return false;
1314 return locks_conflict(breaker, lease); 1314 return locks_conflict(breaker, lease);
1315 } 1315 }
1316 1316
1317 /** 1317 /**
1318 * __break_lease - revoke all outstanding leases on file 1318 * __break_lease - revoke all outstanding leases on file
1319 * @inode: the inode of the file to return 1319 * @inode: the inode of the file to return
1320 * @mode: O_RDONLY: break only write leases; O_WRONLY or O_RDWR: 1320 * @mode: O_RDONLY: break only write leases; O_WRONLY or O_RDWR:
1321 * break all leases 1321 * break all leases
1322 * @type: FL_LEASE: break leases and delegations; FL_DELEG: break 1322 * @type: FL_LEASE: break leases and delegations; FL_DELEG: break
1323 * only delegations 1323 * only delegations
1324 * 1324 *
1325 * break_lease (inlined for speed) has checked there already is at least 1325 * break_lease (inlined for speed) has checked there already is at least
1326 * some kind of lock (maybe a lease) on this file. Leases are broken on 1326 * some kind of lock (maybe a lease) on this file. Leases are broken on
1327 * a call to open() or truncate(). This function can sleep unless you 1327 * a call to open() or truncate(). This function can sleep unless you
1328 * specified %O_NONBLOCK to your open(). 1328 * specified %O_NONBLOCK to your open().
1329 */ 1329 */
1330 int __break_lease(struct inode *inode, unsigned int mode, unsigned int type) 1330 int __break_lease(struct inode *inode, unsigned int mode, unsigned int type)
1331 { 1331 {
1332 int error = 0; 1332 int error = 0;
1333 struct file_lock *new_fl, *flock; 1333 struct file_lock *new_fl, *flock;
1334 struct file_lock *fl; 1334 struct file_lock *fl;
1335 unsigned long break_time; 1335 unsigned long break_time;
1336 int i_have_this_lease = 0; 1336 int i_have_this_lease = 0;
1337 bool lease_conflict = false; 1337 bool lease_conflict = false;
1338 int want_write = (mode & O_ACCMODE) != O_RDONLY; 1338 int want_write = (mode & O_ACCMODE) != O_RDONLY;
1339 1339
1340 new_fl = lease_alloc(NULL, want_write ? F_WRLCK : F_RDLCK); 1340 new_fl = lease_alloc(NULL, want_write ? F_WRLCK : F_RDLCK);
1341 if (IS_ERR(new_fl)) 1341 if (IS_ERR(new_fl))
1342 return PTR_ERR(new_fl); 1342 return PTR_ERR(new_fl);
1343 new_fl->fl_flags = type; 1343 new_fl->fl_flags = type;
1344 1344
1345 spin_lock(&inode->i_lock); 1345 spin_lock(&inode->i_lock);
1346 1346
1347 time_out_leases(inode); 1347 time_out_leases(inode);
1348 1348
1349 flock = inode->i_flock; 1349 flock = inode->i_flock;
1350 if ((flock == NULL) || !IS_LEASE(flock)) 1350 if ((flock == NULL) || !IS_LEASE(flock))
1351 goto out; 1351 goto out;
1352 1352
1353 for (fl = flock; fl && IS_LEASE(fl); fl = fl->fl_next) { 1353 for (fl = flock; fl && IS_LEASE(fl); fl = fl->fl_next) {
1354 if (leases_conflict(fl, new_fl)) { 1354 if (leases_conflict(fl, new_fl)) {
1355 lease_conflict = true; 1355 lease_conflict = true;
1356 if (fl->fl_owner == current->files) 1356 if (fl->fl_owner == current->files)
1357 i_have_this_lease = 1; 1357 i_have_this_lease = 1;
1358 } 1358 }
1359 } 1359 }
1360 if (!lease_conflict) 1360 if (!lease_conflict)
1361 goto out; 1361 goto out;
1362 1362
1363 break_time = 0; 1363 break_time = 0;
1364 if (lease_break_time > 0) { 1364 if (lease_break_time > 0) {
1365 break_time = jiffies + lease_break_time * HZ; 1365 break_time = jiffies + lease_break_time * HZ;
1366 if (break_time == 0) 1366 if (break_time == 0)
1367 break_time++; /* so that 0 means no break time */ 1367 break_time++; /* so that 0 means no break time */
1368 } 1368 }
1369 1369
1370 for (fl = flock; fl && IS_LEASE(fl); fl = fl->fl_next) { 1370 for (fl = flock; fl && IS_LEASE(fl); fl = fl->fl_next) {
1371 if (!leases_conflict(fl, new_fl)) 1371 if (!leases_conflict(fl, new_fl))
1372 continue; 1372 continue;
1373 if (want_write) { 1373 if (want_write) {
1374 if (fl->fl_flags & FL_UNLOCK_PENDING) 1374 if (fl->fl_flags & FL_UNLOCK_PENDING)
1375 continue; 1375 continue;
1376 fl->fl_flags |= FL_UNLOCK_PENDING; 1376 fl->fl_flags |= FL_UNLOCK_PENDING;
1377 fl->fl_break_time = break_time; 1377 fl->fl_break_time = break_time;
1378 } else { 1378 } else {
1379 if (lease_breaking(flock)) 1379 if (lease_breaking(flock))
1380 continue; 1380 continue;
1381 fl->fl_flags |= FL_DOWNGRADE_PENDING; 1381 fl->fl_flags |= FL_DOWNGRADE_PENDING;
1382 fl->fl_downgrade_time = break_time; 1382 fl->fl_downgrade_time = break_time;
1383 } 1383 }
1384 fl->fl_lmops->lm_break(fl); 1384 fl->fl_lmops->lm_break(fl);
1385 } 1385 }
1386 1386
1387 if (i_have_this_lease || (mode & O_NONBLOCK)) { 1387 if (i_have_this_lease || (mode & O_NONBLOCK)) {
1388 error = -EWOULDBLOCK; 1388 error = -EWOULDBLOCK;
1389 goto out; 1389 goto out;
1390 } 1390 }
1391 1391
1392 restart: 1392 restart:
1393 break_time = flock->fl_break_time; 1393 break_time = flock->fl_break_time;
1394 if (break_time != 0) 1394 if (break_time != 0)
1395 break_time -= jiffies; 1395 break_time -= jiffies;
1396 if (break_time == 0) 1396 if (break_time == 0)
1397 break_time++; 1397 break_time++;
1398 locks_insert_block(flock, new_fl); 1398 locks_insert_block(flock, new_fl);
1399 spin_unlock(&inode->i_lock); 1399 spin_unlock(&inode->i_lock);
1400 error = wait_event_interruptible_timeout(new_fl->fl_wait, 1400 error = wait_event_interruptible_timeout(new_fl->fl_wait,
1401 !new_fl->fl_next, break_time); 1401 !new_fl->fl_next, break_time);
1402 spin_lock(&inode->i_lock); 1402 spin_lock(&inode->i_lock);
1403 locks_delete_block(new_fl); 1403 locks_delete_block(new_fl);
1404 if (error >= 0) { 1404 if (error >= 0) {
1405 if (error == 0) 1405 if (error == 0)
1406 time_out_leases(inode); 1406 time_out_leases(inode);
1407 /* 1407 /*
1408 * Wait for the next conflicting lease that has not been 1408 * Wait for the next conflicting lease that has not been
1409 * broken yet 1409 * broken yet
1410 */ 1410 */
1411 for (flock = inode->i_flock; flock && IS_LEASE(flock); 1411 for (flock = inode->i_flock; flock && IS_LEASE(flock);
1412 flock = flock->fl_next) { 1412 flock = flock->fl_next) {
1413 if (leases_conflict(new_fl, flock)) 1413 if (leases_conflict(new_fl, flock))
1414 goto restart; 1414 goto restart;
1415 } 1415 }
1416 error = 0; 1416 error = 0;
1417 } 1417 }
1418 1418
1419 out: 1419 out:
1420 spin_unlock(&inode->i_lock); 1420 spin_unlock(&inode->i_lock);
1421 locks_free_lock(new_fl); 1421 locks_free_lock(new_fl);
1422 return error; 1422 return error;
1423 } 1423 }
1424 1424
1425 EXPORT_SYMBOL(__break_lease); 1425 EXPORT_SYMBOL(__break_lease);
1426 1426
1427 /** 1427 /**
1428 * lease_get_mtime - get the last modified time of an inode 1428 * lease_get_mtime - get the last modified time of an inode
1429 * @inode: the inode 1429 * @inode: the inode
1430 * @time: pointer to a timespec which will contain the last modified time 1430 * @time: pointer to a timespec which will contain the last modified time
1431 * 1431 *
1432 * This is to force NFS clients to flush their caches for files with 1432 * This is to force NFS clients to flush their caches for files with
1433 * exclusive leases. The justification is that if someone has an 1433 * exclusive leases. The justification is that if someone has an
1434 * exclusive lease, then they could be modifying it. 1434 * exclusive lease, then they could be modifying it.
1435 */ 1435 */
1436 void lease_get_mtime(struct inode *inode, struct timespec *time) 1436 void lease_get_mtime(struct inode *inode, struct timespec *time)
1437 { 1437 {
1438 struct file_lock *flock = inode->i_flock; 1438 struct file_lock *flock = inode->i_flock;
1439 if (flock && IS_LEASE(flock) && (flock->fl_type == F_WRLCK)) 1439 if (flock && IS_LEASE(flock) && (flock->fl_type == F_WRLCK))
1440 *time = current_fs_time(inode->i_sb); 1440 *time = current_fs_time(inode->i_sb);
1441 else 1441 else
1442 *time = inode->i_mtime; 1442 *time = inode->i_mtime;
1443 } 1443 }
1444 1444
1445 EXPORT_SYMBOL(lease_get_mtime); 1445 EXPORT_SYMBOL(lease_get_mtime);
1446 1446
1447 /** 1447 /**
1448 * fcntl_getlease - Enquire what lease is currently active 1448 * fcntl_getlease - Enquire what lease is currently active
1449 * @filp: the file 1449 * @filp: the file
1450 * 1450 *
1451 * The value returned by this function will be one of 1451 * The value returned by this function will be one of
1452 * (if no lease break is pending): 1452 * (if no lease break is pending):
1453 * 1453 *
1454 * %F_RDLCK to indicate a shared lease is held. 1454 * %F_RDLCK to indicate a shared lease is held.
1455 * 1455 *
1456 * %F_WRLCK to indicate an exclusive lease is held. 1456 * %F_WRLCK to indicate an exclusive lease is held.
1457 * 1457 *
1458 * %F_UNLCK to indicate no lease is held. 1458 * %F_UNLCK to indicate no lease is held.
1459 * 1459 *
1460 * (if a lease break is pending): 1460 * (if a lease break is pending):
1461 * 1461 *
1462 * %F_RDLCK to indicate an exclusive lease needs to be 1462 * %F_RDLCK to indicate an exclusive lease needs to be
1463 * changed to a shared lease (or removed). 1463 * changed to a shared lease (or removed).
1464 * 1464 *
1465 * %F_UNLCK to indicate the lease needs to be removed. 1465 * %F_UNLCK to indicate the lease needs to be removed.
1466 * 1466 *
1467 * XXX: sfr & willy disagree over whether F_INPROGRESS 1467 * XXX: sfr & willy disagree over whether F_INPROGRESS
1468 * should be returned to userspace. 1468 * should be returned to userspace.
1469 */ 1469 */
1470 int fcntl_getlease(struct file *filp) 1470 int fcntl_getlease(struct file *filp)
1471 { 1471 {
1472 struct file_lock *fl; 1472 struct file_lock *fl;
1473 struct inode *inode = file_inode(filp); 1473 struct inode *inode = file_inode(filp);
1474 int type = F_UNLCK; 1474 int type = F_UNLCK;
1475 1475
1476 spin_lock(&inode->i_lock); 1476 spin_lock(&inode->i_lock);
1477 time_out_leases(file_inode(filp)); 1477 time_out_leases(file_inode(filp));
1478 for (fl = file_inode(filp)->i_flock; fl && IS_LEASE(fl); 1478 for (fl = file_inode(filp)->i_flock; fl && IS_LEASE(fl);
1479 fl = fl->fl_next) { 1479 fl = fl->fl_next) {
1480 if (fl->fl_file == filp) { 1480 if (fl->fl_file == filp) {
1481 type = target_leasetype(fl); 1481 type = target_leasetype(fl);
1482 break; 1482 break;
1483 } 1483 }
1484 } 1484 }
1485 spin_unlock(&inode->i_lock); 1485 spin_unlock(&inode->i_lock);
1486 return type; 1486 return type;
1487 } 1487 }
1488 1488
1489 /** 1489 /**
1490 * check_conflicting_open - see if the given dentry points to a file that has 1490 * check_conflicting_open - see if the given dentry points to a file that has
1491 * an existing open that would conflict with the 1491 * an existing open that would conflict with the
1492 * desired lease. 1492 * desired lease.
1493 * @dentry: dentry to check 1493 * @dentry: dentry to check
1494 * @arg: type of lease that we're trying to acquire 1494 * @arg: type of lease that we're trying to acquire
1495 * 1495 *
1496 * Check to see if there's an existing open fd on this file that would 1496 * Check to see if there's an existing open fd on this file that would
1497 * conflict with the lease we're trying to set. 1497 * conflict with the lease we're trying to set.
1498 */ 1498 */
1499 static int 1499 static int
1500 check_conflicting_open(const struct dentry *dentry, const long arg) 1500 check_conflicting_open(const struct dentry *dentry, const long arg)
1501 { 1501 {
1502 int ret = 0; 1502 int ret = 0;
1503 struct inode *inode = dentry->d_inode; 1503 struct inode *inode = dentry->d_inode;
1504 1504
1505 if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0)) 1505 if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0))
1506 return -EAGAIN; 1506 return -EAGAIN;
1507 1507
1508 if ((arg == F_WRLCK) && ((d_count(dentry) > 1) || 1508 if ((arg == F_WRLCK) && ((d_count(dentry) > 1) ||
1509 (atomic_read(&inode->i_count) > 1))) 1509 (atomic_read(&inode->i_count) > 1)))
1510 ret = -EAGAIN; 1510 ret = -EAGAIN;
1511 1511
1512 return ret; 1512 return ret;
1513 } 1513 }
1514 1514
1515 static int generic_add_lease(struct file *filp, long arg, struct file_lock **flp) 1515 static int generic_add_lease(struct file *filp, long arg, struct file_lock **flp)
1516 { 1516 {
1517 struct file_lock *fl, **before, **my_before = NULL, *lease; 1517 struct file_lock *fl, **before, **my_before = NULL, *lease;
1518 struct dentry *dentry = filp->f_path.dentry; 1518 struct dentry *dentry = filp->f_path.dentry;
1519 struct inode *inode = dentry->d_inode; 1519 struct inode *inode = dentry->d_inode;
1520 bool is_deleg = (*flp)->fl_flags & FL_DELEG; 1520 bool is_deleg = (*flp)->fl_flags & FL_DELEG;
1521 int error; 1521 int error;
1522 1522
1523 lease = *flp; 1523 lease = *flp;
1524 /* 1524 /*
1525 * In the delegation case we need mutual exclusion with 1525 * In the delegation case we need mutual exclusion with
1526 * a number of operations that take the i_mutex. We trylock 1526 * a number of operations that take the i_mutex. We trylock
1527 * because delegations are an optional optimization, and if 1527 * because delegations are an optional optimization, and if
1528 * there's some chance of a conflict--we'd rather not 1528 * there's some chance of a conflict--we'd rather not
1529 * bother, maybe that's a sign this just isn't a good file to 1529 * bother, maybe that's a sign this just isn't a good file to
1530 * hand out a delegation on. 1530 * hand out a delegation on.
1531 */ 1531 */
1532 if (is_deleg && !mutex_trylock(&inode->i_mutex)) 1532 if (is_deleg && !mutex_trylock(&inode->i_mutex))
1533 return -EAGAIN; 1533 return -EAGAIN;
1534 1534
1535 if (is_deleg && arg == F_WRLCK) { 1535 if (is_deleg && arg == F_WRLCK) {
1536 /* Write delegations are not currently supported: */ 1536 /* Write delegations are not currently supported: */
1537 mutex_unlock(&inode->i_mutex); 1537 mutex_unlock(&inode->i_mutex);
1538 WARN_ON_ONCE(1); 1538 WARN_ON_ONCE(1);
1539 return -EINVAL; 1539 return -EINVAL;
1540 } 1540 }
1541 1541
1542 error = check_conflicting_open(dentry, arg); 1542 error = check_conflicting_open(dentry, arg);
1543 if (error) 1543 if (error)
1544 goto out; 1544 goto out;
1545 1545
1546 /* 1546 /*
1547 * At this point, we know that if there is an exclusive 1547 * At this point, we know that if there is an exclusive
1548 * lease on this file, then we hold it on this filp 1548 * lease on this file, then we hold it on this filp
1549 * (otherwise our open of this file would have blocked). 1549 * (otherwise our open of this file would have blocked).
1550 * And if we are trying to acquire an exclusive lease, 1550 * And if we are trying to acquire an exclusive lease,
1551 * then the file is not open by anyone (including us) 1551 * then the file is not open by anyone (including us)
1552 * except for this filp. 1552 * except for this filp.
1553 */ 1553 */
1554 error = -EAGAIN; 1554 error = -EAGAIN;
1555 for (before = &inode->i_flock; 1555 for (before = &inode->i_flock;
1556 ((fl = *before) != NULL) && IS_LEASE(fl); 1556 ((fl = *before) != NULL) && IS_LEASE(fl);
1557 before = &fl->fl_next) { 1557 before = &fl->fl_next) {
1558 if (fl->fl_file == filp) { 1558 if (fl->fl_file == filp) {
1559 my_before = before; 1559 my_before = before;
1560 continue; 1560 continue;
1561 } 1561 }
1562 /* 1562 /*
1563 * No exclusive leases if someone else has a lease on 1563 * No exclusive leases if someone else has a lease on
1564 * this file: 1564 * this file:
1565 */ 1565 */
1566 if (arg == F_WRLCK) 1566 if (arg == F_WRLCK)
1567 goto out; 1567 goto out;
1568 /* 1568 /*
1569 * Modifying our existing lease is OK, but no getting a 1569 * Modifying our existing lease is OK, but no getting a
1570 * new lease if someone else is opening for write: 1570 * new lease if someone else is opening for write:
1571 */ 1571 */
1572 if (fl->fl_flags & FL_UNLOCK_PENDING) 1572 if (fl->fl_flags & FL_UNLOCK_PENDING)
1573 goto out; 1573 goto out;
1574 } 1574 }
1575 1575
1576 if (my_before != NULL) { 1576 if (my_before != NULL) {
1577 error = lease->fl_lmops->lm_change(my_before, arg); 1577 error = lease->fl_lmops->lm_change(my_before, arg);
1578 if (!error) 1578 if (!error)
1579 *flp = *my_before; 1579 *flp = *my_before;
1580 goto out; 1580 goto out;
1581 } 1581 }
1582 1582
1583 error = -EINVAL; 1583 error = -EINVAL;
1584 if (!leases_enable) 1584 if (!leases_enable)
1585 goto out; 1585 goto out;
1586 1586
1587 locks_insert_lock(before, lease); 1587 locks_insert_lock(before, lease);
1588 /* 1588 /*
1589 * The check in break_lease() is lockless. It's possible for another 1589 * The check in break_lease() is lockless. It's possible for another
1590 * open to race in after we did the earlier check for a conflicting 1590 * open to race in after we did the earlier check for a conflicting
1591 * open but before the lease was inserted. Check again for a 1591 * open but before the lease was inserted. Check again for a
1592 * conflicting open and cancel the lease if there is one. 1592 * conflicting open and cancel the lease if there is one.
1593 * 1593 *
1594 * We also add a barrier here to ensure that the insertion of the lock 1594 * We also add a barrier here to ensure that the insertion of the lock
1595 * precedes these checks. 1595 * precedes these checks.
1596 */ 1596 */
1597 smp_mb(); 1597 smp_mb();
1598 error = check_conflicting_open(dentry, arg); 1598 error = check_conflicting_open(dentry, arg);
1599 if (error) 1599 if (error)
1600 locks_unlink_lock(flp); 1600 locks_unlink_lock(flp);
1601 out: 1601 out:
1602 if (is_deleg) 1602 if (is_deleg)
1603 mutex_unlock(&inode->i_mutex); 1603 mutex_unlock(&inode->i_mutex);
1604 return error; 1604 return error;
1605 } 1605 }
1606 1606
1607 static int generic_delete_lease(struct file *filp, struct file_lock **flp) 1607 static int generic_delete_lease(struct file *filp, struct file_lock **flp)
1608 { 1608 {
1609 struct file_lock *fl, **before; 1609 struct file_lock *fl, **before;
1610 struct dentry *dentry = filp->f_path.dentry; 1610 struct dentry *dentry = filp->f_path.dentry;
1611 struct inode *inode = dentry->d_inode; 1611 struct inode *inode = dentry->d_inode;
1612 1612
1613 for (before = &inode->i_flock; 1613 for (before = &inode->i_flock;
1614 ((fl = *before) != NULL) && IS_LEASE(fl); 1614 ((fl = *before) != NULL) && IS_LEASE(fl);
1615 before = &fl->fl_next) { 1615 before = &fl->fl_next) {
1616 if (fl->fl_file != filp) 1616 if (fl->fl_file != filp)
1617 continue; 1617 continue;
1618 return (*flp)->fl_lmops->lm_change(before, F_UNLCK); 1618 return (*flp)->fl_lmops->lm_change(before, F_UNLCK);
1619 } 1619 }
1620 return -EAGAIN; 1620 return -EAGAIN;
1621 } 1621 }
1622 1622
1623 /** 1623 /**
1624 * generic_setlease - sets a lease on an open file 1624 * generic_setlease - sets a lease on an open file
1625 * @filp: file pointer 1625 * @filp: file pointer
1626 * @arg: type of lease to obtain 1626 * @arg: type of lease to obtain
1627 * @flp: input - file_lock to use, output - file_lock inserted 1627 * @flp: input - file_lock to use, output - file_lock inserted
1628 * 1628 *
1629 * The (input) flp->fl_lmops->lm_break function is required 1629 * The (input) flp->fl_lmops->lm_break function is required
1630 * by break_lease(). 1630 * by break_lease().
1631 * 1631 *
1632 * Called with inode->i_lock held. 1632 * Called with inode->i_lock held.
1633 */ 1633 */
1634 int generic_setlease(struct file *filp, long arg, struct file_lock **flp) 1634 int generic_setlease(struct file *filp, long arg, struct file_lock **flp)
1635 { 1635 {
1636 struct dentry *dentry = filp->f_path.dentry; 1636 struct dentry *dentry = filp->f_path.dentry;
1637 struct inode *inode = dentry->d_inode; 1637 struct inode *inode = dentry->d_inode;
1638 int error; 1638 int error;
1639 1639
1640 if ((!uid_eq(current_fsuid(), inode->i_uid)) && !capable(CAP_LEASE)) 1640 if ((!uid_eq(current_fsuid(), inode->i_uid)) && !capable(CAP_LEASE))
1641 return -EACCES; 1641 return -EACCES;
1642 if (!S_ISREG(inode->i_mode)) 1642 if (!S_ISREG(inode->i_mode))
1643 return -EINVAL; 1643 return -EINVAL;
1644 error = security_file_lock(filp, arg); 1644 error = security_file_lock(filp, arg);
1645 if (error) 1645 if (error)
1646 return error; 1646 return error;
1647 1647
1648 time_out_leases(inode); 1648 time_out_leases(inode);
1649 1649
1650 BUG_ON(!(*flp)->fl_lmops->lm_break); 1650 BUG_ON(!(*flp)->fl_lmops->lm_break);
1651 1651
1652 switch (arg) { 1652 switch (arg) {
1653 case F_UNLCK: 1653 case F_UNLCK:
1654 return generic_delete_lease(filp, flp); 1654 return generic_delete_lease(filp, flp);
1655 case F_RDLCK: 1655 case F_RDLCK:
1656 case F_WRLCK: 1656 case F_WRLCK:
1657 return generic_add_lease(filp, arg, flp); 1657 return generic_add_lease(filp, arg, flp);
1658 default: 1658 default:
1659 return -EINVAL; 1659 return -EINVAL;
1660 } 1660 }
1661 } 1661 }
1662 EXPORT_SYMBOL(generic_setlease); 1662 EXPORT_SYMBOL(generic_setlease);
1663 1663
1664 static int __vfs_setlease(struct file *filp, long arg, struct file_lock **lease) 1664 static int __vfs_setlease(struct file *filp, long arg, struct file_lock **lease)
1665 { 1665 {
1666 if (filp->f_op->setlease) 1666 if (filp->f_op->setlease)
1667 return filp->f_op->setlease(filp, arg, lease); 1667 return filp->f_op->setlease(filp, arg, lease);
1668 else 1668 else
1669 return generic_setlease(filp, arg, lease); 1669 return generic_setlease(filp, arg, lease);
1670 } 1670 }
1671 1671
1672 /** 1672 /**
1673 * vfs_setlease - sets a lease on an open file 1673 * vfs_setlease - sets a lease on an open file
1674 * @filp: file pointer 1674 * @filp: file pointer
1675 * @arg: type of lease to obtain 1675 * @arg: type of lease to obtain
1676 * @lease: file_lock to use 1676 * @lease: file_lock to use
1677 * 1677 *
1678 * Call this to establish a lease on the file. 1678 * Call this to establish a lease on the file.
1679 * The (*lease)->fl_lmops->lm_break operation must be set; if not, 1679 * The (*lease)->fl_lmops->lm_break operation must be set; if not,
1680 * break_lease will oops! 1680 * break_lease will oops!
1681 * 1681 *
1682 * This will call the filesystem's setlease file method, if 1682 * This will call the filesystem's setlease file method, if
1683 * defined. Note that there is no getlease method; instead, the 1683 * defined. Note that there is no getlease method; instead, the
1684 * filesystem setlease method should call back to setlease() to 1684 * filesystem setlease method should call back to setlease() to
1685 * add a lease to the inode's lease list, where fcntl_getlease() can 1685 * add a lease to the inode's lease list, where fcntl_getlease() can
1686 * find it. Since fcntl_getlease() only reports whether the current 1686 * find it. Since fcntl_getlease() only reports whether the current
1687 * task holds a lease, a cluster filesystem need only do this for 1687 * task holds a lease, a cluster filesystem need only do this for
1688 * leases held by processes on this node. 1688 * leases held by processes on this node.
1689 * 1689 *
1690 * There is also no break_lease method; filesystems that 1690 * There is also no break_lease method; filesystems that
1691 * handle their own leases should break leases themselves from the 1691 * handle their own leases should break leases themselves from the
1692 * filesystem's open, create, and (on truncate) setattr methods. 1692 * filesystem's open, create, and (on truncate) setattr methods.
1693 * 1693 *
1694 * Warning: the only current setlease methods exist only to disable 1694 * Warning: the only current setlease methods exist only to disable
1695 * leases in certain cases. More vfs changes may be required to 1695 * leases in certain cases. More vfs changes may be required to
1696 * allow a full filesystem lease implementation. 1696 * allow a full filesystem lease implementation.
1697 */ 1697 */
1698 1698
1699 int vfs_setlease(struct file *filp, long arg, struct file_lock **lease) 1699 int vfs_setlease(struct file *filp, long arg, struct file_lock **lease)
1700 { 1700 {
1701 struct inode *inode = file_inode(filp); 1701 struct inode *inode = file_inode(filp);
1702 int error; 1702 int error;
1703 1703
1704 spin_lock(&inode->i_lock); 1704 spin_lock(&inode->i_lock);
1705 error = __vfs_setlease(filp, arg, lease); 1705 error = __vfs_setlease(filp, arg, lease);
1706 spin_unlock(&inode->i_lock); 1706 spin_unlock(&inode->i_lock);
1707 1707
1708 return error; 1708 return error;
1709 } 1709 }
1710 EXPORT_SYMBOL_GPL(vfs_setlease); 1710 EXPORT_SYMBOL_GPL(vfs_setlease);
1711 1711
1712 static int do_fcntl_delete_lease(struct file *filp) 1712 static int do_fcntl_delete_lease(struct file *filp)
1713 { 1713 {
1714 struct file_lock fl, *flp = &fl; 1714 struct file_lock fl, *flp = &fl;
1715 1715
1716 lease_init(filp, F_UNLCK, flp); 1716 lease_init(filp, F_UNLCK, flp);
1717 1717
1718 return vfs_setlease(filp, F_UNLCK, &flp); 1718 return vfs_setlease(filp, F_UNLCK, &flp);
1719 } 1719 }
1720 1720
1721 static int do_fcntl_add_lease(unsigned int fd, struct file *filp, long arg) 1721 static int do_fcntl_add_lease(unsigned int fd, struct file *filp, long arg)
1722 { 1722 {
1723 struct file_lock *fl, *ret; 1723 struct file_lock *fl, *ret;
1724 struct inode *inode = file_inode(filp); 1724 struct inode *inode = file_inode(filp);
1725 struct fasync_struct *new; 1725 struct fasync_struct *new;
1726 int error; 1726 int error;
1727 1727
1728 fl = lease_alloc(filp, arg); 1728 fl = lease_alloc(filp, arg);
1729 if (IS_ERR(fl)) 1729 if (IS_ERR(fl))
1730 return PTR_ERR(fl); 1730 return PTR_ERR(fl);
1731 1731
1732 new = fasync_alloc(); 1732 new = fasync_alloc();
1733 if (!new) { 1733 if (!new) {
1734 locks_free_lock(fl); 1734 locks_free_lock(fl);
1735 return -ENOMEM; 1735 return -ENOMEM;
1736 } 1736 }
1737 ret = fl; 1737 ret = fl;
1738 spin_lock(&inode->i_lock); 1738 spin_lock(&inode->i_lock);
1739 error = __vfs_setlease(filp, arg, &ret); 1739 error = __vfs_setlease(filp, arg, &ret);
1740 if (error) { 1740 if (error) {
1741 spin_unlock(&inode->i_lock); 1741 spin_unlock(&inode->i_lock);
1742 locks_free_lock(fl); 1742 locks_free_lock(fl);
1743 goto out_free_fasync; 1743 goto out_free_fasync;
1744 } 1744 }
1745 if (ret != fl) 1745 if (ret != fl)
1746 locks_free_lock(fl); 1746 locks_free_lock(fl);
1747 1747
1748 /* 1748 /*
1749 * fasync_insert_entry() returns the old entry if any. 1749 * fasync_insert_entry() returns the old entry if any.
1750 * If there was no old entry, then it used 'new' and 1750 * If there was no old entry, then it used 'new' and
1751 * inserted it into the fasync list. Clear new so that 1751 * inserted it into the fasync list. Clear new so that
1752 * we don't release it here. 1752 * we don't release it here.
1753 */ 1753 */
1754 if (!fasync_insert_entry(fd, filp, &ret->fl_fasync, new)) 1754 if (!fasync_insert_entry(fd, filp, &ret->fl_fasync, new))
1755 new = NULL; 1755 new = NULL;
1756 1756
1757 error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0); 1757 error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0);
1758 spin_unlock(&inode->i_lock); 1758 spin_unlock(&inode->i_lock);
1759 1759
1760 out_free_fasync: 1760 out_free_fasync:
1761 if (new) 1761 if (new)
1762 fasync_free(new); 1762 fasync_free(new);
1763 return error; 1763 return error;
1764 } 1764 }
1765 1765
1766 /** 1766 /**
1767 * fcntl_setlease - sets a lease on an open file 1767 * fcntl_setlease - sets a lease on an open file
1768 * @fd: open file descriptor 1768 * @fd: open file descriptor
1769 * @filp: file pointer 1769 * @filp: file pointer
1770 * @arg: type of lease to obtain 1770 * @arg: type of lease to obtain
1771 * 1771 *
1772 * Call this fcntl to establish a lease on the file. 1772 * Call this fcntl to establish a lease on the file.
1773 * Note that you also need to call %F_SETSIG to 1773 * Note that you also need to call %F_SETSIG to
1774 * receive a signal when the lease is broken. 1774 * receive a signal when the lease is broken.
1775 */ 1775 */
1776 int fcntl_setlease(unsigned int fd, struct file *filp, long arg) 1776 int fcntl_setlease(unsigned int fd, struct file *filp, long arg)
1777 { 1777 {
1778 if (arg == F_UNLCK) 1778 if (arg == F_UNLCK)
1779 return do_fcntl_delete_lease(filp); 1779 return do_fcntl_delete_lease(filp);
1780 return do_fcntl_add_lease(fd, filp, arg); 1780 return do_fcntl_add_lease(fd, filp, arg);
1781 } 1781 }
1782 1782
1783 /** 1783 /**
1784 * flock_lock_file_wait - Apply a FLOCK-style lock to a file 1784 * flock_lock_file_wait - Apply a FLOCK-style lock to a file
1785 * @filp: The file to apply the lock to 1785 * @filp: The file to apply the lock to
1786 * @fl: The lock to be applied 1786 * @fl: The lock to be applied
1787 * 1787 *
1788 * Add a FLOCK style lock to a file. 1788 * Add a FLOCK style lock to a file.
1789 */ 1789 */
1790 int flock_lock_file_wait(struct file *filp, struct file_lock *fl) 1790 int flock_lock_file_wait(struct file *filp, struct file_lock *fl)
1791 { 1791 {
1792 int error; 1792 int error;
1793 might_sleep(); 1793 might_sleep();
1794 for (;;) { 1794 for (;;) {
1795 error = flock_lock_file(filp, fl); 1795 error = flock_lock_file(filp, fl);
1796 if (error != FILE_LOCK_DEFERRED) 1796 if (error != FILE_LOCK_DEFERRED)
1797 break; 1797 break;
1798 error = wait_event_interruptible(fl->fl_wait, !fl->fl_next); 1798 error = wait_event_interruptible(fl->fl_wait, !fl->fl_next);
1799 if (!error) 1799 if (!error)
1800 continue; 1800 continue;
1801 1801
1802 locks_delete_block(fl); 1802 locks_delete_block(fl);
1803 break; 1803 break;
1804 } 1804 }
1805 return error; 1805 return error;
1806 } 1806 }
1807 1807
1808 EXPORT_SYMBOL(flock_lock_file_wait); 1808 EXPORT_SYMBOL(flock_lock_file_wait);
1809 1809
1810 /** 1810 /**
1811 * sys_flock: - flock() system call. 1811 * sys_flock: - flock() system call.
1812 * @fd: the file descriptor to lock. 1812 * @fd: the file descriptor to lock.
1813 * @cmd: the type of lock to apply. 1813 * @cmd: the type of lock to apply.
1814 * 1814 *
1815 * Apply a %FL_FLOCK style lock to an open file descriptor. 1815 * Apply a %FL_FLOCK style lock to an open file descriptor.
1816 * The @cmd can be one of 1816 * The @cmd can be one of
1817 * 1817 *
1818 * %LOCK_SH -- a shared lock. 1818 * %LOCK_SH -- a shared lock.
1819 * 1819 *
1820 * %LOCK_EX -- an exclusive lock. 1820 * %LOCK_EX -- an exclusive lock.
1821 * 1821 *
1822 * %LOCK_UN -- remove an existing lock. 1822 * %LOCK_UN -- remove an existing lock.
1823 * 1823 *
1824 * %LOCK_MAND -- a `mandatory' flock. This exists to emulate Windows Share Modes. 1824 * %LOCK_MAND -- a `mandatory' flock. This exists to emulate Windows Share Modes.
1825 * 1825 *
1826 * %LOCK_MAND can be combined with %LOCK_READ or %LOCK_WRITE to allow other 1826 * %LOCK_MAND can be combined with %LOCK_READ or %LOCK_WRITE to allow other
1827 * processes read and write access respectively. 1827 * processes read and write access respectively.
1828 */ 1828 */
1829 SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd) 1829 SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd)
1830 { 1830 {
1831 struct fd f = fdget(fd); 1831 struct fd f = fdget(fd);
1832 struct file_lock *lock; 1832 struct file_lock *lock;
1833 int can_sleep, unlock; 1833 int can_sleep, unlock;
1834 int error; 1834 int error;
1835 1835
1836 error = -EBADF; 1836 error = -EBADF;
1837 if (!f.file) 1837 if (!f.file)
1838 goto out; 1838 goto out;
1839 1839
1840 can_sleep = !(cmd & LOCK_NB); 1840 can_sleep = !(cmd & LOCK_NB);
1841 cmd &= ~LOCK_NB; 1841 cmd &= ~LOCK_NB;
1842 unlock = (cmd == LOCK_UN); 1842 unlock = (cmd == LOCK_UN);
1843 1843
1844 if (!unlock && !(cmd & LOCK_MAND) && 1844 if (!unlock && !(cmd & LOCK_MAND) &&
1845 !(f.file->f_mode & (FMODE_READ|FMODE_WRITE))) 1845 !(f.file->f_mode & (FMODE_READ|FMODE_WRITE)))
1846 goto out_putf; 1846 goto out_putf;
1847 1847
1848 error = flock_make_lock(f.file, &lock, cmd); 1848 error = flock_make_lock(f.file, &lock, cmd);
1849 if (error) 1849 if (error)
1850 goto out_putf; 1850 goto out_putf;
1851 if (can_sleep) 1851 if (can_sleep)
1852 lock->fl_flags |= FL_SLEEP; 1852 lock->fl_flags |= FL_SLEEP;
1853 1853
1854 error = security_file_lock(f.file, lock->fl_type); 1854 error = security_file_lock(f.file, lock->fl_type);
1855 if (error) 1855 if (error)
1856 goto out_free; 1856 goto out_free;
1857 1857
1858 if (f.file->f_op->flock) 1858 if (f.file->f_op->flock)
1859 error = f.file->f_op->flock(f.file, 1859 error = f.file->f_op->flock(f.file,
1860 (can_sleep) ? F_SETLKW : F_SETLK, 1860 (can_sleep) ? F_SETLKW : F_SETLK,
1861 lock); 1861 lock);
1862 else 1862 else
1863 error = flock_lock_file_wait(f.file, lock); 1863 error = flock_lock_file_wait(f.file, lock);
1864 1864
1865 out_free: 1865 out_free:
1866 locks_free_lock(lock); 1866 locks_free_lock(lock);
1867 1867
1868 out_putf: 1868 out_putf:
1869 fdput(f); 1869 fdput(f);
1870 out: 1870 out:
1871 return error; 1871 return error;
1872 } 1872 }
1873 1873
1874 /** 1874 /**
1875 * vfs_test_lock - test file byte range lock 1875 * vfs_test_lock - test file byte range lock
1876 * @filp: The file to test lock for 1876 * @filp: The file to test lock for
1877 * @fl: The lock to test; also used to hold result 1877 * @fl: The lock to test; also used to hold result
1878 * 1878 *
1879 * Returns -ERRNO on failure. Indicates presence of conflicting lock by 1879 * Returns -ERRNO on failure. Indicates presence of conflicting lock by
1880 * setting conf->fl_type to something other than F_UNLCK. 1880 * setting conf->fl_type to something other than F_UNLCK.
1881 */ 1881 */
1882 int vfs_test_lock(struct file *filp, struct file_lock *fl) 1882 int vfs_test_lock(struct file *filp, struct file_lock *fl)
1883 { 1883 {
1884 if (filp->f_op->lock) 1884 if (filp->f_op->lock)
1885 return filp->f_op->lock(filp, F_GETLK, fl); 1885 return filp->f_op->lock(filp, F_GETLK, fl);
1886 posix_test_lock(filp, fl); 1886 posix_test_lock(filp, fl);
1887 return 0; 1887 return 0;
1888 } 1888 }
1889 EXPORT_SYMBOL_GPL(vfs_test_lock); 1889 EXPORT_SYMBOL_GPL(vfs_test_lock);
1890 1890
1891 static int posix_lock_to_flock(struct flock *flock, struct file_lock *fl) 1891 static int posix_lock_to_flock(struct flock *flock, struct file_lock *fl)
1892 { 1892 {
1893 flock->l_pid = IS_FILE_PVT(fl) ? -1 : fl->fl_pid; 1893 flock->l_pid = IS_OFDLCK(fl) ? -1 : fl->fl_pid;
1894 #if BITS_PER_LONG == 32 1894 #if BITS_PER_LONG == 32
1895 /* 1895 /*
1896 * Make sure we can represent the posix lock via 1896 * Make sure we can represent the posix lock via
1897 * legacy 32bit flock. 1897 * legacy 32bit flock.
1898 */ 1898 */
1899 if (fl->fl_start > OFFT_OFFSET_MAX) 1899 if (fl->fl_start > OFFT_OFFSET_MAX)
1900 return -EOVERFLOW; 1900 return -EOVERFLOW;
1901 if (fl->fl_end != OFFSET_MAX && fl->fl_end > OFFT_OFFSET_MAX) 1901 if (fl->fl_end != OFFSET_MAX && fl->fl_end > OFFT_OFFSET_MAX)
1902 return -EOVERFLOW; 1902 return -EOVERFLOW;
1903 #endif 1903 #endif
1904 flock->l_start = fl->fl_start; 1904 flock->l_start = fl->fl_start;
1905 flock->l_len = fl->fl_end == OFFSET_MAX ? 0 : 1905 flock->l_len = fl->fl_end == OFFSET_MAX ? 0 :
1906 fl->fl_end - fl->fl_start + 1; 1906 fl->fl_end - fl->fl_start + 1;
1907 flock->l_whence = 0; 1907 flock->l_whence = 0;
1908 flock->l_type = fl->fl_type; 1908 flock->l_type = fl->fl_type;
1909 return 0; 1909 return 0;
1910 } 1910 }
1911 1911
1912 #if BITS_PER_LONG == 32 1912 #if BITS_PER_LONG == 32
1913 static void posix_lock_to_flock64(struct flock64 *flock, struct file_lock *fl) 1913 static void posix_lock_to_flock64(struct flock64 *flock, struct file_lock *fl)
1914 { 1914 {
1915 flock->l_pid = IS_FILE_PVT(fl) ? -1 : fl->fl_pid; 1915 flock->l_pid = IS_OFDLCK(fl) ? -1 : fl->fl_pid;
1916 flock->l_start = fl->fl_start; 1916 flock->l_start = fl->fl_start;
1917 flock->l_len = fl->fl_end == OFFSET_MAX ? 0 : 1917 flock->l_len = fl->fl_end == OFFSET_MAX ? 0 :
1918 fl->fl_end - fl->fl_start + 1; 1918 fl->fl_end - fl->fl_start + 1;
1919 flock->l_whence = 0; 1919 flock->l_whence = 0;
1920 flock->l_type = fl->fl_type; 1920 flock->l_type = fl->fl_type;
1921 } 1921 }
1922 #endif 1922 #endif
1923 1923
1924 /* Report the first existing lock that would conflict with l. 1924 /* Report the first existing lock that would conflict with l.
1925 * This implements the F_GETLK command of fcntl(). 1925 * This implements the F_GETLK command of fcntl().
1926 */ 1926 */
1927 int fcntl_getlk(struct file *filp, unsigned int cmd, struct flock __user *l) 1927 int fcntl_getlk(struct file *filp, unsigned int cmd, struct flock __user *l)
1928 { 1928 {
1929 struct file_lock file_lock; 1929 struct file_lock file_lock;
1930 struct flock flock; 1930 struct flock flock;
1931 int error; 1931 int error;
1932 1932
1933 error = -EFAULT; 1933 error = -EFAULT;
1934 if (copy_from_user(&flock, l, sizeof(flock))) 1934 if (copy_from_user(&flock, l, sizeof(flock)))
1935 goto out; 1935 goto out;
1936 error = -EINVAL; 1936 error = -EINVAL;
1937 if ((flock.l_type != F_RDLCK) && (flock.l_type != F_WRLCK)) 1937 if ((flock.l_type != F_RDLCK) && (flock.l_type != F_WRLCK))
1938 goto out; 1938 goto out;
1939 1939
1940 error = flock_to_posix_lock(filp, &file_lock, &flock); 1940 error = flock_to_posix_lock(filp, &file_lock, &flock);
1941 if (error) 1941 if (error)
1942 goto out; 1942 goto out;
1943 1943
1944 if (cmd == F_GETLKP) { 1944 if (cmd == F_OFD_GETLK) {
1945 error = -EINVAL; 1945 error = -EINVAL;
1946 if (flock.l_pid != 0) 1946 if (flock.l_pid != 0)
1947 goto out; 1947 goto out;
1948 1948
1949 cmd = F_GETLK; 1949 cmd = F_GETLK;
1950 file_lock.fl_flags |= FL_FILE_PVT; 1950 file_lock.fl_flags |= FL_OFDLCK;
1951 file_lock.fl_owner = (fl_owner_t)filp; 1951 file_lock.fl_owner = (fl_owner_t)filp;
1952 } 1952 }
1953 1953
1954 error = vfs_test_lock(filp, &file_lock); 1954 error = vfs_test_lock(filp, &file_lock);
1955 if (error) 1955 if (error)
1956 goto out; 1956 goto out;
1957 1957
1958 flock.l_type = file_lock.fl_type; 1958 flock.l_type = file_lock.fl_type;
1959 if (file_lock.fl_type != F_UNLCK) { 1959 if (file_lock.fl_type != F_UNLCK) {
1960 error = posix_lock_to_flock(&flock, &file_lock); 1960 error = posix_lock_to_flock(&flock, &file_lock);
1961 if (error) 1961 if (error)
1962 goto out; 1962 goto out;
1963 } 1963 }
1964 error = -EFAULT; 1964 error = -EFAULT;
1965 if (!copy_to_user(l, &flock, sizeof(flock))) 1965 if (!copy_to_user(l, &flock, sizeof(flock)))
1966 error = 0; 1966 error = 0;
1967 out: 1967 out:
1968 return error; 1968 return error;
1969 } 1969 }
1970 1970
1971 /** 1971 /**
1972 * vfs_lock_file - file byte range lock 1972 * vfs_lock_file - file byte range lock
1973 * @filp: The file to apply the lock to 1973 * @filp: The file to apply the lock to
1974 * @cmd: type of locking operation (F_SETLK, F_GETLK, etc.) 1974 * @cmd: type of locking operation (F_SETLK, F_GETLK, etc.)
1975 * @fl: The lock to be applied 1975 * @fl: The lock to be applied
1976 * @conf: Place to return a copy of the conflicting lock, if found. 1976 * @conf: Place to return a copy of the conflicting lock, if found.
1977 * 1977 *
1978 * A caller that doesn't care about the conflicting lock may pass NULL 1978 * A caller that doesn't care about the conflicting lock may pass NULL
1979 * as the final argument. 1979 * as the final argument.
1980 * 1980 *
1981 * If the filesystem defines a private ->lock() method, then @conf will 1981 * If the filesystem defines a private ->lock() method, then @conf will
1982 * be left unchanged; so a caller that cares should initialize it to 1982 * be left unchanged; so a caller that cares should initialize it to
1983 * some acceptable default. 1983 * some acceptable default.
1984 * 1984 *
1985 * To avoid blocking kernel daemons, such as lockd, that need to acquire POSIX 1985 * To avoid blocking kernel daemons, such as lockd, that need to acquire POSIX
1986 * locks, the ->lock() interface may return asynchronously, before the lock has 1986 * locks, the ->lock() interface may return asynchronously, before the lock has
1987 * been granted or denied by the underlying filesystem, if (and only if) 1987 * been granted or denied by the underlying filesystem, if (and only if)
1988 * lm_grant is set. Callers expecting ->lock() to return asynchronously 1988 * lm_grant is set. Callers expecting ->lock() to return asynchronously
1989 * will only use F_SETLK, not F_SETLKW; they will set FL_SLEEP if (and only if) 1989 * will only use F_SETLK, not F_SETLKW; they will set FL_SLEEP if (and only if)
1990 * the request is for a blocking lock. When ->lock() does return asynchronously, 1990 * the request is for a blocking lock. When ->lock() does return asynchronously,
1991 * it must return FILE_LOCK_DEFERRED, and call ->lm_grant() when the lock 1991 * it must return FILE_LOCK_DEFERRED, and call ->lm_grant() when the lock
1992 * request completes. 1992 * request completes.
1993 * If the request is for non-blocking lock the file system should return 1993 * If the request is for non-blocking lock the file system should return
1994 * FILE_LOCK_DEFERRED then try to get the lock and call the callback routine 1994 * FILE_LOCK_DEFERRED then try to get the lock and call the callback routine
1995 * with the result. If the request timed out the callback routine will return a 1995 * with the result. If the request timed out the callback routine will return a
1996 * nonzero return code and the file system should release the lock. The file 1996 * nonzero return code and the file system should release the lock. The file
1997 * system is also responsible to keep a corresponding posix lock when it 1997 * system is also responsible to keep a corresponding posix lock when it
1998 * grants a lock so the VFS can find out which locks are locally held and do 1998 * grants a lock so the VFS can find out which locks are locally held and do
1999 * the correct lock cleanup when required. 1999 * the correct lock cleanup when required.
2000 * The underlying filesystem must not drop the kernel lock or call 2000 * The underlying filesystem must not drop the kernel lock or call
2001 * ->lm_grant() before returning to the caller with a FILE_LOCK_DEFERRED 2001 * ->lm_grant() before returning to the caller with a FILE_LOCK_DEFERRED
2002 * return code. 2002 * return code.
2003 */ 2003 */
2004 int vfs_lock_file(struct file *filp, unsigned int cmd, struct file_lock *fl, struct file_lock *conf) 2004 int vfs_lock_file(struct file *filp, unsigned int cmd, struct file_lock *fl, struct file_lock *conf)
2005 { 2005 {
2006 if (filp->f_op->lock) 2006 if (filp->f_op->lock)
2007 return filp->f_op->lock(filp, cmd, fl); 2007 return filp->f_op->lock(filp, cmd, fl);
2008 else 2008 else
2009 return posix_lock_file(filp, fl, conf); 2009 return posix_lock_file(filp, fl, conf);
2010 } 2010 }
2011 EXPORT_SYMBOL_GPL(vfs_lock_file); 2011 EXPORT_SYMBOL_GPL(vfs_lock_file);
2012 2012
2013 static int do_lock_file_wait(struct file *filp, unsigned int cmd, 2013 static int do_lock_file_wait(struct file *filp, unsigned int cmd,
2014 struct file_lock *fl) 2014 struct file_lock *fl)
2015 { 2015 {
2016 int error; 2016 int error;
2017 2017
2018 error = security_file_lock(filp, fl->fl_type); 2018 error = security_file_lock(filp, fl->fl_type);
2019 if (error) 2019 if (error)
2020 return error; 2020 return error;
2021 2021
2022 for (;;) { 2022 for (;;) {
2023 error = vfs_lock_file(filp, cmd, fl, NULL); 2023 error = vfs_lock_file(filp, cmd, fl, NULL);
2024 if (error != FILE_LOCK_DEFERRED) 2024 if (error != FILE_LOCK_DEFERRED)
2025 break; 2025 break;
2026 error = wait_event_interruptible(fl->fl_wait, !fl->fl_next); 2026 error = wait_event_interruptible(fl->fl_wait, !fl->fl_next);
2027 if (!error) 2027 if (!error)
2028 continue; 2028 continue;
2029 2029
2030 locks_delete_block(fl); 2030 locks_delete_block(fl);
2031 break; 2031 break;
2032 } 2032 }
2033 2033
2034 return error; 2034 return error;
2035 } 2035 }
2036 2036
2037 /* Apply the lock described by l to an open file descriptor. 2037 /* Apply the lock described by l to an open file descriptor.
2038 * This implements both the F_SETLK and F_SETLKW commands of fcntl(). 2038 * This implements both the F_SETLK and F_SETLKW commands of fcntl().
2039 */ 2039 */
2040 int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd, 2040 int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd,
2041 struct flock __user *l) 2041 struct flock __user *l)
2042 { 2042 {
2043 struct file_lock *file_lock = locks_alloc_lock(); 2043 struct file_lock *file_lock = locks_alloc_lock();
2044 struct flock flock; 2044 struct flock flock;
2045 struct inode *inode; 2045 struct inode *inode;
2046 struct file *f; 2046 struct file *f;
2047 int error; 2047 int error;
2048 2048
2049 if (file_lock == NULL) 2049 if (file_lock == NULL)
2050 return -ENOLCK; 2050 return -ENOLCK;
2051 2051
2052 /* 2052 /*
2053 * This might block, so we do it before checking the inode. 2053 * This might block, so we do it before checking the inode.
2054 */ 2054 */
2055 error = -EFAULT; 2055 error = -EFAULT;
2056 if (copy_from_user(&flock, l, sizeof(flock))) 2056 if (copy_from_user(&flock, l, sizeof(flock)))
2057 goto out; 2057 goto out;
2058 2058
2059 inode = file_inode(filp); 2059 inode = file_inode(filp);
2060 2060
2061 /* Don't allow mandatory locks on files that may be memory mapped 2061 /* Don't allow mandatory locks on files that may be memory mapped
2062 * and shared. 2062 * and shared.
2063 */ 2063 */
2064 if (mandatory_lock(inode) && mapping_writably_mapped(filp->f_mapping)) { 2064 if (mandatory_lock(inode) && mapping_writably_mapped(filp->f_mapping)) {
2065 error = -EAGAIN; 2065 error = -EAGAIN;
2066 goto out; 2066 goto out;
2067 } 2067 }
2068 2068
2069 again: 2069 again:
2070 error = flock_to_posix_lock(filp, file_lock, &flock); 2070 error = flock_to_posix_lock(filp, file_lock, &flock);
2071 if (error) 2071 if (error)
2072 goto out; 2072 goto out;
2073 2073
2074 /* 2074 /*
2075 * If the cmd is requesting file-private locks, then set the 2075 * If the cmd is requesting file-private locks, then set the
2076 * FL_FILE_PVT flag and override the owner. 2076 * FL_OFDLCK flag and override the owner.
2077 */ 2077 */
2078 switch (cmd) { 2078 switch (cmd) {
2079 case F_SETLKP: 2079 case F_OFD_SETLK:
2080 error = -EINVAL; 2080 error = -EINVAL;
2081 if (flock.l_pid != 0) 2081 if (flock.l_pid != 0)
2082 goto out; 2082 goto out;
2083 2083
2084 cmd = F_SETLK; 2084 cmd = F_SETLK;
2085 file_lock->fl_flags |= FL_FILE_PVT; 2085 file_lock->fl_flags |= FL_OFDLCK;
2086 file_lock->fl_owner = (fl_owner_t)filp; 2086 file_lock->fl_owner = (fl_owner_t)filp;
2087 break; 2087 break;
2088 case F_SETLKPW: 2088 case F_OFD_SETLKW:
2089 error = -EINVAL; 2089 error = -EINVAL;
2090 if (flock.l_pid != 0) 2090 if (flock.l_pid != 0)
2091 goto out; 2091 goto out;
2092 2092
2093 cmd = F_SETLKW; 2093 cmd = F_SETLKW;
2094 file_lock->fl_flags |= FL_FILE_PVT; 2094 file_lock->fl_flags |= FL_OFDLCK;
2095 file_lock->fl_owner = (fl_owner_t)filp; 2095 file_lock->fl_owner = (fl_owner_t)filp;
2096 /* Fallthrough */ 2096 /* Fallthrough */
2097 case F_SETLKW: 2097 case F_SETLKW:
2098 file_lock->fl_flags |= FL_SLEEP; 2098 file_lock->fl_flags |= FL_SLEEP;
2099 } 2099 }
2100 2100
2101 error = do_lock_file_wait(filp, cmd, file_lock); 2101 error = do_lock_file_wait(filp, cmd, file_lock);
2102 2102
2103 /* 2103 /*
2104 * Attempt to detect a close/fcntl race and recover by 2104 * Attempt to detect a close/fcntl race and recover by
2105 * releasing the lock that was just acquired. 2105 * releasing the lock that was just acquired.
2106 */ 2106 */
2107 /* 2107 /*
2108 * we need that spin_lock here - it prevents reordering between 2108 * we need that spin_lock here - it prevents reordering between
2109 * update of inode->i_flock and check for it done in close(). 2109 * update of inode->i_flock and check for it done in close().
2110 * rcu_read_lock() wouldn't do. 2110 * rcu_read_lock() wouldn't do.
2111 */ 2111 */
2112 spin_lock(&current->files->file_lock); 2112 spin_lock(&current->files->file_lock);
2113 f = fcheck(fd); 2113 f = fcheck(fd);
2114 spin_unlock(&current->files->file_lock); 2114 spin_unlock(&current->files->file_lock);
2115 if (!error && f != filp && flock.l_type != F_UNLCK) { 2115 if (!error && f != filp && flock.l_type != F_UNLCK) {
2116 flock.l_type = F_UNLCK; 2116 flock.l_type = F_UNLCK;
2117 goto again; 2117 goto again;
2118 } 2118 }
2119 2119
2120 out: 2120 out:
2121 locks_free_lock(file_lock); 2121 locks_free_lock(file_lock);
2122 return error; 2122 return error;
2123 } 2123 }
2124 2124
2125 #if BITS_PER_LONG == 32 2125 #if BITS_PER_LONG == 32
2126 /* Report the first existing lock that would conflict with l. 2126 /* Report the first existing lock that would conflict with l.
2127 * This implements the F_GETLK command of fcntl(). 2127 * This implements the F_GETLK command of fcntl().
2128 */ 2128 */
2129 int fcntl_getlk64(struct file *filp, unsigned int cmd, struct flock64 __user *l) 2129 int fcntl_getlk64(struct file *filp, unsigned int cmd, struct flock64 __user *l)
2130 { 2130 {
2131 struct file_lock file_lock; 2131 struct file_lock file_lock;
2132 struct flock64 flock; 2132 struct flock64 flock;
2133 int error; 2133 int error;
2134 2134
2135 error = -EFAULT; 2135 error = -EFAULT;
2136 if (copy_from_user(&flock, l, sizeof(flock))) 2136 if (copy_from_user(&flock, l, sizeof(flock)))
2137 goto out; 2137 goto out;
2138 error = -EINVAL; 2138 error = -EINVAL;
2139 if ((flock.l_type != F_RDLCK) && (flock.l_type != F_WRLCK)) 2139 if ((flock.l_type != F_RDLCK) && (flock.l_type != F_WRLCK))
2140 goto out; 2140 goto out;
2141 2141
2142 error = flock64_to_posix_lock(filp, &file_lock, &flock); 2142 error = flock64_to_posix_lock(filp, &file_lock, &flock);
2143 if (error) 2143 if (error)
2144 goto out; 2144 goto out;
2145 2145
2146 if (cmd == F_GETLKP) { 2146 if (cmd == F_OFD_GETLK) {
2147 error = -EINVAL; 2147 error = -EINVAL;
2148 if (flock.l_pid != 0) 2148 if (flock.l_pid != 0)
2149 goto out; 2149 goto out;
2150 2150
2151 cmd = F_GETLK64; 2151 cmd = F_GETLK64;
2152 file_lock.fl_flags |= FL_FILE_PVT; 2152 file_lock.fl_flags |= FL_OFDLCK;
2153 file_lock.fl_owner = (fl_owner_t)filp; 2153 file_lock.fl_owner = (fl_owner_t)filp;
2154 } 2154 }
2155 2155
2156 error = vfs_test_lock(filp, &file_lock); 2156 error = vfs_test_lock(filp, &file_lock);
2157 if (error) 2157 if (error)
2158 goto out; 2158 goto out;
2159 2159
2160 flock.l_type = file_lock.fl_type; 2160 flock.l_type = file_lock.fl_type;
2161 if (file_lock.fl_type != F_UNLCK) 2161 if (file_lock.fl_type != F_UNLCK)
2162 posix_lock_to_flock64(&flock, &file_lock); 2162 posix_lock_to_flock64(&flock, &file_lock);
2163 2163
2164 error = -EFAULT; 2164 error = -EFAULT;
2165 if (!copy_to_user(l, &flock, sizeof(flock))) 2165 if (!copy_to_user(l, &flock, sizeof(flock)))
2166 error = 0; 2166 error = 0;
2167 2167
2168 out: 2168 out:
2169 return error; 2169 return error;
2170 } 2170 }
2171 2171
2172 /* Apply the lock described by l to an open file descriptor. 2172 /* Apply the lock described by l to an open file descriptor.
2173 * This implements both the F_SETLK and F_SETLKW commands of fcntl(). 2173 * This implements both the F_SETLK and F_SETLKW commands of fcntl().
2174 */ 2174 */
2175 int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd, 2175 int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd,
2176 struct flock64 __user *l) 2176 struct flock64 __user *l)
2177 { 2177 {
2178 struct file_lock *file_lock = locks_alloc_lock(); 2178 struct file_lock *file_lock = locks_alloc_lock();
2179 struct flock64 flock; 2179 struct flock64 flock;
2180 struct inode *inode; 2180 struct inode *inode;
2181 struct file *f; 2181 struct file *f;
2182 int error; 2182 int error;
2183 2183
2184 if (file_lock == NULL) 2184 if (file_lock == NULL)
2185 return -ENOLCK; 2185 return -ENOLCK;
2186 2186
2187 /* 2187 /*
2188 * This might block, so we do it before checking the inode. 2188 * This might block, so we do it before checking the inode.
2189 */ 2189 */
2190 error = -EFAULT; 2190 error = -EFAULT;
2191 if (copy_from_user(&flock, l, sizeof(flock))) 2191 if (copy_from_user(&flock, l, sizeof(flock)))
2192 goto out; 2192 goto out;
2193 2193
2194 inode = file_inode(filp); 2194 inode = file_inode(filp);
2195 2195
2196 /* Don't allow mandatory locks on files that may be memory mapped 2196 /* Don't allow mandatory locks on files that may be memory mapped
2197 * and shared. 2197 * and shared.
2198 */ 2198 */
2199 if (mandatory_lock(inode) && mapping_writably_mapped(filp->f_mapping)) { 2199 if (mandatory_lock(inode) && mapping_writably_mapped(filp->f_mapping)) {
2200 error = -EAGAIN; 2200 error = -EAGAIN;
2201 goto out; 2201 goto out;
2202 } 2202 }
2203 2203
2204 again: 2204 again:
2205 error = flock64_to_posix_lock(filp, file_lock, &flock); 2205 error = flock64_to_posix_lock(filp, file_lock, &flock);
2206 if (error) 2206 if (error)
2207 goto out; 2207 goto out;
2208 2208
2209 /* 2209 /*
2210 * If the cmd is requesting file-private locks, then set the 2210 * If the cmd is requesting file-private locks, then set the
2211 * FL_FILE_PVT flag and override the owner. 2211 * FL_OFDLCK flag and override the owner.
2212 */ 2212 */
2213 switch (cmd) { 2213 switch (cmd) {
2214 case F_SETLKP: 2214 case F_OFD_SETLK:
2215 error = -EINVAL; 2215 error = -EINVAL;
2216 if (flock.l_pid != 0) 2216 if (flock.l_pid != 0)
2217 goto out; 2217 goto out;
2218 2218
2219 cmd = F_SETLK64; 2219 cmd = F_SETLK64;
2220 file_lock->fl_flags |= FL_FILE_PVT; 2220 file_lock->fl_flags |= FL_OFDLCK;
2221 file_lock->fl_owner = (fl_owner_t)filp; 2221 file_lock->fl_owner = (fl_owner_t)filp;
2222 break; 2222 break;
2223 case F_SETLKPW: 2223 case F_OFD_SETLKW:
2224 error = -EINVAL; 2224 error = -EINVAL;
2225 if (flock.l_pid != 0) 2225 if (flock.l_pid != 0)
2226 goto out; 2226 goto out;
2227 2227
2228 cmd = F_SETLKW64; 2228 cmd = F_SETLKW64;
2229 file_lock->fl_flags |= FL_FILE_PVT; 2229 file_lock->fl_flags |= FL_OFDLCK;
2230 file_lock->fl_owner = (fl_owner_t)filp; 2230 file_lock->fl_owner = (fl_owner_t)filp;
2231 /* Fallthrough */ 2231 /* Fallthrough */
2232 case F_SETLKW64: 2232 case F_SETLKW64:
2233 file_lock->fl_flags |= FL_SLEEP; 2233 file_lock->fl_flags |= FL_SLEEP;
2234 } 2234 }
2235 2235
2236 error = do_lock_file_wait(filp, cmd, file_lock); 2236 error = do_lock_file_wait(filp, cmd, file_lock);
2237 2237
2238 /* 2238 /*
2239 * Attempt to detect a close/fcntl race and recover by 2239 * Attempt to detect a close/fcntl race and recover by
2240 * releasing the lock that was just acquired. 2240 * releasing the lock that was just acquired.
2241 */ 2241 */
2242 spin_lock(&current->files->file_lock); 2242 spin_lock(&current->files->file_lock);
2243 f = fcheck(fd); 2243 f = fcheck(fd);
2244 spin_unlock(&current->files->file_lock); 2244 spin_unlock(&current->files->file_lock);
2245 if (!error && f != filp && flock.l_type != F_UNLCK) { 2245 if (!error && f != filp && flock.l_type != F_UNLCK) {
2246 flock.l_type = F_UNLCK; 2246 flock.l_type = F_UNLCK;
2247 goto again; 2247 goto again;
2248 } 2248 }
2249 2249
2250 out: 2250 out:
2251 locks_free_lock(file_lock); 2251 locks_free_lock(file_lock);
2252 return error; 2252 return error;
2253 } 2253 }
2254 #endif /* BITS_PER_LONG == 32 */ 2254 #endif /* BITS_PER_LONG == 32 */
2255 2255
2256 /* 2256 /*
2257 * This function is called when the file is being removed 2257 * This function is called when the file is being removed
2258 * from the task's fd array. POSIX locks belonging to this task 2258 * from the task's fd array. POSIX locks belonging to this task
2259 * are deleted at this time. 2259 * are deleted at this time.
2260 */ 2260 */
2261 void locks_remove_posix(struct file *filp, fl_owner_t owner) 2261 void locks_remove_posix(struct file *filp, fl_owner_t owner)
2262 { 2262 {
2263 struct file_lock lock; 2263 struct file_lock lock;
2264 2264
2265 /* 2265 /*
2266 * If there are no locks held on this file, we don't need to call 2266 * If there are no locks held on this file, we don't need to call
2267 * posix_lock_file(). Another process could be setting a lock on this 2267 * posix_lock_file(). Another process could be setting a lock on this
2268 * file at the same time, but we wouldn't remove that lock anyway. 2268 * file at the same time, but we wouldn't remove that lock anyway.
2269 */ 2269 */
2270 if (!file_inode(filp)->i_flock) 2270 if (!file_inode(filp)->i_flock)
2271 return; 2271 return;
2272 2272
2273 lock.fl_type = F_UNLCK; 2273 lock.fl_type = F_UNLCK;
2274 lock.fl_flags = FL_POSIX | FL_CLOSE; 2274 lock.fl_flags = FL_POSIX | FL_CLOSE;
2275 lock.fl_start = 0; 2275 lock.fl_start = 0;
2276 lock.fl_end = OFFSET_MAX; 2276 lock.fl_end = OFFSET_MAX;
2277 lock.fl_owner = owner; 2277 lock.fl_owner = owner;
2278 lock.fl_pid = current->tgid; 2278 lock.fl_pid = current->tgid;
2279 lock.fl_file = filp; 2279 lock.fl_file = filp;
2280 lock.fl_ops = NULL; 2280 lock.fl_ops = NULL;
2281 lock.fl_lmops = NULL; 2281 lock.fl_lmops = NULL;
2282 2282
2283 vfs_lock_file(filp, F_SETLK, &lock, NULL); 2283 vfs_lock_file(filp, F_SETLK, &lock, NULL);
2284 2284
2285 if (lock.fl_ops && lock.fl_ops->fl_release_private) 2285 if (lock.fl_ops && lock.fl_ops->fl_release_private)
2286 lock.fl_ops->fl_release_private(&lock); 2286 lock.fl_ops->fl_release_private(&lock);
2287 } 2287 }
2288 2288
2289 EXPORT_SYMBOL(locks_remove_posix); 2289 EXPORT_SYMBOL(locks_remove_posix);
2290 2290
2291 /* 2291 /*
2292 * This function is called on the last close of an open file. 2292 * This function is called on the last close of an open file.
2293 */ 2293 */
2294 void locks_remove_file(struct file *filp) 2294 void locks_remove_file(struct file *filp)
2295 { 2295 {
2296 struct inode * inode = file_inode(filp); 2296 struct inode * inode = file_inode(filp);
2297 struct file_lock *fl; 2297 struct file_lock *fl;
2298 struct file_lock **before; 2298 struct file_lock **before;
2299 2299
2300 if (!inode->i_flock) 2300 if (!inode->i_flock)
2301 return; 2301 return;
2302 2302
2303 locks_remove_posix(filp, (fl_owner_t)filp); 2303 locks_remove_posix(filp, (fl_owner_t)filp);
2304 2304
2305 if (filp->f_op->flock) { 2305 if (filp->f_op->flock) {
2306 struct file_lock fl = { 2306 struct file_lock fl = {
2307 .fl_pid = current->tgid, 2307 .fl_pid = current->tgid,
2308 .fl_file = filp, 2308 .fl_file = filp,
2309 .fl_flags = FL_FLOCK, 2309 .fl_flags = FL_FLOCK,
2310 .fl_type = F_UNLCK, 2310 .fl_type = F_UNLCK,
2311 .fl_end = OFFSET_MAX, 2311 .fl_end = OFFSET_MAX,
2312 }; 2312 };
2313 filp->f_op->flock(filp, F_SETLKW, &fl); 2313 filp->f_op->flock(filp, F_SETLKW, &fl);
2314 if (fl.fl_ops && fl.fl_ops->fl_release_private) 2314 if (fl.fl_ops && fl.fl_ops->fl_release_private)
2315 fl.fl_ops->fl_release_private(&fl); 2315 fl.fl_ops->fl_release_private(&fl);
2316 } 2316 }
2317 2317
2318 spin_lock(&inode->i_lock); 2318 spin_lock(&inode->i_lock);
2319 before = &inode->i_flock; 2319 before = &inode->i_flock;
2320 2320
2321 while ((fl = *before) != NULL) { 2321 while ((fl = *before) != NULL) {
2322 if (fl->fl_file == filp) { 2322 if (fl->fl_file == filp) {
2323 if (IS_LEASE(fl)) { 2323 if (IS_LEASE(fl)) {
2324 lease_modify(before, F_UNLCK); 2324 lease_modify(before, F_UNLCK);
2325 continue; 2325 continue;
2326 } 2326 }
2327 2327
2328 /* 2328 /*
2329 * There's a leftover lock on the list of a type that 2329 * There's a leftover lock on the list of a type that
2330 * we didn't expect to see. Most likely a classic 2330 * we didn't expect to see. Most likely a classic
2331 * POSIX lock that ended up not getting released 2331 * POSIX lock that ended up not getting released
2332 * properly, or that raced onto the list somehow. Log 2332 * properly, or that raced onto the list somehow. Log
2333 * some info about it and then just remove it from 2333 * some info about it and then just remove it from
2334 * the list. 2334 * the list.
2335 */ 2335 */
2336 WARN(!IS_FLOCK(fl), 2336 WARN(!IS_FLOCK(fl),
2337 "leftover lock: dev=%u:%u ino=%lu type=%hhd flags=0x%x start=%lld end=%lld\n", 2337 "leftover lock: dev=%u:%u ino=%lu type=%hhd flags=0x%x start=%lld end=%lld\n",
2338 MAJOR(inode->i_sb->s_dev), 2338 MAJOR(inode->i_sb->s_dev),
2339 MINOR(inode->i_sb->s_dev), inode->i_ino, 2339 MINOR(inode->i_sb->s_dev), inode->i_ino,
2340 fl->fl_type, fl->fl_flags, 2340 fl->fl_type, fl->fl_flags,
2341 fl->fl_start, fl->fl_end); 2341 fl->fl_start, fl->fl_end);
2342 2342
2343 locks_delete_lock(before); 2343 locks_delete_lock(before);
2344 continue; 2344 continue;
2345 } 2345 }
2346 before = &fl->fl_next; 2346 before = &fl->fl_next;
2347 } 2347 }
2348 spin_unlock(&inode->i_lock); 2348 spin_unlock(&inode->i_lock);
2349 } 2349 }
2350 2350
2351 /** 2351 /**
2352 * posix_unblock_lock - stop waiting for a file lock 2352 * posix_unblock_lock - stop waiting for a file lock
2353 * @waiter: the lock which was waiting 2353 * @waiter: the lock which was waiting
2354 * 2354 *
2355 * lockd needs to block waiting for locks. 2355 * lockd needs to block waiting for locks.
2356 */ 2356 */
2357 int 2357 int
2358 posix_unblock_lock(struct file_lock *waiter) 2358 posix_unblock_lock(struct file_lock *waiter)
2359 { 2359 {
2360 int status = 0; 2360 int status = 0;
2361 2361
2362 spin_lock(&blocked_lock_lock); 2362 spin_lock(&blocked_lock_lock);
2363 if (waiter->fl_next) 2363 if (waiter->fl_next)
2364 __locks_delete_block(waiter); 2364 __locks_delete_block(waiter);
2365 else 2365 else
2366 status = -ENOENT; 2366 status = -ENOENT;
2367 spin_unlock(&blocked_lock_lock); 2367 spin_unlock(&blocked_lock_lock);
2368 return status; 2368 return status;
2369 } 2369 }
2370 EXPORT_SYMBOL(posix_unblock_lock); 2370 EXPORT_SYMBOL(posix_unblock_lock);
2371 2371
2372 /** 2372 /**
2373 * vfs_cancel_lock - file byte range unblock lock 2373 * vfs_cancel_lock - file byte range unblock lock
2374 * @filp: The file to apply the unblock to 2374 * @filp: The file to apply the unblock to
2375 * @fl: The lock to be unblocked 2375 * @fl: The lock to be unblocked
2376 * 2376 *
2377 * Used by lock managers to cancel blocked requests 2377 * Used by lock managers to cancel blocked requests
2378 */ 2378 */
2379 int vfs_cancel_lock(struct file *filp, struct file_lock *fl) 2379 int vfs_cancel_lock(struct file *filp, struct file_lock *fl)
2380 { 2380 {
2381 if (filp->f_op->lock) 2381 if (filp->f_op->lock)
2382 return filp->f_op->lock(filp, F_CANCELLK, fl); 2382 return filp->f_op->lock(filp, F_CANCELLK, fl);
2383 return 0; 2383 return 0;
2384 } 2384 }
2385 2385
2386 EXPORT_SYMBOL_GPL(vfs_cancel_lock); 2386 EXPORT_SYMBOL_GPL(vfs_cancel_lock);
2387 2387
2388 #ifdef CONFIG_PROC_FS 2388 #ifdef CONFIG_PROC_FS
2389 #include <linux/proc_fs.h> 2389 #include <linux/proc_fs.h>
2390 #include <linux/seq_file.h> 2390 #include <linux/seq_file.h>
2391 2391
2392 struct locks_iterator { 2392 struct locks_iterator {
2393 int li_cpu; 2393 int li_cpu;
2394 loff_t li_pos; 2394 loff_t li_pos;
2395 }; 2395 };
2396 2396
2397 static void lock_get_status(struct seq_file *f, struct file_lock *fl, 2397 static void lock_get_status(struct seq_file *f, struct file_lock *fl,
2398 loff_t id, char *pfx) 2398 loff_t id, char *pfx)
2399 { 2399 {
2400 struct inode *inode = NULL; 2400 struct inode *inode = NULL;
2401 unsigned int fl_pid; 2401 unsigned int fl_pid;
2402 2402
2403 if (fl->fl_nspid) 2403 if (fl->fl_nspid)
2404 fl_pid = pid_vnr(fl->fl_nspid); 2404 fl_pid = pid_vnr(fl->fl_nspid);
2405 else 2405 else
2406 fl_pid = fl->fl_pid; 2406 fl_pid = fl->fl_pid;
2407 2407
2408 if (fl->fl_file != NULL) 2408 if (fl->fl_file != NULL)
2409 inode = file_inode(fl->fl_file); 2409 inode = file_inode(fl->fl_file);
2410 2410
2411 seq_printf(f, "%lld:%s ", id, pfx); 2411 seq_printf(f, "%lld:%s ", id, pfx);
2412 if (IS_POSIX(fl)) { 2412 if (IS_POSIX(fl)) {
2413 if (fl->fl_flags & FL_ACCESS) 2413 if (fl->fl_flags & FL_ACCESS)
2414 seq_printf(f, "ACCESS"); 2414 seq_printf(f, "ACCESS");
2415 else if (IS_FILE_PVT(fl)) 2415 else if (IS_OFDLCK(fl))
2416 seq_printf(f, "FLPVT "); 2416 seq_printf(f, "OFDLCK");
2417 else 2417 else
2418 seq_printf(f, "POSIX "); 2418 seq_printf(f, "POSIX ");
2419 2419
2420 seq_printf(f, " %s ", 2420 seq_printf(f, " %s ",
2421 (inode == NULL) ? "*NOINODE*" : 2421 (inode == NULL) ? "*NOINODE*" :
2422 mandatory_lock(inode) ? "MANDATORY" : "ADVISORY "); 2422 mandatory_lock(inode) ? "MANDATORY" : "ADVISORY ");
2423 } else if (IS_FLOCK(fl)) { 2423 } else if (IS_FLOCK(fl)) {
2424 if (fl->fl_type & LOCK_MAND) { 2424 if (fl->fl_type & LOCK_MAND) {
2425 seq_printf(f, "FLOCK MSNFS "); 2425 seq_printf(f, "FLOCK MSNFS ");
2426 } else { 2426 } else {
2427 seq_printf(f, "FLOCK ADVISORY "); 2427 seq_printf(f, "FLOCK ADVISORY ");
2428 } 2428 }
2429 } else if (IS_LEASE(fl)) { 2429 } else if (IS_LEASE(fl)) {
2430 seq_printf(f, "LEASE "); 2430 seq_printf(f, "LEASE ");
2431 if (lease_breaking(fl)) 2431 if (lease_breaking(fl))
2432 seq_printf(f, "BREAKING "); 2432 seq_printf(f, "BREAKING ");
2433 else if (fl->fl_file) 2433 else if (fl->fl_file)
2434 seq_printf(f, "ACTIVE "); 2434 seq_printf(f, "ACTIVE ");
2435 else 2435 else
2436 seq_printf(f, "BREAKER "); 2436 seq_printf(f, "BREAKER ");
2437 } else { 2437 } else {
2438 seq_printf(f, "UNKNOWN UNKNOWN "); 2438 seq_printf(f, "UNKNOWN UNKNOWN ");
2439 } 2439 }
2440 if (fl->fl_type & LOCK_MAND) { 2440 if (fl->fl_type & LOCK_MAND) {
2441 seq_printf(f, "%s ", 2441 seq_printf(f, "%s ",
2442 (fl->fl_type & LOCK_READ) 2442 (fl->fl_type & LOCK_READ)
2443 ? (fl->fl_type & LOCK_WRITE) ? "RW " : "READ " 2443 ? (fl->fl_type & LOCK_WRITE) ? "RW " : "READ "
2444 : (fl->fl_type & LOCK_WRITE) ? "WRITE" : "NONE "); 2444 : (fl->fl_type & LOCK_WRITE) ? "WRITE" : "NONE ");
2445 } else { 2445 } else {
2446 seq_printf(f, "%s ", 2446 seq_printf(f, "%s ",
2447 (lease_breaking(fl)) 2447 (lease_breaking(fl))
2448 ? (fl->fl_type == F_UNLCK) ? "UNLCK" : "READ " 2448 ? (fl->fl_type == F_UNLCK) ? "UNLCK" : "READ "
2449 : (fl->fl_type == F_WRLCK) ? "WRITE" : "READ "); 2449 : (fl->fl_type == F_WRLCK) ? "WRITE" : "READ ");
2450 } 2450 }
2451 if (inode) { 2451 if (inode) {
2452 #ifdef WE_CAN_BREAK_LSLK_NOW 2452 #ifdef WE_CAN_BREAK_LSLK_NOW
2453 seq_printf(f, "%d %s:%ld ", fl_pid, 2453 seq_printf(f, "%d %s:%ld ", fl_pid,
2454 inode->i_sb->s_id, inode->i_ino); 2454 inode->i_sb->s_id, inode->i_ino);
2455 #else 2455 #else
2456 /* userspace relies on this representation of dev_t ;-( */ 2456 /* userspace relies on this representation of dev_t ;-( */
2457 seq_printf(f, "%d %02x:%02x:%ld ", fl_pid, 2457 seq_printf(f, "%d %02x:%02x:%ld ", fl_pid,
2458 MAJOR(inode->i_sb->s_dev), 2458 MAJOR(inode->i_sb->s_dev),
2459 MINOR(inode->i_sb->s_dev), inode->i_ino); 2459 MINOR(inode->i_sb->s_dev), inode->i_ino);
2460 #endif 2460 #endif
2461 } else { 2461 } else {
2462 seq_printf(f, "%d <none>:0 ", fl_pid); 2462 seq_printf(f, "%d <none>:0 ", fl_pid);
2463 } 2463 }
2464 if (IS_POSIX(fl)) { 2464 if (IS_POSIX(fl)) {
2465 if (fl->fl_end == OFFSET_MAX) 2465 if (fl->fl_end == OFFSET_MAX)
2466 seq_printf(f, "%Ld EOF\n", fl->fl_start); 2466 seq_printf(f, "%Ld EOF\n", fl->fl_start);
2467 else 2467 else
2468 seq_printf(f, "%Ld %Ld\n", fl->fl_start, fl->fl_end); 2468 seq_printf(f, "%Ld %Ld\n", fl->fl_start, fl->fl_end);
2469 } else { 2469 } else {
2470 seq_printf(f, "0 EOF\n"); 2470 seq_printf(f, "0 EOF\n");
2471 } 2471 }
2472 } 2472 }
2473 2473
2474 static int locks_show(struct seq_file *f, void *v) 2474 static int locks_show(struct seq_file *f, void *v)
2475 { 2475 {
2476 struct locks_iterator *iter = f->private; 2476 struct locks_iterator *iter = f->private;
2477 struct file_lock *fl, *bfl; 2477 struct file_lock *fl, *bfl;
2478 2478
2479 fl = hlist_entry(v, struct file_lock, fl_link); 2479 fl = hlist_entry(v, struct file_lock, fl_link);
2480 2480
2481 lock_get_status(f, fl, iter->li_pos, ""); 2481 lock_get_status(f, fl, iter->li_pos, "");
2482 2482
2483 list_for_each_entry(bfl, &fl->fl_block, fl_block) 2483 list_for_each_entry(bfl, &fl->fl_block, fl_block)
2484 lock_get_status(f, bfl, iter->li_pos, " ->"); 2484 lock_get_status(f, bfl, iter->li_pos, " ->");
2485 2485
2486 return 0; 2486 return 0;
2487 } 2487 }
2488 2488
2489 static void *locks_start(struct seq_file *f, loff_t *pos) 2489 static void *locks_start(struct seq_file *f, loff_t *pos)
2490 __acquires(&blocked_lock_lock) 2490 __acquires(&blocked_lock_lock)
2491 { 2491 {
2492 struct locks_iterator *iter = f->private; 2492 struct locks_iterator *iter = f->private;
2493 2493
2494 iter->li_pos = *pos + 1; 2494 iter->li_pos = *pos + 1;
2495 lg_global_lock(&file_lock_lglock); 2495 lg_global_lock(&file_lock_lglock);
2496 spin_lock(&blocked_lock_lock); 2496 spin_lock(&blocked_lock_lock);
2497 return seq_hlist_start_percpu(&file_lock_list, &iter->li_cpu, *pos); 2497 return seq_hlist_start_percpu(&file_lock_list, &iter->li_cpu, *pos);
2498 } 2498 }
2499 2499
2500 static void *locks_next(struct seq_file *f, void *v, loff_t *pos) 2500 static void *locks_next(struct seq_file *f, void *v, loff_t *pos)
2501 { 2501 {
2502 struct locks_iterator *iter = f->private; 2502 struct locks_iterator *iter = f->private;
2503 2503
2504 ++iter->li_pos; 2504 ++iter->li_pos;
2505 return seq_hlist_next_percpu(v, &file_lock_list, &iter->li_cpu, pos); 2505 return seq_hlist_next_percpu(v, &file_lock_list, &iter->li_cpu, pos);
2506 } 2506 }
2507 2507
2508 static void locks_stop(struct seq_file *f, void *v) 2508 static void locks_stop(struct seq_file *f, void *v)
2509 __releases(&blocked_lock_lock) 2509 __releases(&blocked_lock_lock)
2510 { 2510 {
2511 spin_unlock(&blocked_lock_lock); 2511 spin_unlock(&blocked_lock_lock);
2512 lg_global_unlock(&file_lock_lglock); 2512 lg_global_unlock(&file_lock_lglock);
2513 } 2513 }
2514 2514
2515 static const struct seq_operations locks_seq_operations = { 2515 static const struct seq_operations locks_seq_operations = {
2516 .start = locks_start, 2516 .start = locks_start,
2517 .next = locks_next, 2517 .next = locks_next,
2518 .stop = locks_stop, 2518 .stop = locks_stop,
2519 .show = locks_show, 2519 .show = locks_show,
2520 }; 2520 };
2521 2521
2522 static int locks_open(struct inode *inode, struct file *filp) 2522 static int locks_open(struct inode *inode, struct file *filp)
2523 { 2523 {
2524 return seq_open_private(filp, &locks_seq_operations, 2524 return seq_open_private(filp, &locks_seq_operations,
2525 sizeof(struct locks_iterator)); 2525 sizeof(struct locks_iterator));
2526 } 2526 }
2527 2527
2528 static const struct file_operations proc_locks_operations = { 2528 static const struct file_operations proc_locks_operations = {
2529 .open = locks_open, 2529 .open = locks_open,
2530 .read = seq_read, 2530 .read = seq_read,
2531 .llseek = seq_lseek, 2531 .llseek = seq_lseek,
2532 .release = seq_release_private, 2532 .release = seq_release_private,
2533 }; 2533 };
2534 2534
2535 static int __init proc_locks_init(void) 2535 static int __init proc_locks_init(void)
2536 { 2536 {
2537 proc_create("locks", 0, NULL, &proc_locks_operations); 2537 proc_create("locks", 0, NULL, &proc_locks_operations);
2538 return 0; 2538 return 0;
2539 } 2539 }
2540 module_init(proc_locks_init); 2540 module_init(proc_locks_init);
2541 #endif 2541 #endif
2542 2542
2543 /** 2543 /**
2544 * lock_may_read - checks that the region is free of locks 2544 * lock_may_read - checks that the region is free of locks
2545 * @inode: the inode that is being read 2545 * @inode: the inode that is being read
2546 * @start: the first byte to read 2546 * @start: the first byte to read
2547 * @len: the number of bytes to read 2547 * @len: the number of bytes to read
2548 * 2548 *
2549 * Emulates Windows locking requirements. Whole-file 2549 * Emulates Windows locking requirements. Whole-file
2550 * mandatory locks (share modes) can prohibit a read and 2550 * mandatory locks (share modes) can prohibit a read and
2551 * byte-range POSIX locks can prohibit a read if they overlap. 2551 * byte-range POSIX locks can prohibit a read if they overlap.
2552 * 2552 *
2553 * N.B. this function is only ever called 2553 * N.B. this function is only ever called
2554 * from knfsd and ownership of locks is never checked. 2554 * from knfsd and ownership of locks is never checked.
2555 */ 2555 */
2556 int lock_may_read(struct inode *inode, loff_t start, unsigned long len) 2556 int lock_may_read(struct inode *inode, loff_t start, unsigned long len)
2557 { 2557 {
2558 struct file_lock *fl; 2558 struct file_lock *fl;
2559 int result = 1; 2559 int result = 1;
2560 2560
2561 spin_lock(&inode->i_lock); 2561 spin_lock(&inode->i_lock);
2562 for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { 2562 for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) {
2563 if (IS_POSIX(fl)) { 2563 if (IS_POSIX(fl)) {
2564 if (fl->fl_type == F_RDLCK) 2564 if (fl->fl_type == F_RDLCK)
2565 continue; 2565 continue;
2566 if ((fl->fl_end < start) || (fl->fl_start > (start + len))) 2566 if ((fl->fl_end < start) || (fl->fl_start > (start + len)))
2567 continue; 2567 continue;
2568 } else if (IS_FLOCK(fl)) { 2568 } else if (IS_FLOCK(fl)) {
2569 if (!(fl->fl_type & LOCK_MAND)) 2569 if (!(fl->fl_type & LOCK_MAND))
2570 continue; 2570 continue;
2571 if (fl->fl_type & LOCK_READ) 2571 if (fl->fl_type & LOCK_READ)
2572 continue; 2572 continue;
2573 } else 2573 } else
2574 continue; 2574 continue;
2575 result = 0; 2575 result = 0;
2576 break; 2576 break;
2577 } 2577 }
2578 spin_unlock(&inode->i_lock); 2578 spin_unlock(&inode->i_lock);
2579 return result; 2579 return result;
2580 } 2580 }
2581 2581
2582 EXPORT_SYMBOL(lock_may_read); 2582 EXPORT_SYMBOL(lock_may_read);
2583 2583
2584 /** 2584 /**
2585 * lock_may_write - checks that the region is free of locks 2585 * lock_may_write - checks that the region is free of locks
2586 * @inode: the inode that is being written 2586 * @inode: the inode that is being written
2587 * @start: the first byte to write 2587 * @start: the first byte to write
2588 * @len: the number of bytes to write 2588 * @len: the number of bytes to write
2589 * 2589 *
2590 * Emulates Windows locking requirements. Whole-file 2590 * Emulates Windows locking requirements. Whole-file
2591 * mandatory locks (share modes) can prohibit a write and 2591 * mandatory locks (share modes) can prohibit a write and
2592 * byte-range POSIX locks can prohibit a write if they overlap. 2592 * byte-range POSIX locks can prohibit a write if they overlap.
2593 * 2593 *
2594 * N.B. this function is only ever called 2594 * N.B. this function is only ever called
2595 * from knfsd and ownership of locks is never checked. 2595 * from knfsd and ownership of locks is never checked.
2596 */ 2596 */
2597 int lock_may_write(struct inode *inode, loff_t start, unsigned long len) 2597 int lock_may_write(struct inode *inode, loff_t start, unsigned long len)
2598 { 2598 {
2599 struct file_lock *fl; 2599 struct file_lock *fl;
2600 int result = 1; 2600 int result = 1;
2601 2601
2602 spin_lock(&inode->i_lock); 2602 spin_lock(&inode->i_lock);
2603 for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { 2603 for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) {
2604 if (IS_POSIX(fl)) { 2604 if (IS_POSIX(fl)) {
2605 if ((fl->fl_end < start) || (fl->fl_start > (start + len))) 2605 if ((fl->fl_end < start) || (fl->fl_start > (start + len)))
2606 continue; 2606 continue;
2607 } else if (IS_FLOCK(fl)) { 2607 } else if (IS_FLOCK(fl)) {
2608 if (!(fl->fl_type & LOCK_MAND)) 2608 if (!(fl->fl_type & LOCK_MAND))
2609 continue; 2609 continue;
2610 if (fl->fl_type & LOCK_WRITE) 2610 if (fl->fl_type & LOCK_WRITE)
2611 continue; 2611 continue;
2612 } else 2612 } else
2613 continue; 2613 continue;
2614 result = 0; 2614 result = 0;
2615 break; 2615 break;
2616 } 2616 }
2617 spin_unlock(&inode->i_lock); 2617 spin_unlock(&inode->i_lock);
2618 return result; 2618 return result;
2619 } 2619 }
2620 2620
2621 EXPORT_SYMBOL(lock_may_write); 2621 EXPORT_SYMBOL(lock_may_write);
2622 2622
2623 static int __init filelock_init(void) 2623 static int __init filelock_init(void)
2624 { 2624 {
2625 int i; 2625 int i;
2626 2626
2627 filelock_cache = kmem_cache_create("file_lock_cache", 2627 filelock_cache = kmem_cache_create("file_lock_cache",
2628 sizeof(struct file_lock), 0, SLAB_PANIC, NULL); 2628 sizeof(struct file_lock), 0, SLAB_PANIC, NULL);
2629 2629
2630 lg_lock_init(&file_lock_lglock, "file_lock_lglock"); 2630 lg_lock_init(&file_lock_lglock, "file_lock_lglock");
2631 2631
2632 for_each_possible_cpu(i) 2632 for_each_possible_cpu(i)
2633 INIT_HLIST_HEAD(per_cpu_ptr(&file_lock_list, i)); 2633 INIT_HLIST_HEAD(per_cpu_ptr(&file_lock_list, i));
2634 2634
2635 return 0; 2635 return 0;
2636 } 2636 }
2637 2637
2638 core_initcall(filelock_init); 2638 core_initcall(filelock_init);
2639 2639
1 #ifndef _LINUX_FS_H 1 #ifndef _LINUX_FS_H
2 #define _LINUX_FS_H 2 #define _LINUX_FS_H
3 3
4 4
5 #include <linux/linkage.h> 5 #include <linux/linkage.h>
6 #include <linux/wait.h> 6 #include <linux/wait.h>
7 #include <linux/kdev_t.h> 7 #include <linux/kdev_t.h>
8 #include <linux/dcache.h> 8 #include <linux/dcache.h>
9 #include <linux/path.h> 9 #include <linux/path.h>
10 #include <linux/stat.h> 10 #include <linux/stat.h>
11 #include <linux/cache.h> 11 #include <linux/cache.h>
12 #include <linux/list.h> 12 #include <linux/list.h>
13 #include <linux/list_lru.h> 13 #include <linux/list_lru.h>
14 #include <linux/llist.h> 14 #include <linux/llist.h>
15 #include <linux/radix-tree.h> 15 #include <linux/radix-tree.h>
16 #include <linux/rbtree.h> 16 #include <linux/rbtree.h>
17 #include <linux/init.h> 17 #include <linux/init.h>
18 #include <linux/pid.h> 18 #include <linux/pid.h>
19 #include <linux/bug.h> 19 #include <linux/bug.h>
20 #include <linux/mutex.h> 20 #include <linux/mutex.h>
21 #include <linux/capability.h> 21 #include <linux/capability.h>
22 #include <linux/semaphore.h> 22 #include <linux/semaphore.h>
23 #include <linux/fiemap.h> 23 #include <linux/fiemap.h>
24 #include <linux/rculist_bl.h> 24 #include <linux/rculist_bl.h>
25 #include <linux/atomic.h> 25 #include <linux/atomic.h>
26 #include <linux/shrinker.h> 26 #include <linux/shrinker.h>
27 #include <linux/migrate_mode.h> 27 #include <linux/migrate_mode.h>
28 #include <linux/uidgid.h> 28 #include <linux/uidgid.h>
29 #include <linux/lockdep.h> 29 #include <linux/lockdep.h>
30 #include <linux/percpu-rwsem.h> 30 #include <linux/percpu-rwsem.h>
31 #include <linux/blk_types.h> 31 #include <linux/blk_types.h>
32 32
33 #include <asm/byteorder.h> 33 #include <asm/byteorder.h>
34 #include <uapi/linux/fs.h> 34 #include <uapi/linux/fs.h>
35 35
36 struct export_operations; 36 struct export_operations;
37 struct hd_geometry; 37 struct hd_geometry;
38 struct iovec; 38 struct iovec;
39 struct nameidata; 39 struct nameidata;
40 struct kiocb; 40 struct kiocb;
41 struct kobject; 41 struct kobject;
42 struct pipe_inode_info; 42 struct pipe_inode_info;
43 struct poll_table_struct; 43 struct poll_table_struct;
44 struct kstatfs; 44 struct kstatfs;
45 struct vm_area_struct; 45 struct vm_area_struct;
46 struct vfsmount; 46 struct vfsmount;
47 struct cred; 47 struct cred;
48 struct swap_info_struct; 48 struct swap_info_struct;
49 struct seq_file; 49 struct seq_file;
50 struct workqueue_struct; 50 struct workqueue_struct;
51 struct iov_iter; 51 struct iov_iter;
52 52
53 extern void __init inode_init(void); 53 extern void __init inode_init(void);
54 extern void __init inode_init_early(void); 54 extern void __init inode_init_early(void);
55 extern void __init files_init(unsigned long); 55 extern void __init files_init(unsigned long);
56 56
57 extern struct files_stat_struct files_stat; 57 extern struct files_stat_struct files_stat;
58 extern unsigned long get_max_files(void); 58 extern unsigned long get_max_files(void);
59 extern int sysctl_nr_open; 59 extern int sysctl_nr_open;
60 extern struct inodes_stat_t inodes_stat; 60 extern struct inodes_stat_t inodes_stat;
61 extern int leases_enable, lease_break_time; 61 extern int leases_enable, lease_break_time;
62 extern int sysctl_protected_symlinks; 62 extern int sysctl_protected_symlinks;
63 extern int sysctl_protected_hardlinks; 63 extern int sysctl_protected_hardlinks;
64 64
65 struct buffer_head; 65 struct buffer_head;
66 typedef int (get_block_t)(struct inode *inode, sector_t iblock, 66 typedef int (get_block_t)(struct inode *inode, sector_t iblock,
67 struct buffer_head *bh_result, int create); 67 struct buffer_head *bh_result, int create);
68 typedef void (dio_iodone_t)(struct kiocb *iocb, loff_t offset, 68 typedef void (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
69 ssize_t bytes, void *private); 69 ssize_t bytes, void *private);
70 70
71 #define MAY_EXEC 0x00000001 71 #define MAY_EXEC 0x00000001
72 #define MAY_WRITE 0x00000002 72 #define MAY_WRITE 0x00000002
73 #define MAY_READ 0x00000004 73 #define MAY_READ 0x00000004
74 #define MAY_APPEND 0x00000008 74 #define MAY_APPEND 0x00000008
75 #define MAY_ACCESS 0x00000010 75 #define MAY_ACCESS 0x00000010
76 #define MAY_OPEN 0x00000020 76 #define MAY_OPEN 0x00000020
77 #define MAY_CHDIR 0x00000040 77 #define MAY_CHDIR 0x00000040
78 /* called from RCU mode, don't block */ 78 /* called from RCU mode, don't block */
79 #define MAY_NOT_BLOCK 0x00000080 79 #define MAY_NOT_BLOCK 0x00000080
80 80
81 /* 81 /*
82 * flags in file.f_mode. Note that FMODE_READ and FMODE_WRITE must correspond 82 * flags in file.f_mode. Note that FMODE_READ and FMODE_WRITE must correspond
83 * to O_WRONLY and O_RDWR via the strange trick in __dentry_open() 83 * to O_WRONLY and O_RDWR via the strange trick in __dentry_open()
84 */ 84 */
85 85
86 /* file is open for reading */ 86 /* file is open for reading */
87 #define FMODE_READ ((__force fmode_t)0x1) 87 #define FMODE_READ ((__force fmode_t)0x1)
88 /* file is open for writing */ 88 /* file is open for writing */
89 #define FMODE_WRITE ((__force fmode_t)0x2) 89 #define FMODE_WRITE ((__force fmode_t)0x2)
90 /* file is seekable */ 90 /* file is seekable */
91 #define FMODE_LSEEK ((__force fmode_t)0x4) 91 #define FMODE_LSEEK ((__force fmode_t)0x4)
92 /* file can be accessed using pread */ 92 /* file can be accessed using pread */
93 #define FMODE_PREAD ((__force fmode_t)0x8) 93 #define FMODE_PREAD ((__force fmode_t)0x8)
94 /* file can be accessed using pwrite */ 94 /* file can be accessed using pwrite */
95 #define FMODE_PWRITE ((__force fmode_t)0x10) 95 #define FMODE_PWRITE ((__force fmode_t)0x10)
96 /* File is opened for execution with sys_execve / sys_uselib */ 96 /* File is opened for execution with sys_execve / sys_uselib */
97 #define FMODE_EXEC ((__force fmode_t)0x20) 97 #define FMODE_EXEC ((__force fmode_t)0x20)
98 /* File is opened with O_NDELAY (only set for block devices) */ 98 /* File is opened with O_NDELAY (only set for block devices) */
99 #define FMODE_NDELAY ((__force fmode_t)0x40) 99 #define FMODE_NDELAY ((__force fmode_t)0x40)
100 /* File is opened with O_EXCL (only set for block devices) */ 100 /* File is opened with O_EXCL (only set for block devices) */
101 #define FMODE_EXCL ((__force fmode_t)0x80) 101 #define FMODE_EXCL ((__force fmode_t)0x80)
102 /* File is opened using open(.., 3, ..) and is writeable only for ioctls 102 /* File is opened using open(.., 3, ..) and is writeable only for ioctls
103 (specialy hack for floppy.c) */ 103 (specialy hack for floppy.c) */
104 #define FMODE_WRITE_IOCTL ((__force fmode_t)0x100) 104 #define FMODE_WRITE_IOCTL ((__force fmode_t)0x100)
105 /* 32bit hashes as llseek() offset (for directories) */ 105 /* 32bit hashes as llseek() offset (for directories) */
106 #define FMODE_32BITHASH ((__force fmode_t)0x200) 106 #define FMODE_32BITHASH ((__force fmode_t)0x200)
107 /* 64bit hashes as llseek() offset (for directories) */ 107 /* 64bit hashes as llseek() offset (for directories) */
108 #define FMODE_64BITHASH ((__force fmode_t)0x400) 108 #define FMODE_64BITHASH ((__force fmode_t)0x400)
109 109
110 /* 110 /*
111 * Don't update ctime and mtime. 111 * Don't update ctime and mtime.
112 * 112 *
113 * Currently a special hack for the XFS open_by_handle ioctl, but we'll 113 * Currently a special hack for the XFS open_by_handle ioctl, but we'll
114 * hopefully graduate it to a proper O_CMTIME flag supported by open(2) soon. 114 * hopefully graduate it to a proper O_CMTIME flag supported by open(2) soon.
115 */ 115 */
116 #define FMODE_NOCMTIME ((__force fmode_t)0x800) 116 #define FMODE_NOCMTIME ((__force fmode_t)0x800)
117 117
118 /* Expect random access pattern */ 118 /* Expect random access pattern */
119 #define FMODE_RANDOM ((__force fmode_t)0x1000) 119 #define FMODE_RANDOM ((__force fmode_t)0x1000)
120 120
121 /* File is huge (eg. /dev/kmem): treat loff_t as unsigned */ 121 /* File is huge (eg. /dev/kmem): treat loff_t as unsigned */
122 #define FMODE_UNSIGNED_OFFSET ((__force fmode_t)0x2000) 122 #define FMODE_UNSIGNED_OFFSET ((__force fmode_t)0x2000)
123 123
124 /* File is opened with O_PATH; almost nothing can be done with it */ 124 /* File is opened with O_PATH; almost nothing can be done with it */
125 #define FMODE_PATH ((__force fmode_t)0x4000) 125 #define FMODE_PATH ((__force fmode_t)0x4000)
126 126
127 /* File needs atomic accesses to f_pos */ 127 /* File needs atomic accesses to f_pos */
128 #define FMODE_ATOMIC_POS ((__force fmode_t)0x8000) 128 #define FMODE_ATOMIC_POS ((__force fmode_t)0x8000)
129 /* Write access to underlying fs */ 129 /* Write access to underlying fs */
130 #define FMODE_WRITER ((__force fmode_t)0x10000) 130 #define FMODE_WRITER ((__force fmode_t)0x10000)
131 131
132 /* File was opened by fanotify and shouldn't generate fanotify events */ 132 /* File was opened by fanotify and shouldn't generate fanotify events */
133 #define FMODE_NONOTIFY ((__force fmode_t)0x1000000) 133 #define FMODE_NONOTIFY ((__force fmode_t)0x1000000)
134 134
135 /* 135 /*
136 * Flag for rw_copy_check_uvector and compat_rw_copy_check_uvector 136 * Flag for rw_copy_check_uvector and compat_rw_copy_check_uvector
137 * that indicates that they should check the contents of the iovec are 137 * that indicates that they should check the contents of the iovec are
138 * valid, but not check the memory that the iovec elements 138 * valid, but not check the memory that the iovec elements
139 * points too. 139 * points too.
140 */ 140 */
141 #define CHECK_IOVEC_ONLY -1 141 #define CHECK_IOVEC_ONLY -1
142 142
143 /* 143 /*
144 * The below are the various read and write types that we support. Some of 144 * The below are the various read and write types that we support. Some of
145 * them include behavioral modifiers that send information down to the 145 * them include behavioral modifiers that send information down to the
146 * block layer and IO scheduler. Terminology: 146 * block layer and IO scheduler. Terminology:
147 * 147 *
148 * The block layer uses device plugging to defer IO a little bit, in 148 * The block layer uses device plugging to defer IO a little bit, in
149 * the hope that we will see more IO very shortly. This increases 149 * the hope that we will see more IO very shortly. This increases
150 * coalescing of adjacent IO and thus reduces the number of IOs we 150 * coalescing of adjacent IO and thus reduces the number of IOs we
151 * have to send to the device. It also allows for better queuing, 151 * have to send to the device. It also allows for better queuing,
152 * if the IO isn't mergeable. If the caller is going to be waiting 152 * if the IO isn't mergeable. If the caller is going to be waiting
153 * for the IO, then he must ensure that the device is unplugged so 153 * for the IO, then he must ensure that the device is unplugged so
154 * that the IO is dispatched to the driver. 154 * that the IO is dispatched to the driver.
155 * 155 *
156 * All IO is handled async in Linux. This is fine for background 156 * All IO is handled async in Linux. This is fine for background
157 * writes, but for reads or writes that someone waits for completion 157 * writes, but for reads or writes that someone waits for completion
158 * on, we want to notify the block layer and IO scheduler so that they 158 * on, we want to notify the block layer and IO scheduler so that they
159 * know about it. That allows them to make better scheduling 159 * know about it. That allows them to make better scheduling
160 * decisions. So when the below references 'sync' and 'async', it 160 * decisions. So when the below references 'sync' and 'async', it
161 * is referencing this priority hint. 161 * is referencing this priority hint.
162 * 162 *
163 * With that in mind, the available types are: 163 * With that in mind, the available types are:
164 * 164 *
165 * READ A normal read operation. Device will be plugged. 165 * READ A normal read operation. Device will be plugged.
166 * READ_SYNC A synchronous read. Device is not plugged, caller can 166 * READ_SYNC A synchronous read. Device is not plugged, caller can
167 * immediately wait on this read without caring about 167 * immediately wait on this read without caring about
168 * unplugging. 168 * unplugging.
169 * READA Used for read-ahead operations. Lower priority, and the 169 * READA Used for read-ahead operations. Lower priority, and the
170 * block layer could (in theory) choose to ignore this 170 * block layer could (in theory) choose to ignore this
171 * request if it runs into resource problems. 171 * request if it runs into resource problems.
172 * WRITE A normal async write. Device will be plugged. 172 * WRITE A normal async write. Device will be plugged.
173 * WRITE_SYNC Synchronous write. Identical to WRITE, but passes down 173 * WRITE_SYNC Synchronous write. Identical to WRITE, but passes down
174 * the hint that someone will be waiting on this IO 174 * the hint that someone will be waiting on this IO
175 * shortly. The write equivalent of READ_SYNC. 175 * shortly. The write equivalent of READ_SYNC.
176 * WRITE_ODIRECT Special case write for O_DIRECT only. 176 * WRITE_ODIRECT Special case write for O_DIRECT only.
177 * WRITE_FLUSH Like WRITE_SYNC but with preceding cache flush. 177 * WRITE_FLUSH Like WRITE_SYNC but with preceding cache flush.
178 * WRITE_FUA Like WRITE_SYNC but data is guaranteed to be on 178 * WRITE_FUA Like WRITE_SYNC but data is guaranteed to be on
179 * non-volatile media on completion. 179 * non-volatile media on completion.
180 * WRITE_FLUSH_FUA Combination of WRITE_FLUSH and FUA. The IO is preceded 180 * WRITE_FLUSH_FUA Combination of WRITE_FLUSH and FUA. The IO is preceded
181 * by a cache flush and data is guaranteed to be on 181 * by a cache flush and data is guaranteed to be on
182 * non-volatile media on completion. 182 * non-volatile media on completion.
183 * 183 *
184 */ 184 */
185 #define RW_MASK REQ_WRITE 185 #define RW_MASK REQ_WRITE
186 #define RWA_MASK REQ_RAHEAD 186 #define RWA_MASK REQ_RAHEAD
187 187
188 #define READ 0 188 #define READ 0
189 #define WRITE RW_MASK 189 #define WRITE RW_MASK
190 #define READA RWA_MASK 190 #define READA RWA_MASK
191 #define KERNEL_READ (READ|REQ_KERNEL) 191 #define KERNEL_READ (READ|REQ_KERNEL)
192 #define KERNEL_WRITE (WRITE|REQ_KERNEL) 192 #define KERNEL_WRITE (WRITE|REQ_KERNEL)
193 193
194 #define READ_SYNC (READ | REQ_SYNC) 194 #define READ_SYNC (READ | REQ_SYNC)
195 #define WRITE_SYNC (WRITE | REQ_SYNC | REQ_NOIDLE) 195 #define WRITE_SYNC (WRITE | REQ_SYNC | REQ_NOIDLE)
196 #define WRITE_ODIRECT (WRITE | REQ_SYNC) 196 #define WRITE_ODIRECT (WRITE | REQ_SYNC)
197 #define WRITE_FLUSH (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_FLUSH) 197 #define WRITE_FLUSH (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_FLUSH)
198 #define WRITE_FUA (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_FUA) 198 #define WRITE_FUA (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_FUA)
199 #define WRITE_FLUSH_FUA (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_FLUSH | REQ_FUA) 199 #define WRITE_FLUSH_FUA (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_FLUSH | REQ_FUA)
200 200
201 /* 201 /*
202 * Attribute flags. These should be or-ed together to figure out what 202 * Attribute flags. These should be or-ed together to figure out what
203 * has been changed! 203 * has been changed!
204 */ 204 */
205 #define ATTR_MODE (1 << 0) 205 #define ATTR_MODE (1 << 0)
206 #define ATTR_UID (1 << 1) 206 #define ATTR_UID (1 << 1)
207 #define ATTR_GID (1 << 2) 207 #define ATTR_GID (1 << 2)
208 #define ATTR_SIZE (1 << 3) 208 #define ATTR_SIZE (1 << 3)
209 #define ATTR_ATIME (1 << 4) 209 #define ATTR_ATIME (1 << 4)
210 #define ATTR_MTIME (1 << 5) 210 #define ATTR_MTIME (1 << 5)
211 #define ATTR_CTIME (1 << 6) 211 #define ATTR_CTIME (1 << 6)
212 #define ATTR_ATIME_SET (1 << 7) 212 #define ATTR_ATIME_SET (1 << 7)
213 #define ATTR_MTIME_SET (1 << 8) 213 #define ATTR_MTIME_SET (1 << 8)
214 #define ATTR_FORCE (1 << 9) /* Not a change, but a change it */ 214 #define ATTR_FORCE (1 << 9) /* Not a change, but a change it */
215 #define ATTR_ATTR_FLAG (1 << 10) 215 #define ATTR_ATTR_FLAG (1 << 10)
216 #define ATTR_KILL_SUID (1 << 11) 216 #define ATTR_KILL_SUID (1 << 11)
217 #define ATTR_KILL_SGID (1 << 12) 217 #define ATTR_KILL_SGID (1 << 12)
218 #define ATTR_FILE (1 << 13) 218 #define ATTR_FILE (1 << 13)
219 #define ATTR_KILL_PRIV (1 << 14) 219 #define ATTR_KILL_PRIV (1 << 14)
220 #define ATTR_OPEN (1 << 15) /* Truncating from open(O_TRUNC) */ 220 #define ATTR_OPEN (1 << 15) /* Truncating from open(O_TRUNC) */
221 #define ATTR_TIMES_SET (1 << 16) 221 #define ATTR_TIMES_SET (1 << 16)
222 222
223 /* 223 /*
224 * This is the Inode Attributes structure, used for notify_change(). It 224 * This is the Inode Attributes structure, used for notify_change(). It
225 * uses the above definitions as flags, to know which values have changed. 225 * uses the above definitions as flags, to know which values have changed.
226 * Also, in this manner, a Filesystem can look at only the values it cares 226 * Also, in this manner, a Filesystem can look at only the values it cares
227 * about. Basically, these are the attributes that the VFS layer can 227 * about. Basically, these are the attributes that the VFS layer can
228 * request to change from the FS layer. 228 * request to change from the FS layer.
229 * 229 *
230 * Derek Atkins <warlord@MIT.EDU> 94-10-20 230 * Derek Atkins <warlord@MIT.EDU> 94-10-20
231 */ 231 */
232 struct iattr { 232 struct iattr {
233 unsigned int ia_valid; 233 unsigned int ia_valid;
234 umode_t ia_mode; 234 umode_t ia_mode;
235 kuid_t ia_uid; 235 kuid_t ia_uid;
236 kgid_t ia_gid; 236 kgid_t ia_gid;
237 loff_t ia_size; 237 loff_t ia_size;
238 struct timespec ia_atime; 238 struct timespec ia_atime;
239 struct timespec ia_mtime; 239 struct timespec ia_mtime;
240 struct timespec ia_ctime; 240 struct timespec ia_ctime;
241 241
242 /* 242 /*
243 * Not an attribute, but an auxiliary info for filesystems wanting to 243 * Not an attribute, but an auxiliary info for filesystems wanting to
244 * implement an ftruncate() like method. NOTE: filesystem should 244 * implement an ftruncate() like method. NOTE: filesystem should
245 * check for (ia_valid & ATTR_FILE), and not for (ia_file != NULL). 245 * check for (ia_valid & ATTR_FILE), and not for (ia_file != NULL).
246 */ 246 */
247 struct file *ia_file; 247 struct file *ia_file;
248 }; 248 };
249 249
250 /* 250 /*
251 * Includes for diskquotas. 251 * Includes for diskquotas.
252 */ 252 */
253 #include <linux/quota.h> 253 #include <linux/quota.h>
254 254
255 /** 255 /**
256 * enum positive_aop_returns - aop return codes with specific semantics 256 * enum positive_aop_returns - aop return codes with specific semantics
257 * 257 *
258 * @AOP_WRITEPAGE_ACTIVATE: Informs the caller that page writeback has 258 * @AOP_WRITEPAGE_ACTIVATE: Informs the caller that page writeback has
259 * completed, that the page is still locked, and 259 * completed, that the page is still locked, and
260 * should be considered active. The VM uses this hint 260 * should be considered active. The VM uses this hint
261 * to return the page to the active list -- it won't 261 * to return the page to the active list -- it won't
262 * be a candidate for writeback again in the near 262 * be a candidate for writeback again in the near
263 * future. Other callers must be careful to unlock 263 * future. Other callers must be careful to unlock
264 * the page if they get this return. Returned by 264 * the page if they get this return. Returned by
265 * writepage(); 265 * writepage();
266 * 266 *
267 * @AOP_TRUNCATED_PAGE: The AOP method that was handed a locked page has 267 * @AOP_TRUNCATED_PAGE: The AOP method that was handed a locked page has
268 * unlocked it and the page might have been truncated. 268 * unlocked it and the page might have been truncated.
269 * The caller should back up to acquiring a new page and 269 * The caller should back up to acquiring a new page and
270 * trying again. The aop will be taking reasonable 270 * trying again. The aop will be taking reasonable
271 * precautions not to livelock. If the caller held a page 271 * precautions not to livelock. If the caller held a page
272 * reference, it should drop it before retrying. Returned 272 * reference, it should drop it before retrying. Returned
273 * by readpage(). 273 * by readpage().
274 * 274 *
275 * address_space_operation functions return these large constants to indicate 275 * address_space_operation functions return these large constants to indicate
276 * special semantics to the caller. These are much larger than the bytes in a 276 * special semantics to the caller. These are much larger than the bytes in a
277 * page to allow for functions that return the number of bytes operated on in a 277 * page to allow for functions that return the number of bytes operated on in a
278 * given page. 278 * given page.
279 */ 279 */
280 280
281 enum positive_aop_returns { 281 enum positive_aop_returns {
282 AOP_WRITEPAGE_ACTIVATE = 0x80000, 282 AOP_WRITEPAGE_ACTIVATE = 0x80000,
283 AOP_TRUNCATED_PAGE = 0x80001, 283 AOP_TRUNCATED_PAGE = 0x80001,
284 }; 284 };
285 285
286 #define AOP_FLAG_UNINTERRUPTIBLE 0x0001 /* will not do a short write */ 286 #define AOP_FLAG_UNINTERRUPTIBLE 0x0001 /* will not do a short write */
287 #define AOP_FLAG_CONT_EXPAND 0x0002 /* called from cont_expand */ 287 #define AOP_FLAG_CONT_EXPAND 0x0002 /* called from cont_expand */
288 #define AOP_FLAG_NOFS 0x0004 /* used by filesystem to direct 288 #define AOP_FLAG_NOFS 0x0004 /* used by filesystem to direct
289 * helper code (eg buffer layer) 289 * helper code (eg buffer layer)
290 * to clear GFP_FS from alloc */ 290 * to clear GFP_FS from alloc */
291 291
292 /* 292 /*
293 * oh the beauties of C type declarations. 293 * oh the beauties of C type declarations.
294 */ 294 */
295 struct page; 295 struct page;
296 struct address_space; 296 struct address_space;
297 struct writeback_control; 297 struct writeback_control;
298 298
299 /* 299 /*
300 * "descriptor" for what we're up to with a read. 300 * "descriptor" for what we're up to with a read.
301 * This allows us to use the same read code yet 301 * This allows us to use the same read code yet
302 * have multiple different users of the data that 302 * have multiple different users of the data that
303 * we read from a file. 303 * we read from a file.
304 * 304 *
305 * The simplest case just copies the data to user 305 * The simplest case just copies the data to user
306 * mode. 306 * mode.
307 */ 307 */
308 typedef struct { 308 typedef struct {
309 size_t written; 309 size_t written;
310 size_t count; 310 size_t count;
311 union { 311 union {
312 char __user *buf; 312 char __user *buf;
313 void *data; 313 void *data;
314 } arg; 314 } arg;
315 int error; 315 int error;
316 } read_descriptor_t; 316 } read_descriptor_t;
317 317
318 typedef int (*read_actor_t)(read_descriptor_t *, struct page *, 318 typedef int (*read_actor_t)(read_descriptor_t *, struct page *,
319 unsigned long, unsigned long); 319 unsigned long, unsigned long);
320 320
321 struct address_space_operations { 321 struct address_space_operations {
322 int (*writepage)(struct page *page, struct writeback_control *wbc); 322 int (*writepage)(struct page *page, struct writeback_control *wbc);
323 int (*readpage)(struct file *, struct page *); 323 int (*readpage)(struct file *, struct page *);
324 324
325 /* Write back some dirty pages from this mapping. */ 325 /* Write back some dirty pages from this mapping. */
326 int (*writepages)(struct address_space *, struct writeback_control *); 326 int (*writepages)(struct address_space *, struct writeback_control *);
327 327
328 /* Set a page dirty. Return true if this dirtied it */ 328 /* Set a page dirty. Return true if this dirtied it */
329 int (*set_page_dirty)(struct page *page); 329 int (*set_page_dirty)(struct page *page);
330 330
331 int (*readpages)(struct file *filp, struct address_space *mapping, 331 int (*readpages)(struct file *filp, struct address_space *mapping,
332 struct list_head *pages, unsigned nr_pages); 332 struct list_head *pages, unsigned nr_pages);
333 333
334 int (*write_begin)(struct file *, struct address_space *mapping, 334 int (*write_begin)(struct file *, struct address_space *mapping,
335 loff_t pos, unsigned len, unsigned flags, 335 loff_t pos, unsigned len, unsigned flags,
336 struct page **pagep, void **fsdata); 336 struct page **pagep, void **fsdata);
337 int (*write_end)(struct file *, struct address_space *mapping, 337 int (*write_end)(struct file *, struct address_space *mapping,
338 loff_t pos, unsigned len, unsigned copied, 338 loff_t pos, unsigned len, unsigned copied,
339 struct page *page, void *fsdata); 339 struct page *page, void *fsdata);
340 340
341 /* Unfortunately this kludge is needed for FIBMAP. Don't use it */ 341 /* Unfortunately this kludge is needed for FIBMAP. Don't use it */
342 sector_t (*bmap)(struct address_space *, sector_t); 342 sector_t (*bmap)(struct address_space *, sector_t);
343 void (*invalidatepage) (struct page *, unsigned int, unsigned int); 343 void (*invalidatepage) (struct page *, unsigned int, unsigned int);
344 int (*releasepage) (struct page *, gfp_t); 344 int (*releasepage) (struct page *, gfp_t);
345 void (*freepage)(struct page *); 345 void (*freepage)(struct page *);
346 ssize_t (*direct_IO)(int, struct kiocb *, const struct iovec *iov, 346 ssize_t (*direct_IO)(int, struct kiocb *, const struct iovec *iov,
347 loff_t offset, unsigned long nr_segs); 347 loff_t offset, unsigned long nr_segs);
348 int (*get_xip_mem)(struct address_space *, pgoff_t, int, 348 int (*get_xip_mem)(struct address_space *, pgoff_t, int,
349 void **, unsigned long *); 349 void **, unsigned long *);
350 /* 350 /*
351 * migrate the contents of a page to the specified target. If 351 * migrate the contents of a page to the specified target. If
352 * migrate_mode is MIGRATE_ASYNC, it must not block. 352 * migrate_mode is MIGRATE_ASYNC, it must not block.
353 */ 353 */
354 int (*migratepage) (struct address_space *, 354 int (*migratepage) (struct address_space *,
355 struct page *, struct page *, enum migrate_mode); 355 struct page *, struct page *, enum migrate_mode);
356 int (*launder_page) (struct page *); 356 int (*launder_page) (struct page *);
357 int (*is_partially_uptodate) (struct page *, unsigned long, 357 int (*is_partially_uptodate) (struct page *, unsigned long,
358 unsigned long); 358 unsigned long);
359 void (*is_dirty_writeback) (struct page *, bool *, bool *); 359 void (*is_dirty_writeback) (struct page *, bool *, bool *);
360 int (*error_remove_page)(struct address_space *, struct page *); 360 int (*error_remove_page)(struct address_space *, struct page *);
361 361
362 /* swapfile support */ 362 /* swapfile support */
363 int (*swap_activate)(struct swap_info_struct *sis, struct file *file, 363 int (*swap_activate)(struct swap_info_struct *sis, struct file *file,
364 sector_t *span); 364 sector_t *span);
365 void (*swap_deactivate)(struct file *file); 365 void (*swap_deactivate)(struct file *file);
366 }; 366 };
367 367
368 extern const struct address_space_operations empty_aops; 368 extern const struct address_space_operations empty_aops;
369 369
370 /* 370 /*
371 * pagecache_write_begin/pagecache_write_end must be used by general code 371 * pagecache_write_begin/pagecache_write_end must be used by general code
372 * to write into the pagecache. 372 * to write into the pagecache.
373 */ 373 */
374 int pagecache_write_begin(struct file *, struct address_space *mapping, 374 int pagecache_write_begin(struct file *, struct address_space *mapping,
375 loff_t pos, unsigned len, unsigned flags, 375 loff_t pos, unsigned len, unsigned flags,
376 struct page **pagep, void **fsdata); 376 struct page **pagep, void **fsdata);
377 377
378 int pagecache_write_end(struct file *, struct address_space *mapping, 378 int pagecache_write_end(struct file *, struct address_space *mapping,
379 loff_t pos, unsigned len, unsigned copied, 379 loff_t pos, unsigned len, unsigned copied,
380 struct page *page, void *fsdata); 380 struct page *page, void *fsdata);
381 381
382 struct backing_dev_info; 382 struct backing_dev_info;
383 struct address_space { 383 struct address_space {
384 struct inode *host; /* owner: inode, block_device */ 384 struct inode *host; /* owner: inode, block_device */
385 struct radix_tree_root page_tree; /* radix tree of all pages */ 385 struct radix_tree_root page_tree; /* radix tree of all pages */
386 spinlock_t tree_lock; /* and lock protecting it */ 386 spinlock_t tree_lock; /* and lock protecting it */
387 unsigned int i_mmap_writable;/* count VM_SHARED mappings */ 387 unsigned int i_mmap_writable;/* count VM_SHARED mappings */
388 struct rb_root i_mmap; /* tree of private and shared mappings */ 388 struct rb_root i_mmap; /* tree of private and shared mappings */
389 struct list_head i_mmap_nonlinear;/*list VM_NONLINEAR mappings */ 389 struct list_head i_mmap_nonlinear;/*list VM_NONLINEAR mappings */
390 struct mutex i_mmap_mutex; /* protect tree, count, list */ 390 struct mutex i_mmap_mutex; /* protect tree, count, list */
391 /* Protected by tree_lock together with the radix tree */ 391 /* Protected by tree_lock together with the radix tree */
392 unsigned long nrpages; /* number of total pages */ 392 unsigned long nrpages; /* number of total pages */
393 unsigned long nrshadows; /* number of shadow entries */ 393 unsigned long nrshadows; /* number of shadow entries */
394 pgoff_t writeback_index;/* writeback starts here */ 394 pgoff_t writeback_index;/* writeback starts here */
395 const struct address_space_operations *a_ops; /* methods */ 395 const struct address_space_operations *a_ops; /* methods */
396 unsigned long flags; /* error bits/gfp mask */ 396 unsigned long flags; /* error bits/gfp mask */
397 struct backing_dev_info *backing_dev_info; /* device readahead, etc */ 397 struct backing_dev_info *backing_dev_info; /* device readahead, etc */
398 spinlock_t private_lock; /* for use by the address_space */ 398 spinlock_t private_lock; /* for use by the address_space */
399 struct list_head private_list; /* ditto */ 399 struct list_head private_list; /* ditto */
400 void *private_data; /* ditto */ 400 void *private_data; /* ditto */
401 } __attribute__((aligned(sizeof(long)))); 401 } __attribute__((aligned(sizeof(long))));
402 /* 402 /*
403 * On most architectures that alignment is already the case; but 403 * On most architectures that alignment is already the case; but
404 * must be enforced here for CRIS, to let the least significant bit 404 * must be enforced here for CRIS, to let the least significant bit
405 * of struct page's "mapping" pointer be used for PAGE_MAPPING_ANON. 405 * of struct page's "mapping" pointer be used for PAGE_MAPPING_ANON.
406 */ 406 */
407 struct request_queue; 407 struct request_queue;
408 408
409 struct block_device { 409 struct block_device {
410 dev_t bd_dev; /* not a kdev_t - it's a search key */ 410 dev_t bd_dev; /* not a kdev_t - it's a search key */
411 int bd_openers; 411 int bd_openers;
412 struct inode * bd_inode; /* will die */ 412 struct inode * bd_inode; /* will die */
413 struct super_block * bd_super; 413 struct super_block * bd_super;
414 struct mutex bd_mutex; /* open/close mutex */ 414 struct mutex bd_mutex; /* open/close mutex */
415 struct list_head bd_inodes; 415 struct list_head bd_inodes;
416 void * bd_claiming; 416 void * bd_claiming;
417 void * bd_holder; 417 void * bd_holder;
418 int bd_holders; 418 int bd_holders;
419 bool bd_write_holder; 419 bool bd_write_holder;
420 #ifdef CONFIG_SYSFS 420 #ifdef CONFIG_SYSFS
421 struct list_head bd_holder_disks; 421 struct list_head bd_holder_disks;
422 #endif 422 #endif
423 struct block_device * bd_contains; 423 struct block_device * bd_contains;
424 unsigned bd_block_size; 424 unsigned bd_block_size;
425 struct hd_struct * bd_part; 425 struct hd_struct * bd_part;
426 /* number of times partitions within this device have been opened. */ 426 /* number of times partitions within this device have been opened. */
427 unsigned bd_part_count; 427 unsigned bd_part_count;
428 int bd_invalidated; 428 int bd_invalidated;
429 struct gendisk * bd_disk; 429 struct gendisk * bd_disk;
430 struct request_queue * bd_queue; 430 struct request_queue * bd_queue;
431 struct list_head bd_list; 431 struct list_head bd_list;
432 /* 432 /*
433 * Private data. You must have bd_claim'ed the block_device 433 * Private data. You must have bd_claim'ed the block_device
434 * to use this. NOTE: bd_claim allows an owner to claim 434 * to use this. NOTE: bd_claim allows an owner to claim
435 * the same device multiple times, the owner must take special 435 * the same device multiple times, the owner must take special
436 * care to not mess up bd_private for that case. 436 * care to not mess up bd_private for that case.
437 */ 437 */
438 unsigned long bd_private; 438 unsigned long bd_private;
439 439
440 /* The counter of freeze processes */ 440 /* The counter of freeze processes */
441 int bd_fsfreeze_count; 441 int bd_fsfreeze_count;
442 /* Mutex for freeze */ 442 /* Mutex for freeze */
443 struct mutex bd_fsfreeze_mutex; 443 struct mutex bd_fsfreeze_mutex;
444 }; 444 };
445 445
446 /* 446 /*
447 * Radix-tree tags, for tagging dirty and writeback pages within the pagecache 447 * Radix-tree tags, for tagging dirty and writeback pages within the pagecache
448 * radix trees 448 * radix trees
449 */ 449 */
450 #define PAGECACHE_TAG_DIRTY 0 450 #define PAGECACHE_TAG_DIRTY 0
451 #define PAGECACHE_TAG_WRITEBACK 1 451 #define PAGECACHE_TAG_WRITEBACK 1
452 #define PAGECACHE_TAG_TOWRITE 2 452 #define PAGECACHE_TAG_TOWRITE 2
453 453
454 int mapping_tagged(struct address_space *mapping, int tag); 454 int mapping_tagged(struct address_space *mapping, int tag);
455 455
456 /* 456 /*
457 * Might pages of this file be mapped into userspace? 457 * Might pages of this file be mapped into userspace?
458 */ 458 */
459 static inline int mapping_mapped(struct address_space *mapping) 459 static inline int mapping_mapped(struct address_space *mapping)
460 { 460 {
461 return !RB_EMPTY_ROOT(&mapping->i_mmap) || 461 return !RB_EMPTY_ROOT(&mapping->i_mmap) ||
462 !list_empty(&mapping->i_mmap_nonlinear); 462 !list_empty(&mapping->i_mmap_nonlinear);
463 } 463 }
464 464
465 /* 465 /*
466 * Might pages of this file have been modified in userspace? 466 * Might pages of this file have been modified in userspace?
467 * Note that i_mmap_writable counts all VM_SHARED vmas: do_mmap_pgoff 467 * Note that i_mmap_writable counts all VM_SHARED vmas: do_mmap_pgoff
468 * marks vma as VM_SHARED if it is shared, and the file was opened for 468 * marks vma as VM_SHARED if it is shared, and the file was opened for
469 * writing i.e. vma may be mprotected writable even if now readonly. 469 * writing i.e. vma may be mprotected writable even if now readonly.
470 */ 470 */
471 static inline int mapping_writably_mapped(struct address_space *mapping) 471 static inline int mapping_writably_mapped(struct address_space *mapping)
472 { 472 {
473 return mapping->i_mmap_writable != 0; 473 return mapping->i_mmap_writable != 0;
474 } 474 }
475 475
476 /* 476 /*
477 * Use sequence counter to get consistent i_size on 32-bit processors. 477 * Use sequence counter to get consistent i_size on 32-bit processors.
478 */ 478 */
479 #if BITS_PER_LONG==32 && defined(CONFIG_SMP) 479 #if BITS_PER_LONG==32 && defined(CONFIG_SMP)
480 #include <linux/seqlock.h> 480 #include <linux/seqlock.h>
481 #define __NEED_I_SIZE_ORDERED 481 #define __NEED_I_SIZE_ORDERED
482 #define i_size_ordered_init(inode) seqcount_init(&inode->i_size_seqcount) 482 #define i_size_ordered_init(inode) seqcount_init(&inode->i_size_seqcount)
483 #else 483 #else
484 #define i_size_ordered_init(inode) do { } while (0) 484 #define i_size_ordered_init(inode) do { } while (0)
485 #endif 485 #endif
486 486
487 struct posix_acl; 487 struct posix_acl;
488 #define ACL_NOT_CACHED ((void *)(-1)) 488 #define ACL_NOT_CACHED ((void *)(-1))
489 489
490 #define IOP_FASTPERM 0x0001 490 #define IOP_FASTPERM 0x0001
491 #define IOP_LOOKUP 0x0002 491 #define IOP_LOOKUP 0x0002
492 #define IOP_NOFOLLOW 0x0004 492 #define IOP_NOFOLLOW 0x0004
493 493
494 /* 494 /*
495 * Keep mostly read-only and often accessed (especially for 495 * Keep mostly read-only and often accessed (especially for
496 * the RCU path lookup and 'stat' data) fields at the beginning 496 * the RCU path lookup and 'stat' data) fields at the beginning
497 * of the 'struct inode' 497 * of the 'struct inode'
498 */ 498 */
499 struct inode { 499 struct inode {
500 umode_t i_mode; 500 umode_t i_mode;
501 unsigned short i_opflags; 501 unsigned short i_opflags;
502 kuid_t i_uid; 502 kuid_t i_uid;
503 kgid_t i_gid; 503 kgid_t i_gid;
504 unsigned int i_flags; 504 unsigned int i_flags;
505 505
506 #ifdef CONFIG_FS_POSIX_ACL 506 #ifdef CONFIG_FS_POSIX_ACL
507 struct posix_acl *i_acl; 507 struct posix_acl *i_acl;
508 struct posix_acl *i_default_acl; 508 struct posix_acl *i_default_acl;
509 #endif 509 #endif
510 510
511 const struct inode_operations *i_op; 511 const struct inode_operations *i_op;
512 struct super_block *i_sb; 512 struct super_block *i_sb;
513 struct address_space *i_mapping; 513 struct address_space *i_mapping;
514 514
515 #ifdef CONFIG_SECURITY 515 #ifdef CONFIG_SECURITY
516 void *i_security; 516 void *i_security;
517 #endif 517 #endif
518 518
519 /* Stat data, not accessed from path walking */ 519 /* Stat data, not accessed from path walking */
520 unsigned long i_ino; 520 unsigned long i_ino;
521 /* 521 /*
522 * Filesystems may only read i_nlink directly. They shall use the 522 * Filesystems may only read i_nlink directly. They shall use the
523 * following functions for modification: 523 * following functions for modification:
524 * 524 *
525 * (set|clear|inc|drop)_nlink 525 * (set|clear|inc|drop)_nlink
526 * inode_(inc|dec)_link_count 526 * inode_(inc|dec)_link_count
527 */ 527 */
528 union { 528 union {
529 const unsigned int i_nlink; 529 const unsigned int i_nlink;
530 unsigned int __i_nlink; 530 unsigned int __i_nlink;
531 }; 531 };
532 dev_t i_rdev; 532 dev_t i_rdev;
533 loff_t i_size; 533 loff_t i_size;
534 struct timespec i_atime; 534 struct timespec i_atime;
535 struct timespec i_mtime; 535 struct timespec i_mtime;
536 struct timespec i_ctime; 536 struct timespec i_ctime;
537 spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */ 537 spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */
538 unsigned short i_bytes; 538 unsigned short i_bytes;
539 unsigned int i_blkbits; 539 unsigned int i_blkbits;
540 blkcnt_t i_blocks; 540 blkcnt_t i_blocks;
541 541
542 #ifdef __NEED_I_SIZE_ORDERED 542 #ifdef __NEED_I_SIZE_ORDERED
543 seqcount_t i_size_seqcount; 543 seqcount_t i_size_seqcount;
544 #endif 544 #endif
545 545
546 /* Misc */ 546 /* Misc */
547 unsigned long i_state; 547 unsigned long i_state;
548 struct mutex i_mutex; 548 struct mutex i_mutex;
549 549
550 unsigned long dirtied_when; /* jiffies of first dirtying */ 550 unsigned long dirtied_when; /* jiffies of first dirtying */
551 551
552 struct hlist_node i_hash; 552 struct hlist_node i_hash;
553 struct list_head i_wb_list; /* backing dev IO list */ 553 struct list_head i_wb_list; /* backing dev IO list */
554 struct list_head i_lru; /* inode LRU list */ 554 struct list_head i_lru; /* inode LRU list */
555 struct list_head i_sb_list; 555 struct list_head i_sb_list;
556 union { 556 union {
557 struct hlist_head i_dentry; 557 struct hlist_head i_dentry;
558 struct rcu_head i_rcu; 558 struct rcu_head i_rcu;
559 }; 559 };
560 u64 i_version; 560 u64 i_version;
561 atomic_t i_count; 561 atomic_t i_count;
562 atomic_t i_dio_count; 562 atomic_t i_dio_count;
563 atomic_t i_writecount; 563 atomic_t i_writecount;
564 #ifdef CONFIG_IMA 564 #ifdef CONFIG_IMA
565 atomic_t i_readcount; /* struct files open RO */ 565 atomic_t i_readcount; /* struct files open RO */
566 #endif 566 #endif
567 const struct file_operations *i_fop; /* former ->i_op->default_file_ops */ 567 const struct file_operations *i_fop; /* former ->i_op->default_file_ops */
568 struct file_lock *i_flock; 568 struct file_lock *i_flock;
569 struct address_space i_data; 569 struct address_space i_data;
570 #ifdef CONFIG_QUOTA 570 #ifdef CONFIG_QUOTA
571 struct dquot *i_dquot[MAXQUOTAS]; 571 struct dquot *i_dquot[MAXQUOTAS];
572 #endif 572 #endif
573 struct list_head i_devices; 573 struct list_head i_devices;
574 union { 574 union {
575 struct pipe_inode_info *i_pipe; 575 struct pipe_inode_info *i_pipe;
576 struct block_device *i_bdev; 576 struct block_device *i_bdev;
577 struct cdev *i_cdev; 577 struct cdev *i_cdev;
578 }; 578 };
579 579
580 __u32 i_generation; 580 __u32 i_generation;
581 581
582 #ifdef CONFIG_FSNOTIFY 582 #ifdef CONFIG_FSNOTIFY
583 __u32 i_fsnotify_mask; /* all events this inode cares about */ 583 __u32 i_fsnotify_mask; /* all events this inode cares about */
584 struct hlist_head i_fsnotify_marks; 584 struct hlist_head i_fsnotify_marks;
585 #endif 585 #endif
586 586
587 void *i_private; /* fs or device private pointer */ 587 void *i_private; /* fs or device private pointer */
588 }; 588 };
589 589
590 static inline int inode_unhashed(struct inode *inode) 590 static inline int inode_unhashed(struct inode *inode)
591 { 591 {
592 return hlist_unhashed(&inode->i_hash); 592 return hlist_unhashed(&inode->i_hash);
593 } 593 }
594 594
595 /* 595 /*
596 * inode->i_mutex nesting subclasses for the lock validator: 596 * inode->i_mutex nesting subclasses for the lock validator:
597 * 597 *
598 * 0: the object of the current VFS operation 598 * 0: the object of the current VFS operation
599 * 1: parent 599 * 1: parent
600 * 2: child/target 600 * 2: child/target
601 * 3: xattr 601 * 3: xattr
602 * 4: second non-directory 602 * 4: second non-directory
603 * The last is for certain operations (such as rename) which lock two 603 * The last is for certain operations (such as rename) which lock two
604 * non-directories at once. 604 * non-directories at once.
605 * 605 *
606 * The locking order between these classes is 606 * The locking order between these classes is
607 * parent -> child -> normal -> xattr -> second non-directory 607 * parent -> child -> normal -> xattr -> second non-directory
608 */ 608 */
609 enum inode_i_mutex_lock_class 609 enum inode_i_mutex_lock_class
610 { 610 {
611 I_MUTEX_NORMAL, 611 I_MUTEX_NORMAL,
612 I_MUTEX_PARENT, 612 I_MUTEX_PARENT,
613 I_MUTEX_CHILD, 613 I_MUTEX_CHILD,
614 I_MUTEX_XATTR, 614 I_MUTEX_XATTR,
615 I_MUTEX_NONDIR2 615 I_MUTEX_NONDIR2
616 }; 616 };
617 617
618 void lock_two_nondirectories(struct inode *, struct inode*); 618 void lock_two_nondirectories(struct inode *, struct inode*);
619 void unlock_two_nondirectories(struct inode *, struct inode*); 619 void unlock_two_nondirectories(struct inode *, struct inode*);
620 620
621 /* 621 /*
622 * NOTE: in a 32bit arch with a preemptable kernel and 622 * NOTE: in a 32bit arch with a preemptable kernel and
623 * an UP compile the i_size_read/write must be atomic 623 * an UP compile the i_size_read/write must be atomic
624 * with respect to the local cpu (unlike with preempt disabled), 624 * with respect to the local cpu (unlike with preempt disabled),
625 * but they don't need to be atomic with respect to other cpus like in 625 * but they don't need to be atomic with respect to other cpus like in
626 * true SMP (so they need either to either locally disable irq around 626 * true SMP (so they need either to either locally disable irq around
627 * the read or for example on x86 they can be still implemented as a 627 * the read or for example on x86 they can be still implemented as a
628 * cmpxchg8b without the need of the lock prefix). For SMP compiles 628 * cmpxchg8b without the need of the lock prefix). For SMP compiles
629 * and 64bit archs it makes no difference if preempt is enabled or not. 629 * and 64bit archs it makes no difference if preempt is enabled or not.
630 */ 630 */
631 static inline loff_t i_size_read(const struct inode *inode) 631 static inline loff_t i_size_read(const struct inode *inode)
632 { 632 {
633 #if BITS_PER_LONG==32 && defined(CONFIG_SMP) 633 #if BITS_PER_LONG==32 && defined(CONFIG_SMP)
634 loff_t i_size; 634 loff_t i_size;
635 unsigned int seq; 635 unsigned int seq;
636 636
637 do { 637 do {
638 seq = read_seqcount_begin(&inode->i_size_seqcount); 638 seq = read_seqcount_begin(&inode->i_size_seqcount);
639 i_size = inode->i_size; 639 i_size = inode->i_size;
640 } while (read_seqcount_retry(&inode->i_size_seqcount, seq)); 640 } while (read_seqcount_retry(&inode->i_size_seqcount, seq));
641 return i_size; 641 return i_size;
642 #elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPT) 642 #elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPT)
643 loff_t i_size; 643 loff_t i_size;
644 644
645 preempt_disable(); 645 preempt_disable();
646 i_size = inode->i_size; 646 i_size = inode->i_size;
647 preempt_enable(); 647 preempt_enable();
648 return i_size; 648 return i_size;
649 #else 649 #else
650 return inode->i_size; 650 return inode->i_size;
651 #endif 651 #endif
652 } 652 }
653 653
654 /* 654 /*
655 * NOTE: unlike i_size_read(), i_size_write() does need locking around it 655 * NOTE: unlike i_size_read(), i_size_write() does need locking around it
656 * (normally i_mutex), otherwise on 32bit/SMP an update of i_size_seqcount 656 * (normally i_mutex), otherwise on 32bit/SMP an update of i_size_seqcount
657 * can be lost, resulting in subsequent i_size_read() calls spinning forever. 657 * can be lost, resulting in subsequent i_size_read() calls spinning forever.
658 */ 658 */
659 static inline void i_size_write(struct inode *inode, loff_t i_size) 659 static inline void i_size_write(struct inode *inode, loff_t i_size)
660 { 660 {
661 #if BITS_PER_LONG==32 && defined(CONFIG_SMP) 661 #if BITS_PER_LONG==32 && defined(CONFIG_SMP)
662 preempt_disable(); 662 preempt_disable();
663 write_seqcount_begin(&inode->i_size_seqcount); 663 write_seqcount_begin(&inode->i_size_seqcount);
664 inode->i_size = i_size; 664 inode->i_size = i_size;
665 write_seqcount_end(&inode->i_size_seqcount); 665 write_seqcount_end(&inode->i_size_seqcount);
666 preempt_enable(); 666 preempt_enable();
667 #elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPT) 667 #elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPT)
668 preempt_disable(); 668 preempt_disable();
669 inode->i_size = i_size; 669 inode->i_size = i_size;
670 preempt_enable(); 670 preempt_enable();
671 #else 671 #else
672 inode->i_size = i_size; 672 inode->i_size = i_size;
673 #endif 673 #endif
674 } 674 }
675 675
676 /* Helper functions so that in most cases filesystems will 676 /* Helper functions so that in most cases filesystems will
677 * not need to deal directly with kuid_t and kgid_t and can 677 * not need to deal directly with kuid_t and kgid_t and can
678 * instead deal with the raw numeric values that are stored 678 * instead deal with the raw numeric values that are stored
679 * in the filesystem. 679 * in the filesystem.
680 */ 680 */
681 static inline uid_t i_uid_read(const struct inode *inode) 681 static inline uid_t i_uid_read(const struct inode *inode)
682 { 682 {
683 return from_kuid(&init_user_ns, inode->i_uid); 683 return from_kuid(&init_user_ns, inode->i_uid);
684 } 684 }
685 685
686 static inline gid_t i_gid_read(const struct inode *inode) 686 static inline gid_t i_gid_read(const struct inode *inode)
687 { 687 {
688 return from_kgid(&init_user_ns, inode->i_gid); 688 return from_kgid(&init_user_ns, inode->i_gid);
689 } 689 }
690 690
691 static inline void i_uid_write(struct inode *inode, uid_t uid) 691 static inline void i_uid_write(struct inode *inode, uid_t uid)
692 { 692 {
693 inode->i_uid = make_kuid(&init_user_ns, uid); 693 inode->i_uid = make_kuid(&init_user_ns, uid);
694 } 694 }
695 695
696 static inline void i_gid_write(struct inode *inode, gid_t gid) 696 static inline void i_gid_write(struct inode *inode, gid_t gid)
697 { 697 {
698 inode->i_gid = make_kgid(&init_user_ns, gid); 698 inode->i_gid = make_kgid(&init_user_ns, gid);
699 } 699 }
700 700
701 static inline unsigned iminor(const struct inode *inode) 701 static inline unsigned iminor(const struct inode *inode)
702 { 702 {
703 return MINOR(inode->i_rdev); 703 return MINOR(inode->i_rdev);
704 } 704 }
705 705
706 static inline unsigned imajor(const struct inode *inode) 706 static inline unsigned imajor(const struct inode *inode)
707 { 707 {
708 return MAJOR(inode->i_rdev); 708 return MAJOR(inode->i_rdev);
709 } 709 }
710 710
711 extern struct block_device *I_BDEV(struct inode *inode); 711 extern struct block_device *I_BDEV(struct inode *inode);
712 712
713 struct fown_struct { 713 struct fown_struct {
714 rwlock_t lock; /* protects pid, uid, euid fields */ 714 rwlock_t lock; /* protects pid, uid, euid fields */
715 struct pid *pid; /* pid or -pgrp where SIGIO should be sent */ 715 struct pid *pid; /* pid or -pgrp where SIGIO should be sent */
716 enum pid_type pid_type; /* Kind of process group SIGIO should be sent to */ 716 enum pid_type pid_type; /* Kind of process group SIGIO should be sent to */
717 kuid_t uid, euid; /* uid/euid of process setting the owner */ 717 kuid_t uid, euid; /* uid/euid of process setting the owner */
718 int signum; /* posix.1b rt signal to be delivered on IO */ 718 int signum; /* posix.1b rt signal to be delivered on IO */
719 }; 719 };
720 720
721 /* 721 /*
722 * Track a single file's readahead state 722 * Track a single file's readahead state
723 */ 723 */
724 struct file_ra_state { 724 struct file_ra_state {
725 pgoff_t start; /* where readahead started */ 725 pgoff_t start; /* where readahead started */
726 unsigned int size; /* # of readahead pages */ 726 unsigned int size; /* # of readahead pages */
727 unsigned int async_size; /* do asynchronous readahead when 727 unsigned int async_size; /* do asynchronous readahead when
728 there are only # of pages ahead */ 728 there are only # of pages ahead */
729 729
730 unsigned int ra_pages; /* Maximum readahead window */ 730 unsigned int ra_pages; /* Maximum readahead window */
731 unsigned int mmap_miss; /* Cache miss stat for mmap accesses */ 731 unsigned int mmap_miss; /* Cache miss stat for mmap accesses */
732 loff_t prev_pos; /* Cache last read() position */ 732 loff_t prev_pos; /* Cache last read() position */
733 }; 733 };
734 734
735 /* 735 /*
736 * Check if @index falls in the readahead windows. 736 * Check if @index falls in the readahead windows.
737 */ 737 */
738 static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index) 738 static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index)
739 { 739 {
740 return (index >= ra->start && 740 return (index >= ra->start &&
741 index < ra->start + ra->size); 741 index < ra->start + ra->size);
742 } 742 }
743 743
744 struct file { 744 struct file {
745 union { 745 union {
746 struct llist_node fu_llist; 746 struct llist_node fu_llist;
747 struct rcu_head fu_rcuhead; 747 struct rcu_head fu_rcuhead;
748 } f_u; 748 } f_u;
749 struct path f_path; 749 struct path f_path;
750 #define f_dentry f_path.dentry 750 #define f_dentry f_path.dentry
751 struct inode *f_inode; /* cached value */ 751 struct inode *f_inode; /* cached value */
752 const struct file_operations *f_op; 752 const struct file_operations *f_op;
753 753
754 /* 754 /*
755 * Protects f_ep_links, f_flags. 755 * Protects f_ep_links, f_flags.
756 * Must not be taken from IRQ context. 756 * Must not be taken from IRQ context.
757 */ 757 */
758 spinlock_t f_lock; 758 spinlock_t f_lock;
759 atomic_long_t f_count; 759 atomic_long_t f_count;
760 unsigned int f_flags; 760 unsigned int f_flags;
761 fmode_t f_mode; 761 fmode_t f_mode;
762 struct mutex f_pos_lock; 762 struct mutex f_pos_lock;
763 loff_t f_pos; 763 loff_t f_pos;
764 struct fown_struct f_owner; 764 struct fown_struct f_owner;
765 const struct cred *f_cred; 765 const struct cred *f_cred;
766 struct file_ra_state f_ra; 766 struct file_ra_state f_ra;
767 767
768 u64 f_version; 768 u64 f_version;
769 #ifdef CONFIG_SECURITY 769 #ifdef CONFIG_SECURITY
770 void *f_security; 770 void *f_security;
771 #endif 771 #endif
772 /* needed for tty driver, and maybe others */ 772 /* needed for tty driver, and maybe others */
773 void *private_data; 773 void *private_data;
774 774
775 #ifdef CONFIG_EPOLL 775 #ifdef CONFIG_EPOLL
776 /* Used by fs/eventpoll.c to link all the hooks to this file */ 776 /* Used by fs/eventpoll.c to link all the hooks to this file */
777 struct list_head f_ep_links; 777 struct list_head f_ep_links;
778 struct list_head f_tfile_llink; 778 struct list_head f_tfile_llink;
779 #endif /* #ifdef CONFIG_EPOLL */ 779 #endif /* #ifdef CONFIG_EPOLL */
780 struct address_space *f_mapping; 780 struct address_space *f_mapping;
781 } __attribute__((aligned(4))); /* lest something weird decides that 2 is OK */ 781 } __attribute__((aligned(4))); /* lest something weird decides that 2 is OK */
782 782
783 struct file_handle { 783 struct file_handle {
784 __u32 handle_bytes; 784 __u32 handle_bytes;
785 int handle_type; 785 int handle_type;
786 /* file identifier */ 786 /* file identifier */
787 unsigned char f_handle[0]; 787 unsigned char f_handle[0];
788 }; 788 };
789 789
790 static inline struct file *get_file(struct file *f) 790 static inline struct file *get_file(struct file *f)
791 { 791 {
792 atomic_long_inc(&f->f_count); 792 atomic_long_inc(&f->f_count);
793 return f; 793 return f;
794 } 794 }
795 #define fput_atomic(x) atomic_long_add_unless(&(x)->f_count, -1, 1) 795 #define fput_atomic(x) atomic_long_add_unless(&(x)->f_count, -1, 1)
796 #define file_count(x) atomic_long_read(&(x)->f_count) 796 #define file_count(x) atomic_long_read(&(x)->f_count)
797 797
798 #define MAX_NON_LFS ((1UL<<31) - 1) 798 #define MAX_NON_LFS ((1UL<<31) - 1)
799 799
800 /* Page cache limit. The filesystems should put that into their s_maxbytes 800 /* Page cache limit. The filesystems should put that into their s_maxbytes
801 limits, otherwise bad things can happen in VM. */ 801 limits, otherwise bad things can happen in VM. */
802 #if BITS_PER_LONG==32 802 #if BITS_PER_LONG==32
803 #define MAX_LFS_FILESIZE (((loff_t)PAGE_CACHE_SIZE << (BITS_PER_LONG-1))-1) 803 #define MAX_LFS_FILESIZE (((loff_t)PAGE_CACHE_SIZE << (BITS_PER_LONG-1))-1)
804 #elif BITS_PER_LONG==64 804 #elif BITS_PER_LONG==64
805 #define MAX_LFS_FILESIZE ((loff_t)0x7fffffffffffffffLL) 805 #define MAX_LFS_FILESIZE ((loff_t)0x7fffffffffffffffLL)
806 #endif 806 #endif
807 807
808 #define FL_POSIX 1 808 #define FL_POSIX 1
809 #define FL_FLOCK 2 809 #define FL_FLOCK 2
810 #define FL_DELEG 4 /* NFSv4 delegation */ 810 #define FL_DELEG 4 /* NFSv4 delegation */
811 #define FL_ACCESS 8 /* not trying to lock, just looking */ 811 #define FL_ACCESS 8 /* not trying to lock, just looking */
812 #define FL_EXISTS 16 /* when unlocking, test for existence */ 812 #define FL_EXISTS 16 /* when unlocking, test for existence */
813 #define FL_LEASE 32 /* lease held on this file */ 813 #define FL_LEASE 32 /* lease held on this file */
814 #define FL_CLOSE 64 /* unlock on close */ 814 #define FL_CLOSE 64 /* unlock on close */
815 #define FL_SLEEP 128 /* A blocking lock */ 815 #define FL_SLEEP 128 /* A blocking lock */
816 #define FL_DOWNGRADE_PENDING 256 /* Lease is being downgraded */ 816 #define FL_DOWNGRADE_PENDING 256 /* Lease is being downgraded */
817 #define FL_UNLOCK_PENDING 512 /* Lease is being broken */ 817 #define FL_UNLOCK_PENDING 512 /* Lease is being broken */
818 #define FL_FILE_PVT 1024 /* lock is private to the file */ 818 #define FL_OFDLCK 1024 /* lock is "owned" by struct file */
819 819
820 /* 820 /*
821 * Special return value from posix_lock_file() and vfs_lock_file() for 821 * Special return value from posix_lock_file() and vfs_lock_file() for
822 * asynchronous locking. 822 * asynchronous locking.
823 */ 823 */
824 #define FILE_LOCK_DEFERRED 1 824 #define FILE_LOCK_DEFERRED 1
825 825
826 /* 826 /*
827 * The POSIX file lock owner is determined by 827 * The POSIX file lock owner is determined by
828 * the "struct files_struct" in the thread group 828 * the "struct files_struct" in the thread group
829 * (or NULL for no owner - BSD locks). 829 * (or NULL for no owner - BSD locks).
830 * 830 *
831 * Lockd stuffs a "host" pointer into this. 831 * Lockd stuffs a "host" pointer into this.
832 */ 832 */
833 typedef struct files_struct *fl_owner_t; 833 typedef struct files_struct *fl_owner_t;
834 834
835 struct file_lock_operations { 835 struct file_lock_operations {
836 void (*fl_copy_lock)(struct file_lock *, struct file_lock *); 836 void (*fl_copy_lock)(struct file_lock *, struct file_lock *);
837 void (*fl_release_private)(struct file_lock *); 837 void (*fl_release_private)(struct file_lock *);
838 }; 838 };
839 839
840 struct lock_manager_operations { 840 struct lock_manager_operations {
841 int (*lm_compare_owner)(struct file_lock *, struct file_lock *); 841 int (*lm_compare_owner)(struct file_lock *, struct file_lock *);
842 unsigned long (*lm_owner_key)(struct file_lock *); 842 unsigned long (*lm_owner_key)(struct file_lock *);
843 void (*lm_notify)(struct file_lock *); /* unblock callback */ 843 void (*lm_notify)(struct file_lock *); /* unblock callback */
844 int (*lm_grant)(struct file_lock *, struct file_lock *, int); 844 int (*lm_grant)(struct file_lock *, struct file_lock *, int);
845 void (*lm_break)(struct file_lock *); 845 void (*lm_break)(struct file_lock *);
846 int (*lm_change)(struct file_lock **, int); 846 int (*lm_change)(struct file_lock **, int);
847 }; 847 };
848 848
849 struct lock_manager { 849 struct lock_manager {
850 struct list_head list; 850 struct list_head list;
851 }; 851 };
852 852
853 struct net; 853 struct net;
854 void locks_start_grace(struct net *, struct lock_manager *); 854 void locks_start_grace(struct net *, struct lock_manager *);
855 void locks_end_grace(struct lock_manager *); 855 void locks_end_grace(struct lock_manager *);
856 int locks_in_grace(struct net *); 856 int locks_in_grace(struct net *);
857 857
858 /* that will die - we need it for nfs_lock_info */ 858 /* that will die - we need it for nfs_lock_info */
859 #include <linux/nfs_fs_i.h> 859 #include <linux/nfs_fs_i.h>
860 860
861 /* 861 /*
862 * struct file_lock represents a generic "file lock". It's used to represent 862 * struct file_lock represents a generic "file lock". It's used to represent
863 * POSIX byte range locks, BSD (flock) locks, and leases. It's important to 863 * POSIX byte range locks, BSD (flock) locks, and leases. It's important to
864 * note that the same struct is used to represent both a request for a lock and 864 * note that the same struct is used to represent both a request for a lock and
865 * the lock itself, but the same object is never used for both. 865 * the lock itself, but the same object is never used for both.
866 * 866 *
867 * FIXME: should we create a separate "struct lock_request" to help distinguish 867 * FIXME: should we create a separate "struct lock_request" to help distinguish
868 * these two uses? 868 * these two uses?
869 * 869 *
870 * The i_flock list is ordered by: 870 * The i_flock list is ordered by:
871 * 871 *
872 * 1) lock type -- FL_LEASEs first, then FL_FLOCK, and finally FL_POSIX 872 * 1) lock type -- FL_LEASEs first, then FL_FLOCK, and finally FL_POSIX
873 * 2) lock owner 873 * 2) lock owner
874 * 3) lock range start 874 * 3) lock range start
875 * 4) lock range end 875 * 4) lock range end
876 * 876 *
877 * Obviously, the last two criteria only matter for POSIX locks. 877 * Obviously, the last two criteria only matter for POSIX locks.
878 */ 878 */
879 struct file_lock { 879 struct file_lock {
880 struct file_lock *fl_next; /* singly linked list for this inode */ 880 struct file_lock *fl_next; /* singly linked list for this inode */
881 struct hlist_node fl_link; /* node in global lists */ 881 struct hlist_node fl_link; /* node in global lists */
882 struct list_head fl_block; /* circular list of blocked processes */ 882 struct list_head fl_block; /* circular list of blocked processes */
883 fl_owner_t fl_owner; 883 fl_owner_t fl_owner;
884 unsigned int fl_flags; 884 unsigned int fl_flags;
885 unsigned char fl_type; 885 unsigned char fl_type;
886 unsigned int fl_pid; 886 unsigned int fl_pid;
887 int fl_link_cpu; /* what cpu's list is this on? */ 887 int fl_link_cpu; /* what cpu's list is this on? */
888 struct pid *fl_nspid; 888 struct pid *fl_nspid;
889 wait_queue_head_t fl_wait; 889 wait_queue_head_t fl_wait;
890 struct file *fl_file; 890 struct file *fl_file;
891 loff_t fl_start; 891 loff_t fl_start;
892 loff_t fl_end; 892 loff_t fl_end;
893 893
894 struct fasync_struct * fl_fasync; /* for lease break notifications */ 894 struct fasync_struct * fl_fasync; /* for lease break notifications */
895 /* for lease breaks: */ 895 /* for lease breaks: */
896 unsigned long fl_break_time; 896 unsigned long fl_break_time;
897 unsigned long fl_downgrade_time; 897 unsigned long fl_downgrade_time;
898 898
899 const struct file_lock_operations *fl_ops; /* Callbacks for filesystems */ 899 const struct file_lock_operations *fl_ops; /* Callbacks for filesystems */
900 const struct lock_manager_operations *fl_lmops; /* Callbacks for lockmanagers */ 900 const struct lock_manager_operations *fl_lmops; /* Callbacks for lockmanagers */
901 union { 901 union {
902 struct nfs_lock_info nfs_fl; 902 struct nfs_lock_info nfs_fl;
903 struct nfs4_lock_info nfs4_fl; 903 struct nfs4_lock_info nfs4_fl;
904 struct { 904 struct {
905 struct list_head link; /* link in AFS vnode's pending_locks list */ 905 struct list_head link; /* link in AFS vnode's pending_locks list */
906 int state; /* state of grant or error if -ve */ 906 int state; /* state of grant or error if -ve */
907 } afs; 907 } afs;
908 } fl_u; 908 } fl_u;
909 }; 909 };
910 910
911 /* The following constant reflects the upper bound of the file/locking space */ 911 /* The following constant reflects the upper bound of the file/locking space */
912 #ifndef OFFSET_MAX 912 #ifndef OFFSET_MAX
913 #define INT_LIMIT(x) (~((x)1 << (sizeof(x)*8 - 1))) 913 #define INT_LIMIT(x) (~((x)1 << (sizeof(x)*8 - 1)))
914 #define OFFSET_MAX INT_LIMIT(loff_t) 914 #define OFFSET_MAX INT_LIMIT(loff_t)
915 #define OFFT_OFFSET_MAX INT_LIMIT(off_t) 915 #define OFFT_OFFSET_MAX INT_LIMIT(off_t)
916 #endif 916 #endif
917 917
918 #include <linux/fcntl.h> 918 #include <linux/fcntl.h>
919 919
920 extern void send_sigio(struct fown_struct *fown, int fd, int band); 920 extern void send_sigio(struct fown_struct *fown, int fd, int band);
921 921
922 #ifdef CONFIG_FILE_LOCKING 922 #ifdef CONFIG_FILE_LOCKING
923 extern int fcntl_getlk(struct file *, unsigned int, struct flock __user *); 923 extern int fcntl_getlk(struct file *, unsigned int, struct flock __user *);
924 extern int fcntl_setlk(unsigned int, struct file *, unsigned int, 924 extern int fcntl_setlk(unsigned int, struct file *, unsigned int,
925 struct flock __user *); 925 struct flock __user *);
926 926
927 #if BITS_PER_LONG == 32 927 #if BITS_PER_LONG == 32
928 extern int fcntl_getlk64(struct file *, unsigned int, struct flock64 __user *); 928 extern int fcntl_getlk64(struct file *, unsigned int, struct flock64 __user *);
929 extern int fcntl_setlk64(unsigned int, struct file *, unsigned int, 929 extern int fcntl_setlk64(unsigned int, struct file *, unsigned int,
930 struct flock64 __user *); 930 struct flock64 __user *);
931 #endif 931 #endif
932 932
933 extern int fcntl_setlease(unsigned int fd, struct file *filp, long arg); 933 extern int fcntl_setlease(unsigned int fd, struct file *filp, long arg);
934 extern int fcntl_getlease(struct file *filp); 934 extern int fcntl_getlease(struct file *filp);
935 935
936 /* fs/locks.c */ 936 /* fs/locks.c */
937 void locks_free_lock(struct file_lock *fl); 937 void locks_free_lock(struct file_lock *fl);
938 extern void locks_init_lock(struct file_lock *); 938 extern void locks_init_lock(struct file_lock *);
939 extern struct file_lock * locks_alloc_lock(void); 939 extern struct file_lock * locks_alloc_lock(void);
940 extern void locks_copy_lock(struct file_lock *, struct file_lock *); 940 extern void locks_copy_lock(struct file_lock *, struct file_lock *);
941 extern void __locks_copy_lock(struct file_lock *, const struct file_lock *); 941 extern void __locks_copy_lock(struct file_lock *, const struct file_lock *);
942 extern void locks_remove_posix(struct file *, fl_owner_t); 942 extern void locks_remove_posix(struct file *, fl_owner_t);
943 extern void locks_remove_file(struct file *); 943 extern void locks_remove_file(struct file *);
944 extern void locks_release_private(struct file_lock *); 944 extern void locks_release_private(struct file_lock *);
945 extern void posix_test_lock(struct file *, struct file_lock *); 945 extern void posix_test_lock(struct file *, struct file_lock *);
946 extern int posix_lock_file(struct file *, struct file_lock *, struct file_lock *); 946 extern int posix_lock_file(struct file *, struct file_lock *, struct file_lock *);
947 extern int posix_lock_file_wait(struct file *, struct file_lock *); 947 extern int posix_lock_file_wait(struct file *, struct file_lock *);
948 extern int posix_unblock_lock(struct file_lock *); 948 extern int posix_unblock_lock(struct file_lock *);
949 extern int vfs_test_lock(struct file *, struct file_lock *); 949 extern int vfs_test_lock(struct file *, struct file_lock *);
950 extern int vfs_lock_file(struct file *, unsigned int, struct file_lock *, struct file_lock *); 950 extern int vfs_lock_file(struct file *, unsigned int, struct file_lock *, struct file_lock *);
951 extern int vfs_cancel_lock(struct file *filp, struct file_lock *fl); 951 extern int vfs_cancel_lock(struct file *filp, struct file_lock *fl);
952 extern int flock_lock_file_wait(struct file *filp, struct file_lock *fl); 952 extern int flock_lock_file_wait(struct file *filp, struct file_lock *fl);
953 extern int __break_lease(struct inode *inode, unsigned int flags, unsigned int type); 953 extern int __break_lease(struct inode *inode, unsigned int flags, unsigned int type);
954 extern void lease_get_mtime(struct inode *, struct timespec *time); 954 extern void lease_get_mtime(struct inode *, struct timespec *time);
955 extern int generic_setlease(struct file *, long, struct file_lock **); 955 extern int generic_setlease(struct file *, long, struct file_lock **);
956 extern int vfs_setlease(struct file *, long, struct file_lock **); 956 extern int vfs_setlease(struct file *, long, struct file_lock **);
957 extern int lease_modify(struct file_lock **, int); 957 extern int lease_modify(struct file_lock **, int);
958 extern int lock_may_read(struct inode *, loff_t start, unsigned long count); 958 extern int lock_may_read(struct inode *, loff_t start, unsigned long count);
959 extern int lock_may_write(struct inode *, loff_t start, unsigned long count); 959 extern int lock_may_write(struct inode *, loff_t start, unsigned long count);
960 #else /* !CONFIG_FILE_LOCKING */ 960 #else /* !CONFIG_FILE_LOCKING */
961 static inline int fcntl_getlk(struct file *file, unsigned int cmd, 961 static inline int fcntl_getlk(struct file *file, unsigned int cmd,
962 struct flock __user *user) 962 struct flock __user *user)
963 { 963 {
964 return -EINVAL; 964 return -EINVAL;
965 } 965 }
966 966
967 static inline int fcntl_setlk(unsigned int fd, struct file *file, 967 static inline int fcntl_setlk(unsigned int fd, struct file *file,
968 unsigned int cmd, struct flock __user *user) 968 unsigned int cmd, struct flock __user *user)
969 { 969 {
970 return -EACCES; 970 return -EACCES;
971 } 971 }
972 972
973 #if BITS_PER_LONG == 32 973 #if BITS_PER_LONG == 32
974 static inline int fcntl_getlk64(struct file *file, unsigned int cmd, 974 static inline int fcntl_getlk64(struct file *file, unsigned int cmd,
975 struct flock64 __user *user) 975 struct flock64 __user *user)
976 { 976 {
977 return -EINVAL; 977 return -EINVAL;
978 } 978 }
979 979
980 static inline int fcntl_setlk64(unsigned int fd, struct file *file, 980 static inline int fcntl_setlk64(unsigned int fd, struct file *file,
981 unsigned int cmd, struct flock64 __user *user) 981 unsigned int cmd, struct flock64 __user *user)
982 { 982 {
983 return -EACCES; 983 return -EACCES;
984 } 984 }
985 #endif 985 #endif
986 static inline int fcntl_setlease(unsigned int fd, struct file *filp, long arg) 986 static inline int fcntl_setlease(unsigned int fd, struct file *filp, long arg)
987 { 987 {
988 return 0; 988 return 0;
989 } 989 }
990 990
991 static inline int fcntl_getlease(struct file *filp) 991 static inline int fcntl_getlease(struct file *filp)
992 { 992 {
993 return 0; 993 return 0;
994 } 994 }
995 995
996 static inline void locks_init_lock(struct file_lock *fl) 996 static inline void locks_init_lock(struct file_lock *fl)
997 { 997 {
998 return; 998 return;
999 } 999 }
1000 1000
1001 static inline void __locks_copy_lock(struct file_lock *new, struct file_lock *fl) 1001 static inline void __locks_copy_lock(struct file_lock *new, struct file_lock *fl)
1002 { 1002 {
1003 return; 1003 return;
1004 } 1004 }
1005 1005
1006 static inline void locks_copy_lock(struct file_lock *new, struct file_lock *fl) 1006 static inline void locks_copy_lock(struct file_lock *new, struct file_lock *fl)
1007 { 1007 {
1008 return; 1008 return;
1009 } 1009 }
1010 1010
1011 static inline void locks_remove_posix(struct file *filp, fl_owner_t owner) 1011 static inline void locks_remove_posix(struct file *filp, fl_owner_t owner)
1012 { 1012 {
1013 return; 1013 return;
1014 } 1014 }
1015 1015
1016 static inline void locks_remove_file(struct file *filp) 1016 static inline void locks_remove_file(struct file *filp)
1017 { 1017 {
1018 return; 1018 return;
1019 } 1019 }
1020 1020
1021 static inline void posix_test_lock(struct file *filp, struct file_lock *fl) 1021 static inline void posix_test_lock(struct file *filp, struct file_lock *fl)
1022 { 1022 {
1023 return; 1023 return;
1024 } 1024 }
1025 1025
1026 static inline int posix_lock_file(struct file *filp, struct file_lock *fl, 1026 static inline int posix_lock_file(struct file *filp, struct file_lock *fl,
1027 struct file_lock *conflock) 1027 struct file_lock *conflock)
1028 { 1028 {
1029 return -ENOLCK; 1029 return -ENOLCK;
1030 } 1030 }
1031 1031
1032 static inline int posix_lock_file_wait(struct file *filp, struct file_lock *fl) 1032 static inline int posix_lock_file_wait(struct file *filp, struct file_lock *fl)
1033 { 1033 {
1034 return -ENOLCK; 1034 return -ENOLCK;
1035 } 1035 }
1036 1036
1037 static inline int posix_unblock_lock(struct file_lock *waiter) 1037 static inline int posix_unblock_lock(struct file_lock *waiter)
1038 { 1038 {
1039 return -ENOENT; 1039 return -ENOENT;
1040 } 1040 }
1041 1041
1042 static inline int vfs_test_lock(struct file *filp, struct file_lock *fl) 1042 static inline int vfs_test_lock(struct file *filp, struct file_lock *fl)
1043 { 1043 {
1044 return 0; 1044 return 0;
1045 } 1045 }
1046 1046
1047 static inline int vfs_lock_file(struct file *filp, unsigned int cmd, 1047 static inline int vfs_lock_file(struct file *filp, unsigned int cmd,
1048 struct file_lock *fl, struct file_lock *conf) 1048 struct file_lock *fl, struct file_lock *conf)
1049 { 1049 {
1050 return -ENOLCK; 1050 return -ENOLCK;
1051 } 1051 }
1052 1052
1053 static inline int vfs_cancel_lock(struct file *filp, struct file_lock *fl) 1053 static inline int vfs_cancel_lock(struct file *filp, struct file_lock *fl)
1054 { 1054 {
1055 return 0; 1055 return 0;
1056 } 1056 }
1057 1057
1058 static inline int flock_lock_file_wait(struct file *filp, 1058 static inline int flock_lock_file_wait(struct file *filp,
1059 struct file_lock *request) 1059 struct file_lock *request)
1060 { 1060 {
1061 return -ENOLCK; 1061 return -ENOLCK;
1062 } 1062 }
1063 1063
1064 static inline int __break_lease(struct inode *inode, unsigned int mode, unsigned int type) 1064 static inline int __break_lease(struct inode *inode, unsigned int mode, unsigned int type)
1065 { 1065 {
1066 return 0; 1066 return 0;
1067 } 1067 }
1068 1068
1069 static inline void lease_get_mtime(struct inode *inode, struct timespec *time) 1069 static inline void lease_get_mtime(struct inode *inode, struct timespec *time)
1070 { 1070 {
1071 return; 1071 return;
1072 } 1072 }
1073 1073
1074 static inline int generic_setlease(struct file *filp, long arg, 1074 static inline int generic_setlease(struct file *filp, long arg,
1075 struct file_lock **flp) 1075 struct file_lock **flp)
1076 { 1076 {
1077 return -EINVAL; 1077 return -EINVAL;
1078 } 1078 }
1079 1079
1080 static inline int vfs_setlease(struct file *filp, long arg, 1080 static inline int vfs_setlease(struct file *filp, long arg,
1081 struct file_lock **lease) 1081 struct file_lock **lease)
1082 { 1082 {
1083 return -EINVAL; 1083 return -EINVAL;
1084 } 1084 }
1085 1085
1086 static inline int lease_modify(struct file_lock **before, int arg) 1086 static inline int lease_modify(struct file_lock **before, int arg)
1087 { 1087 {
1088 return -EINVAL; 1088 return -EINVAL;
1089 } 1089 }
1090 1090
1091 static inline int lock_may_read(struct inode *inode, loff_t start, 1091 static inline int lock_may_read(struct inode *inode, loff_t start,
1092 unsigned long len) 1092 unsigned long len)
1093 { 1093 {
1094 return 1; 1094 return 1;
1095 } 1095 }
1096 1096
1097 static inline int lock_may_write(struct inode *inode, loff_t start, 1097 static inline int lock_may_write(struct inode *inode, loff_t start,
1098 unsigned long len) 1098 unsigned long len)
1099 { 1099 {
1100 return 1; 1100 return 1;
1101 } 1101 }
1102 #endif /* !CONFIG_FILE_LOCKING */ 1102 #endif /* !CONFIG_FILE_LOCKING */
1103 1103
1104 1104
1105 struct fasync_struct { 1105 struct fasync_struct {
1106 spinlock_t fa_lock; 1106 spinlock_t fa_lock;
1107 int magic; 1107 int magic;
1108 int fa_fd; 1108 int fa_fd;
1109 struct fasync_struct *fa_next; /* singly linked list */ 1109 struct fasync_struct *fa_next; /* singly linked list */
1110 struct file *fa_file; 1110 struct file *fa_file;
1111 struct rcu_head fa_rcu; 1111 struct rcu_head fa_rcu;
1112 }; 1112 };
1113 1113
1114 #define FASYNC_MAGIC 0x4601 1114 #define FASYNC_MAGIC 0x4601
1115 1115
1116 /* SMP safe fasync helpers: */ 1116 /* SMP safe fasync helpers: */
1117 extern int fasync_helper(int, struct file *, int, struct fasync_struct **); 1117 extern int fasync_helper(int, struct file *, int, struct fasync_struct **);
1118 extern struct fasync_struct *fasync_insert_entry(int, struct file *, struct fasync_struct **, struct fasync_struct *); 1118 extern struct fasync_struct *fasync_insert_entry(int, struct file *, struct fasync_struct **, struct fasync_struct *);
1119 extern int fasync_remove_entry(struct file *, struct fasync_struct **); 1119 extern int fasync_remove_entry(struct file *, struct fasync_struct **);
1120 extern struct fasync_struct *fasync_alloc(void); 1120 extern struct fasync_struct *fasync_alloc(void);
1121 extern void fasync_free(struct fasync_struct *); 1121 extern void fasync_free(struct fasync_struct *);
1122 1122
1123 /* can be called from interrupts */ 1123 /* can be called from interrupts */
1124 extern void kill_fasync(struct fasync_struct **, int, int); 1124 extern void kill_fasync(struct fasync_struct **, int, int);
1125 1125
1126 extern int __f_setown(struct file *filp, struct pid *, enum pid_type, int force); 1126 extern int __f_setown(struct file *filp, struct pid *, enum pid_type, int force);
1127 extern int f_setown(struct file *filp, unsigned long arg, int force); 1127 extern int f_setown(struct file *filp, unsigned long arg, int force);
1128 extern void f_delown(struct file *filp); 1128 extern void f_delown(struct file *filp);
1129 extern pid_t f_getown(struct file *filp); 1129 extern pid_t f_getown(struct file *filp);
1130 extern int send_sigurg(struct fown_struct *fown); 1130 extern int send_sigurg(struct fown_struct *fown);
1131 1131
1132 struct mm_struct; 1132 struct mm_struct;
1133 1133
1134 /* 1134 /*
1135 * Umount options 1135 * Umount options
1136 */ 1136 */
1137 1137
1138 #define MNT_FORCE 0x00000001 /* Attempt to forcibily umount */ 1138 #define MNT_FORCE 0x00000001 /* Attempt to forcibily umount */
1139 #define MNT_DETACH 0x00000002 /* Just detach from the tree */ 1139 #define MNT_DETACH 0x00000002 /* Just detach from the tree */
1140 #define MNT_EXPIRE 0x00000004 /* Mark for expiry */ 1140 #define MNT_EXPIRE 0x00000004 /* Mark for expiry */
1141 #define UMOUNT_NOFOLLOW 0x00000008 /* Don't follow symlink on umount */ 1141 #define UMOUNT_NOFOLLOW 0x00000008 /* Don't follow symlink on umount */
1142 #define UMOUNT_UNUSED 0x80000000 /* Flag guaranteed to be unused */ 1142 #define UMOUNT_UNUSED 0x80000000 /* Flag guaranteed to be unused */
1143 1143
1144 extern struct list_head super_blocks; 1144 extern struct list_head super_blocks;
1145 extern spinlock_t sb_lock; 1145 extern spinlock_t sb_lock;
1146 1146
1147 /* Possible states of 'frozen' field */ 1147 /* Possible states of 'frozen' field */
1148 enum { 1148 enum {
1149 SB_UNFROZEN = 0, /* FS is unfrozen */ 1149 SB_UNFROZEN = 0, /* FS is unfrozen */
1150 SB_FREEZE_WRITE = 1, /* Writes, dir ops, ioctls frozen */ 1150 SB_FREEZE_WRITE = 1, /* Writes, dir ops, ioctls frozen */
1151 SB_FREEZE_PAGEFAULT = 2, /* Page faults stopped as well */ 1151 SB_FREEZE_PAGEFAULT = 2, /* Page faults stopped as well */
1152 SB_FREEZE_FS = 3, /* For internal FS use (e.g. to stop 1152 SB_FREEZE_FS = 3, /* For internal FS use (e.g. to stop
1153 * internal threads if needed) */ 1153 * internal threads if needed) */
1154 SB_FREEZE_COMPLETE = 4, /* ->freeze_fs finished successfully */ 1154 SB_FREEZE_COMPLETE = 4, /* ->freeze_fs finished successfully */
1155 }; 1155 };
1156 1156
1157 #define SB_FREEZE_LEVELS (SB_FREEZE_COMPLETE - 1) 1157 #define SB_FREEZE_LEVELS (SB_FREEZE_COMPLETE - 1)
1158 1158
1159 struct sb_writers { 1159 struct sb_writers {
1160 /* Counters for counting writers at each level */ 1160 /* Counters for counting writers at each level */
1161 struct percpu_counter counter[SB_FREEZE_LEVELS]; 1161 struct percpu_counter counter[SB_FREEZE_LEVELS];
1162 wait_queue_head_t wait; /* queue for waiting for 1162 wait_queue_head_t wait; /* queue for waiting for
1163 writers / faults to finish */ 1163 writers / faults to finish */
1164 int frozen; /* Is sb frozen? */ 1164 int frozen; /* Is sb frozen? */
1165 wait_queue_head_t wait_unfrozen; /* queue for waiting for 1165 wait_queue_head_t wait_unfrozen; /* queue for waiting for
1166 sb to be thawed */ 1166 sb to be thawed */
1167 #ifdef CONFIG_DEBUG_LOCK_ALLOC 1167 #ifdef CONFIG_DEBUG_LOCK_ALLOC
1168 struct lockdep_map lock_map[SB_FREEZE_LEVELS]; 1168 struct lockdep_map lock_map[SB_FREEZE_LEVELS];
1169 #endif 1169 #endif
1170 }; 1170 };
1171 1171
1172 struct super_block { 1172 struct super_block {
1173 struct list_head s_list; /* Keep this first */ 1173 struct list_head s_list; /* Keep this first */
1174 dev_t s_dev; /* search index; _not_ kdev_t */ 1174 dev_t s_dev; /* search index; _not_ kdev_t */
1175 unsigned char s_blocksize_bits; 1175 unsigned char s_blocksize_bits;
1176 unsigned long s_blocksize; 1176 unsigned long s_blocksize;
1177 loff_t s_maxbytes; /* Max file size */ 1177 loff_t s_maxbytes; /* Max file size */
1178 struct file_system_type *s_type; 1178 struct file_system_type *s_type;
1179 const struct super_operations *s_op; 1179 const struct super_operations *s_op;
1180 const struct dquot_operations *dq_op; 1180 const struct dquot_operations *dq_op;
1181 const struct quotactl_ops *s_qcop; 1181 const struct quotactl_ops *s_qcop;
1182 const struct export_operations *s_export_op; 1182 const struct export_operations *s_export_op;
1183 unsigned long s_flags; 1183 unsigned long s_flags;
1184 unsigned long s_magic; 1184 unsigned long s_magic;
1185 struct dentry *s_root; 1185 struct dentry *s_root;
1186 struct rw_semaphore s_umount; 1186 struct rw_semaphore s_umount;
1187 int s_count; 1187 int s_count;
1188 atomic_t s_active; 1188 atomic_t s_active;
1189 #ifdef CONFIG_SECURITY 1189 #ifdef CONFIG_SECURITY
1190 void *s_security; 1190 void *s_security;
1191 #endif 1191 #endif
1192 const struct xattr_handler **s_xattr; 1192 const struct xattr_handler **s_xattr;
1193 1193
1194 struct list_head s_inodes; /* all inodes */ 1194 struct list_head s_inodes; /* all inodes */
1195 struct hlist_bl_head s_anon; /* anonymous dentries for (nfs) exporting */ 1195 struct hlist_bl_head s_anon; /* anonymous dentries for (nfs) exporting */
1196 struct list_head s_mounts; /* list of mounts; _not_ for fs use */ 1196 struct list_head s_mounts; /* list of mounts; _not_ for fs use */
1197 struct block_device *s_bdev; 1197 struct block_device *s_bdev;
1198 struct backing_dev_info *s_bdi; 1198 struct backing_dev_info *s_bdi;
1199 struct mtd_info *s_mtd; 1199 struct mtd_info *s_mtd;
1200 struct hlist_node s_instances; 1200 struct hlist_node s_instances;
1201 struct quota_info s_dquot; /* Diskquota specific options */ 1201 struct quota_info s_dquot; /* Diskquota specific options */
1202 1202
1203 struct sb_writers s_writers; 1203 struct sb_writers s_writers;
1204 1204
1205 char s_id[32]; /* Informational name */ 1205 char s_id[32]; /* Informational name */
1206 u8 s_uuid[16]; /* UUID */ 1206 u8 s_uuid[16]; /* UUID */
1207 1207
1208 void *s_fs_info; /* Filesystem private info */ 1208 void *s_fs_info; /* Filesystem private info */
1209 unsigned int s_max_links; 1209 unsigned int s_max_links;
1210 fmode_t s_mode; 1210 fmode_t s_mode;
1211 1211
1212 /* Granularity of c/m/atime in ns. 1212 /* Granularity of c/m/atime in ns.
1213 Cannot be worse than a second */ 1213 Cannot be worse than a second */
1214 u32 s_time_gran; 1214 u32 s_time_gran;
1215 1215
1216 /* 1216 /*
1217 * The next field is for VFS *only*. No filesystems have any business 1217 * The next field is for VFS *only*. No filesystems have any business
1218 * even looking at it. You had been warned. 1218 * even looking at it. You had been warned.
1219 */ 1219 */
1220 struct mutex s_vfs_rename_mutex; /* Kludge */ 1220 struct mutex s_vfs_rename_mutex; /* Kludge */
1221 1221
1222 /* 1222 /*
1223 * Filesystem subtype. If non-empty the filesystem type field 1223 * Filesystem subtype. If non-empty the filesystem type field
1224 * in /proc/mounts will be "type.subtype" 1224 * in /proc/mounts will be "type.subtype"
1225 */ 1225 */
1226 char *s_subtype; 1226 char *s_subtype;
1227 1227
1228 /* 1228 /*
1229 * Saved mount options for lazy filesystems using 1229 * Saved mount options for lazy filesystems using
1230 * generic_show_options() 1230 * generic_show_options()
1231 */ 1231 */
1232 char __rcu *s_options; 1232 char __rcu *s_options;
1233 const struct dentry_operations *s_d_op; /* default d_op for dentries */ 1233 const struct dentry_operations *s_d_op; /* default d_op for dentries */
1234 1234
1235 /* 1235 /*
1236 * Saved pool identifier for cleancache (-1 means none) 1236 * Saved pool identifier for cleancache (-1 means none)
1237 */ 1237 */
1238 int cleancache_poolid; 1238 int cleancache_poolid;
1239 1239
1240 struct shrinker s_shrink; /* per-sb shrinker handle */ 1240 struct shrinker s_shrink; /* per-sb shrinker handle */
1241 1241
1242 /* Number of inodes with nlink == 0 but still referenced */ 1242 /* Number of inodes with nlink == 0 but still referenced */
1243 atomic_long_t s_remove_count; 1243 atomic_long_t s_remove_count;
1244 1244
1245 /* Being remounted read-only */ 1245 /* Being remounted read-only */
1246 int s_readonly_remount; 1246 int s_readonly_remount;
1247 1247
1248 /* AIO completions deferred from interrupt context */ 1248 /* AIO completions deferred from interrupt context */
1249 struct workqueue_struct *s_dio_done_wq; 1249 struct workqueue_struct *s_dio_done_wq;
1250 1250
1251 /* 1251 /*
1252 * Keep the lru lists last in the structure so they always sit on their 1252 * Keep the lru lists last in the structure so they always sit on their
1253 * own individual cachelines. 1253 * own individual cachelines.
1254 */ 1254 */
1255 struct list_lru s_dentry_lru ____cacheline_aligned_in_smp; 1255 struct list_lru s_dentry_lru ____cacheline_aligned_in_smp;
1256 struct list_lru s_inode_lru ____cacheline_aligned_in_smp; 1256 struct list_lru s_inode_lru ____cacheline_aligned_in_smp;
1257 struct rcu_head rcu; 1257 struct rcu_head rcu;
1258 }; 1258 };
1259 1259
1260 extern struct timespec current_fs_time(struct super_block *sb); 1260 extern struct timespec current_fs_time(struct super_block *sb);
1261 1261
1262 /* 1262 /*
1263 * Snapshotting support. 1263 * Snapshotting support.
1264 */ 1264 */
1265 1265
1266 void __sb_end_write(struct super_block *sb, int level); 1266 void __sb_end_write(struct super_block *sb, int level);
1267 int __sb_start_write(struct super_block *sb, int level, bool wait); 1267 int __sb_start_write(struct super_block *sb, int level, bool wait);
1268 1268
1269 /** 1269 /**
1270 * sb_end_write - drop write access to a superblock 1270 * sb_end_write - drop write access to a superblock
1271 * @sb: the super we wrote to 1271 * @sb: the super we wrote to
1272 * 1272 *
1273 * Decrement number of writers to the filesystem. Wake up possible waiters 1273 * Decrement number of writers to the filesystem. Wake up possible waiters
1274 * wanting to freeze the filesystem. 1274 * wanting to freeze the filesystem.
1275 */ 1275 */
1276 static inline void sb_end_write(struct super_block *sb) 1276 static inline void sb_end_write(struct super_block *sb)
1277 { 1277 {
1278 __sb_end_write(sb, SB_FREEZE_WRITE); 1278 __sb_end_write(sb, SB_FREEZE_WRITE);
1279 } 1279 }
1280 1280
1281 /** 1281 /**
1282 * sb_end_pagefault - drop write access to a superblock from a page fault 1282 * sb_end_pagefault - drop write access to a superblock from a page fault
1283 * @sb: the super we wrote to 1283 * @sb: the super we wrote to
1284 * 1284 *
1285 * Decrement number of processes handling write page fault to the filesystem. 1285 * Decrement number of processes handling write page fault to the filesystem.
1286 * Wake up possible waiters wanting to freeze the filesystem. 1286 * Wake up possible waiters wanting to freeze the filesystem.
1287 */ 1287 */
1288 static inline void sb_end_pagefault(struct super_block *sb) 1288 static inline void sb_end_pagefault(struct super_block *sb)
1289 { 1289 {
1290 __sb_end_write(sb, SB_FREEZE_PAGEFAULT); 1290 __sb_end_write(sb, SB_FREEZE_PAGEFAULT);
1291 } 1291 }
1292 1292
1293 /** 1293 /**
1294 * sb_end_intwrite - drop write access to a superblock for internal fs purposes 1294 * sb_end_intwrite - drop write access to a superblock for internal fs purposes
1295 * @sb: the super we wrote to 1295 * @sb: the super we wrote to
1296 * 1296 *
1297 * Decrement fs-internal number of writers to the filesystem. Wake up possible 1297 * Decrement fs-internal number of writers to the filesystem. Wake up possible
1298 * waiters wanting to freeze the filesystem. 1298 * waiters wanting to freeze the filesystem.
1299 */ 1299 */
1300 static inline void sb_end_intwrite(struct super_block *sb) 1300 static inline void sb_end_intwrite(struct super_block *sb)
1301 { 1301 {
1302 __sb_end_write(sb, SB_FREEZE_FS); 1302 __sb_end_write(sb, SB_FREEZE_FS);
1303 } 1303 }
1304 1304
1305 /** 1305 /**
1306 * sb_start_write - get write access to a superblock 1306 * sb_start_write - get write access to a superblock
1307 * @sb: the super we write to 1307 * @sb: the super we write to
1308 * 1308 *
1309 * When a process wants to write data or metadata to a file system (i.e. dirty 1309 * When a process wants to write data or metadata to a file system (i.e. dirty
1310 * a page or an inode), it should embed the operation in a sb_start_write() - 1310 * a page or an inode), it should embed the operation in a sb_start_write() -
1311 * sb_end_write() pair to get exclusion against file system freezing. This 1311 * sb_end_write() pair to get exclusion against file system freezing. This
1312 * function increments number of writers preventing freezing. If the file 1312 * function increments number of writers preventing freezing. If the file
1313 * system is already frozen, the function waits until the file system is 1313 * system is already frozen, the function waits until the file system is
1314 * thawed. 1314 * thawed.
1315 * 1315 *
1316 * Since freeze protection behaves as a lock, users have to preserve 1316 * Since freeze protection behaves as a lock, users have to preserve
1317 * ordering of freeze protection and other filesystem locks. Generally, 1317 * ordering of freeze protection and other filesystem locks. Generally,
1318 * freeze protection should be the outermost lock. In particular, we have: 1318 * freeze protection should be the outermost lock. In particular, we have:
1319 * 1319 *
1320 * sb_start_write 1320 * sb_start_write
1321 * -> i_mutex (write path, truncate, directory ops, ...) 1321 * -> i_mutex (write path, truncate, directory ops, ...)
1322 * -> s_umount (freeze_super, thaw_super) 1322 * -> s_umount (freeze_super, thaw_super)
1323 */ 1323 */
1324 static inline void sb_start_write(struct super_block *sb) 1324 static inline void sb_start_write(struct super_block *sb)
1325 { 1325 {
1326 __sb_start_write(sb, SB_FREEZE_WRITE, true); 1326 __sb_start_write(sb, SB_FREEZE_WRITE, true);
1327 } 1327 }
1328 1328
1329 static inline int sb_start_write_trylock(struct super_block *sb) 1329 static inline int sb_start_write_trylock(struct super_block *sb)
1330 { 1330 {
1331 return __sb_start_write(sb, SB_FREEZE_WRITE, false); 1331 return __sb_start_write(sb, SB_FREEZE_WRITE, false);
1332 } 1332 }
1333 1333
1334 /** 1334 /**
1335 * sb_start_pagefault - get write access to a superblock from a page fault 1335 * sb_start_pagefault - get write access to a superblock from a page fault
1336 * @sb: the super we write to 1336 * @sb: the super we write to
1337 * 1337 *
1338 * When a process starts handling write page fault, it should embed the 1338 * When a process starts handling write page fault, it should embed the
1339 * operation into sb_start_pagefault() - sb_end_pagefault() pair to get 1339 * operation into sb_start_pagefault() - sb_end_pagefault() pair to get
1340 * exclusion against file system freezing. This is needed since the page fault 1340 * exclusion against file system freezing. This is needed since the page fault
1341 * is going to dirty a page. This function increments number of running page 1341 * is going to dirty a page. This function increments number of running page
1342 * faults preventing freezing. If the file system is already frozen, the 1342 * faults preventing freezing. If the file system is already frozen, the
1343 * function waits until the file system is thawed. 1343 * function waits until the file system is thawed.
1344 * 1344 *
1345 * Since page fault freeze protection behaves as a lock, users have to preserve 1345 * Since page fault freeze protection behaves as a lock, users have to preserve
1346 * ordering of freeze protection and other filesystem locks. It is advised to 1346 * ordering of freeze protection and other filesystem locks. It is advised to
1347 * put sb_start_pagefault() close to mmap_sem in lock ordering. Page fault 1347 * put sb_start_pagefault() close to mmap_sem in lock ordering. Page fault
1348 * handling code implies lock dependency: 1348 * handling code implies lock dependency:
1349 * 1349 *
1350 * mmap_sem 1350 * mmap_sem
1351 * -> sb_start_pagefault 1351 * -> sb_start_pagefault
1352 */ 1352 */
1353 static inline void sb_start_pagefault(struct super_block *sb) 1353 static inline void sb_start_pagefault(struct super_block *sb)
1354 { 1354 {
1355 __sb_start_write(sb, SB_FREEZE_PAGEFAULT, true); 1355 __sb_start_write(sb, SB_FREEZE_PAGEFAULT, true);
1356 } 1356 }
1357 1357
1358 /* 1358 /*
1359 * sb_start_intwrite - get write access to a superblock for internal fs purposes 1359 * sb_start_intwrite - get write access to a superblock for internal fs purposes
1360 * @sb: the super we write to 1360 * @sb: the super we write to
1361 * 1361 *
1362 * This is the third level of protection against filesystem freezing. It is 1362 * This is the third level of protection against filesystem freezing. It is
1363 * free for use by a filesystem. The only requirement is that it must rank 1363 * free for use by a filesystem. The only requirement is that it must rank
1364 * below sb_start_pagefault. 1364 * below sb_start_pagefault.
1365 * 1365 *
1366 * For example filesystem can call sb_start_intwrite() when starting a 1366 * For example filesystem can call sb_start_intwrite() when starting a
1367 * transaction which somewhat eases handling of freezing for internal sources 1367 * transaction which somewhat eases handling of freezing for internal sources
1368 * of filesystem changes (internal fs threads, discarding preallocation on file 1368 * of filesystem changes (internal fs threads, discarding preallocation on file
1369 * close, etc.). 1369 * close, etc.).
1370 */ 1370 */
1371 static inline void sb_start_intwrite(struct super_block *sb) 1371 static inline void sb_start_intwrite(struct super_block *sb)
1372 { 1372 {
1373 __sb_start_write(sb, SB_FREEZE_FS, true); 1373 __sb_start_write(sb, SB_FREEZE_FS, true);
1374 } 1374 }
1375 1375
1376 1376
1377 extern bool inode_owner_or_capable(const struct inode *inode); 1377 extern bool inode_owner_or_capable(const struct inode *inode);
1378 1378
1379 /* 1379 /*
1380 * VFS helper functions.. 1380 * VFS helper functions..
1381 */ 1381 */
1382 extern int vfs_create(struct inode *, struct dentry *, umode_t, bool); 1382 extern int vfs_create(struct inode *, struct dentry *, umode_t, bool);
1383 extern int vfs_mkdir(struct inode *, struct dentry *, umode_t); 1383 extern int vfs_mkdir(struct inode *, struct dentry *, umode_t);
1384 extern int vfs_mknod(struct inode *, struct dentry *, umode_t, dev_t); 1384 extern int vfs_mknod(struct inode *, struct dentry *, umode_t, dev_t);
1385 extern int vfs_symlink(struct inode *, struct dentry *, const char *); 1385 extern int vfs_symlink(struct inode *, struct dentry *, const char *);
1386 extern int vfs_link(struct dentry *, struct inode *, struct dentry *, struct inode **); 1386 extern int vfs_link(struct dentry *, struct inode *, struct dentry *, struct inode **);
1387 extern int vfs_rmdir(struct inode *, struct dentry *); 1387 extern int vfs_rmdir(struct inode *, struct dentry *);
1388 extern int vfs_unlink(struct inode *, struct dentry *, struct inode **); 1388 extern int vfs_unlink(struct inode *, struct dentry *, struct inode **);
1389 extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *, struct inode **, unsigned int); 1389 extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *, struct inode **, unsigned int);
1390 1390
1391 /* 1391 /*
1392 * VFS dentry helper functions. 1392 * VFS dentry helper functions.
1393 */ 1393 */
1394 extern void dentry_unhash(struct dentry *dentry); 1394 extern void dentry_unhash(struct dentry *dentry);
1395 1395
1396 /* 1396 /*
1397 * VFS file helper functions. 1397 * VFS file helper functions.
1398 */ 1398 */
1399 extern void inode_init_owner(struct inode *inode, const struct inode *dir, 1399 extern void inode_init_owner(struct inode *inode, const struct inode *dir,
1400 umode_t mode); 1400 umode_t mode);
1401 /* 1401 /*
1402 * VFS FS_IOC_FIEMAP helper definitions. 1402 * VFS FS_IOC_FIEMAP helper definitions.
1403 */ 1403 */
1404 struct fiemap_extent_info { 1404 struct fiemap_extent_info {
1405 unsigned int fi_flags; /* Flags as passed from user */ 1405 unsigned int fi_flags; /* Flags as passed from user */
1406 unsigned int fi_extents_mapped; /* Number of mapped extents */ 1406 unsigned int fi_extents_mapped; /* Number of mapped extents */
1407 unsigned int fi_extents_max; /* Size of fiemap_extent array */ 1407 unsigned int fi_extents_max; /* Size of fiemap_extent array */
1408 struct fiemap_extent __user *fi_extents_start; /* Start of 1408 struct fiemap_extent __user *fi_extents_start; /* Start of
1409 fiemap_extent array */ 1409 fiemap_extent array */
1410 }; 1410 };
1411 int fiemap_fill_next_extent(struct fiemap_extent_info *info, u64 logical, 1411 int fiemap_fill_next_extent(struct fiemap_extent_info *info, u64 logical,
1412 u64 phys, u64 len, u32 flags); 1412 u64 phys, u64 len, u32 flags);
1413 int fiemap_check_flags(struct fiemap_extent_info *fieinfo, u32 fs_flags); 1413 int fiemap_check_flags(struct fiemap_extent_info *fieinfo, u32 fs_flags);
1414 1414
1415 /* 1415 /*
1416 * File types 1416 * File types
1417 * 1417 *
1418 * NOTE! These match bits 12..15 of stat.st_mode 1418 * NOTE! These match bits 12..15 of stat.st_mode
1419 * (ie "(i_mode >> 12) & 15"). 1419 * (ie "(i_mode >> 12) & 15").
1420 */ 1420 */
1421 #define DT_UNKNOWN 0 1421 #define DT_UNKNOWN 0
1422 #define DT_FIFO 1 1422 #define DT_FIFO 1
1423 #define DT_CHR 2 1423 #define DT_CHR 2
1424 #define DT_DIR 4 1424 #define DT_DIR 4
1425 #define DT_BLK 6 1425 #define DT_BLK 6
1426 #define DT_REG 8 1426 #define DT_REG 8
1427 #define DT_LNK 10 1427 #define DT_LNK 10
1428 #define DT_SOCK 12 1428 #define DT_SOCK 12
1429 #define DT_WHT 14 1429 #define DT_WHT 14
1430 1430
1431 /* 1431 /*
1432 * This is the "filldir" function type, used by readdir() to let 1432 * This is the "filldir" function type, used by readdir() to let
1433 * the kernel specify what kind of dirent layout it wants to have. 1433 * the kernel specify what kind of dirent layout it wants to have.
1434 * This allows the kernel to read directories into kernel space or 1434 * This allows the kernel to read directories into kernel space or
1435 * to have different dirent layouts depending on the binary type. 1435 * to have different dirent layouts depending on the binary type.
1436 */ 1436 */
1437 typedef int (*filldir_t)(void *, const char *, int, loff_t, u64, unsigned); 1437 typedef int (*filldir_t)(void *, const char *, int, loff_t, u64, unsigned);
1438 struct dir_context { 1438 struct dir_context {
1439 const filldir_t actor; 1439 const filldir_t actor;
1440 loff_t pos; 1440 loff_t pos;
1441 }; 1441 };
1442 1442
1443 struct block_device_operations; 1443 struct block_device_operations;
1444 1444
1445 /* These macros are for out of kernel modules to test that 1445 /* These macros are for out of kernel modules to test that
1446 * the kernel supports the unlocked_ioctl and compat_ioctl 1446 * the kernel supports the unlocked_ioctl and compat_ioctl
1447 * fields in struct file_operations. */ 1447 * fields in struct file_operations. */
1448 #define HAVE_COMPAT_IOCTL 1 1448 #define HAVE_COMPAT_IOCTL 1
1449 #define HAVE_UNLOCKED_IOCTL 1 1449 #define HAVE_UNLOCKED_IOCTL 1
1450 1450
1451 struct file_operations { 1451 struct file_operations {
1452 struct module *owner; 1452 struct module *owner;
1453 loff_t (*llseek) (struct file *, loff_t, int); 1453 loff_t (*llseek) (struct file *, loff_t, int);
1454 ssize_t (*read) (struct file *, char __user *, size_t, loff_t *); 1454 ssize_t (*read) (struct file *, char __user *, size_t, loff_t *);
1455 ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *); 1455 ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
1456 ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t); 1456 ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
1457 ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t); 1457 ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
1458 int (*iterate) (struct file *, struct dir_context *); 1458 int (*iterate) (struct file *, struct dir_context *);
1459 unsigned int (*poll) (struct file *, struct poll_table_struct *); 1459 unsigned int (*poll) (struct file *, struct poll_table_struct *);
1460 long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long); 1460 long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
1461 long (*compat_ioctl) (struct file *, unsigned int, unsigned long); 1461 long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
1462 int (*mmap) (struct file *, struct vm_area_struct *); 1462 int (*mmap) (struct file *, struct vm_area_struct *);
1463 int (*open) (struct inode *, struct file *); 1463 int (*open) (struct inode *, struct file *);
1464 int (*flush) (struct file *, fl_owner_t id); 1464 int (*flush) (struct file *, fl_owner_t id);
1465 int (*release) (struct inode *, struct file *); 1465 int (*release) (struct inode *, struct file *);
1466 int (*fsync) (struct file *, loff_t, loff_t, int datasync); 1466 int (*fsync) (struct file *, loff_t, loff_t, int datasync);
1467 int (*aio_fsync) (struct kiocb *, int datasync); 1467 int (*aio_fsync) (struct kiocb *, int datasync);
1468 int (*fasync) (int, struct file *, int); 1468 int (*fasync) (int, struct file *, int);
1469 int (*lock) (struct file *, int, struct file_lock *); 1469 int (*lock) (struct file *, int, struct file_lock *);
1470 ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int); 1470 ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int);
1471 unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long); 1471 unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
1472 int (*check_flags)(int); 1472 int (*check_flags)(int);
1473 int (*flock) (struct file *, int, struct file_lock *); 1473 int (*flock) (struct file *, int, struct file_lock *);
1474 ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int); 1474 ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int);
1475 ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int); 1475 ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int);
1476 int (*setlease)(struct file *, long, struct file_lock **); 1476 int (*setlease)(struct file *, long, struct file_lock **);
1477 long (*fallocate)(struct file *file, int mode, loff_t offset, 1477 long (*fallocate)(struct file *file, int mode, loff_t offset,
1478 loff_t len); 1478 loff_t len);
1479 int (*show_fdinfo)(struct seq_file *m, struct file *f); 1479 int (*show_fdinfo)(struct seq_file *m, struct file *f);
1480 }; 1480 };
1481 1481
1482 struct inode_operations { 1482 struct inode_operations {
1483 struct dentry * (*lookup) (struct inode *,struct dentry *, unsigned int); 1483 struct dentry * (*lookup) (struct inode *,struct dentry *, unsigned int);
1484 void * (*follow_link) (struct dentry *, struct nameidata *); 1484 void * (*follow_link) (struct dentry *, struct nameidata *);
1485 int (*permission) (struct inode *, int); 1485 int (*permission) (struct inode *, int);
1486 struct posix_acl * (*get_acl)(struct inode *, int); 1486 struct posix_acl * (*get_acl)(struct inode *, int);
1487 1487
1488 int (*readlink) (struct dentry *, char __user *,int); 1488 int (*readlink) (struct dentry *, char __user *,int);
1489 void (*put_link) (struct dentry *, struct nameidata *, void *); 1489 void (*put_link) (struct dentry *, struct nameidata *, void *);
1490 1490
1491 int (*create) (struct inode *,struct dentry *, umode_t, bool); 1491 int (*create) (struct inode *,struct dentry *, umode_t, bool);
1492 int (*link) (struct dentry *,struct inode *,struct dentry *); 1492 int (*link) (struct dentry *,struct inode *,struct dentry *);
1493 int (*unlink) (struct inode *,struct dentry *); 1493 int (*unlink) (struct inode *,struct dentry *);
1494 int (*symlink) (struct inode *,struct dentry *,const char *); 1494 int (*symlink) (struct inode *,struct dentry *,const char *);
1495 int (*mkdir) (struct inode *,struct dentry *,umode_t); 1495 int (*mkdir) (struct inode *,struct dentry *,umode_t);
1496 int (*rmdir) (struct inode *,struct dentry *); 1496 int (*rmdir) (struct inode *,struct dentry *);
1497 int (*mknod) (struct inode *,struct dentry *,umode_t,dev_t); 1497 int (*mknod) (struct inode *,struct dentry *,umode_t,dev_t);
1498 int (*rename) (struct inode *, struct dentry *, 1498 int (*rename) (struct inode *, struct dentry *,
1499 struct inode *, struct dentry *); 1499 struct inode *, struct dentry *);
1500 int (*rename2) (struct inode *, struct dentry *, 1500 int (*rename2) (struct inode *, struct dentry *,
1501 struct inode *, struct dentry *, unsigned int); 1501 struct inode *, struct dentry *, unsigned int);
1502 int (*setattr) (struct dentry *, struct iattr *); 1502 int (*setattr) (struct dentry *, struct iattr *);
1503 int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *); 1503 int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *);
1504 int (*setxattr) (struct dentry *, const char *,const void *,size_t,int); 1504 int (*setxattr) (struct dentry *, const char *,const void *,size_t,int);
1505 ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t); 1505 ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t);
1506 ssize_t (*listxattr) (struct dentry *, char *, size_t); 1506 ssize_t (*listxattr) (struct dentry *, char *, size_t);
1507 int (*removexattr) (struct dentry *, const char *); 1507 int (*removexattr) (struct dentry *, const char *);
1508 int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, 1508 int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start,
1509 u64 len); 1509 u64 len);
1510 int (*update_time)(struct inode *, struct timespec *, int); 1510 int (*update_time)(struct inode *, struct timespec *, int);
1511 int (*atomic_open)(struct inode *, struct dentry *, 1511 int (*atomic_open)(struct inode *, struct dentry *,
1512 struct file *, unsigned open_flag, 1512 struct file *, unsigned open_flag,
1513 umode_t create_mode, int *opened); 1513 umode_t create_mode, int *opened);
1514 int (*tmpfile) (struct inode *, struct dentry *, umode_t); 1514 int (*tmpfile) (struct inode *, struct dentry *, umode_t);
1515 int (*set_acl)(struct inode *, struct posix_acl *, int); 1515 int (*set_acl)(struct inode *, struct posix_acl *, int);
1516 } ____cacheline_aligned; 1516 } ____cacheline_aligned;
1517 1517
1518 ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector, 1518 ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
1519 unsigned long nr_segs, unsigned long fast_segs, 1519 unsigned long nr_segs, unsigned long fast_segs,
1520 struct iovec *fast_pointer, 1520 struct iovec *fast_pointer,
1521 struct iovec **ret_pointer); 1521 struct iovec **ret_pointer);
1522 1522
1523 extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *); 1523 extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *);
1524 extern ssize_t vfs_write(struct file *, const char __user *, size_t, loff_t *); 1524 extern ssize_t vfs_write(struct file *, const char __user *, size_t, loff_t *);
1525 extern ssize_t vfs_readv(struct file *, const struct iovec __user *, 1525 extern ssize_t vfs_readv(struct file *, const struct iovec __user *,
1526 unsigned long, loff_t *); 1526 unsigned long, loff_t *);
1527 extern ssize_t vfs_writev(struct file *, const struct iovec __user *, 1527 extern ssize_t vfs_writev(struct file *, const struct iovec __user *,
1528 unsigned long, loff_t *); 1528 unsigned long, loff_t *);
1529 1529
1530 struct super_operations { 1530 struct super_operations {
1531 struct inode *(*alloc_inode)(struct super_block *sb); 1531 struct inode *(*alloc_inode)(struct super_block *sb);
1532 void (*destroy_inode)(struct inode *); 1532 void (*destroy_inode)(struct inode *);
1533 1533
1534 void (*dirty_inode) (struct inode *, int flags); 1534 void (*dirty_inode) (struct inode *, int flags);
1535 int (*write_inode) (struct inode *, struct writeback_control *wbc); 1535 int (*write_inode) (struct inode *, struct writeback_control *wbc);
1536 int (*drop_inode) (struct inode *); 1536 int (*drop_inode) (struct inode *);
1537 void (*evict_inode) (struct inode *); 1537 void (*evict_inode) (struct inode *);
1538 void (*put_super) (struct super_block *); 1538 void (*put_super) (struct super_block *);
1539 int (*sync_fs)(struct super_block *sb, int wait); 1539 int (*sync_fs)(struct super_block *sb, int wait);
1540 int (*freeze_fs) (struct super_block *); 1540 int (*freeze_fs) (struct super_block *);
1541 int (*unfreeze_fs) (struct super_block *); 1541 int (*unfreeze_fs) (struct super_block *);
1542 int (*statfs) (struct dentry *, struct kstatfs *); 1542 int (*statfs) (struct dentry *, struct kstatfs *);
1543 int (*remount_fs) (struct super_block *, int *, char *); 1543 int (*remount_fs) (struct super_block *, int *, char *);
1544 void (*umount_begin) (struct super_block *); 1544 void (*umount_begin) (struct super_block *);
1545 1545
1546 int (*show_options)(struct seq_file *, struct dentry *); 1546 int (*show_options)(struct seq_file *, struct dentry *);
1547 int (*show_devname)(struct seq_file *, struct dentry *); 1547 int (*show_devname)(struct seq_file *, struct dentry *);
1548 int (*show_path)(struct seq_file *, struct dentry *); 1548 int (*show_path)(struct seq_file *, struct dentry *);
1549 int (*show_stats)(struct seq_file *, struct dentry *); 1549 int (*show_stats)(struct seq_file *, struct dentry *);
1550 #ifdef CONFIG_QUOTA 1550 #ifdef CONFIG_QUOTA
1551 ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t); 1551 ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t);
1552 ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t); 1552 ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t);
1553 #endif 1553 #endif
1554 int (*bdev_try_to_free_page)(struct super_block*, struct page*, gfp_t); 1554 int (*bdev_try_to_free_page)(struct super_block*, struct page*, gfp_t);
1555 long (*nr_cached_objects)(struct super_block *, int); 1555 long (*nr_cached_objects)(struct super_block *, int);
1556 long (*free_cached_objects)(struct super_block *, long, int); 1556 long (*free_cached_objects)(struct super_block *, long, int);
1557 }; 1557 };
1558 1558
1559 /* 1559 /*
1560 * Inode flags - they have no relation to superblock flags now 1560 * Inode flags - they have no relation to superblock flags now
1561 */ 1561 */
1562 #define S_SYNC 1 /* Writes are synced at once */ 1562 #define S_SYNC 1 /* Writes are synced at once */
1563 #define S_NOATIME 2 /* Do not update access times */ 1563 #define S_NOATIME 2 /* Do not update access times */
1564 #define S_APPEND 4 /* Append-only file */ 1564 #define S_APPEND 4 /* Append-only file */
1565 #define S_IMMUTABLE 8 /* Immutable file */ 1565 #define S_IMMUTABLE 8 /* Immutable file */
1566 #define S_DEAD 16 /* removed, but still open directory */ 1566 #define S_DEAD 16 /* removed, but still open directory */
1567 #define S_NOQUOTA 32 /* Inode is not counted to quota */ 1567 #define S_NOQUOTA 32 /* Inode is not counted to quota */
1568 #define S_DIRSYNC 64 /* Directory modifications are synchronous */ 1568 #define S_DIRSYNC 64 /* Directory modifications are synchronous */
1569 #define S_NOCMTIME 128 /* Do not update file c/mtime */ 1569 #define S_NOCMTIME 128 /* Do not update file c/mtime */
1570 #define S_SWAPFILE 256 /* Do not truncate: swapon got its bmaps */ 1570 #define S_SWAPFILE 256 /* Do not truncate: swapon got its bmaps */
1571 #define S_PRIVATE 512 /* Inode is fs-internal */ 1571 #define S_PRIVATE 512 /* Inode is fs-internal */
1572 #define S_IMA 1024 /* Inode has an associated IMA struct */ 1572 #define S_IMA 1024 /* Inode has an associated IMA struct */
1573 #define S_AUTOMOUNT 2048 /* Automount/referral quasi-directory */ 1573 #define S_AUTOMOUNT 2048 /* Automount/referral quasi-directory */
1574 #define S_NOSEC 4096 /* no suid or xattr security attributes */ 1574 #define S_NOSEC 4096 /* no suid or xattr security attributes */
1575 1575
1576 /* 1576 /*
1577 * Note that nosuid etc flags are inode-specific: setting some file-system 1577 * Note that nosuid etc flags are inode-specific: setting some file-system
1578 * flags just means all the inodes inherit those flags by default. It might be 1578 * flags just means all the inodes inherit those flags by default. It might be
1579 * possible to override it selectively if you really wanted to with some 1579 * possible to override it selectively if you really wanted to with some
1580 * ioctl() that is not currently implemented. 1580 * ioctl() that is not currently implemented.
1581 * 1581 *
1582 * Exception: MS_RDONLY is always applied to the entire file system. 1582 * Exception: MS_RDONLY is always applied to the entire file system.
1583 * 1583 *
1584 * Unfortunately, it is possible to change a filesystems flags with it mounted 1584 * Unfortunately, it is possible to change a filesystems flags with it mounted
1585 * with files in use. This means that all of the inodes will not have their 1585 * with files in use. This means that all of the inodes will not have their
1586 * i_flags updated. Hence, i_flags no longer inherit the superblock mount 1586 * i_flags updated. Hence, i_flags no longer inherit the superblock mount
1587 * flags, so these have to be checked separately. -- rmk@arm.uk.linux.org 1587 * flags, so these have to be checked separately. -- rmk@arm.uk.linux.org
1588 */ 1588 */
1589 #define __IS_FLG(inode, flg) ((inode)->i_sb->s_flags & (flg)) 1589 #define __IS_FLG(inode, flg) ((inode)->i_sb->s_flags & (flg))
1590 1590
1591 #define IS_RDONLY(inode) ((inode)->i_sb->s_flags & MS_RDONLY) 1591 #define IS_RDONLY(inode) ((inode)->i_sb->s_flags & MS_RDONLY)
1592 #define IS_SYNC(inode) (__IS_FLG(inode, MS_SYNCHRONOUS) || \ 1592 #define IS_SYNC(inode) (__IS_FLG(inode, MS_SYNCHRONOUS) || \
1593 ((inode)->i_flags & S_SYNC)) 1593 ((inode)->i_flags & S_SYNC))
1594 #define IS_DIRSYNC(inode) (__IS_FLG(inode, MS_SYNCHRONOUS|MS_DIRSYNC) || \ 1594 #define IS_DIRSYNC(inode) (__IS_FLG(inode, MS_SYNCHRONOUS|MS_DIRSYNC) || \
1595 ((inode)->i_flags & (S_SYNC|S_DIRSYNC))) 1595 ((inode)->i_flags & (S_SYNC|S_DIRSYNC)))
1596 #define IS_MANDLOCK(inode) __IS_FLG(inode, MS_MANDLOCK) 1596 #define IS_MANDLOCK(inode) __IS_FLG(inode, MS_MANDLOCK)
1597 #define IS_NOATIME(inode) __IS_FLG(inode, MS_RDONLY|MS_NOATIME) 1597 #define IS_NOATIME(inode) __IS_FLG(inode, MS_RDONLY|MS_NOATIME)
1598 #define IS_I_VERSION(inode) __IS_FLG(inode, MS_I_VERSION) 1598 #define IS_I_VERSION(inode) __IS_FLG(inode, MS_I_VERSION)
1599 1599
1600 #define IS_NOQUOTA(inode) ((inode)->i_flags & S_NOQUOTA) 1600 #define IS_NOQUOTA(inode) ((inode)->i_flags & S_NOQUOTA)
1601 #define IS_APPEND(inode) ((inode)->i_flags & S_APPEND) 1601 #define IS_APPEND(inode) ((inode)->i_flags & S_APPEND)
1602 #define IS_IMMUTABLE(inode) ((inode)->i_flags & S_IMMUTABLE) 1602 #define IS_IMMUTABLE(inode) ((inode)->i_flags & S_IMMUTABLE)
1603 #define IS_POSIXACL(inode) __IS_FLG(inode, MS_POSIXACL) 1603 #define IS_POSIXACL(inode) __IS_FLG(inode, MS_POSIXACL)
1604 1604
1605 #define IS_DEADDIR(inode) ((inode)->i_flags & S_DEAD) 1605 #define IS_DEADDIR(inode) ((inode)->i_flags & S_DEAD)
1606 #define IS_NOCMTIME(inode) ((inode)->i_flags & S_NOCMTIME) 1606 #define IS_NOCMTIME(inode) ((inode)->i_flags & S_NOCMTIME)
1607 #define IS_SWAPFILE(inode) ((inode)->i_flags & S_SWAPFILE) 1607 #define IS_SWAPFILE(inode) ((inode)->i_flags & S_SWAPFILE)
1608 #define IS_PRIVATE(inode) ((inode)->i_flags & S_PRIVATE) 1608 #define IS_PRIVATE(inode) ((inode)->i_flags & S_PRIVATE)
1609 #define IS_IMA(inode) ((inode)->i_flags & S_IMA) 1609 #define IS_IMA(inode) ((inode)->i_flags & S_IMA)
1610 #define IS_AUTOMOUNT(inode) ((inode)->i_flags & S_AUTOMOUNT) 1610 #define IS_AUTOMOUNT(inode) ((inode)->i_flags & S_AUTOMOUNT)
1611 #define IS_NOSEC(inode) ((inode)->i_flags & S_NOSEC) 1611 #define IS_NOSEC(inode) ((inode)->i_flags & S_NOSEC)
1612 1612
1613 /* 1613 /*
1614 * Inode state bits. Protected by inode->i_lock 1614 * Inode state bits. Protected by inode->i_lock
1615 * 1615 *
1616 * Three bits determine the dirty state of the inode, I_DIRTY_SYNC, 1616 * Three bits determine the dirty state of the inode, I_DIRTY_SYNC,
1617 * I_DIRTY_DATASYNC and I_DIRTY_PAGES. 1617 * I_DIRTY_DATASYNC and I_DIRTY_PAGES.
1618 * 1618 *
1619 * Four bits define the lifetime of an inode. Initially, inodes are I_NEW, 1619 * Four bits define the lifetime of an inode. Initially, inodes are I_NEW,
1620 * until that flag is cleared. I_WILL_FREE, I_FREEING and I_CLEAR are set at 1620 * until that flag is cleared. I_WILL_FREE, I_FREEING and I_CLEAR are set at
1621 * various stages of removing an inode. 1621 * various stages of removing an inode.
1622 * 1622 *
1623 * Two bits are used for locking and completion notification, I_NEW and I_SYNC. 1623 * Two bits are used for locking and completion notification, I_NEW and I_SYNC.
1624 * 1624 *
1625 * I_DIRTY_SYNC Inode is dirty, but doesn't have to be written on 1625 * I_DIRTY_SYNC Inode is dirty, but doesn't have to be written on
1626 * fdatasync(). i_atime is the usual cause. 1626 * fdatasync(). i_atime is the usual cause.
1627 * I_DIRTY_DATASYNC Data-related inode changes pending. We keep track of 1627 * I_DIRTY_DATASYNC Data-related inode changes pending. We keep track of
1628 * these changes separately from I_DIRTY_SYNC so that we 1628 * these changes separately from I_DIRTY_SYNC so that we
1629 * don't have to write inode on fdatasync() when only 1629 * don't have to write inode on fdatasync() when only
1630 * mtime has changed in it. 1630 * mtime has changed in it.
1631 * I_DIRTY_PAGES Inode has dirty pages. Inode itself may be clean. 1631 * I_DIRTY_PAGES Inode has dirty pages. Inode itself may be clean.
1632 * I_NEW Serves as both a mutex and completion notification. 1632 * I_NEW Serves as both a mutex and completion notification.
1633 * New inodes set I_NEW. If two processes both create 1633 * New inodes set I_NEW. If two processes both create
1634 * the same inode, one of them will release its inode and 1634 * the same inode, one of them will release its inode and
1635 * wait for I_NEW to be released before returning. 1635 * wait for I_NEW to be released before returning.
1636 * Inodes in I_WILL_FREE, I_FREEING or I_CLEAR state can 1636 * Inodes in I_WILL_FREE, I_FREEING or I_CLEAR state can
1637 * also cause waiting on I_NEW, without I_NEW actually 1637 * also cause waiting on I_NEW, without I_NEW actually
1638 * being set. find_inode() uses this to prevent returning 1638 * being set. find_inode() uses this to prevent returning
1639 * nearly-dead inodes. 1639 * nearly-dead inodes.
1640 * I_WILL_FREE Must be set when calling write_inode_now() if i_count 1640 * I_WILL_FREE Must be set when calling write_inode_now() if i_count
1641 * is zero. I_FREEING must be set when I_WILL_FREE is 1641 * is zero. I_FREEING must be set when I_WILL_FREE is
1642 * cleared. 1642 * cleared.
1643 * I_FREEING Set when inode is about to be freed but still has dirty 1643 * I_FREEING Set when inode is about to be freed but still has dirty
1644 * pages or buffers attached or the inode itself is still 1644 * pages or buffers attached or the inode itself is still
1645 * dirty. 1645 * dirty.
1646 * I_CLEAR Added by clear_inode(). In this state the inode is 1646 * I_CLEAR Added by clear_inode(). In this state the inode is
1647 * clean and can be destroyed. Inode keeps I_FREEING. 1647 * clean and can be destroyed. Inode keeps I_FREEING.
1648 * 1648 *
1649 * Inodes that are I_WILL_FREE, I_FREEING or I_CLEAR are 1649 * Inodes that are I_WILL_FREE, I_FREEING or I_CLEAR are
1650 * prohibited for many purposes. iget() must wait for 1650 * prohibited for many purposes. iget() must wait for
1651 * the inode to be completely released, then create it 1651 * the inode to be completely released, then create it
1652 * anew. Other functions will just ignore such inodes, 1652 * anew. Other functions will just ignore such inodes,
1653 * if appropriate. I_NEW is used for waiting. 1653 * if appropriate. I_NEW is used for waiting.
1654 * 1654 *
1655 * I_SYNC Writeback of inode is running. The bit is set during 1655 * I_SYNC Writeback of inode is running. The bit is set during
1656 * data writeback, and cleared with a wakeup on the bit 1656 * data writeback, and cleared with a wakeup on the bit
1657 * address once it is done. The bit is also used to pin 1657 * address once it is done. The bit is also used to pin
1658 * the inode in memory for flusher thread. 1658 * the inode in memory for flusher thread.
1659 * 1659 *
1660 * I_REFERENCED Marks the inode as recently references on the LRU list. 1660 * I_REFERENCED Marks the inode as recently references on the LRU list.
1661 * 1661 *
1662 * I_DIO_WAKEUP Never set. Only used as a key for wait_on_bit(). 1662 * I_DIO_WAKEUP Never set. Only used as a key for wait_on_bit().
1663 * 1663 *
1664 * Q: What is the difference between I_WILL_FREE and I_FREEING? 1664 * Q: What is the difference between I_WILL_FREE and I_FREEING?
1665 */ 1665 */
1666 #define I_DIRTY_SYNC (1 << 0) 1666 #define I_DIRTY_SYNC (1 << 0)
1667 #define I_DIRTY_DATASYNC (1 << 1) 1667 #define I_DIRTY_DATASYNC (1 << 1)
1668 #define I_DIRTY_PAGES (1 << 2) 1668 #define I_DIRTY_PAGES (1 << 2)
1669 #define __I_NEW 3 1669 #define __I_NEW 3
1670 #define I_NEW (1 << __I_NEW) 1670 #define I_NEW (1 << __I_NEW)
1671 #define I_WILL_FREE (1 << 4) 1671 #define I_WILL_FREE (1 << 4)
1672 #define I_FREEING (1 << 5) 1672 #define I_FREEING (1 << 5)
1673 #define I_CLEAR (1 << 6) 1673 #define I_CLEAR (1 << 6)
1674 #define __I_SYNC 7 1674 #define __I_SYNC 7
1675 #define I_SYNC (1 << __I_SYNC) 1675 #define I_SYNC (1 << __I_SYNC)
1676 #define I_REFERENCED (1 << 8) 1676 #define I_REFERENCED (1 << 8)
1677 #define __I_DIO_WAKEUP 9 1677 #define __I_DIO_WAKEUP 9
1678 #define I_DIO_WAKEUP (1 << I_DIO_WAKEUP) 1678 #define I_DIO_WAKEUP (1 << I_DIO_WAKEUP)
1679 #define I_LINKABLE (1 << 10) 1679 #define I_LINKABLE (1 << 10)
1680 1680
1681 #define I_DIRTY (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES) 1681 #define I_DIRTY (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES)
1682 1682
1683 extern void __mark_inode_dirty(struct inode *, int); 1683 extern void __mark_inode_dirty(struct inode *, int);
1684 static inline void mark_inode_dirty(struct inode *inode) 1684 static inline void mark_inode_dirty(struct inode *inode)
1685 { 1685 {
1686 __mark_inode_dirty(inode, I_DIRTY); 1686 __mark_inode_dirty(inode, I_DIRTY);
1687 } 1687 }
1688 1688
1689 static inline void mark_inode_dirty_sync(struct inode *inode) 1689 static inline void mark_inode_dirty_sync(struct inode *inode)
1690 { 1690 {
1691 __mark_inode_dirty(inode, I_DIRTY_SYNC); 1691 __mark_inode_dirty(inode, I_DIRTY_SYNC);
1692 } 1692 }
1693 1693
1694 extern void inc_nlink(struct inode *inode); 1694 extern void inc_nlink(struct inode *inode);
1695 extern void drop_nlink(struct inode *inode); 1695 extern void drop_nlink(struct inode *inode);
1696 extern void clear_nlink(struct inode *inode); 1696 extern void clear_nlink(struct inode *inode);
1697 extern void set_nlink(struct inode *inode, unsigned int nlink); 1697 extern void set_nlink(struct inode *inode, unsigned int nlink);
1698 1698
1699 static inline void inode_inc_link_count(struct inode *inode) 1699 static inline void inode_inc_link_count(struct inode *inode)
1700 { 1700 {
1701 inc_nlink(inode); 1701 inc_nlink(inode);
1702 mark_inode_dirty(inode); 1702 mark_inode_dirty(inode);
1703 } 1703 }
1704 1704
1705 static inline void inode_dec_link_count(struct inode *inode) 1705 static inline void inode_dec_link_count(struct inode *inode)
1706 { 1706 {
1707 drop_nlink(inode); 1707 drop_nlink(inode);
1708 mark_inode_dirty(inode); 1708 mark_inode_dirty(inode);
1709 } 1709 }
1710 1710
1711 /** 1711 /**
1712 * inode_inc_iversion - increments i_version 1712 * inode_inc_iversion - increments i_version
1713 * @inode: inode that need to be updated 1713 * @inode: inode that need to be updated
1714 * 1714 *
1715 * Every time the inode is modified, the i_version field will be incremented. 1715 * Every time the inode is modified, the i_version field will be incremented.
1716 * The filesystem has to be mounted with i_version flag 1716 * The filesystem has to be mounted with i_version flag
1717 */ 1717 */
1718 1718
1719 static inline void inode_inc_iversion(struct inode *inode) 1719 static inline void inode_inc_iversion(struct inode *inode)
1720 { 1720 {
1721 spin_lock(&inode->i_lock); 1721 spin_lock(&inode->i_lock);
1722 inode->i_version++; 1722 inode->i_version++;
1723 spin_unlock(&inode->i_lock); 1723 spin_unlock(&inode->i_lock);
1724 } 1724 }
1725 1725
1726 enum file_time_flags { 1726 enum file_time_flags {
1727 S_ATIME = 1, 1727 S_ATIME = 1,
1728 S_MTIME = 2, 1728 S_MTIME = 2,
1729 S_CTIME = 4, 1729 S_CTIME = 4,
1730 S_VERSION = 8, 1730 S_VERSION = 8,
1731 }; 1731 };
1732 1732
1733 extern void touch_atime(const struct path *); 1733 extern void touch_atime(const struct path *);
1734 static inline void file_accessed(struct file *file) 1734 static inline void file_accessed(struct file *file)
1735 { 1735 {
1736 if (!(file->f_flags & O_NOATIME)) 1736 if (!(file->f_flags & O_NOATIME))
1737 touch_atime(&file->f_path); 1737 touch_atime(&file->f_path);
1738 } 1738 }
1739 1739
1740 int sync_inode(struct inode *inode, struct writeback_control *wbc); 1740 int sync_inode(struct inode *inode, struct writeback_control *wbc);
1741 int sync_inode_metadata(struct inode *inode, int wait); 1741 int sync_inode_metadata(struct inode *inode, int wait);
1742 1742
1743 struct file_system_type { 1743 struct file_system_type {
1744 const char *name; 1744 const char *name;
1745 int fs_flags; 1745 int fs_flags;
1746 #define FS_REQUIRES_DEV 1 1746 #define FS_REQUIRES_DEV 1
1747 #define FS_BINARY_MOUNTDATA 2 1747 #define FS_BINARY_MOUNTDATA 2
1748 #define FS_HAS_SUBTYPE 4 1748 #define FS_HAS_SUBTYPE 4
1749 #define FS_USERNS_MOUNT 8 /* Can be mounted by userns root */ 1749 #define FS_USERNS_MOUNT 8 /* Can be mounted by userns root */
1750 #define FS_USERNS_DEV_MOUNT 16 /* A userns mount does not imply MNT_NODEV */ 1750 #define FS_USERNS_DEV_MOUNT 16 /* A userns mount does not imply MNT_NODEV */
1751 #define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() during rename() internally. */ 1751 #define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() during rename() internally. */
1752 struct dentry *(*mount) (struct file_system_type *, int, 1752 struct dentry *(*mount) (struct file_system_type *, int,
1753 const char *, void *); 1753 const char *, void *);
1754 void (*kill_sb) (struct super_block *); 1754 void (*kill_sb) (struct super_block *);
1755 struct module *owner; 1755 struct module *owner;
1756 struct file_system_type * next; 1756 struct file_system_type * next;
1757 struct hlist_head fs_supers; 1757 struct hlist_head fs_supers;
1758 1758
1759 struct lock_class_key s_lock_key; 1759 struct lock_class_key s_lock_key;
1760 struct lock_class_key s_umount_key; 1760 struct lock_class_key s_umount_key;
1761 struct lock_class_key s_vfs_rename_key; 1761 struct lock_class_key s_vfs_rename_key;
1762 struct lock_class_key s_writers_key[SB_FREEZE_LEVELS]; 1762 struct lock_class_key s_writers_key[SB_FREEZE_LEVELS];
1763 1763
1764 struct lock_class_key i_lock_key; 1764 struct lock_class_key i_lock_key;
1765 struct lock_class_key i_mutex_key; 1765 struct lock_class_key i_mutex_key;
1766 struct lock_class_key i_mutex_dir_key; 1766 struct lock_class_key i_mutex_dir_key;
1767 }; 1767 };
1768 1768
1769 #define MODULE_ALIAS_FS(NAME) MODULE_ALIAS("fs-" NAME) 1769 #define MODULE_ALIAS_FS(NAME) MODULE_ALIAS("fs-" NAME)
1770 1770
1771 extern struct dentry *mount_ns(struct file_system_type *fs_type, int flags, 1771 extern struct dentry *mount_ns(struct file_system_type *fs_type, int flags,
1772 void *data, int (*fill_super)(struct super_block *, void *, int)); 1772 void *data, int (*fill_super)(struct super_block *, void *, int));
1773 extern struct dentry *mount_bdev(struct file_system_type *fs_type, 1773 extern struct dentry *mount_bdev(struct file_system_type *fs_type,
1774 int flags, const char *dev_name, void *data, 1774 int flags, const char *dev_name, void *data,
1775 int (*fill_super)(struct super_block *, void *, int)); 1775 int (*fill_super)(struct super_block *, void *, int));
1776 extern struct dentry *mount_single(struct file_system_type *fs_type, 1776 extern struct dentry *mount_single(struct file_system_type *fs_type,
1777 int flags, void *data, 1777 int flags, void *data,
1778 int (*fill_super)(struct super_block *, void *, int)); 1778 int (*fill_super)(struct super_block *, void *, int));
1779 extern struct dentry *mount_nodev(struct file_system_type *fs_type, 1779 extern struct dentry *mount_nodev(struct file_system_type *fs_type,
1780 int flags, void *data, 1780 int flags, void *data,
1781 int (*fill_super)(struct super_block *, void *, int)); 1781 int (*fill_super)(struct super_block *, void *, int));
1782 extern struct dentry *mount_subtree(struct vfsmount *mnt, const char *path); 1782 extern struct dentry *mount_subtree(struct vfsmount *mnt, const char *path);
1783 void generic_shutdown_super(struct super_block *sb); 1783 void generic_shutdown_super(struct super_block *sb);
1784 void kill_block_super(struct super_block *sb); 1784 void kill_block_super(struct super_block *sb);
1785 void kill_anon_super(struct super_block *sb); 1785 void kill_anon_super(struct super_block *sb);
1786 void kill_litter_super(struct super_block *sb); 1786 void kill_litter_super(struct super_block *sb);
1787 void deactivate_super(struct super_block *sb); 1787 void deactivate_super(struct super_block *sb);
1788 void deactivate_locked_super(struct super_block *sb); 1788 void deactivate_locked_super(struct super_block *sb);
1789 int set_anon_super(struct super_block *s, void *data); 1789 int set_anon_super(struct super_block *s, void *data);
1790 int get_anon_bdev(dev_t *); 1790 int get_anon_bdev(dev_t *);
1791 void free_anon_bdev(dev_t); 1791 void free_anon_bdev(dev_t);
1792 struct super_block *sget(struct file_system_type *type, 1792 struct super_block *sget(struct file_system_type *type,
1793 int (*test)(struct super_block *,void *), 1793 int (*test)(struct super_block *,void *),
1794 int (*set)(struct super_block *,void *), 1794 int (*set)(struct super_block *,void *),
1795 int flags, void *data); 1795 int flags, void *data);
1796 extern struct dentry *mount_pseudo(struct file_system_type *, char *, 1796 extern struct dentry *mount_pseudo(struct file_system_type *, char *,
1797 const struct super_operations *ops, 1797 const struct super_operations *ops,
1798 const struct dentry_operations *dops, 1798 const struct dentry_operations *dops,
1799 unsigned long); 1799 unsigned long);
1800 1800
1801 /* Alas, no aliases. Too much hassle with bringing module.h everywhere */ 1801 /* Alas, no aliases. Too much hassle with bringing module.h everywhere */
1802 #define fops_get(fops) \ 1802 #define fops_get(fops) \
1803 (((fops) && try_module_get((fops)->owner) ? (fops) : NULL)) 1803 (((fops) && try_module_get((fops)->owner) ? (fops) : NULL))
1804 #define fops_put(fops) \ 1804 #define fops_put(fops) \
1805 do { if (fops) module_put((fops)->owner); } while(0) 1805 do { if (fops) module_put((fops)->owner); } while(0)
1806 /* 1806 /*
1807 * This one is to be used *ONLY* from ->open() instances. 1807 * This one is to be used *ONLY* from ->open() instances.
1808 * fops must be non-NULL, pinned down *and* module dependencies 1808 * fops must be non-NULL, pinned down *and* module dependencies
1809 * should be sufficient to pin the caller down as well. 1809 * should be sufficient to pin the caller down as well.
1810 */ 1810 */
1811 #define replace_fops(f, fops) \ 1811 #define replace_fops(f, fops) \
1812 do { \ 1812 do { \
1813 struct file *__file = (f); \ 1813 struct file *__file = (f); \
1814 fops_put(__file->f_op); \ 1814 fops_put(__file->f_op); \
1815 BUG_ON(!(__file->f_op = (fops))); \ 1815 BUG_ON(!(__file->f_op = (fops))); \
1816 } while(0) 1816 } while(0)
1817 1817
1818 extern int register_filesystem(struct file_system_type *); 1818 extern int register_filesystem(struct file_system_type *);
1819 extern int unregister_filesystem(struct file_system_type *); 1819 extern int unregister_filesystem(struct file_system_type *);
1820 extern struct vfsmount *kern_mount_data(struct file_system_type *, void *data); 1820 extern struct vfsmount *kern_mount_data(struct file_system_type *, void *data);
1821 #define kern_mount(type) kern_mount_data(type, NULL) 1821 #define kern_mount(type) kern_mount_data(type, NULL)
1822 extern void kern_unmount(struct vfsmount *mnt); 1822 extern void kern_unmount(struct vfsmount *mnt);
1823 extern int may_umount_tree(struct vfsmount *); 1823 extern int may_umount_tree(struct vfsmount *);
1824 extern int may_umount(struct vfsmount *); 1824 extern int may_umount(struct vfsmount *);
1825 extern long do_mount(const char *, const char *, const char *, unsigned long, void *); 1825 extern long do_mount(const char *, const char *, const char *, unsigned long, void *);
1826 extern struct vfsmount *collect_mounts(struct path *); 1826 extern struct vfsmount *collect_mounts(struct path *);
1827 extern void drop_collected_mounts(struct vfsmount *); 1827 extern void drop_collected_mounts(struct vfsmount *);
1828 extern int iterate_mounts(int (*)(struct vfsmount *, void *), void *, 1828 extern int iterate_mounts(int (*)(struct vfsmount *, void *), void *,
1829 struct vfsmount *); 1829 struct vfsmount *);
1830 extern int vfs_statfs(struct path *, struct kstatfs *); 1830 extern int vfs_statfs(struct path *, struct kstatfs *);
1831 extern int user_statfs(const char __user *, struct kstatfs *); 1831 extern int user_statfs(const char __user *, struct kstatfs *);
1832 extern int fd_statfs(int, struct kstatfs *); 1832 extern int fd_statfs(int, struct kstatfs *);
1833 extern int vfs_ustat(dev_t, struct kstatfs *); 1833 extern int vfs_ustat(dev_t, struct kstatfs *);
1834 extern int freeze_super(struct super_block *super); 1834 extern int freeze_super(struct super_block *super);
1835 extern int thaw_super(struct super_block *super); 1835 extern int thaw_super(struct super_block *super);
1836 extern bool our_mnt(struct vfsmount *mnt); 1836 extern bool our_mnt(struct vfsmount *mnt);
1837 extern bool fs_fully_visible(struct file_system_type *); 1837 extern bool fs_fully_visible(struct file_system_type *);
1838 1838
1839 extern int current_umask(void); 1839 extern int current_umask(void);
1840 1840
1841 extern void ihold(struct inode * inode); 1841 extern void ihold(struct inode * inode);
1842 extern void iput(struct inode *); 1842 extern void iput(struct inode *);
1843 1843
1844 static inline struct inode *file_inode(struct file *f) 1844 static inline struct inode *file_inode(struct file *f)
1845 { 1845 {
1846 return f->f_inode; 1846 return f->f_inode;
1847 } 1847 }
1848 1848
1849 /* /sys/fs */ 1849 /* /sys/fs */
1850 extern struct kobject *fs_kobj; 1850 extern struct kobject *fs_kobj;
1851 1851
1852 #define MAX_RW_COUNT (INT_MAX & PAGE_CACHE_MASK) 1852 #define MAX_RW_COUNT (INT_MAX & PAGE_CACHE_MASK)
1853 1853
1854 #define FLOCK_VERIFY_READ 1 1854 #define FLOCK_VERIFY_READ 1
1855 #define FLOCK_VERIFY_WRITE 2 1855 #define FLOCK_VERIFY_WRITE 2
1856 1856
1857 #ifdef CONFIG_FILE_LOCKING 1857 #ifdef CONFIG_FILE_LOCKING
1858 extern int locks_mandatory_locked(struct file *); 1858 extern int locks_mandatory_locked(struct file *);
1859 extern int locks_mandatory_area(int, struct inode *, struct file *, loff_t, size_t); 1859 extern int locks_mandatory_area(int, struct inode *, struct file *, loff_t, size_t);
1860 1860
1861 /* 1861 /*
1862 * Candidates for mandatory locking have the setgid bit set 1862 * Candidates for mandatory locking have the setgid bit set
1863 * but no group execute bit - an otherwise meaningless combination. 1863 * but no group execute bit - an otherwise meaningless combination.
1864 */ 1864 */
1865 1865
1866 static inline int __mandatory_lock(struct inode *ino) 1866 static inline int __mandatory_lock(struct inode *ino)
1867 { 1867 {
1868 return (ino->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID; 1868 return (ino->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID;
1869 } 1869 }
1870 1870
1871 /* 1871 /*
1872 * ... and these candidates should be on MS_MANDLOCK mounted fs, 1872 * ... and these candidates should be on MS_MANDLOCK mounted fs,
1873 * otherwise these will be advisory locks 1873 * otherwise these will be advisory locks
1874 */ 1874 */
1875 1875
1876 static inline int mandatory_lock(struct inode *ino) 1876 static inline int mandatory_lock(struct inode *ino)
1877 { 1877 {
1878 return IS_MANDLOCK(ino) && __mandatory_lock(ino); 1878 return IS_MANDLOCK(ino) && __mandatory_lock(ino);
1879 } 1879 }
1880 1880
1881 static inline int locks_verify_locked(struct file *file) 1881 static inline int locks_verify_locked(struct file *file)
1882 { 1882 {
1883 if (mandatory_lock(file_inode(file))) 1883 if (mandatory_lock(file_inode(file)))
1884 return locks_mandatory_locked(file); 1884 return locks_mandatory_locked(file);
1885 return 0; 1885 return 0;
1886 } 1886 }
1887 1887
1888 static inline int locks_verify_truncate(struct inode *inode, 1888 static inline int locks_verify_truncate(struct inode *inode,
1889 struct file *filp, 1889 struct file *filp,
1890 loff_t size) 1890 loff_t size)
1891 { 1891 {
1892 if (inode->i_flock && mandatory_lock(inode)) 1892 if (inode->i_flock && mandatory_lock(inode))
1893 return locks_mandatory_area( 1893 return locks_mandatory_area(
1894 FLOCK_VERIFY_WRITE, inode, filp, 1894 FLOCK_VERIFY_WRITE, inode, filp,
1895 size < inode->i_size ? size : inode->i_size, 1895 size < inode->i_size ? size : inode->i_size,
1896 (size < inode->i_size ? inode->i_size - size 1896 (size < inode->i_size ? inode->i_size - size
1897 : size - inode->i_size) 1897 : size - inode->i_size)
1898 ); 1898 );
1899 return 0; 1899 return 0;
1900 } 1900 }
1901 1901
1902 static inline int break_lease(struct inode *inode, unsigned int mode) 1902 static inline int break_lease(struct inode *inode, unsigned int mode)
1903 { 1903 {
1904 /* 1904 /*
1905 * Since this check is lockless, we must ensure that any refcounts 1905 * Since this check is lockless, we must ensure that any refcounts
1906 * taken are done before checking inode->i_flock. Otherwise, we could 1906 * taken are done before checking inode->i_flock. Otherwise, we could
1907 * end up racing with tasks trying to set a new lease on this file. 1907 * end up racing with tasks trying to set a new lease on this file.
1908 */ 1908 */
1909 smp_mb(); 1909 smp_mb();
1910 if (inode->i_flock) 1910 if (inode->i_flock)
1911 return __break_lease(inode, mode, FL_LEASE); 1911 return __break_lease(inode, mode, FL_LEASE);
1912 return 0; 1912 return 0;
1913 } 1913 }
1914 1914
1915 static inline int break_deleg(struct inode *inode, unsigned int mode) 1915 static inline int break_deleg(struct inode *inode, unsigned int mode)
1916 { 1916 {
1917 if (inode->i_flock) 1917 if (inode->i_flock)
1918 return __break_lease(inode, mode, FL_DELEG); 1918 return __break_lease(inode, mode, FL_DELEG);
1919 return 0; 1919 return 0;
1920 } 1920 }
1921 1921
1922 static inline int try_break_deleg(struct inode *inode, struct inode **delegated_inode) 1922 static inline int try_break_deleg(struct inode *inode, struct inode **delegated_inode)
1923 { 1923 {
1924 int ret; 1924 int ret;
1925 1925
1926 ret = break_deleg(inode, O_WRONLY|O_NONBLOCK); 1926 ret = break_deleg(inode, O_WRONLY|O_NONBLOCK);
1927 if (ret == -EWOULDBLOCK && delegated_inode) { 1927 if (ret == -EWOULDBLOCK && delegated_inode) {
1928 *delegated_inode = inode; 1928 *delegated_inode = inode;
1929 ihold(inode); 1929 ihold(inode);
1930 } 1930 }
1931 return ret; 1931 return ret;
1932 } 1932 }
1933 1933
1934 static inline int break_deleg_wait(struct inode **delegated_inode) 1934 static inline int break_deleg_wait(struct inode **delegated_inode)
1935 { 1935 {
1936 int ret; 1936 int ret;
1937 1937
1938 ret = break_deleg(*delegated_inode, O_WRONLY); 1938 ret = break_deleg(*delegated_inode, O_WRONLY);
1939 iput(*delegated_inode); 1939 iput(*delegated_inode);
1940 *delegated_inode = NULL; 1940 *delegated_inode = NULL;
1941 return ret; 1941 return ret;
1942 } 1942 }
1943 1943
1944 #else /* !CONFIG_FILE_LOCKING */ 1944 #else /* !CONFIG_FILE_LOCKING */
1945 static inline int locks_mandatory_locked(struct file *file) 1945 static inline int locks_mandatory_locked(struct file *file)
1946 { 1946 {
1947 return 0; 1947 return 0;
1948 } 1948 }
1949 1949
1950 static inline int locks_mandatory_area(int rw, struct inode *inode, 1950 static inline int locks_mandatory_area(int rw, struct inode *inode,
1951 struct file *filp, loff_t offset, 1951 struct file *filp, loff_t offset,
1952 size_t count) 1952 size_t count)
1953 { 1953 {
1954 return 0; 1954 return 0;
1955 } 1955 }
1956 1956
1957 static inline int __mandatory_lock(struct inode *inode) 1957 static inline int __mandatory_lock(struct inode *inode)
1958 { 1958 {
1959 return 0; 1959 return 0;
1960 } 1960 }
1961 1961
1962 static inline int mandatory_lock(struct inode *inode) 1962 static inline int mandatory_lock(struct inode *inode)
1963 { 1963 {
1964 return 0; 1964 return 0;
1965 } 1965 }
1966 1966
1967 static inline int locks_verify_locked(struct file *file) 1967 static inline int locks_verify_locked(struct file *file)
1968 { 1968 {
1969 return 0; 1969 return 0;
1970 } 1970 }
1971 1971
1972 static inline int locks_verify_truncate(struct inode *inode, struct file *filp, 1972 static inline int locks_verify_truncate(struct inode *inode, struct file *filp,
1973 size_t size) 1973 size_t size)
1974 { 1974 {
1975 return 0; 1975 return 0;
1976 } 1976 }
1977 1977
1978 static inline int break_lease(struct inode *inode, unsigned int mode) 1978 static inline int break_lease(struct inode *inode, unsigned int mode)
1979 { 1979 {
1980 return 0; 1980 return 0;
1981 } 1981 }
1982 1982
1983 static inline int break_deleg(struct inode *inode, unsigned int mode) 1983 static inline int break_deleg(struct inode *inode, unsigned int mode)
1984 { 1984 {
1985 return 0; 1985 return 0;
1986 } 1986 }
1987 1987
1988 static inline int try_break_deleg(struct inode *inode, struct inode **delegated_inode) 1988 static inline int try_break_deleg(struct inode *inode, struct inode **delegated_inode)
1989 { 1989 {
1990 return 0; 1990 return 0;
1991 } 1991 }
1992 1992
1993 static inline int break_deleg_wait(struct inode **delegated_inode) 1993 static inline int break_deleg_wait(struct inode **delegated_inode)
1994 { 1994 {
1995 BUG(); 1995 BUG();
1996 return 0; 1996 return 0;
1997 } 1997 }
1998 1998
1999 #endif /* CONFIG_FILE_LOCKING */ 1999 #endif /* CONFIG_FILE_LOCKING */
2000 2000
2001 /* fs/open.c */ 2001 /* fs/open.c */
2002 struct audit_names; 2002 struct audit_names;
2003 struct filename { 2003 struct filename {
2004 const char *name; /* pointer to actual string */ 2004 const char *name; /* pointer to actual string */
2005 const __user char *uptr; /* original userland pointer */ 2005 const __user char *uptr; /* original userland pointer */
2006 struct audit_names *aname; 2006 struct audit_names *aname;
2007 bool separate; /* should "name" be freed? */ 2007 bool separate; /* should "name" be freed? */
2008 }; 2008 };
2009 2009
2010 extern long vfs_truncate(struct path *, loff_t); 2010 extern long vfs_truncate(struct path *, loff_t);
2011 extern int do_truncate(struct dentry *, loff_t start, unsigned int time_attrs, 2011 extern int do_truncate(struct dentry *, loff_t start, unsigned int time_attrs,
2012 struct file *filp); 2012 struct file *filp);
2013 extern int do_fallocate(struct file *file, int mode, loff_t offset, 2013 extern int do_fallocate(struct file *file, int mode, loff_t offset,
2014 loff_t len); 2014 loff_t len);
2015 extern long do_sys_open(int dfd, const char __user *filename, int flags, 2015 extern long do_sys_open(int dfd, const char __user *filename, int flags,
2016 umode_t mode); 2016 umode_t mode);
2017 extern struct file *file_open_name(struct filename *, int, umode_t); 2017 extern struct file *file_open_name(struct filename *, int, umode_t);
2018 extern struct file *filp_open(const char *, int, umode_t); 2018 extern struct file *filp_open(const char *, int, umode_t);
2019 extern struct file *file_open_root(struct dentry *, struct vfsmount *, 2019 extern struct file *file_open_root(struct dentry *, struct vfsmount *,
2020 const char *, int); 2020 const char *, int);
2021 extern struct file * dentry_open(const struct path *, int, const struct cred *); 2021 extern struct file * dentry_open(const struct path *, int, const struct cred *);
2022 extern int filp_close(struct file *, fl_owner_t id); 2022 extern int filp_close(struct file *, fl_owner_t id);
2023 2023
2024 extern struct filename *getname(const char __user *); 2024 extern struct filename *getname(const char __user *);
2025 extern struct filename *getname_kernel(const char *); 2025 extern struct filename *getname_kernel(const char *);
2026 2026
2027 enum { 2027 enum {
2028 FILE_CREATED = 1, 2028 FILE_CREATED = 1,
2029 FILE_OPENED = 2 2029 FILE_OPENED = 2
2030 }; 2030 };
2031 extern int finish_open(struct file *file, struct dentry *dentry, 2031 extern int finish_open(struct file *file, struct dentry *dentry,
2032 int (*open)(struct inode *, struct file *), 2032 int (*open)(struct inode *, struct file *),
2033 int *opened); 2033 int *opened);
2034 extern int finish_no_open(struct file *file, struct dentry *dentry); 2034 extern int finish_no_open(struct file *file, struct dentry *dentry);
2035 2035
2036 /* fs/ioctl.c */ 2036 /* fs/ioctl.c */
2037 2037
2038 extern int ioctl_preallocate(struct file *filp, void __user *argp); 2038 extern int ioctl_preallocate(struct file *filp, void __user *argp);
2039 2039
2040 /* fs/dcache.c */ 2040 /* fs/dcache.c */
2041 extern void __init vfs_caches_init_early(void); 2041 extern void __init vfs_caches_init_early(void);
2042 extern void __init vfs_caches_init(unsigned long); 2042 extern void __init vfs_caches_init(unsigned long);
2043 2043
2044 extern struct kmem_cache *names_cachep; 2044 extern struct kmem_cache *names_cachep;
2045 2045
2046 extern void final_putname(struct filename *name); 2046 extern void final_putname(struct filename *name);
2047 2047
2048 #define __getname() kmem_cache_alloc(names_cachep, GFP_KERNEL) 2048 #define __getname() kmem_cache_alloc(names_cachep, GFP_KERNEL)
2049 #define __putname(name) kmem_cache_free(names_cachep, (void *)(name)) 2049 #define __putname(name) kmem_cache_free(names_cachep, (void *)(name))
2050 #ifndef CONFIG_AUDITSYSCALL 2050 #ifndef CONFIG_AUDITSYSCALL
2051 #define putname(name) final_putname(name) 2051 #define putname(name) final_putname(name)
2052 #else 2052 #else
2053 extern void putname(struct filename *name); 2053 extern void putname(struct filename *name);
2054 #endif 2054 #endif
2055 2055
2056 #ifdef CONFIG_BLOCK 2056 #ifdef CONFIG_BLOCK
2057 extern int register_blkdev(unsigned int, const char *); 2057 extern int register_blkdev(unsigned int, const char *);
2058 extern void unregister_blkdev(unsigned int, const char *); 2058 extern void unregister_blkdev(unsigned int, const char *);
2059 extern struct block_device *bdget(dev_t); 2059 extern struct block_device *bdget(dev_t);
2060 extern struct block_device *bdgrab(struct block_device *bdev); 2060 extern struct block_device *bdgrab(struct block_device *bdev);
2061 extern void bd_set_size(struct block_device *, loff_t size); 2061 extern void bd_set_size(struct block_device *, loff_t size);
2062 extern void bd_forget(struct inode *inode); 2062 extern void bd_forget(struct inode *inode);
2063 extern void bdput(struct block_device *); 2063 extern void bdput(struct block_device *);
2064 extern void invalidate_bdev(struct block_device *); 2064 extern void invalidate_bdev(struct block_device *);
2065 extern void iterate_bdevs(void (*)(struct block_device *, void *), void *); 2065 extern void iterate_bdevs(void (*)(struct block_device *, void *), void *);
2066 extern int sync_blockdev(struct block_device *bdev); 2066 extern int sync_blockdev(struct block_device *bdev);
2067 extern void kill_bdev(struct block_device *); 2067 extern void kill_bdev(struct block_device *);
2068 extern struct super_block *freeze_bdev(struct block_device *); 2068 extern struct super_block *freeze_bdev(struct block_device *);
2069 extern void emergency_thaw_all(void); 2069 extern void emergency_thaw_all(void);
2070 extern int thaw_bdev(struct block_device *bdev, struct super_block *sb); 2070 extern int thaw_bdev(struct block_device *bdev, struct super_block *sb);
2071 extern int fsync_bdev(struct block_device *); 2071 extern int fsync_bdev(struct block_device *);
2072 extern int sb_is_blkdev_sb(struct super_block *sb); 2072 extern int sb_is_blkdev_sb(struct super_block *sb);
2073 #else 2073 #else
2074 static inline void bd_forget(struct inode *inode) {} 2074 static inline void bd_forget(struct inode *inode) {}
2075 static inline int sync_blockdev(struct block_device *bdev) { return 0; } 2075 static inline int sync_blockdev(struct block_device *bdev) { return 0; }
2076 static inline void kill_bdev(struct block_device *bdev) {} 2076 static inline void kill_bdev(struct block_device *bdev) {}
2077 static inline void invalidate_bdev(struct block_device *bdev) {} 2077 static inline void invalidate_bdev(struct block_device *bdev) {}
2078 2078
2079 static inline struct super_block *freeze_bdev(struct block_device *sb) 2079 static inline struct super_block *freeze_bdev(struct block_device *sb)
2080 { 2080 {
2081 return NULL; 2081 return NULL;
2082 } 2082 }
2083 2083
2084 static inline int thaw_bdev(struct block_device *bdev, struct super_block *sb) 2084 static inline int thaw_bdev(struct block_device *bdev, struct super_block *sb)
2085 { 2085 {
2086 return 0; 2086 return 0;
2087 } 2087 }
2088 2088
2089 static inline void iterate_bdevs(void (*f)(struct block_device *, void *), void *arg) 2089 static inline void iterate_bdevs(void (*f)(struct block_device *, void *), void *arg)
2090 { 2090 {
2091 } 2091 }
2092 2092
2093 static inline int sb_is_blkdev_sb(struct super_block *sb) 2093 static inline int sb_is_blkdev_sb(struct super_block *sb)
2094 { 2094 {
2095 return 0; 2095 return 0;
2096 } 2096 }
2097 #endif 2097 #endif
2098 extern int sync_filesystem(struct super_block *); 2098 extern int sync_filesystem(struct super_block *);
2099 extern const struct file_operations def_blk_fops; 2099 extern const struct file_operations def_blk_fops;
2100 extern const struct file_operations def_chr_fops; 2100 extern const struct file_operations def_chr_fops;
2101 extern const struct file_operations bad_sock_fops; 2101 extern const struct file_operations bad_sock_fops;
2102 #ifdef CONFIG_BLOCK 2102 #ifdef CONFIG_BLOCK
2103 extern int ioctl_by_bdev(struct block_device *, unsigned, unsigned long); 2103 extern int ioctl_by_bdev(struct block_device *, unsigned, unsigned long);
2104 extern int blkdev_ioctl(struct block_device *, fmode_t, unsigned, unsigned long); 2104 extern int blkdev_ioctl(struct block_device *, fmode_t, unsigned, unsigned long);
2105 extern long compat_blkdev_ioctl(struct file *, unsigned, unsigned long); 2105 extern long compat_blkdev_ioctl(struct file *, unsigned, unsigned long);
2106 extern int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder); 2106 extern int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder);
2107 extern struct block_device *blkdev_get_by_path(const char *path, fmode_t mode, 2107 extern struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
2108 void *holder); 2108 void *holder);
2109 extern struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, 2109 extern struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode,
2110 void *holder); 2110 void *holder);
2111 extern void blkdev_put(struct block_device *bdev, fmode_t mode); 2111 extern void blkdev_put(struct block_device *bdev, fmode_t mode);
2112 #ifdef CONFIG_SYSFS 2112 #ifdef CONFIG_SYSFS
2113 extern int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk); 2113 extern int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk);
2114 extern void bd_unlink_disk_holder(struct block_device *bdev, 2114 extern void bd_unlink_disk_holder(struct block_device *bdev,
2115 struct gendisk *disk); 2115 struct gendisk *disk);
2116 #else 2116 #else
2117 static inline int bd_link_disk_holder(struct block_device *bdev, 2117 static inline int bd_link_disk_holder(struct block_device *bdev,
2118 struct gendisk *disk) 2118 struct gendisk *disk)
2119 { 2119 {
2120 return 0; 2120 return 0;
2121 } 2121 }
2122 static inline void bd_unlink_disk_holder(struct block_device *bdev, 2122 static inline void bd_unlink_disk_holder(struct block_device *bdev,
2123 struct gendisk *disk) 2123 struct gendisk *disk)
2124 { 2124 {
2125 } 2125 }
2126 #endif 2126 #endif
2127 #endif 2127 #endif
2128 2128
2129 /* fs/char_dev.c */ 2129 /* fs/char_dev.c */
2130 #define CHRDEV_MAJOR_HASH_SIZE 255 2130 #define CHRDEV_MAJOR_HASH_SIZE 255
2131 extern int alloc_chrdev_region(dev_t *, unsigned, unsigned, const char *); 2131 extern int alloc_chrdev_region(dev_t *, unsigned, unsigned, const char *);
2132 extern int register_chrdev_region(dev_t, unsigned, const char *); 2132 extern int register_chrdev_region(dev_t, unsigned, const char *);
2133 extern int __register_chrdev(unsigned int major, unsigned int baseminor, 2133 extern int __register_chrdev(unsigned int major, unsigned int baseminor,
2134 unsigned int count, const char *name, 2134 unsigned int count, const char *name,
2135 const struct file_operations *fops); 2135 const struct file_operations *fops);
2136 extern void __unregister_chrdev(unsigned int major, unsigned int baseminor, 2136 extern void __unregister_chrdev(unsigned int major, unsigned int baseminor,
2137 unsigned int count, const char *name); 2137 unsigned int count, const char *name);
2138 extern void unregister_chrdev_region(dev_t, unsigned); 2138 extern void unregister_chrdev_region(dev_t, unsigned);
2139 extern void chrdev_show(struct seq_file *,off_t); 2139 extern void chrdev_show(struct seq_file *,off_t);
2140 2140
2141 static inline int register_chrdev(unsigned int major, const char *name, 2141 static inline int register_chrdev(unsigned int major, const char *name,
2142 const struct file_operations *fops) 2142 const struct file_operations *fops)
2143 { 2143 {
2144 return __register_chrdev(major, 0, 256, name, fops); 2144 return __register_chrdev(major, 0, 256, name, fops);
2145 } 2145 }
2146 2146
2147 static inline void unregister_chrdev(unsigned int major, const char *name) 2147 static inline void unregister_chrdev(unsigned int major, const char *name)
2148 { 2148 {
2149 __unregister_chrdev(major, 0, 256, name); 2149 __unregister_chrdev(major, 0, 256, name);
2150 } 2150 }
2151 2151
2152 /* fs/block_dev.c */ 2152 /* fs/block_dev.c */
2153 #define BDEVNAME_SIZE 32 /* Largest string for a blockdev identifier */ 2153 #define BDEVNAME_SIZE 32 /* Largest string for a blockdev identifier */
2154 #define BDEVT_SIZE 10 /* Largest string for MAJ:MIN for blkdev */ 2154 #define BDEVT_SIZE 10 /* Largest string for MAJ:MIN for blkdev */
2155 2155
2156 #ifdef CONFIG_BLOCK 2156 #ifdef CONFIG_BLOCK
2157 #define BLKDEV_MAJOR_HASH_SIZE 255 2157 #define BLKDEV_MAJOR_HASH_SIZE 255
2158 extern const char *__bdevname(dev_t, char *buffer); 2158 extern const char *__bdevname(dev_t, char *buffer);
2159 extern const char *bdevname(struct block_device *bdev, char *buffer); 2159 extern const char *bdevname(struct block_device *bdev, char *buffer);
2160 extern struct block_device *lookup_bdev(const char *); 2160 extern struct block_device *lookup_bdev(const char *);
2161 extern void blkdev_show(struct seq_file *,off_t); 2161 extern void blkdev_show(struct seq_file *,off_t);
2162 2162
2163 #else 2163 #else
2164 #define BLKDEV_MAJOR_HASH_SIZE 0 2164 #define BLKDEV_MAJOR_HASH_SIZE 0
2165 #endif 2165 #endif
2166 2166
2167 extern void init_special_inode(struct inode *, umode_t, dev_t); 2167 extern void init_special_inode(struct inode *, umode_t, dev_t);
2168 2168
2169 /* Invalid inode operations -- fs/bad_inode.c */ 2169 /* Invalid inode operations -- fs/bad_inode.c */
2170 extern void make_bad_inode(struct inode *); 2170 extern void make_bad_inode(struct inode *);
2171 extern int is_bad_inode(struct inode *); 2171 extern int is_bad_inode(struct inode *);
2172 2172
2173 #ifdef CONFIG_BLOCK 2173 #ifdef CONFIG_BLOCK
2174 /* 2174 /*
2175 * return READ, READA, or WRITE 2175 * return READ, READA, or WRITE
2176 */ 2176 */
2177 #define bio_rw(bio) ((bio)->bi_rw & (RW_MASK | RWA_MASK)) 2177 #define bio_rw(bio) ((bio)->bi_rw & (RW_MASK | RWA_MASK))
2178 2178
2179 /* 2179 /*
2180 * return data direction, READ or WRITE 2180 * return data direction, READ or WRITE
2181 */ 2181 */
2182 #define bio_data_dir(bio) ((bio)->bi_rw & 1) 2182 #define bio_data_dir(bio) ((bio)->bi_rw & 1)
2183 2183
2184 extern void check_disk_size_change(struct gendisk *disk, 2184 extern void check_disk_size_change(struct gendisk *disk,
2185 struct block_device *bdev); 2185 struct block_device *bdev);
2186 extern int revalidate_disk(struct gendisk *); 2186 extern int revalidate_disk(struct gendisk *);
2187 extern int check_disk_change(struct block_device *); 2187 extern int check_disk_change(struct block_device *);
2188 extern int __invalidate_device(struct block_device *, bool); 2188 extern int __invalidate_device(struct block_device *, bool);
2189 extern int invalidate_partition(struct gendisk *, int); 2189 extern int invalidate_partition(struct gendisk *, int);
2190 #endif 2190 #endif
2191 unsigned long invalidate_mapping_pages(struct address_space *mapping, 2191 unsigned long invalidate_mapping_pages(struct address_space *mapping,
2192 pgoff_t start, pgoff_t end); 2192 pgoff_t start, pgoff_t end);
2193 2193
2194 static inline void invalidate_remote_inode(struct inode *inode) 2194 static inline void invalidate_remote_inode(struct inode *inode)
2195 { 2195 {
2196 if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || 2196 if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
2197 S_ISLNK(inode->i_mode)) 2197 S_ISLNK(inode->i_mode))
2198 invalidate_mapping_pages(inode->i_mapping, 0, -1); 2198 invalidate_mapping_pages(inode->i_mapping, 0, -1);
2199 } 2199 }
2200 extern int invalidate_inode_pages2(struct address_space *mapping); 2200 extern int invalidate_inode_pages2(struct address_space *mapping);
2201 extern int invalidate_inode_pages2_range(struct address_space *mapping, 2201 extern int invalidate_inode_pages2_range(struct address_space *mapping,
2202 pgoff_t start, pgoff_t end); 2202 pgoff_t start, pgoff_t end);
2203 extern int write_inode_now(struct inode *, int); 2203 extern int write_inode_now(struct inode *, int);
2204 extern int filemap_fdatawrite(struct address_space *); 2204 extern int filemap_fdatawrite(struct address_space *);
2205 extern int filemap_flush(struct address_space *); 2205 extern int filemap_flush(struct address_space *);
2206 extern int filemap_fdatawait(struct address_space *); 2206 extern int filemap_fdatawait(struct address_space *);
2207 extern int filemap_fdatawait_range(struct address_space *, loff_t lstart, 2207 extern int filemap_fdatawait_range(struct address_space *, loff_t lstart,
2208 loff_t lend); 2208 loff_t lend);
2209 extern int filemap_write_and_wait(struct address_space *mapping); 2209 extern int filemap_write_and_wait(struct address_space *mapping);
2210 extern int filemap_write_and_wait_range(struct address_space *mapping, 2210 extern int filemap_write_and_wait_range(struct address_space *mapping,
2211 loff_t lstart, loff_t lend); 2211 loff_t lstart, loff_t lend);
2212 extern int __filemap_fdatawrite_range(struct address_space *mapping, 2212 extern int __filemap_fdatawrite_range(struct address_space *mapping,
2213 loff_t start, loff_t end, int sync_mode); 2213 loff_t start, loff_t end, int sync_mode);
2214 extern int filemap_fdatawrite_range(struct address_space *mapping, 2214 extern int filemap_fdatawrite_range(struct address_space *mapping,
2215 loff_t start, loff_t end); 2215 loff_t start, loff_t end);
2216 2216
2217 extern int vfs_fsync_range(struct file *file, loff_t start, loff_t end, 2217 extern int vfs_fsync_range(struct file *file, loff_t start, loff_t end,
2218 int datasync); 2218 int datasync);
2219 extern int vfs_fsync(struct file *file, int datasync); 2219 extern int vfs_fsync(struct file *file, int datasync);
2220 static inline int generic_write_sync(struct file *file, loff_t pos, loff_t count) 2220 static inline int generic_write_sync(struct file *file, loff_t pos, loff_t count)
2221 { 2221 {
2222 if (!(file->f_flags & O_DSYNC) && !IS_SYNC(file->f_mapping->host)) 2222 if (!(file->f_flags & O_DSYNC) && !IS_SYNC(file->f_mapping->host))
2223 return 0; 2223 return 0;
2224 return vfs_fsync_range(file, pos, pos + count - 1, 2224 return vfs_fsync_range(file, pos, pos + count - 1,
2225 (file->f_flags & __O_SYNC) ? 0 : 1); 2225 (file->f_flags & __O_SYNC) ? 0 : 1);
2226 } 2226 }
2227 extern void emergency_sync(void); 2227 extern void emergency_sync(void);
2228 extern void emergency_remount(void); 2228 extern void emergency_remount(void);
2229 #ifdef CONFIG_BLOCK 2229 #ifdef CONFIG_BLOCK
2230 extern sector_t bmap(struct inode *, sector_t); 2230 extern sector_t bmap(struct inode *, sector_t);
2231 #endif 2231 #endif
2232 extern int notify_change(struct dentry *, struct iattr *, struct inode **); 2232 extern int notify_change(struct dentry *, struct iattr *, struct inode **);
2233 extern int inode_permission(struct inode *, int); 2233 extern int inode_permission(struct inode *, int);
2234 extern int generic_permission(struct inode *, int); 2234 extern int generic_permission(struct inode *, int);
2235 2235
2236 static inline bool execute_ok(struct inode *inode) 2236 static inline bool execute_ok(struct inode *inode)
2237 { 2237 {
2238 return (inode->i_mode & S_IXUGO) || S_ISDIR(inode->i_mode); 2238 return (inode->i_mode & S_IXUGO) || S_ISDIR(inode->i_mode);
2239 } 2239 }
2240 2240
2241 static inline void file_start_write(struct file *file) 2241 static inline void file_start_write(struct file *file)
2242 { 2242 {
2243 if (!S_ISREG(file_inode(file)->i_mode)) 2243 if (!S_ISREG(file_inode(file)->i_mode))
2244 return; 2244 return;
2245 __sb_start_write(file_inode(file)->i_sb, SB_FREEZE_WRITE, true); 2245 __sb_start_write(file_inode(file)->i_sb, SB_FREEZE_WRITE, true);
2246 } 2246 }
2247 2247
2248 static inline bool file_start_write_trylock(struct file *file) 2248 static inline bool file_start_write_trylock(struct file *file)
2249 { 2249 {
2250 if (!S_ISREG(file_inode(file)->i_mode)) 2250 if (!S_ISREG(file_inode(file)->i_mode))
2251 return true; 2251 return true;
2252 return __sb_start_write(file_inode(file)->i_sb, SB_FREEZE_WRITE, false); 2252 return __sb_start_write(file_inode(file)->i_sb, SB_FREEZE_WRITE, false);
2253 } 2253 }
2254 2254
2255 static inline void file_end_write(struct file *file) 2255 static inline void file_end_write(struct file *file)
2256 { 2256 {
2257 if (!S_ISREG(file_inode(file)->i_mode)) 2257 if (!S_ISREG(file_inode(file)->i_mode))
2258 return; 2258 return;
2259 __sb_end_write(file_inode(file)->i_sb, SB_FREEZE_WRITE); 2259 __sb_end_write(file_inode(file)->i_sb, SB_FREEZE_WRITE);
2260 } 2260 }
2261 2261
2262 /* 2262 /*
2263 * get_write_access() gets write permission for a file. 2263 * get_write_access() gets write permission for a file.
2264 * put_write_access() releases this write permission. 2264 * put_write_access() releases this write permission.
2265 * This is used for regular files. 2265 * This is used for regular files.
2266 * We cannot support write (and maybe mmap read-write shared) accesses and 2266 * We cannot support write (and maybe mmap read-write shared) accesses and
2267 * MAP_DENYWRITE mmappings simultaneously. The i_writecount field of an inode 2267 * MAP_DENYWRITE mmappings simultaneously. The i_writecount field of an inode
2268 * can have the following values: 2268 * can have the following values:
2269 * 0: no writers, no VM_DENYWRITE mappings 2269 * 0: no writers, no VM_DENYWRITE mappings
2270 * < 0: (-i_writecount) vm_area_structs with VM_DENYWRITE set exist 2270 * < 0: (-i_writecount) vm_area_structs with VM_DENYWRITE set exist
2271 * > 0: (i_writecount) users are writing to the file. 2271 * > 0: (i_writecount) users are writing to the file.
2272 * 2272 *
2273 * Normally we operate on that counter with atomic_{inc,dec} and it's safe 2273 * Normally we operate on that counter with atomic_{inc,dec} and it's safe
2274 * except for the cases where we don't hold i_writecount yet. Then we need to 2274 * except for the cases where we don't hold i_writecount yet. Then we need to
2275 * use {get,deny}_write_access() - these functions check the sign and refuse 2275 * use {get,deny}_write_access() - these functions check the sign and refuse
2276 * to do the change if sign is wrong. 2276 * to do the change if sign is wrong.
2277 */ 2277 */
2278 static inline int get_write_access(struct inode *inode) 2278 static inline int get_write_access(struct inode *inode)
2279 { 2279 {
2280 return atomic_inc_unless_negative(&inode->i_writecount) ? 0 : -ETXTBSY; 2280 return atomic_inc_unless_negative(&inode->i_writecount) ? 0 : -ETXTBSY;
2281 } 2281 }
2282 static inline int deny_write_access(struct file *file) 2282 static inline int deny_write_access(struct file *file)
2283 { 2283 {
2284 struct inode *inode = file_inode(file); 2284 struct inode *inode = file_inode(file);
2285 return atomic_dec_unless_positive(&inode->i_writecount) ? 0 : -ETXTBSY; 2285 return atomic_dec_unless_positive(&inode->i_writecount) ? 0 : -ETXTBSY;
2286 } 2286 }
2287 static inline void put_write_access(struct inode * inode) 2287 static inline void put_write_access(struct inode * inode)
2288 { 2288 {
2289 atomic_dec(&inode->i_writecount); 2289 atomic_dec(&inode->i_writecount);
2290 } 2290 }
2291 static inline void allow_write_access(struct file *file) 2291 static inline void allow_write_access(struct file *file)
2292 { 2292 {
2293 if (file) 2293 if (file)
2294 atomic_inc(&file_inode(file)->i_writecount); 2294 atomic_inc(&file_inode(file)->i_writecount);
2295 } 2295 }
2296 static inline bool inode_is_open_for_write(const struct inode *inode) 2296 static inline bool inode_is_open_for_write(const struct inode *inode)
2297 { 2297 {
2298 return atomic_read(&inode->i_writecount) > 0; 2298 return atomic_read(&inode->i_writecount) > 0;
2299 } 2299 }
2300 2300
2301 #ifdef CONFIG_IMA 2301 #ifdef CONFIG_IMA
2302 static inline void i_readcount_dec(struct inode *inode) 2302 static inline void i_readcount_dec(struct inode *inode)
2303 { 2303 {
2304 BUG_ON(!atomic_read(&inode->i_readcount)); 2304 BUG_ON(!atomic_read(&inode->i_readcount));
2305 atomic_dec(&inode->i_readcount); 2305 atomic_dec(&inode->i_readcount);
2306 } 2306 }
2307 static inline void i_readcount_inc(struct inode *inode) 2307 static inline void i_readcount_inc(struct inode *inode)
2308 { 2308 {
2309 atomic_inc(&inode->i_readcount); 2309 atomic_inc(&inode->i_readcount);
2310 } 2310 }
2311 #else 2311 #else
2312 static inline void i_readcount_dec(struct inode *inode) 2312 static inline void i_readcount_dec(struct inode *inode)
2313 { 2313 {
2314 return; 2314 return;
2315 } 2315 }
2316 static inline void i_readcount_inc(struct inode *inode) 2316 static inline void i_readcount_inc(struct inode *inode)
2317 { 2317 {
2318 return; 2318 return;
2319 } 2319 }
2320 #endif 2320 #endif
2321 extern int do_pipe_flags(int *, int); 2321 extern int do_pipe_flags(int *, int);
2322 2322
2323 extern int kernel_read(struct file *, loff_t, char *, unsigned long); 2323 extern int kernel_read(struct file *, loff_t, char *, unsigned long);
2324 extern ssize_t kernel_write(struct file *, const char *, size_t, loff_t); 2324 extern ssize_t kernel_write(struct file *, const char *, size_t, loff_t);
2325 extern struct file * open_exec(const char *); 2325 extern struct file * open_exec(const char *);
2326 2326
2327 /* fs/dcache.c -- generic fs support functions */ 2327 /* fs/dcache.c -- generic fs support functions */
2328 extern int is_subdir(struct dentry *, struct dentry *); 2328 extern int is_subdir(struct dentry *, struct dentry *);
2329 extern int path_is_under(struct path *, struct path *); 2329 extern int path_is_under(struct path *, struct path *);
2330 2330
2331 #include <linux/err.h> 2331 #include <linux/err.h>
2332 2332
2333 /* needed for stackable file system support */ 2333 /* needed for stackable file system support */
2334 extern loff_t default_llseek(struct file *file, loff_t offset, int whence); 2334 extern loff_t default_llseek(struct file *file, loff_t offset, int whence);
2335 2335
2336 extern loff_t vfs_llseek(struct file *file, loff_t offset, int whence); 2336 extern loff_t vfs_llseek(struct file *file, loff_t offset, int whence);
2337 2337
2338 extern int inode_init_always(struct super_block *, struct inode *); 2338 extern int inode_init_always(struct super_block *, struct inode *);
2339 extern void inode_init_once(struct inode *); 2339 extern void inode_init_once(struct inode *);
2340 extern void address_space_init_once(struct address_space *mapping); 2340 extern void address_space_init_once(struct address_space *mapping);
2341 extern struct inode * igrab(struct inode *); 2341 extern struct inode * igrab(struct inode *);
2342 extern ino_t iunique(struct super_block *, ino_t); 2342 extern ino_t iunique(struct super_block *, ino_t);
2343 extern int inode_needs_sync(struct inode *inode); 2343 extern int inode_needs_sync(struct inode *inode);
2344 extern int generic_delete_inode(struct inode *inode); 2344 extern int generic_delete_inode(struct inode *inode);
2345 static inline int generic_drop_inode(struct inode *inode) 2345 static inline int generic_drop_inode(struct inode *inode)
2346 { 2346 {
2347 return !inode->i_nlink || inode_unhashed(inode); 2347 return !inode->i_nlink || inode_unhashed(inode);
2348 } 2348 }
2349 2349
2350 extern struct inode *ilookup5_nowait(struct super_block *sb, 2350 extern struct inode *ilookup5_nowait(struct super_block *sb,
2351 unsigned long hashval, int (*test)(struct inode *, void *), 2351 unsigned long hashval, int (*test)(struct inode *, void *),
2352 void *data); 2352 void *data);
2353 extern struct inode *ilookup5(struct super_block *sb, unsigned long hashval, 2353 extern struct inode *ilookup5(struct super_block *sb, unsigned long hashval,
2354 int (*test)(struct inode *, void *), void *data); 2354 int (*test)(struct inode *, void *), void *data);
2355 extern struct inode *ilookup(struct super_block *sb, unsigned long ino); 2355 extern struct inode *ilookup(struct super_block *sb, unsigned long ino);
2356 2356
2357 extern struct inode * iget5_locked(struct super_block *, unsigned long, int (*test)(struct inode *, void *), int (*set)(struct inode *, void *), void *); 2357 extern struct inode * iget5_locked(struct super_block *, unsigned long, int (*test)(struct inode *, void *), int (*set)(struct inode *, void *), void *);
2358 extern struct inode * iget_locked(struct super_block *, unsigned long); 2358 extern struct inode * iget_locked(struct super_block *, unsigned long);
2359 extern int insert_inode_locked4(struct inode *, unsigned long, int (*test)(struct inode *, void *), void *); 2359 extern int insert_inode_locked4(struct inode *, unsigned long, int (*test)(struct inode *, void *), void *);
2360 extern int insert_inode_locked(struct inode *); 2360 extern int insert_inode_locked(struct inode *);
2361 #ifdef CONFIG_DEBUG_LOCK_ALLOC 2361 #ifdef CONFIG_DEBUG_LOCK_ALLOC
2362 extern void lockdep_annotate_inode_mutex_key(struct inode *inode); 2362 extern void lockdep_annotate_inode_mutex_key(struct inode *inode);
2363 #else 2363 #else
2364 static inline void lockdep_annotate_inode_mutex_key(struct inode *inode) { }; 2364 static inline void lockdep_annotate_inode_mutex_key(struct inode *inode) { };
2365 #endif 2365 #endif
2366 extern void unlock_new_inode(struct inode *); 2366 extern void unlock_new_inode(struct inode *);
2367 extern unsigned int get_next_ino(void); 2367 extern unsigned int get_next_ino(void);
2368 2368
2369 extern void __iget(struct inode * inode); 2369 extern void __iget(struct inode * inode);
2370 extern void iget_failed(struct inode *); 2370 extern void iget_failed(struct inode *);
2371 extern void clear_inode(struct inode *); 2371 extern void clear_inode(struct inode *);
2372 extern void __destroy_inode(struct inode *); 2372 extern void __destroy_inode(struct inode *);
2373 extern struct inode *new_inode_pseudo(struct super_block *sb); 2373 extern struct inode *new_inode_pseudo(struct super_block *sb);
2374 extern struct inode *new_inode(struct super_block *sb); 2374 extern struct inode *new_inode(struct super_block *sb);
2375 extern void free_inode_nonrcu(struct inode *inode); 2375 extern void free_inode_nonrcu(struct inode *inode);
2376 extern int should_remove_suid(struct dentry *); 2376 extern int should_remove_suid(struct dentry *);
2377 extern int file_remove_suid(struct file *); 2377 extern int file_remove_suid(struct file *);
2378 2378
2379 extern void __insert_inode_hash(struct inode *, unsigned long hashval); 2379 extern void __insert_inode_hash(struct inode *, unsigned long hashval);
2380 static inline void insert_inode_hash(struct inode *inode) 2380 static inline void insert_inode_hash(struct inode *inode)
2381 { 2381 {
2382 __insert_inode_hash(inode, inode->i_ino); 2382 __insert_inode_hash(inode, inode->i_ino);
2383 } 2383 }
2384 2384
2385 extern void __remove_inode_hash(struct inode *); 2385 extern void __remove_inode_hash(struct inode *);
2386 static inline void remove_inode_hash(struct inode *inode) 2386 static inline void remove_inode_hash(struct inode *inode)
2387 { 2387 {
2388 if (!inode_unhashed(inode)) 2388 if (!inode_unhashed(inode))
2389 __remove_inode_hash(inode); 2389 __remove_inode_hash(inode);
2390 } 2390 }
2391 2391
2392 extern void inode_sb_list_add(struct inode *inode); 2392 extern void inode_sb_list_add(struct inode *inode);
2393 2393
2394 #ifdef CONFIG_BLOCK 2394 #ifdef CONFIG_BLOCK
2395 extern void submit_bio(int, struct bio *); 2395 extern void submit_bio(int, struct bio *);
2396 extern int bdev_read_only(struct block_device *); 2396 extern int bdev_read_only(struct block_device *);
2397 #endif 2397 #endif
2398 extern int set_blocksize(struct block_device *, int); 2398 extern int set_blocksize(struct block_device *, int);
2399 extern int sb_set_blocksize(struct super_block *, int); 2399 extern int sb_set_blocksize(struct super_block *, int);
2400 extern int sb_min_blocksize(struct super_block *, int); 2400 extern int sb_min_blocksize(struct super_block *, int);
2401 2401
2402 extern int generic_file_mmap(struct file *, struct vm_area_struct *); 2402 extern int generic_file_mmap(struct file *, struct vm_area_struct *);
2403 extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *); 2403 extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *);
2404 extern int generic_file_remap_pages(struct vm_area_struct *, unsigned long addr, 2404 extern int generic_file_remap_pages(struct vm_area_struct *, unsigned long addr,
2405 unsigned long size, pgoff_t pgoff); 2405 unsigned long size, pgoff_t pgoff);
2406 int generic_write_checks(struct file *file, loff_t *pos, size_t *count, int isblk); 2406 int generic_write_checks(struct file *file, loff_t *pos, size_t *count, int isblk);
2407 extern ssize_t generic_file_aio_read(struct kiocb *, const struct iovec *, unsigned long, loff_t); 2407 extern ssize_t generic_file_aio_read(struct kiocb *, const struct iovec *, unsigned long, loff_t);
2408 extern ssize_t __generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long); 2408 extern ssize_t __generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long);
2409 extern ssize_t generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long, loff_t); 2409 extern ssize_t generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long, loff_t);
2410 extern ssize_t generic_file_direct_write(struct kiocb *, const struct iovec *, 2410 extern ssize_t generic_file_direct_write(struct kiocb *, const struct iovec *,
2411 unsigned long *, loff_t, size_t, size_t); 2411 unsigned long *, loff_t, size_t, size_t);
2412 extern ssize_t generic_perform_write(struct file *, struct iov_iter *, loff_t); 2412 extern ssize_t generic_perform_write(struct file *, struct iov_iter *, loff_t);
2413 extern ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos); 2413 extern ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos);
2414 extern ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos); 2414 extern ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos);
2415 extern int generic_segment_checks(const struct iovec *iov, 2415 extern int generic_segment_checks(const struct iovec *iov,
2416 unsigned long *nr_segs, size_t *count, int access_flags); 2416 unsigned long *nr_segs, size_t *count, int access_flags);
2417 2417
2418 /* fs/block_dev.c */ 2418 /* fs/block_dev.c */
2419 extern ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov, 2419 extern ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov,
2420 unsigned long nr_segs, loff_t pos); 2420 unsigned long nr_segs, loff_t pos);
2421 extern int blkdev_fsync(struct file *filp, loff_t start, loff_t end, 2421 extern int blkdev_fsync(struct file *filp, loff_t start, loff_t end,
2422 int datasync); 2422 int datasync);
2423 extern void block_sync_page(struct page *page); 2423 extern void block_sync_page(struct page *page);
2424 2424
2425 /* fs/splice.c */ 2425 /* fs/splice.c */
2426 extern ssize_t generic_file_splice_read(struct file *, loff_t *, 2426 extern ssize_t generic_file_splice_read(struct file *, loff_t *,
2427 struct pipe_inode_info *, size_t, unsigned int); 2427 struct pipe_inode_info *, size_t, unsigned int);
2428 extern ssize_t default_file_splice_read(struct file *, loff_t *, 2428 extern ssize_t default_file_splice_read(struct file *, loff_t *,
2429 struct pipe_inode_info *, size_t, unsigned int); 2429 struct pipe_inode_info *, size_t, unsigned int);
2430 extern ssize_t generic_file_splice_write(struct pipe_inode_info *, 2430 extern ssize_t generic_file_splice_write(struct pipe_inode_info *,
2431 struct file *, loff_t *, size_t, unsigned int); 2431 struct file *, loff_t *, size_t, unsigned int);
2432 extern ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, 2432 extern ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe,
2433 struct file *out, loff_t *, size_t len, unsigned int flags); 2433 struct file *out, loff_t *, size_t len, unsigned int flags);
2434 2434
2435 extern void 2435 extern void
2436 file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping); 2436 file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping);
2437 extern loff_t noop_llseek(struct file *file, loff_t offset, int whence); 2437 extern loff_t noop_llseek(struct file *file, loff_t offset, int whence);
2438 extern loff_t no_llseek(struct file *file, loff_t offset, int whence); 2438 extern loff_t no_llseek(struct file *file, loff_t offset, int whence);
2439 extern loff_t vfs_setpos(struct file *file, loff_t offset, loff_t maxsize); 2439 extern loff_t vfs_setpos(struct file *file, loff_t offset, loff_t maxsize);
2440 extern loff_t generic_file_llseek(struct file *file, loff_t offset, int whence); 2440 extern loff_t generic_file_llseek(struct file *file, loff_t offset, int whence);
2441 extern loff_t generic_file_llseek_size(struct file *file, loff_t offset, 2441 extern loff_t generic_file_llseek_size(struct file *file, loff_t offset,
2442 int whence, loff_t maxsize, loff_t eof); 2442 int whence, loff_t maxsize, loff_t eof);
2443 extern loff_t fixed_size_llseek(struct file *file, loff_t offset, 2443 extern loff_t fixed_size_llseek(struct file *file, loff_t offset,
2444 int whence, loff_t size); 2444 int whence, loff_t size);
2445 extern int generic_file_open(struct inode * inode, struct file * filp); 2445 extern int generic_file_open(struct inode * inode, struct file * filp);
2446 extern int nonseekable_open(struct inode * inode, struct file * filp); 2446 extern int nonseekable_open(struct inode * inode, struct file * filp);
2447 2447
2448 #ifdef CONFIG_FS_XIP 2448 #ifdef CONFIG_FS_XIP
2449 extern ssize_t xip_file_read(struct file *filp, char __user *buf, size_t len, 2449 extern ssize_t xip_file_read(struct file *filp, char __user *buf, size_t len,
2450 loff_t *ppos); 2450 loff_t *ppos);
2451 extern int xip_file_mmap(struct file * file, struct vm_area_struct * vma); 2451 extern int xip_file_mmap(struct file * file, struct vm_area_struct * vma);
2452 extern ssize_t xip_file_write(struct file *filp, const char __user *buf, 2452 extern ssize_t xip_file_write(struct file *filp, const char __user *buf,
2453 size_t len, loff_t *ppos); 2453 size_t len, loff_t *ppos);
2454 extern int xip_truncate_page(struct address_space *mapping, loff_t from); 2454 extern int xip_truncate_page(struct address_space *mapping, loff_t from);
2455 #else 2455 #else
2456 static inline int xip_truncate_page(struct address_space *mapping, loff_t from) 2456 static inline int xip_truncate_page(struct address_space *mapping, loff_t from)
2457 { 2457 {
2458 return 0; 2458 return 0;
2459 } 2459 }
2460 #endif 2460 #endif
2461 2461
2462 #ifdef CONFIG_BLOCK 2462 #ifdef CONFIG_BLOCK
2463 typedef void (dio_submit_t)(int rw, struct bio *bio, struct inode *inode, 2463 typedef void (dio_submit_t)(int rw, struct bio *bio, struct inode *inode,
2464 loff_t file_offset); 2464 loff_t file_offset);
2465 2465
2466 enum { 2466 enum {
2467 /* need locking between buffered and direct access */ 2467 /* need locking between buffered and direct access */
2468 DIO_LOCKING = 0x01, 2468 DIO_LOCKING = 0x01,
2469 2469
2470 /* filesystem does not support filling holes */ 2470 /* filesystem does not support filling holes */
2471 DIO_SKIP_HOLES = 0x02, 2471 DIO_SKIP_HOLES = 0x02,
2472 2472
2473 /* filesystem can handle aio writes beyond i_size */ 2473 /* filesystem can handle aio writes beyond i_size */
2474 DIO_ASYNC_EXTEND = 0x04, 2474 DIO_ASYNC_EXTEND = 0x04,
2475 }; 2475 };
2476 2476
2477 void dio_end_io(struct bio *bio, int error); 2477 void dio_end_io(struct bio *bio, int error);
2478 2478
2479 ssize_t __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, 2479 ssize_t __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
2480 struct block_device *bdev, const struct iovec *iov, loff_t offset, 2480 struct block_device *bdev, const struct iovec *iov, loff_t offset,
2481 unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io, 2481 unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io,
2482 dio_submit_t submit_io, int flags); 2482 dio_submit_t submit_io, int flags);
2483 2483
2484 static inline ssize_t blockdev_direct_IO(int rw, struct kiocb *iocb, 2484 static inline ssize_t blockdev_direct_IO(int rw, struct kiocb *iocb,
2485 struct inode *inode, const struct iovec *iov, loff_t offset, 2485 struct inode *inode, const struct iovec *iov, loff_t offset,
2486 unsigned long nr_segs, get_block_t get_block) 2486 unsigned long nr_segs, get_block_t get_block)
2487 { 2487 {
2488 return __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, 2488 return __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
2489 offset, nr_segs, get_block, NULL, NULL, 2489 offset, nr_segs, get_block, NULL, NULL,
2490 DIO_LOCKING | DIO_SKIP_HOLES); 2490 DIO_LOCKING | DIO_SKIP_HOLES);
2491 } 2491 }
2492 #endif 2492 #endif
2493 2493
2494 void inode_dio_wait(struct inode *inode); 2494 void inode_dio_wait(struct inode *inode);
2495 void inode_dio_done(struct inode *inode); 2495 void inode_dio_done(struct inode *inode);
2496 2496
2497 extern void inode_set_flags(struct inode *inode, unsigned int flags, 2497 extern void inode_set_flags(struct inode *inode, unsigned int flags,
2498 unsigned int mask); 2498 unsigned int mask);
2499 2499
2500 extern const struct file_operations generic_ro_fops; 2500 extern const struct file_operations generic_ro_fops;
2501 2501
2502 #define special_file(m) (S_ISCHR(m)||S_ISBLK(m)||S_ISFIFO(m)||S_ISSOCK(m)) 2502 #define special_file(m) (S_ISCHR(m)||S_ISBLK(m)||S_ISFIFO(m)||S_ISSOCK(m))
2503 2503
2504 extern int readlink_copy(char __user *, int, const char *); 2504 extern int readlink_copy(char __user *, int, const char *);
2505 extern int page_readlink(struct dentry *, char __user *, int); 2505 extern int page_readlink(struct dentry *, char __user *, int);
2506 extern void *page_follow_link_light(struct dentry *, struct nameidata *); 2506 extern void *page_follow_link_light(struct dentry *, struct nameidata *);
2507 extern void page_put_link(struct dentry *, struct nameidata *, void *); 2507 extern void page_put_link(struct dentry *, struct nameidata *, void *);
2508 extern int __page_symlink(struct inode *inode, const char *symname, int len, 2508 extern int __page_symlink(struct inode *inode, const char *symname, int len,
2509 int nofs); 2509 int nofs);
2510 extern int page_symlink(struct inode *inode, const char *symname, int len); 2510 extern int page_symlink(struct inode *inode, const char *symname, int len);
2511 extern const struct inode_operations page_symlink_inode_operations; 2511 extern const struct inode_operations page_symlink_inode_operations;
2512 extern void kfree_put_link(struct dentry *, struct nameidata *, void *); 2512 extern void kfree_put_link(struct dentry *, struct nameidata *, void *);
2513 extern int generic_readlink(struct dentry *, char __user *, int); 2513 extern int generic_readlink(struct dentry *, char __user *, int);
2514 extern void generic_fillattr(struct inode *, struct kstat *); 2514 extern void generic_fillattr(struct inode *, struct kstat *);
2515 int vfs_getattr_nosec(struct path *path, struct kstat *stat); 2515 int vfs_getattr_nosec(struct path *path, struct kstat *stat);
2516 extern int vfs_getattr(struct path *, struct kstat *); 2516 extern int vfs_getattr(struct path *, struct kstat *);
2517 void __inode_add_bytes(struct inode *inode, loff_t bytes); 2517 void __inode_add_bytes(struct inode *inode, loff_t bytes);
2518 void inode_add_bytes(struct inode *inode, loff_t bytes); 2518 void inode_add_bytes(struct inode *inode, loff_t bytes);
2519 void __inode_sub_bytes(struct inode *inode, loff_t bytes); 2519 void __inode_sub_bytes(struct inode *inode, loff_t bytes);
2520 void inode_sub_bytes(struct inode *inode, loff_t bytes); 2520 void inode_sub_bytes(struct inode *inode, loff_t bytes);
2521 loff_t inode_get_bytes(struct inode *inode); 2521 loff_t inode_get_bytes(struct inode *inode);
2522 void inode_set_bytes(struct inode *inode, loff_t bytes); 2522 void inode_set_bytes(struct inode *inode, loff_t bytes);
2523 2523
2524 extern int vfs_readdir(struct file *, filldir_t, void *); 2524 extern int vfs_readdir(struct file *, filldir_t, void *);
2525 extern int iterate_dir(struct file *, struct dir_context *); 2525 extern int iterate_dir(struct file *, struct dir_context *);
2526 2526
2527 extern int vfs_stat(const char __user *, struct kstat *); 2527 extern int vfs_stat(const char __user *, struct kstat *);
2528 extern int vfs_lstat(const char __user *, struct kstat *); 2528 extern int vfs_lstat(const char __user *, struct kstat *);
2529 extern int vfs_fstat(unsigned int, struct kstat *); 2529 extern int vfs_fstat(unsigned int, struct kstat *);
2530 extern int vfs_fstatat(int , const char __user *, struct kstat *, int); 2530 extern int vfs_fstatat(int , const char __user *, struct kstat *, int);
2531 2531
2532 extern int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd, 2532 extern int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd,
2533 unsigned long arg); 2533 unsigned long arg);
2534 extern int __generic_block_fiemap(struct inode *inode, 2534 extern int __generic_block_fiemap(struct inode *inode,
2535 struct fiemap_extent_info *fieinfo, 2535 struct fiemap_extent_info *fieinfo,
2536 loff_t start, loff_t len, 2536 loff_t start, loff_t len,
2537 get_block_t *get_block); 2537 get_block_t *get_block);
2538 extern int generic_block_fiemap(struct inode *inode, 2538 extern int generic_block_fiemap(struct inode *inode,
2539 struct fiemap_extent_info *fieinfo, u64 start, 2539 struct fiemap_extent_info *fieinfo, u64 start,
2540 u64 len, get_block_t *get_block); 2540 u64 len, get_block_t *get_block);
2541 2541
2542 extern void get_filesystem(struct file_system_type *fs); 2542 extern void get_filesystem(struct file_system_type *fs);
2543 extern void put_filesystem(struct file_system_type *fs); 2543 extern void put_filesystem(struct file_system_type *fs);
2544 extern struct file_system_type *get_fs_type(const char *name); 2544 extern struct file_system_type *get_fs_type(const char *name);
2545 extern struct super_block *get_super(struct block_device *); 2545 extern struct super_block *get_super(struct block_device *);
2546 extern struct super_block *get_super_thawed(struct block_device *); 2546 extern struct super_block *get_super_thawed(struct block_device *);
2547 extern struct super_block *get_active_super(struct block_device *bdev); 2547 extern struct super_block *get_active_super(struct block_device *bdev);
2548 extern void drop_super(struct super_block *sb); 2548 extern void drop_super(struct super_block *sb);
2549 extern void iterate_supers(void (*)(struct super_block *, void *), void *); 2549 extern void iterate_supers(void (*)(struct super_block *, void *), void *);
2550 extern void iterate_supers_type(struct file_system_type *, 2550 extern void iterate_supers_type(struct file_system_type *,
2551 void (*)(struct super_block *, void *), void *); 2551 void (*)(struct super_block *, void *), void *);
2552 2552
2553 extern int dcache_dir_open(struct inode *, struct file *); 2553 extern int dcache_dir_open(struct inode *, struct file *);
2554 extern int dcache_dir_close(struct inode *, struct file *); 2554 extern int dcache_dir_close(struct inode *, struct file *);
2555 extern loff_t dcache_dir_lseek(struct file *, loff_t, int); 2555 extern loff_t dcache_dir_lseek(struct file *, loff_t, int);
2556 extern int dcache_readdir(struct file *, struct dir_context *); 2556 extern int dcache_readdir(struct file *, struct dir_context *);
2557 extern int simple_setattr(struct dentry *, struct iattr *); 2557 extern int simple_setattr(struct dentry *, struct iattr *);
2558 extern int simple_getattr(struct vfsmount *, struct dentry *, struct kstat *); 2558 extern int simple_getattr(struct vfsmount *, struct dentry *, struct kstat *);
2559 extern int simple_statfs(struct dentry *, struct kstatfs *); 2559 extern int simple_statfs(struct dentry *, struct kstatfs *);
2560 extern int simple_open(struct inode *inode, struct file *file); 2560 extern int simple_open(struct inode *inode, struct file *file);
2561 extern int simple_link(struct dentry *, struct inode *, struct dentry *); 2561 extern int simple_link(struct dentry *, struct inode *, struct dentry *);
2562 extern int simple_unlink(struct inode *, struct dentry *); 2562 extern int simple_unlink(struct inode *, struct dentry *);
2563 extern int simple_rmdir(struct inode *, struct dentry *); 2563 extern int simple_rmdir(struct inode *, struct dentry *);
2564 extern int simple_rename(struct inode *, struct dentry *, struct inode *, struct dentry *); 2564 extern int simple_rename(struct inode *, struct dentry *, struct inode *, struct dentry *);
2565 extern int noop_fsync(struct file *, loff_t, loff_t, int); 2565 extern int noop_fsync(struct file *, loff_t, loff_t, int);
2566 extern int simple_empty(struct dentry *); 2566 extern int simple_empty(struct dentry *);
2567 extern int simple_readpage(struct file *file, struct page *page); 2567 extern int simple_readpage(struct file *file, struct page *page);
2568 extern int simple_write_begin(struct file *file, struct address_space *mapping, 2568 extern int simple_write_begin(struct file *file, struct address_space *mapping,
2569 loff_t pos, unsigned len, unsigned flags, 2569 loff_t pos, unsigned len, unsigned flags,
2570 struct page **pagep, void **fsdata); 2570 struct page **pagep, void **fsdata);
2571 extern int simple_write_end(struct file *file, struct address_space *mapping, 2571 extern int simple_write_end(struct file *file, struct address_space *mapping,
2572 loff_t pos, unsigned len, unsigned copied, 2572 loff_t pos, unsigned len, unsigned copied,
2573 struct page *page, void *fsdata); 2573 struct page *page, void *fsdata);
2574 extern int always_delete_dentry(const struct dentry *); 2574 extern int always_delete_dentry(const struct dentry *);
2575 extern struct inode *alloc_anon_inode(struct super_block *); 2575 extern struct inode *alloc_anon_inode(struct super_block *);
2576 extern const struct dentry_operations simple_dentry_operations; 2576 extern const struct dentry_operations simple_dentry_operations;
2577 2577
2578 extern struct dentry *simple_lookup(struct inode *, struct dentry *, unsigned int flags); 2578 extern struct dentry *simple_lookup(struct inode *, struct dentry *, unsigned int flags);
2579 extern ssize_t generic_read_dir(struct file *, char __user *, size_t, loff_t *); 2579 extern ssize_t generic_read_dir(struct file *, char __user *, size_t, loff_t *);
2580 extern const struct file_operations simple_dir_operations; 2580 extern const struct file_operations simple_dir_operations;
2581 extern const struct inode_operations simple_dir_inode_operations; 2581 extern const struct inode_operations simple_dir_inode_operations;
2582 struct tree_descr { char *name; const struct file_operations *ops; int mode; }; 2582 struct tree_descr { char *name; const struct file_operations *ops; int mode; };
2583 struct dentry *d_alloc_name(struct dentry *, const char *); 2583 struct dentry *d_alloc_name(struct dentry *, const char *);
2584 extern int simple_fill_super(struct super_block *, unsigned long, struct tree_descr *); 2584 extern int simple_fill_super(struct super_block *, unsigned long, struct tree_descr *);
2585 extern int simple_pin_fs(struct file_system_type *, struct vfsmount **mount, int *count); 2585 extern int simple_pin_fs(struct file_system_type *, struct vfsmount **mount, int *count);
2586 extern void simple_release_fs(struct vfsmount **mount, int *count); 2586 extern void simple_release_fs(struct vfsmount **mount, int *count);
2587 2587
2588 extern ssize_t simple_read_from_buffer(void __user *to, size_t count, 2588 extern ssize_t simple_read_from_buffer(void __user *to, size_t count,
2589 loff_t *ppos, const void *from, size_t available); 2589 loff_t *ppos, const void *from, size_t available);
2590 extern ssize_t simple_write_to_buffer(void *to, size_t available, loff_t *ppos, 2590 extern ssize_t simple_write_to_buffer(void *to, size_t available, loff_t *ppos,
2591 const void __user *from, size_t count); 2591 const void __user *from, size_t count);
2592 2592
2593 extern int generic_file_fsync(struct file *, loff_t, loff_t, int); 2593 extern int generic_file_fsync(struct file *, loff_t, loff_t, int);
2594 2594
2595 extern int generic_check_addressable(unsigned, u64); 2595 extern int generic_check_addressable(unsigned, u64);
2596 2596
2597 #ifdef CONFIG_MIGRATION 2597 #ifdef CONFIG_MIGRATION
2598 extern int buffer_migrate_page(struct address_space *, 2598 extern int buffer_migrate_page(struct address_space *,
2599 struct page *, struct page *, 2599 struct page *, struct page *,
2600 enum migrate_mode); 2600 enum migrate_mode);
2601 #else 2601 #else
2602 #define buffer_migrate_page NULL 2602 #define buffer_migrate_page NULL
2603 #endif 2603 #endif
2604 2604
2605 extern int inode_change_ok(const struct inode *, struct iattr *); 2605 extern int inode_change_ok(const struct inode *, struct iattr *);
2606 extern int inode_newsize_ok(const struct inode *, loff_t offset); 2606 extern int inode_newsize_ok(const struct inode *, loff_t offset);
2607 extern void setattr_copy(struct inode *inode, const struct iattr *attr); 2607 extern void setattr_copy(struct inode *inode, const struct iattr *attr);
2608 2608
2609 extern int file_update_time(struct file *file); 2609 extern int file_update_time(struct file *file);
2610 2610
2611 extern int generic_show_options(struct seq_file *m, struct dentry *root); 2611 extern int generic_show_options(struct seq_file *m, struct dentry *root);
2612 extern void save_mount_options(struct super_block *sb, char *options); 2612 extern void save_mount_options(struct super_block *sb, char *options);
2613 extern void replace_mount_options(struct super_block *sb, char *options); 2613 extern void replace_mount_options(struct super_block *sb, char *options);
2614 2614
2615 static inline ino_t parent_ino(struct dentry *dentry) 2615 static inline ino_t parent_ino(struct dentry *dentry)
2616 { 2616 {
2617 ino_t res; 2617 ino_t res;
2618 2618
2619 /* 2619 /*
2620 * Don't strictly need d_lock here? If the parent ino could change 2620 * Don't strictly need d_lock here? If the parent ino could change
2621 * then surely we'd have a deeper race in the caller? 2621 * then surely we'd have a deeper race in the caller?
2622 */ 2622 */
2623 spin_lock(&dentry->d_lock); 2623 spin_lock(&dentry->d_lock);
2624 res = dentry->d_parent->d_inode->i_ino; 2624 res = dentry->d_parent->d_inode->i_ino;
2625 spin_unlock(&dentry->d_lock); 2625 spin_unlock(&dentry->d_lock);
2626 return res; 2626 return res;
2627 } 2627 }
2628 2628
2629 /* Transaction based IO helpers */ 2629 /* Transaction based IO helpers */
2630 2630
2631 /* 2631 /*
2632 * An argresp is stored in an allocated page and holds the 2632 * An argresp is stored in an allocated page and holds the
2633 * size of the argument or response, along with its content 2633 * size of the argument or response, along with its content
2634 */ 2634 */
2635 struct simple_transaction_argresp { 2635 struct simple_transaction_argresp {
2636 ssize_t size; 2636 ssize_t size;
2637 char data[0]; 2637 char data[0];
2638 }; 2638 };
2639 2639
2640 #define SIMPLE_TRANSACTION_LIMIT (PAGE_SIZE - sizeof(struct simple_transaction_argresp)) 2640 #define SIMPLE_TRANSACTION_LIMIT (PAGE_SIZE - sizeof(struct simple_transaction_argresp))
2641 2641
2642 char *simple_transaction_get(struct file *file, const char __user *buf, 2642 char *simple_transaction_get(struct file *file, const char __user *buf,
2643 size_t size); 2643 size_t size);
2644 ssize_t simple_transaction_read(struct file *file, char __user *buf, 2644 ssize_t simple_transaction_read(struct file *file, char __user *buf,
2645 size_t size, loff_t *pos); 2645 size_t size, loff_t *pos);
2646 int simple_transaction_release(struct inode *inode, struct file *file); 2646 int simple_transaction_release(struct inode *inode, struct file *file);
2647 2647
2648 void simple_transaction_set(struct file *file, size_t n); 2648 void simple_transaction_set(struct file *file, size_t n);
2649 2649
2650 /* 2650 /*
2651 * simple attribute files 2651 * simple attribute files
2652 * 2652 *
2653 * These attributes behave similar to those in sysfs: 2653 * These attributes behave similar to those in sysfs:
2654 * 2654 *
2655 * Writing to an attribute immediately sets a value, an open file can be 2655 * Writing to an attribute immediately sets a value, an open file can be
2656 * written to multiple times. 2656 * written to multiple times.
2657 * 2657 *
2658 * Reading from an attribute creates a buffer from the value that might get 2658 * Reading from an attribute creates a buffer from the value that might get
2659 * read with multiple read calls. When the attribute has been read 2659 * read with multiple read calls. When the attribute has been read
2660 * completely, no further read calls are possible until the file is opened 2660 * completely, no further read calls are possible until the file is opened
2661 * again. 2661 * again.
2662 * 2662 *
2663 * All attributes contain a text representation of a numeric value 2663 * All attributes contain a text representation of a numeric value
2664 * that are accessed with the get() and set() functions. 2664 * that are accessed with the get() and set() functions.
2665 */ 2665 */
2666 #define DEFINE_SIMPLE_ATTRIBUTE(__fops, __get, __set, __fmt) \ 2666 #define DEFINE_SIMPLE_ATTRIBUTE(__fops, __get, __set, __fmt) \
2667 static int __fops ## _open(struct inode *inode, struct file *file) \ 2667 static int __fops ## _open(struct inode *inode, struct file *file) \
2668 { \ 2668 { \
2669 __simple_attr_check_format(__fmt, 0ull); \ 2669 __simple_attr_check_format(__fmt, 0ull); \
2670 return simple_attr_open(inode, file, __get, __set, __fmt); \ 2670 return simple_attr_open(inode, file, __get, __set, __fmt); \
2671 } \ 2671 } \
2672 static const struct file_operations __fops = { \ 2672 static const struct file_operations __fops = { \
2673 .owner = THIS_MODULE, \ 2673 .owner = THIS_MODULE, \
2674 .open = __fops ## _open, \ 2674 .open = __fops ## _open, \
2675 .release = simple_attr_release, \ 2675 .release = simple_attr_release, \
2676 .read = simple_attr_read, \ 2676 .read = simple_attr_read, \
2677 .write = simple_attr_write, \ 2677 .write = simple_attr_write, \
2678 .llseek = generic_file_llseek, \ 2678 .llseek = generic_file_llseek, \
2679 }; 2679 };
2680 2680
2681 static inline __printf(1, 2) 2681 static inline __printf(1, 2)
2682 void __simple_attr_check_format(const char *fmt, ...) 2682 void __simple_attr_check_format(const char *fmt, ...)
2683 { 2683 {
2684 /* don't do anything, just let the compiler check the arguments; */ 2684 /* don't do anything, just let the compiler check the arguments; */
2685 } 2685 }
2686 2686
2687 int simple_attr_open(struct inode *inode, struct file *file, 2687 int simple_attr_open(struct inode *inode, struct file *file,
2688 int (*get)(void *, u64 *), int (*set)(void *, u64), 2688 int (*get)(void *, u64 *), int (*set)(void *, u64),
2689 const char *fmt); 2689 const char *fmt);
2690 int simple_attr_release(struct inode *inode, struct file *file); 2690 int simple_attr_release(struct inode *inode, struct file *file);
2691 ssize_t simple_attr_read(struct file *file, char __user *buf, 2691 ssize_t simple_attr_read(struct file *file, char __user *buf,
2692 size_t len, loff_t *ppos); 2692 size_t len, loff_t *ppos);
2693 ssize_t simple_attr_write(struct file *file, const char __user *buf, 2693 ssize_t simple_attr_write(struct file *file, const char __user *buf,
2694 size_t len, loff_t *ppos); 2694 size_t len, loff_t *ppos);
2695 2695
2696 struct ctl_table; 2696 struct ctl_table;
2697 int proc_nr_files(struct ctl_table *table, int write, 2697 int proc_nr_files(struct ctl_table *table, int write,
2698 void __user *buffer, size_t *lenp, loff_t *ppos); 2698 void __user *buffer, size_t *lenp, loff_t *ppos);
2699 int proc_nr_dentry(struct ctl_table *table, int write, 2699 int proc_nr_dentry(struct ctl_table *table, int write,
2700 void __user *buffer, size_t *lenp, loff_t *ppos); 2700 void __user *buffer, size_t *lenp, loff_t *ppos);
2701 int proc_nr_inodes(struct ctl_table *table, int write, 2701 int proc_nr_inodes(struct ctl_table *table, int write,
2702 void __user *buffer, size_t *lenp, loff_t *ppos); 2702 void __user *buffer, size_t *lenp, loff_t *ppos);
2703 int __init get_filesystem_list(char *buf); 2703 int __init get_filesystem_list(char *buf);
2704 2704
2705 #define __FMODE_EXEC ((__force int) FMODE_EXEC) 2705 #define __FMODE_EXEC ((__force int) FMODE_EXEC)
2706 #define __FMODE_NONOTIFY ((__force int) FMODE_NONOTIFY) 2706 #define __FMODE_NONOTIFY ((__force int) FMODE_NONOTIFY)
2707 2707
2708 #define ACC_MODE(x) ("\004\002\006\006"[(x)&O_ACCMODE]) 2708 #define ACC_MODE(x) ("\004\002\006\006"[(x)&O_ACCMODE])
2709 #define OPEN_FMODE(flag) ((__force fmode_t)(((flag + 1) & O_ACCMODE) | \ 2709 #define OPEN_FMODE(flag) ((__force fmode_t)(((flag + 1) & O_ACCMODE) | \
2710 (flag & __FMODE_NONOTIFY))) 2710 (flag & __FMODE_NONOTIFY)))
2711 2711
2712 static inline int is_sxid(umode_t mode) 2712 static inline int is_sxid(umode_t mode)
2713 { 2713 {
2714 return (mode & S_ISUID) || ((mode & S_ISGID) && (mode & S_IXGRP)); 2714 return (mode & S_ISUID) || ((mode & S_ISGID) && (mode & S_IXGRP));
2715 } 2715 }
2716 2716
2717 static inline void inode_has_no_xattr(struct inode *inode) 2717 static inline void inode_has_no_xattr(struct inode *inode)
2718 { 2718 {
2719 if (!is_sxid(inode->i_mode) && (inode->i_sb->s_flags & MS_NOSEC)) 2719 if (!is_sxid(inode->i_mode) && (inode->i_sb->s_flags & MS_NOSEC))
2720 inode->i_flags |= S_NOSEC; 2720 inode->i_flags |= S_NOSEC;
2721 } 2721 }
2722 2722
2723 static inline bool dir_emit(struct dir_context *ctx, 2723 static inline bool dir_emit(struct dir_context *ctx,
2724 const char *name, int namelen, 2724 const char *name, int namelen,
2725 u64 ino, unsigned type) 2725 u64 ino, unsigned type)
2726 { 2726 {
2727 return ctx->actor(ctx, name, namelen, ctx->pos, ino, type) == 0; 2727 return ctx->actor(ctx, name, namelen, ctx->pos, ino, type) == 0;
2728 } 2728 }
2729 static inline bool dir_emit_dot(struct file *file, struct dir_context *ctx) 2729 static inline bool dir_emit_dot(struct file *file, struct dir_context *ctx)
2730 { 2730 {
2731 return ctx->actor(ctx, ".", 1, ctx->pos, 2731 return ctx->actor(ctx, ".", 1, ctx->pos,
2732 file->f_path.dentry->d_inode->i_ino, DT_DIR) == 0; 2732 file->f_path.dentry->d_inode->i_ino, DT_DIR) == 0;
2733 } 2733 }
2734 static inline bool dir_emit_dotdot(struct file *file, struct dir_context *ctx) 2734 static inline bool dir_emit_dotdot(struct file *file, struct dir_context *ctx)
2735 { 2735 {
2736 return ctx->actor(ctx, "..", 2, ctx->pos, 2736 return ctx->actor(ctx, "..", 2, ctx->pos,
2737 parent_ino(file->f_path.dentry), DT_DIR) == 0; 2737 parent_ino(file->f_path.dentry), DT_DIR) == 0;
2738 } 2738 }
2739 static inline bool dir_emit_dots(struct file *file, struct dir_context *ctx) 2739 static inline bool dir_emit_dots(struct file *file, struct dir_context *ctx)
2740 { 2740 {
2741 if (ctx->pos == 0) { 2741 if (ctx->pos == 0) {
2742 if (!dir_emit_dot(file, ctx)) 2742 if (!dir_emit_dot(file, ctx))
2743 return false; 2743 return false;
2744 ctx->pos = 1; 2744 ctx->pos = 1;
2745 } 2745 }
2746 if (ctx->pos == 1) { 2746 if (ctx->pos == 1) {
2747 if (!dir_emit_dotdot(file, ctx)) 2747 if (!dir_emit_dotdot(file, ctx))
2748 return false; 2748 return false;
2749 ctx->pos = 2; 2749 ctx->pos = 2;
2750 } 2750 }
2751 return true; 2751 return true;
2752 } 2752 }
2753 static inline bool dir_relax(struct inode *inode) 2753 static inline bool dir_relax(struct inode *inode)
2754 { 2754 {
2755 mutex_unlock(&inode->i_mutex); 2755 mutex_unlock(&inode->i_mutex);
2756 mutex_lock(&inode->i_mutex); 2756 mutex_lock(&inode->i_mutex);
2757 return !IS_DEADDIR(inode); 2757 return !IS_DEADDIR(inode);
2758 } 2758 }
2759 2759
2760 #endif /* _LINUX_FS_H */ 2760 #endif /* _LINUX_FS_H */
2761 2761
include/uapi/asm-generic/fcntl.h
1 #ifndef _ASM_GENERIC_FCNTL_H 1 #ifndef _ASM_GENERIC_FCNTL_H
2 #define _ASM_GENERIC_FCNTL_H 2 #define _ASM_GENERIC_FCNTL_H
3 3
4 #include <linux/types.h> 4 #include <linux/types.h>
5 5
6 /* 6 /*
7 * FMODE_EXEC is 0x20 7 * FMODE_EXEC is 0x20
8 * FMODE_NONOTIFY is 0x1000000 8 * FMODE_NONOTIFY is 0x1000000
9 * These cannot be used by userspace O_* until internal and external open 9 * These cannot be used by userspace O_* until internal and external open
10 * flags are split. 10 * flags are split.
11 * -Eric Paris 11 * -Eric Paris
12 */ 12 */
13 13
14 /* 14 /*
15 * When introducing new O_* bits, please check its uniqueness in fcntl_init(). 15 * When introducing new O_* bits, please check its uniqueness in fcntl_init().
16 */ 16 */
17 17
18 #define O_ACCMODE 00000003 18 #define O_ACCMODE 00000003
19 #define O_RDONLY 00000000 19 #define O_RDONLY 00000000
20 #define O_WRONLY 00000001 20 #define O_WRONLY 00000001
21 #define O_RDWR 00000002 21 #define O_RDWR 00000002
22 #ifndef O_CREAT 22 #ifndef O_CREAT
23 #define O_CREAT 00000100 /* not fcntl */ 23 #define O_CREAT 00000100 /* not fcntl */
24 #endif 24 #endif
25 #ifndef O_EXCL 25 #ifndef O_EXCL
26 #define O_EXCL 00000200 /* not fcntl */ 26 #define O_EXCL 00000200 /* not fcntl */
27 #endif 27 #endif
28 #ifndef O_NOCTTY 28 #ifndef O_NOCTTY
29 #define O_NOCTTY 00000400 /* not fcntl */ 29 #define O_NOCTTY 00000400 /* not fcntl */
30 #endif 30 #endif
31 #ifndef O_TRUNC 31 #ifndef O_TRUNC
32 #define O_TRUNC 00001000 /* not fcntl */ 32 #define O_TRUNC 00001000 /* not fcntl */
33 #endif 33 #endif
34 #ifndef O_APPEND 34 #ifndef O_APPEND
35 #define O_APPEND 00002000 35 #define O_APPEND 00002000
36 #endif 36 #endif
37 #ifndef O_NONBLOCK 37 #ifndef O_NONBLOCK
38 #define O_NONBLOCK 00004000 38 #define O_NONBLOCK 00004000
39 #endif 39 #endif
40 #ifndef O_DSYNC 40 #ifndef O_DSYNC
41 #define O_DSYNC 00010000 /* used to be O_SYNC, see below */ 41 #define O_DSYNC 00010000 /* used to be O_SYNC, see below */
42 #endif 42 #endif
43 #ifndef FASYNC 43 #ifndef FASYNC
44 #define FASYNC 00020000 /* fcntl, for BSD compatibility */ 44 #define FASYNC 00020000 /* fcntl, for BSD compatibility */
45 #endif 45 #endif
46 #ifndef O_DIRECT 46 #ifndef O_DIRECT
47 #define O_DIRECT 00040000 /* direct disk access hint */ 47 #define O_DIRECT 00040000 /* direct disk access hint */
48 #endif 48 #endif
49 #ifndef O_LARGEFILE 49 #ifndef O_LARGEFILE
50 #define O_LARGEFILE 00100000 50 #define O_LARGEFILE 00100000
51 #endif 51 #endif
52 #ifndef O_DIRECTORY 52 #ifndef O_DIRECTORY
53 #define O_DIRECTORY 00200000 /* must be a directory */ 53 #define O_DIRECTORY 00200000 /* must be a directory */
54 #endif 54 #endif
55 #ifndef O_NOFOLLOW 55 #ifndef O_NOFOLLOW
56 #define O_NOFOLLOW 00400000 /* don't follow links */ 56 #define O_NOFOLLOW 00400000 /* don't follow links */
57 #endif 57 #endif
58 #ifndef O_NOATIME 58 #ifndef O_NOATIME
59 #define O_NOATIME 01000000 59 #define O_NOATIME 01000000
60 #endif 60 #endif
61 #ifndef O_CLOEXEC 61 #ifndef O_CLOEXEC
62 #define O_CLOEXEC 02000000 /* set close_on_exec */ 62 #define O_CLOEXEC 02000000 /* set close_on_exec */
63 #endif 63 #endif
64 64
65 /* 65 /*
66 * Before Linux 2.6.33 only O_DSYNC semantics were implemented, but using 66 * Before Linux 2.6.33 only O_DSYNC semantics were implemented, but using
67 * the O_SYNC flag. We continue to use the existing numerical value 67 * the O_SYNC flag. We continue to use the existing numerical value
68 * for O_DSYNC semantics now, but using the correct symbolic name for it. 68 * for O_DSYNC semantics now, but using the correct symbolic name for it.
69 * This new value is used to request true Posix O_SYNC semantics. It is 69 * This new value is used to request true Posix O_SYNC semantics. It is
70 * defined in this strange way to make sure applications compiled against 70 * defined in this strange way to make sure applications compiled against
71 * new headers get at least O_DSYNC semantics on older kernels. 71 * new headers get at least O_DSYNC semantics on older kernels.
72 * 72 *
73 * This has the nice side-effect that we can simply test for O_DSYNC 73 * This has the nice side-effect that we can simply test for O_DSYNC
74 * wherever we do not care if O_DSYNC or O_SYNC is used. 74 * wherever we do not care if O_DSYNC or O_SYNC is used.
75 * 75 *
76 * Note: __O_SYNC must never be used directly. 76 * Note: __O_SYNC must never be used directly.
77 */ 77 */
78 #ifndef O_SYNC 78 #ifndef O_SYNC
79 #define __O_SYNC 04000000 79 #define __O_SYNC 04000000
80 #define O_SYNC (__O_SYNC|O_DSYNC) 80 #define O_SYNC (__O_SYNC|O_DSYNC)
81 #endif 81 #endif
82 82
83 #ifndef O_PATH 83 #ifndef O_PATH
84 #define O_PATH 010000000 84 #define O_PATH 010000000
85 #endif 85 #endif
86 86
87 #ifndef __O_TMPFILE 87 #ifndef __O_TMPFILE
88 #define __O_TMPFILE 020000000 88 #define __O_TMPFILE 020000000
89 #endif 89 #endif
90 90
91 /* a horrid kludge trying to make sure that this will fail on old kernels */ 91 /* a horrid kludge trying to make sure that this will fail on old kernels */
92 #define O_TMPFILE (__O_TMPFILE | O_DIRECTORY) 92 #define O_TMPFILE (__O_TMPFILE | O_DIRECTORY)
93 #define O_TMPFILE_MASK (__O_TMPFILE | O_DIRECTORY | O_CREAT) 93 #define O_TMPFILE_MASK (__O_TMPFILE | O_DIRECTORY | O_CREAT)
94 94
95 #ifndef O_NDELAY 95 #ifndef O_NDELAY
96 #define O_NDELAY O_NONBLOCK 96 #define O_NDELAY O_NONBLOCK
97 #endif 97 #endif
98 98
99 #define F_DUPFD 0 /* dup */ 99 #define F_DUPFD 0 /* dup */
100 #define F_GETFD 1 /* get close_on_exec */ 100 #define F_GETFD 1 /* get close_on_exec */
101 #define F_SETFD 2 /* set/clear close_on_exec */ 101 #define F_SETFD 2 /* set/clear close_on_exec */
102 #define F_GETFL 3 /* get file->f_flags */ 102 #define F_GETFL 3 /* get file->f_flags */
103 #define F_SETFL 4 /* set file->f_flags */ 103 #define F_SETFL 4 /* set file->f_flags */
104 #ifndef F_GETLK 104 #ifndef F_GETLK
105 #define F_GETLK 5 105 #define F_GETLK 5
106 #define F_SETLK 6 106 #define F_SETLK 6
107 #define F_SETLKW 7 107 #define F_SETLKW 7
108 #endif 108 #endif
109 #ifndef F_SETOWN 109 #ifndef F_SETOWN
110 #define F_SETOWN 8 /* for sockets. */ 110 #define F_SETOWN 8 /* for sockets. */
111 #define F_GETOWN 9 /* for sockets. */ 111 #define F_GETOWN 9 /* for sockets. */
112 #endif 112 #endif
113 #ifndef F_SETSIG 113 #ifndef F_SETSIG
114 #define F_SETSIG 10 /* for sockets. */ 114 #define F_SETSIG 10 /* for sockets. */
115 #define F_GETSIG 11 /* for sockets. */ 115 #define F_GETSIG 11 /* for sockets. */
116 #endif 116 #endif
117 117
118 #ifndef CONFIG_64BIT 118 #ifndef CONFIG_64BIT
119 #ifndef F_GETLK64 119 #ifndef F_GETLK64
120 #define F_GETLK64 12 /* using 'struct flock64' */ 120 #define F_GETLK64 12 /* using 'struct flock64' */
121 #define F_SETLK64 13 121 #define F_SETLK64 13
122 #define F_SETLKW64 14 122 #define F_SETLKW64 14
123 #endif 123 #endif
124 #endif 124 #endif
125 125
126 #ifndef F_SETOWN_EX 126 #ifndef F_SETOWN_EX
127 #define F_SETOWN_EX 15 127 #define F_SETOWN_EX 15
128 #define F_GETOWN_EX 16 128 #define F_GETOWN_EX 16
129 #endif 129 #endif
130 130
131 #ifndef F_GETOWNER_UIDS 131 #ifndef F_GETOWNER_UIDS
132 #define F_GETOWNER_UIDS 17 132 #define F_GETOWNER_UIDS 17
133 #endif 133 #endif
134 134
135 /* 135 /*
136 * fd "private" POSIX locks. 136 * Open File Description Locks
137 * 137 *
138 * Usually POSIX locks held by a process are released on *any* close and are 138 * Usually record locks held by a process are released on *any* close and are
139 * not inherited across a fork(). 139 * not inherited across a fork().
140 * 140 *
141 * These cmd values will set locks that conflict with normal POSIX locks, but 141 * These cmd values will set locks that conflict with process-associated
142 * are "owned" by the opened file, not the process. This means that they are 142 * record locks, but are "owned" by the open file description, not the
143 * inherited across fork() like BSD (flock) locks, and they are only released 143 * process. This means that they are inherited across fork() like BSD (flock)
144 * automatically when the last reference to the the open file against which 144 * locks, and they are only released automatically when the last reference to
145 * they were acquired is put. 145 * the the open file against which they were acquired is put.
146 */ 146 */
147 #define F_GETLKP 36 147 #define F_OFD_GETLK 36
148 #define F_SETLKP 37 148 #define F_OFD_SETLK 37
149 #define F_SETLKPW 38 149 #define F_OFD_SETLKW 38
150 150
151 #define F_OWNER_TID 0 151 #define F_OWNER_TID 0
152 #define F_OWNER_PID 1 152 #define F_OWNER_PID 1
153 #define F_OWNER_PGRP 2 153 #define F_OWNER_PGRP 2
154 154
155 struct f_owner_ex { 155 struct f_owner_ex {
156 int type; 156 int type;
157 __kernel_pid_t pid; 157 __kernel_pid_t pid;
158 }; 158 };
159 159
160 /* for F_[GET|SET]FL */ 160 /* for F_[GET|SET]FL */
161 #define FD_CLOEXEC 1 /* actually anything with low bit set goes */ 161 #define FD_CLOEXEC 1 /* actually anything with low bit set goes */
162 162
163 /* for posix fcntl() and lockf() */ 163 /* for posix fcntl() and lockf() */
164 #ifndef F_RDLCK 164 #ifndef F_RDLCK
165 #define F_RDLCK 0 165 #define F_RDLCK 0
166 #define F_WRLCK 1 166 #define F_WRLCK 1
167 #define F_UNLCK 2 167 #define F_UNLCK 2
168 #endif 168 #endif
169 169
170 /* for old implementation of bsd flock () */ 170 /* for old implementation of bsd flock () */
171 #ifndef F_EXLCK 171 #ifndef F_EXLCK
172 #define F_EXLCK 4 /* or 3 */ 172 #define F_EXLCK 4 /* or 3 */
173 #define F_SHLCK 8 /* or 4 */ 173 #define F_SHLCK 8 /* or 4 */
174 #endif 174 #endif
175 175
176 /* operations for bsd flock(), also used by the kernel implementation */ 176 /* operations for bsd flock(), also used by the kernel implementation */
177 #define LOCK_SH 1 /* shared lock */ 177 #define LOCK_SH 1 /* shared lock */
178 #define LOCK_EX 2 /* exclusive lock */ 178 #define LOCK_EX 2 /* exclusive lock */
179 #define LOCK_NB 4 /* or'd with one of the above to prevent 179 #define LOCK_NB 4 /* or'd with one of the above to prevent
180 blocking */ 180 blocking */
181 #define LOCK_UN 8 /* remove lock */ 181 #define LOCK_UN 8 /* remove lock */
182 182
183 #define LOCK_MAND 32 /* This is a mandatory flock ... */ 183 #define LOCK_MAND 32 /* This is a mandatory flock ... */
184 #define LOCK_READ 64 /* which allows concurrent read operations */ 184 #define LOCK_READ 64 /* which allows concurrent read operations */
185 #define LOCK_WRITE 128 /* which allows concurrent write operations */ 185 #define LOCK_WRITE 128 /* which allows concurrent write operations */
186 #define LOCK_RW 192 /* which allows concurrent read & write ops */ 186 #define LOCK_RW 192 /* which allows concurrent read & write ops */
187 187
188 #define F_LINUX_SPECIFIC_BASE 1024 188 #define F_LINUX_SPECIFIC_BASE 1024
189 189
190 #ifndef HAVE_ARCH_STRUCT_FLOCK 190 #ifndef HAVE_ARCH_STRUCT_FLOCK
191 #ifndef __ARCH_FLOCK_PAD 191 #ifndef __ARCH_FLOCK_PAD
192 #define __ARCH_FLOCK_PAD 192 #define __ARCH_FLOCK_PAD
193 #endif 193 #endif
194 194
195 struct flock { 195 struct flock {
196 short l_type; 196 short l_type;
197 short l_whence; 197 short l_whence;
198 __kernel_off_t l_start; 198 __kernel_off_t l_start;
199 __kernel_off_t l_len; 199 __kernel_off_t l_len;
200 __kernel_pid_t l_pid; 200 __kernel_pid_t l_pid;
201 __ARCH_FLOCK_PAD 201 __ARCH_FLOCK_PAD
202 }; 202 };
203 #endif 203 #endif
204 204
205 #ifndef HAVE_ARCH_STRUCT_FLOCK64 205 #ifndef HAVE_ARCH_STRUCT_FLOCK64
206 #ifndef __ARCH_FLOCK64_PAD 206 #ifndef __ARCH_FLOCK64_PAD
207 #define __ARCH_FLOCK64_PAD 207 #define __ARCH_FLOCK64_PAD
208 #endif 208 #endif
209 209
210 struct flock64 { 210 struct flock64 {
211 short l_type; 211 short l_type;
212 short l_whence; 212 short l_whence;
213 __kernel_loff_t l_start; 213 __kernel_loff_t l_start;
214 __kernel_loff_t l_len; 214 __kernel_loff_t l_len;
215 __kernel_pid_t l_pid; 215 __kernel_pid_t l_pid;
216 __ARCH_FLOCK64_PAD 216 __ARCH_FLOCK64_PAD
217 }; 217 };
218 #endif 218 #endif
219 219
220 #endif /* _ASM_GENERIC_FCNTL_H */ 220 #endif /* _ASM_GENERIC_FCNTL_H */
221 221
security/selinux/hooks.c
1 /* 1 /*
2 * NSA Security-Enhanced Linux (SELinux) security module 2 * NSA Security-Enhanced Linux (SELinux) security module
3 * 3 *
4 * This file contains the SELinux hook function implementations. 4 * This file contains the SELinux hook function implementations.
5 * 5 *
6 * Authors: Stephen Smalley, <sds@epoch.ncsc.mil> 6 * Authors: Stephen Smalley, <sds@epoch.ncsc.mil>
7 * Chris Vance, <cvance@nai.com> 7 * Chris Vance, <cvance@nai.com>
8 * Wayne Salamon, <wsalamon@nai.com> 8 * Wayne Salamon, <wsalamon@nai.com>
9 * James Morris <jmorris@redhat.com> 9 * James Morris <jmorris@redhat.com>
10 * 10 *
11 * Copyright (C) 2001,2002 Networks Associates Technology, Inc. 11 * Copyright (C) 2001,2002 Networks Associates Technology, Inc.
12 * Copyright (C) 2003-2008 Red Hat, Inc., James Morris <jmorris@redhat.com> 12 * Copyright (C) 2003-2008 Red Hat, Inc., James Morris <jmorris@redhat.com>
13 * Eric Paris <eparis@redhat.com> 13 * Eric Paris <eparis@redhat.com>
14 * Copyright (C) 2004-2005 Trusted Computer Solutions, Inc. 14 * Copyright (C) 2004-2005 Trusted Computer Solutions, Inc.
15 * <dgoeddel@trustedcs.com> 15 * <dgoeddel@trustedcs.com>
16 * Copyright (C) 2006, 2007, 2009 Hewlett-Packard Development Company, L.P. 16 * Copyright (C) 2006, 2007, 2009 Hewlett-Packard Development Company, L.P.
17 * Paul Moore <paul@paul-moore.com> 17 * Paul Moore <paul@paul-moore.com>
18 * Copyright (C) 2007 Hitachi Software Engineering Co., Ltd. 18 * Copyright (C) 2007 Hitachi Software Engineering Co., Ltd.
19 * Yuichi Nakamura <ynakam@hitachisoft.jp> 19 * Yuichi Nakamura <ynakam@hitachisoft.jp>
20 * 20 *
21 * This program is free software; you can redistribute it and/or modify 21 * This program is free software; you can redistribute it and/or modify
22 * it under the terms of the GNU General Public License version 2, 22 * it under the terms of the GNU General Public License version 2,
23 * as published by the Free Software Foundation. 23 * as published by the Free Software Foundation.
24 */ 24 */
25 25
26 #include <linux/init.h> 26 #include <linux/init.h>
27 #include <linux/kd.h> 27 #include <linux/kd.h>
28 #include <linux/kernel.h> 28 #include <linux/kernel.h>
29 #include <linux/tracehook.h> 29 #include <linux/tracehook.h>
30 #include <linux/errno.h> 30 #include <linux/errno.h>
31 #include <linux/sched.h> 31 #include <linux/sched.h>
32 #include <linux/security.h> 32 #include <linux/security.h>
33 #include <linux/xattr.h> 33 #include <linux/xattr.h>
34 #include <linux/capability.h> 34 #include <linux/capability.h>
35 #include <linux/unistd.h> 35 #include <linux/unistd.h>
36 #include <linux/mm.h> 36 #include <linux/mm.h>
37 #include <linux/mman.h> 37 #include <linux/mman.h>
38 #include <linux/slab.h> 38 #include <linux/slab.h>
39 #include <linux/pagemap.h> 39 #include <linux/pagemap.h>
40 #include <linux/proc_fs.h> 40 #include <linux/proc_fs.h>
41 #include <linux/swap.h> 41 #include <linux/swap.h>
42 #include <linux/spinlock.h> 42 #include <linux/spinlock.h>
43 #include <linux/syscalls.h> 43 #include <linux/syscalls.h>
44 #include <linux/dcache.h> 44 #include <linux/dcache.h>
45 #include <linux/file.h> 45 #include <linux/file.h>
46 #include <linux/fdtable.h> 46 #include <linux/fdtable.h>
47 #include <linux/namei.h> 47 #include <linux/namei.h>
48 #include <linux/mount.h> 48 #include <linux/mount.h>
49 #include <linux/netfilter_ipv4.h> 49 #include <linux/netfilter_ipv4.h>
50 #include <linux/netfilter_ipv6.h> 50 #include <linux/netfilter_ipv6.h>
51 #include <linux/tty.h> 51 #include <linux/tty.h>
52 #include <net/icmp.h> 52 #include <net/icmp.h>
53 #include <net/ip.h> /* for local_port_range[] */ 53 #include <net/ip.h> /* for local_port_range[] */
54 #include <net/sock.h> 54 #include <net/sock.h>
55 #include <net/tcp.h> /* struct or_callable used in sock_rcv_skb */ 55 #include <net/tcp.h> /* struct or_callable used in sock_rcv_skb */
56 #include <net/inet_connection_sock.h> 56 #include <net/inet_connection_sock.h>
57 #include <net/net_namespace.h> 57 #include <net/net_namespace.h>
58 #include <net/netlabel.h> 58 #include <net/netlabel.h>
59 #include <linux/uaccess.h> 59 #include <linux/uaccess.h>
60 #include <asm/ioctls.h> 60 #include <asm/ioctls.h>
61 #include <linux/atomic.h> 61 #include <linux/atomic.h>
62 #include <linux/bitops.h> 62 #include <linux/bitops.h>
63 #include <linux/interrupt.h> 63 #include <linux/interrupt.h>
64 #include <linux/netdevice.h> /* for network interface checks */ 64 #include <linux/netdevice.h> /* for network interface checks */
65 #include <net/netlink.h> 65 #include <net/netlink.h>
66 #include <linux/tcp.h> 66 #include <linux/tcp.h>
67 #include <linux/udp.h> 67 #include <linux/udp.h>
68 #include <linux/dccp.h> 68 #include <linux/dccp.h>
69 #include <linux/quota.h> 69 #include <linux/quota.h>
70 #include <linux/un.h> /* for Unix socket types */ 70 #include <linux/un.h> /* for Unix socket types */
71 #include <net/af_unix.h> /* for Unix socket types */ 71 #include <net/af_unix.h> /* for Unix socket types */
72 #include <linux/parser.h> 72 #include <linux/parser.h>
73 #include <linux/nfs_mount.h> 73 #include <linux/nfs_mount.h>
74 #include <net/ipv6.h> 74 #include <net/ipv6.h>
75 #include <linux/hugetlb.h> 75 #include <linux/hugetlb.h>
76 #include <linux/personality.h> 76 #include <linux/personality.h>
77 #include <linux/audit.h> 77 #include <linux/audit.h>
78 #include <linux/string.h> 78 #include <linux/string.h>
79 #include <linux/selinux.h> 79 #include <linux/selinux.h>
80 #include <linux/mutex.h> 80 #include <linux/mutex.h>
81 #include <linux/posix-timers.h> 81 #include <linux/posix-timers.h>
82 #include <linux/syslog.h> 82 #include <linux/syslog.h>
83 #include <linux/user_namespace.h> 83 #include <linux/user_namespace.h>
84 #include <linux/export.h> 84 #include <linux/export.h>
85 #include <linux/msg.h> 85 #include <linux/msg.h>
86 #include <linux/shm.h> 86 #include <linux/shm.h>
87 87
88 #include "avc.h" 88 #include "avc.h"
89 #include "objsec.h" 89 #include "objsec.h"
90 #include "netif.h" 90 #include "netif.h"
91 #include "netnode.h" 91 #include "netnode.h"
92 #include "netport.h" 92 #include "netport.h"
93 #include "xfrm.h" 93 #include "xfrm.h"
94 #include "netlabel.h" 94 #include "netlabel.h"
95 #include "audit.h" 95 #include "audit.h"
96 #include "avc_ss.h" 96 #include "avc_ss.h"
97 97
98 extern struct security_operations *security_ops; 98 extern struct security_operations *security_ops;
99 99
100 /* SECMARK reference count */ 100 /* SECMARK reference count */
101 static atomic_t selinux_secmark_refcount = ATOMIC_INIT(0); 101 static atomic_t selinux_secmark_refcount = ATOMIC_INIT(0);
102 102
103 #ifdef CONFIG_SECURITY_SELINUX_DEVELOP 103 #ifdef CONFIG_SECURITY_SELINUX_DEVELOP
104 int selinux_enforcing; 104 int selinux_enforcing;
105 105
106 static int __init enforcing_setup(char *str) 106 static int __init enforcing_setup(char *str)
107 { 107 {
108 unsigned long enforcing; 108 unsigned long enforcing;
109 if (!kstrtoul(str, 0, &enforcing)) 109 if (!kstrtoul(str, 0, &enforcing))
110 selinux_enforcing = enforcing ? 1 : 0; 110 selinux_enforcing = enforcing ? 1 : 0;
111 return 1; 111 return 1;
112 } 112 }
113 __setup("enforcing=", enforcing_setup); 113 __setup("enforcing=", enforcing_setup);
114 #endif 114 #endif
115 115
116 #ifdef CONFIG_SECURITY_SELINUX_BOOTPARAM 116 #ifdef CONFIG_SECURITY_SELINUX_BOOTPARAM
117 int selinux_enabled = CONFIG_SECURITY_SELINUX_BOOTPARAM_VALUE; 117 int selinux_enabled = CONFIG_SECURITY_SELINUX_BOOTPARAM_VALUE;
118 118
119 static int __init selinux_enabled_setup(char *str) 119 static int __init selinux_enabled_setup(char *str)
120 { 120 {
121 unsigned long enabled; 121 unsigned long enabled;
122 if (!kstrtoul(str, 0, &enabled)) 122 if (!kstrtoul(str, 0, &enabled))
123 selinux_enabled = enabled ? 1 : 0; 123 selinux_enabled = enabled ? 1 : 0;
124 return 1; 124 return 1;
125 } 125 }
126 __setup("selinux=", selinux_enabled_setup); 126 __setup("selinux=", selinux_enabled_setup);
127 #else 127 #else
128 int selinux_enabled = 1; 128 int selinux_enabled = 1;
129 #endif 129 #endif
130 130
131 static struct kmem_cache *sel_inode_cache; 131 static struct kmem_cache *sel_inode_cache;
132 132
133 /** 133 /**
134 * selinux_secmark_enabled - Check to see if SECMARK is currently enabled 134 * selinux_secmark_enabled - Check to see if SECMARK is currently enabled
135 * 135 *
136 * Description: 136 * Description:
137 * This function checks the SECMARK reference counter to see if any SECMARK 137 * This function checks the SECMARK reference counter to see if any SECMARK
138 * targets are currently configured, if the reference counter is greater than 138 * targets are currently configured, if the reference counter is greater than
139 * zero SECMARK is considered to be enabled. Returns true (1) if SECMARK is 139 * zero SECMARK is considered to be enabled. Returns true (1) if SECMARK is
140 * enabled, false (0) if SECMARK is disabled. If the always_check_network 140 * enabled, false (0) if SECMARK is disabled. If the always_check_network
141 * policy capability is enabled, SECMARK is always considered enabled. 141 * policy capability is enabled, SECMARK is always considered enabled.
142 * 142 *
143 */ 143 */
144 static int selinux_secmark_enabled(void) 144 static int selinux_secmark_enabled(void)
145 { 145 {
146 return (selinux_policycap_alwaysnetwork || atomic_read(&selinux_secmark_refcount)); 146 return (selinux_policycap_alwaysnetwork || atomic_read(&selinux_secmark_refcount));
147 } 147 }
148 148
149 /** 149 /**
150 * selinux_peerlbl_enabled - Check to see if peer labeling is currently enabled 150 * selinux_peerlbl_enabled - Check to see if peer labeling is currently enabled
151 * 151 *
152 * Description: 152 * Description:
153 * This function checks if NetLabel or labeled IPSEC is enabled. Returns true 153 * This function checks if NetLabel or labeled IPSEC is enabled. Returns true
154 * (1) if any are enabled or false (0) if neither are enabled. If the 154 * (1) if any are enabled or false (0) if neither are enabled. If the
155 * always_check_network policy capability is enabled, peer labeling 155 * always_check_network policy capability is enabled, peer labeling
156 * is always considered enabled. 156 * is always considered enabled.
157 * 157 *
158 */ 158 */
159 static int selinux_peerlbl_enabled(void) 159 static int selinux_peerlbl_enabled(void)
160 { 160 {
161 return (selinux_policycap_alwaysnetwork || netlbl_enabled() || selinux_xfrm_enabled()); 161 return (selinux_policycap_alwaysnetwork || netlbl_enabled() || selinux_xfrm_enabled());
162 } 162 }
163 163
164 /* 164 /*
165 * initialise the security for the init task 165 * initialise the security for the init task
166 */ 166 */
167 static void cred_init_security(void) 167 static void cred_init_security(void)
168 { 168 {
169 struct cred *cred = (struct cred *) current->real_cred; 169 struct cred *cred = (struct cred *) current->real_cred;
170 struct task_security_struct *tsec; 170 struct task_security_struct *tsec;
171 171
172 tsec = kzalloc(sizeof(struct task_security_struct), GFP_KERNEL); 172 tsec = kzalloc(sizeof(struct task_security_struct), GFP_KERNEL);
173 if (!tsec) 173 if (!tsec)
174 panic("SELinux: Failed to initialize initial task.\n"); 174 panic("SELinux: Failed to initialize initial task.\n");
175 175
176 tsec->osid = tsec->sid = SECINITSID_KERNEL; 176 tsec->osid = tsec->sid = SECINITSID_KERNEL;
177 cred->security = tsec; 177 cred->security = tsec;
178 } 178 }
179 179
180 /* 180 /*
181 * get the security ID of a set of credentials 181 * get the security ID of a set of credentials
182 */ 182 */
183 static inline u32 cred_sid(const struct cred *cred) 183 static inline u32 cred_sid(const struct cred *cred)
184 { 184 {
185 const struct task_security_struct *tsec; 185 const struct task_security_struct *tsec;
186 186
187 tsec = cred->security; 187 tsec = cred->security;
188 return tsec->sid; 188 return tsec->sid;
189 } 189 }
190 190
191 /* 191 /*
192 * get the objective security ID of a task 192 * get the objective security ID of a task
193 */ 193 */
194 static inline u32 task_sid(const struct task_struct *task) 194 static inline u32 task_sid(const struct task_struct *task)
195 { 195 {
196 u32 sid; 196 u32 sid;
197 197
198 rcu_read_lock(); 198 rcu_read_lock();
199 sid = cred_sid(__task_cred(task)); 199 sid = cred_sid(__task_cred(task));
200 rcu_read_unlock(); 200 rcu_read_unlock();
201 return sid; 201 return sid;
202 } 202 }
203 203
204 /* 204 /*
205 * get the subjective security ID of the current task 205 * get the subjective security ID of the current task
206 */ 206 */
207 static inline u32 current_sid(void) 207 static inline u32 current_sid(void)
208 { 208 {
209 const struct task_security_struct *tsec = current_security(); 209 const struct task_security_struct *tsec = current_security();
210 210
211 return tsec->sid; 211 return tsec->sid;
212 } 212 }
213 213
214 /* Allocate and free functions for each kind of security blob. */ 214 /* Allocate and free functions for each kind of security blob. */
215 215
216 static int inode_alloc_security(struct inode *inode) 216 static int inode_alloc_security(struct inode *inode)
217 { 217 {
218 struct inode_security_struct *isec; 218 struct inode_security_struct *isec;
219 u32 sid = current_sid(); 219 u32 sid = current_sid();
220 220
221 isec = kmem_cache_zalloc(sel_inode_cache, GFP_NOFS); 221 isec = kmem_cache_zalloc(sel_inode_cache, GFP_NOFS);
222 if (!isec) 222 if (!isec)
223 return -ENOMEM; 223 return -ENOMEM;
224 224
225 mutex_init(&isec->lock); 225 mutex_init(&isec->lock);
226 INIT_LIST_HEAD(&isec->list); 226 INIT_LIST_HEAD(&isec->list);
227 isec->inode = inode; 227 isec->inode = inode;
228 isec->sid = SECINITSID_UNLABELED; 228 isec->sid = SECINITSID_UNLABELED;
229 isec->sclass = SECCLASS_FILE; 229 isec->sclass = SECCLASS_FILE;
230 isec->task_sid = sid; 230 isec->task_sid = sid;
231 inode->i_security = isec; 231 inode->i_security = isec;
232 232
233 return 0; 233 return 0;
234 } 234 }
235 235
236 static void inode_free_rcu(struct rcu_head *head) 236 static void inode_free_rcu(struct rcu_head *head)
237 { 237 {
238 struct inode_security_struct *isec; 238 struct inode_security_struct *isec;
239 239
240 isec = container_of(head, struct inode_security_struct, rcu); 240 isec = container_of(head, struct inode_security_struct, rcu);
241 kmem_cache_free(sel_inode_cache, isec); 241 kmem_cache_free(sel_inode_cache, isec);
242 } 242 }
243 243
244 static void inode_free_security(struct inode *inode) 244 static void inode_free_security(struct inode *inode)
245 { 245 {
246 struct inode_security_struct *isec = inode->i_security; 246 struct inode_security_struct *isec = inode->i_security;
247 struct superblock_security_struct *sbsec = inode->i_sb->s_security; 247 struct superblock_security_struct *sbsec = inode->i_sb->s_security;
248 248
249 spin_lock(&sbsec->isec_lock); 249 spin_lock(&sbsec->isec_lock);
250 if (!list_empty(&isec->list)) 250 if (!list_empty(&isec->list))
251 list_del_init(&isec->list); 251 list_del_init(&isec->list);
252 spin_unlock(&sbsec->isec_lock); 252 spin_unlock(&sbsec->isec_lock);
253 253
254 /* 254 /*
255 * The inode may still be referenced in a path walk and 255 * The inode may still be referenced in a path walk and
256 * a call to selinux_inode_permission() can be made 256 * a call to selinux_inode_permission() can be made
257 * after inode_free_security() is called. Ideally, the VFS 257 * after inode_free_security() is called. Ideally, the VFS
258 * wouldn't do this, but fixing that is a much harder 258 * wouldn't do this, but fixing that is a much harder
259 * job. For now, simply free the i_security via RCU, and 259 * job. For now, simply free the i_security via RCU, and
260 * leave the current inode->i_security pointer intact. 260 * leave the current inode->i_security pointer intact.
261 * The inode will be freed after the RCU grace period too. 261 * The inode will be freed after the RCU grace period too.
262 */ 262 */
263 call_rcu(&isec->rcu, inode_free_rcu); 263 call_rcu(&isec->rcu, inode_free_rcu);
264 } 264 }
265 265
266 static int file_alloc_security(struct file *file) 266 static int file_alloc_security(struct file *file)
267 { 267 {
268 struct file_security_struct *fsec; 268 struct file_security_struct *fsec;
269 u32 sid = current_sid(); 269 u32 sid = current_sid();
270 270
271 fsec = kzalloc(sizeof(struct file_security_struct), GFP_KERNEL); 271 fsec = kzalloc(sizeof(struct file_security_struct), GFP_KERNEL);
272 if (!fsec) 272 if (!fsec)
273 return -ENOMEM; 273 return -ENOMEM;
274 274
275 fsec->sid = sid; 275 fsec->sid = sid;
276 fsec->fown_sid = sid; 276 fsec->fown_sid = sid;
277 file->f_security = fsec; 277 file->f_security = fsec;
278 278
279 return 0; 279 return 0;
280 } 280 }
281 281
282 static void file_free_security(struct file *file) 282 static void file_free_security(struct file *file)
283 { 283 {
284 struct file_security_struct *fsec = file->f_security; 284 struct file_security_struct *fsec = file->f_security;
285 file->f_security = NULL; 285 file->f_security = NULL;
286 kfree(fsec); 286 kfree(fsec);
287 } 287 }
288 288
289 static int superblock_alloc_security(struct super_block *sb) 289 static int superblock_alloc_security(struct super_block *sb)
290 { 290 {
291 struct superblock_security_struct *sbsec; 291 struct superblock_security_struct *sbsec;
292 292
293 sbsec = kzalloc(sizeof(struct superblock_security_struct), GFP_KERNEL); 293 sbsec = kzalloc(sizeof(struct superblock_security_struct), GFP_KERNEL);
294 if (!sbsec) 294 if (!sbsec)
295 return -ENOMEM; 295 return -ENOMEM;
296 296
297 mutex_init(&sbsec->lock); 297 mutex_init(&sbsec->lock);
298 INIT_LIST_HEAD(&sbsec->isec_head); 298 INIT_LIST_HEAD(&sbsec->isec_head);
299 spin_lock_init(&sbsec->isec_lock); 299 spin_lock_init(&sbsec->isec_lock);
300 sbsec->sb = sb; 300 sbsec->sb = sb;
301 sbsec->sid = SECINITSID_UNLABELED; 301 sbsec->sid = SECINITSID_UNLABELED;
302 sbsec->def_sid = SECINITSID_FILE; 302 sbsec->def_sid = SECINITSID_FILE;
303 sbsec->mntpoint_sid = SECINITSID_UNLABELED; 303 sbsec->mntpoint_sid = SECINITSID_UNLABELED;
304 sb->s_security = sbsec; 304 sb->s_security = sbsec;
305 305
306 return 0; 306 return 0;
307 } 307 }
308 308
309 static void superblock_free_security(struct super_block *sb) 309 static void superblock_free_security(struct super_block *sb)
310 { 310 {
311 struct superblock_security_struct *sbsec = sb->s_security; 311 struct superblock_security_struct *sbsec = sb->s_security;
312 sb->s_security = NULL; 312 sb->s_security = NULL;
313 kfree(sbsec); 313 kfree(sbsec);
314 } 314 }
315 315
316 /* The file system's label must be initialized prior to use. */ 316 /* The file system's label must be initialized prior to use. */
317 317
318 static const char *labeling_behaviors[7] = { 318 static const char *labeling_behaviors[7] = {
319 "uses xattr", 319 "uses xattr",
320 "uses transition SIDs", 320 "uses transition SIDs",
321 "uses task SIDs", 321 "uses task SIDs",
322 "uses genfs_contexts", 322 "uses genfs_contexts",
323 "not configured for labeling", 323 "not configured for labeling",
324 "uses mountpoint labeling", 324 "uses mountpoint labeling",
325 "uses native labeling", 325 "uses native labeling",
326 }; 326 };
327 327
328 static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dentry); 328 static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dentry);
329 329
330 static inline int inode_doinit(struct inode *inode) 330 static inline int inode_doinit(struct inode *inode)
331 { 331 {
332 return inode_doinit_with_dentry(inode, NULL); 332 return inode_doinit_with_dentry(inode, NULL);
333 } 333 }
334 334
335 enum { 335 enum {
336 Opt_error = -1, 336 Opt_error = -1,
337 Opt_context = 1, 337 Opt_context = 1,
338 Opt_fscontext = 2, 338 Opt_fscontext = 2,
339 Opt_defcontext = 3, 339 Opt_defcontext = 3,
340 Opt_rootcontext = 4, 340 Opt_rootcontext = 4,
341 Opt_labelsupport = 5, 341 Opt_labelsupport = 5,
342 Opt_nextmntopt = 6, 342 Opt_nextmntopt = 6,
343 }; 343 };
344 344
345 #define NUM_SEL_MNT_OPTS (Opt_nextmntopt - 1) 345 #define NUM_SEL_MNT_OPTS (Opt_nextmntopt - 1)
346 346
347 static const match_table_t tokens = { 347 static const match_table_t tokens = {
348 {Opt_context, CONTEXT_STR "%s"}, 348 {Opt_context, CONTEXT_STR "%s"},
349 {Opt_fscontext, FSCONTEXT_STR "%s"}, 349 {Opt_fscontext, FSCONTEXT_STR "%s"},
350 {Opt_defcontext, DEFCONTEXT_STR "%s"}, 350 {Opt_defcontext, DEFCONTEXT_STR "%s"},
351 {Opt_rootcontext, ROOTCONTEXT_STR "%s"}, 351 {Opt_rootcontext, ROOTCONTEXT_STR "%s"},
352 {Opt_labelsupport, LABELSUPP_STR}, 352 {Opt_labelsupport, LABELSUPP_STR},
353 {Opt_error, NULL}, 353 {Opt_error, NULL},
354 }; 354 };
355 355
356 #define SEL_MOUNT_FAIL_MSG "SELinux: duplicate or incompatible mount options\n" 356 #define SEL_MOUNT_FAIL_MSG "SELinux: duplicate or incompatible mount options\n"
357 357
358 static int may_context_mount_sb_relabel(u32 sid, 358 static int may_context_mount_sb_relabel(u32 sid,
359 struct superblock_security_struct *sbsec, 359 struct superblock_security_struct *sbsec,
360 const struct cred *cred) 360 const struct cred *cred)
361 { 361 {
362 const struct task_security_struct *tsec = cred->security; 362 const struct task_security_struct *tsec = cred->security;
363 int rc; 363 int rc;
364 364
365 rc = avc_has_perm(tsec->sid, sbsec->sid, SECCLASS_FILESYSTEM, 365 rc = avc_has_perm(tsec->sid, sbsec->sid, SECCLASS_FILESYSTEM,
366 FILESYSTEM__RELABELFROM, NULL); 366 FILESYSTEM__RELABELFROM, NULL);
367 if (rc) 367 if (rc)
368 return rc; 368 return rc;
369 369
370 rc = avc_has_perm(tsec->sid, sid, SECCLASS_FILESYSTEM, 370 rc = avc_has_perm(tsec->sid, sid, SECCLASS_FILESYSTEM,
371 FILESYSTEM__RELABELTO, NULL); 371 FILESYSTEM__RELABELTO, NULL);
372 return rc; 372 return rc;
373 } 373 }
374 374
375 static int may_context_mount_inode_relabel(u32 sid, 375 static int may_context_mount_inode_relabel(u32 sid,
376 struct superblock_security_struct *sbsec, 376 struct superblock_security_struct *sbsec,
377 const struct cred *cred) 377 const struct cred *cred)
378 { 378 {
379 const struct task_security_struct *tsec = cred->security; 379 const struct task_security_struct *tsec = cred->security;
380 int rc; 380 int rc;
381 rc = avc_has_perm(tsec->sid, sbsec->sid, SECCLASS_FILESYSTEM, 381 rc = avc_has_perm(tsec->sid, sbsec->sid, SECCLASS_FILESYSTEM,
382 FILESYSTEM__RELABELFROM, NULL); 382 FILESYSTEM__RELABELFROM, NULL);
383 if (rc) 383 if (rc)
384 return rc; 384 return rc;
385 385
386 rc = avc_has_perm(sid, sbsec->sid, SECCLASS_FILESYSTEM, 386 rc = avc_has_perm(sid, sbsec->sid, SECCLASS_FILESYSTEM,
387 FILESYSTEM__ASSOCIATE, NULL); 387 FILESYSTEM__ASSOCIATE, NULL);
388 return rc; 388 return rc;
389 } 389 }
390 390
391 static int selinux_is_sblabel_mnt(struct super_block *sb) 391 static int selinux_is_sblabel_mnt(struct super_block *sb)
392 { 392 {
393 struct superblock_security_struct *sbsec = sb->s_security; 393 struct superblock_security_struct *sbsec = sb->s_security;
394 394
395 if (sbsec->behavior == SECURITY_FS_USE_XATTR || 395 if (sbsec->behavior == SECURITY_FS_USE_XATTR ||
396 sbsec->behavior == SECURITY_FS_USE_TRANS || 396 sbsec->behavior == SECURITY_FS_USE_TRANS ||
397 sbsec->behavior == SECURITY_FS_USE_TASK) 397 sbsec->behavior == SECURITY_FS_USE_TASK)
398 return 1; 398 return 1;
399 399
400 /* Special handling for sysfs. Is genfs but also has setxattr handler*/ 400 /* Special handling for sysfs. Is genfs but also has setxattr handler*/
401 if (strncmp(sb->s_type->name, "sysfs", sizeof("sysfs")) == 0) 401 if (strncmp(sb->s_type->name, "sysfs", sizeof("sysfs")) == 0)
402 return 1; 402 return 1;
403 403
404 /* 404 /*
405 * Special handling for rootfs. Is genfs but supports 405 * Special handling for rootfs. Is genfs but supports
406 * setting SELinux context on in-core inodes. 406 * setting SELinux context on in-core inodes.
407 */ 407 */
408 if (strncmp(sb->s_type->name, "rootfs", sizeof("rootfs")) == 0) 408 if (strncmp(sb->s_type->name, "rootfs", sizeof("rootfs")) == 0)
409 return 1; 409 return 1;
410 410
411 return 0; 411 return 0;
412 } 412 }
413 413
414 static int sb_finish_set_opts(struct super_block *sb) 414 static int sb_finish_set_opts(struct super_block *sb)
415 { 415 {
416 struct superblock_security_struct *sbsec = sb->s_security; 416 struct superblock_security_struct *sbsec = sb->s_security;
417 struct dentry *root = sb->s_root; 417 struct dentry *root = sb->s_root;
418 struct inode *root_inode = root->d_inode; 418 struct inode *root_inode = root->d_inode;
419 int rc = 0; 419 int rc = 0;
420 420
421 if (sbsec->behavior == SECURITY_FS_USE_XATTR) { 421 if (sbsec->behavior == SECURITY_FS_USE_XATTR) {
422 /* Make sure that the xattr handler exists and that no 422 /* Make sure that the xattr handler exists and that no
423 error other than -ENODATA is returned by getxattr on 423 error other than -ENODATA is returned by getxattr on
424 the root directory. -ENODATA is ok, as this may be 424 the root directory. -ENODATA is ok, as this may be
425 the first boot of the SELinux kernel before we have 425 the first boot of the SELinux kernel before we have
426 assigned xattr values to the filesystem. */ 426 assigned xattr values to the filesystem. */
427 if (!root_inode->i_op->getxattr) { 427 if (!root_inode->i_op->getxattr) {
428 printk(KERN_WARNING "SELinux: (dev %s, type %s) has no " 428 printk(KERN_WARNING "SELinux: (dev %s, type %s) has no "
429 "xattr support\n", sb->s_id, sb->s_type->name); 429 "xattr support\n", sb->s_id, sb->s_type->name);
430 rc = -EOPNOTSUPP; 430 rc = -EOPNOTSUPP;
431 goto out; 431 goto out;
432 } 432 }
433 rc = root_inode->i_op->getxattr(root, XATTR_NAME_SELINUX, NULL, 0); 433 rc = root_inode->i_op->getxattr(root, XATTR_NAME_SELINUX, NULL, 0);
434 if (rc < 0 && rc != -ENODATA) { 434 if (rc < 0 && rc != -ENODATA) {
435 if (rc == -EOPNOTSUPP) 435 if (rc == -EOPNOTSUPP)
436 printk(KERN_WARNING "SELinux: (dev %s, type " 436 printk(KERN_WARNING "SELinux: (dev %s, type "
437 "%s) has no security xattr handler\n", 437 "%s) has no security xattr handler\n",
438 sb->s_id, sb->s_type->name); 438 sb->s_id, sb->s_type->name);
439 else 439 else
440 printk(KERN_WARNING "SELinux: (dev %s, type " 440 printk(KERN_WARNING "SELinux: (dev %s, type "
441 "%s) getxattr errno %d\n", sb->s_id, 441 "%s) getxattr errno %d\n", sb->s_id,
442 sb->s_type->name, -rc); 442 sb->s_type->name, -rc);
443 goto out; 443 goto out;
444 } 444 }
445 } 445 }
446 446
447 if (sbsec->behavior > ARRAY_SIZE(labeling_behaviors)) 447 if (sbsec->behavior > ARRAY_SIZE(labeling_behaviors))
448 printk(KERN_ERR "SELinux: initialized (dev %s, type %s), unknown behavior\n", 448 printk(KERN_ERR "SELinux: initialized (dev %s, type %s), unknown behavior\n",
449 sb->s_id, sb->s_type->name); 449 sb->s_id, sb->s_type->name);
450 else 450 else
451 printk(KERN_DEBUG "SELinux: initialized (dev %s, type %s), %s\n", 451 printk(KERN_DEBUG "SELinux: initialized (dev %s, type %s), %s\n",
452 sb->s_id, sb->s_type->name, 452 sb->s_id, sb->s_type->name,
453 labeling_behaviors[sbsec->behavior-1]); 453 labeling_behaviors[sbsec->behavior-1]);
454 454
455 sbsec->flags |= SE_SBINITIALIZED; 455 sbsec->flags |= SE_SBINITIALIZED;
456 if (selinux_is_sblabel_mnt(sb)) 456 if (selinux_is_sblabel_mnt(sb))
457 sbsec->flags |= SBLABEL_MNT; 457 sbsec->flags |= SBLABEL_MNT;
458 458
459 /* Initialize the root inode. */ 459 /* Initialize the root inode. */
460 rc = inode_doinit_with_dentry(root_inode, root); 460 rc = inode_doinit_with_dentry(root_inode, root);
461 461
462 /* Initialize any other inodes associated with the superblock, e.g. 462 /* Initialize any other inodes associated with the superblock, e.g.
463 inodes created prior to initial policy load or inodes created 463 inodes created prior to initial policy load or inodes created
464 during get_sb by a pseudo filesystem that directly 464 during get_sb by a pseudo filesystem that directly
465 populates itself. */ 465 populates itself. */
466 spin_lock(&sbsec->isec_lock); 466 spin_lock(&sbsec->isec_lock);
467 next_inode: 467 next_inode:
468 if (!list_empty(&sbsec->isec_head)) { 468 if (!list_empty(&sbsec->isec_head)) {
469 struct inode_security_struct *isec = 469 struct inode_security_struct *isec =
470 list_entry(sbsec->isec_head.next, 470 list_entry(sbsec->isec_head.next,
471 struct inode_security_struct, list); 471 struct inode_security_struct, list);
472 struct inode *inode = isec->inode; 472 struct inode *inode = isec->inode;
473 spin_unlock(&sbsec->isec_lock); 473 spin_unlock(&sbsec->isec_lock);
474 inode = igrab(inode); 474 inode = igrab(inode);
475 if (inode) { 475 if (inode) {
476 if (!IS_PRIVATE(inode)) 476 if (!IS_PRIVATE(inode))
477 inode_doinit(inode); 477 inode_doinit(inode);
478 iput(inode); 478 iput(inode);
479 } 479 }
480 spin_lock(&sbsec->isec_lock); 480 spin_lock(&sbsec->isec_lock);
481 list_del_init(&isec->list); 481 list_del_init(&isec->list);
482 goto next_inode; 482 goto next_inode;
483 } 483 }
484 spin_unlock(&sbsec->isec_lock); 484 spin_unlock(&sbsec->isec_lock);
485 out: 485 out:
486 return rc; 486 return rc;
487 } 487 }
488 488
489 /* 489 /*
490 * This function should allow an FS to ask what it's mount security 490 * This function should allow an FS to ask what it's mount security
491 * options were so it can use those later for submounts, displaying 491 * options were so it can use those later for submounts, displaying
492 * mount options, or whatever. 492 * mount options, or whatever.
493 */ 493 */
494 static int selinux_get_mnt_opts(const struct super_block *sb, 494 static int selinux_get_mnt_opts(const struct super_block *sb,
495 struct security_mnt_opts *opts) 495 struct security_mnt_opts *opts)
496 { 496 {
497 int rc = 0, i; 497 int rc = 0, i;
498 struct superblock_security_struct *sbsec = sb->s_security; 498 struct superblock_security_struct *sbsec = sb->s_security;
499 char *context = NULL; 499 char *context = NULL;
500 u32 len; 500 u32 len;
501 char tmp; 501 char tmp;
502 502
503 security_init_mnt_opts(opts); 503 security_init_mnt_opts(opts);
504 504
505 if (!(sbsec->flags & SE_SBINITIALIZED)) 505 if (!(sbsec->flags & SE_SBINITIALIZED))
506 return -EINVAL; 506 return -EINVAL;
507 507
508 if (!ss_initialized) 508 if (!ss_initialized)
509 return -EINVAL; 509 return -EINVAL;
510 510
511 /* make sure we always check enough bits to cover the mask */ 511 /* make sure we always check enough bits to cover the mask */
512 BUILD_BUG_ON(SE_MNTMASK >= (1 << NUM_SEL_MNT_OPTS)); 512 BUILD_BUG_ON(SE_MNTMASK >= (1 << NUM_SEL_MNT_OPTS));
513 513
514 tmp = sbsec->flags & SE_MNTMASK; 514 tmp = sbsec->flags & SE_MNTMASK;
515 /* count the number of mount options for this sb */ 515 /* count the number of mount options for this sb */
516 for (i = 0; i < NUM_SEL_MNT_OPTS; i++) { 516 for (i = 0; i < NUM_SEL_MNT_OPTS; i++) {
517 if (tmp & 0x01) 517 if (tmp & 0x01)
518 opts->num_mnt_opts++; 518 opts->num_mnt_opts++;
519 tmp >>= 1; 519 tmp >>= 1;
520 } 520 }
521 /* Check if the Label support flag is set */ 521 /* Check if the Label support flag is set */
522 if (sbsec->flags & SBLABEL_MNT) 522 if (sbsec->flags & SBLABEL_MNT)
523 opts->num_mnt_opts++; 523 opts->num_mnt_opts++;
524 524
525 opts->mnt_opts = kcalloc(opts->num_mnt_opts, sizeof(char *), GFP_ATOMIC); 525 opts->mnt_opts = kcalloc(opts->num_mnt_opts, sizeof(char *), GFP_ATOMIC);
526 if (!opts->mnt_opts) { 526 if (!opts->mnt_opts) {
527 rc = -ENOMEM; 527 rc = -ENOMEM;
528 goto out_free; 528 goto out_free;
529 } 529 }
530 530
531 opts->mnt_opts_flags = kcalloc(opts->num_mnt_opts, sizeof(int), GFP_ATOMIC); 531 opts->mnt_opts_flags = kcalloc(opts->num_mnt_opts, sizeof(int), GFP_ATOMIC);
532 if (!opts->mnt_opts_flags) { 532 if (!opts->mnt_opts_flags) {
533 rc = -ENOMEM; 533 rc = -ENOMEM;
534 goto out_free; 534 goto out_free;
535 } 535 }
536 536
537 i = 0; 537 i = 0;
538 if (sbsec->flags & FSCONTEXT_MNT) { 538 if (sbsec->flags & FSCONTEXT_MNT) {
539 rc = security_sid_to_context(sbsec->sid, &context, &len); 539 rc = security_sid_to_context(sbsec->sid, &context, &len);
540 if (rc) 540 if (rc)
541 goto out_free; 541 goto out_free;
542 opts->mnt_opts[i] = context; 542 opts->mnt_opts[i] = context;
543 opts->mnt_opts_flags[i++] = FSCONTEXT_MNT; 543 opts->mnt_opts_flags[i++] = FSCONTEXT_MNT;
544 } 544 }
545 if (sbsec->flags & CONTEXT_MNT) { 545 if (sbsec->flags & CONTEXT_MNT) {
546 rc = security_sid_to_context(sbsec->mntpoint_sid, &context, &len); 546 rc = security_sid_to_context(sbsec->mntpoint_sid, &context, &len);
547 if (rc) 547 if (rc)
548 goto out_free; 548 goto out_free;
549 opts->mnt_opts[i] = context; 549 opts->mnt_opts[i] = context;
550 opts->mnt_opts_flags[i++] = CONTEXT_MNT; 550 opts->mnt_opts_flags[i++] = CONTEXT_MNT;
551 } 551 }
552 if (sbsec->flags & DEFCONTEXT_MNT) { 552 if (sbsec->flags & DEFCONTEXT_MNT) {
553 rc = security_sid_to_context(sbsec->def_sid, &context, &len); 553 rc = security_sid_to_context(sbsec->def_sid, &context, &len);
554 if (rc) 554 if (rc)
555 goto out_free; 555 goto out_free;
556 opts->mnt_opts[i] = context; 556 opts->mnt_opts[i] = context;
557 opts->mnt_opts_flags[i++] = DEFCONTEXT_MNT; 557 opts->mnt_opts_flags[i++] = DEFCONTEXT_MNT;
558 } 558 }
559 if (sbsec->flags & ROOTCONTEXT_MNT) { 559 if (sbsec->flags & ROOTCONTEXT_MNT) {
560 struct inode *root = sbsec->sb->s_root->d_inode; 560 struct inode *root = sbsec->sb->s_root->d_inode;
561 struct inode_security_struct *isec = root->i_security; 561 struct inode_security_struct *isec = root->i_security;
562 562
563 rc = security_sid_to_context(isec->sid, &context, &len); 563 rc = security_sid_to_context(isec->sid, &context, &len);
564 if (rc) 564 if (rc)
565 goto out_free; 565 goto out_free;
566 opts->mnt_opts[i] = context; 566 opts->mnt_opts[i] = context;
567 opts->mnt_opts_flags[i++] = ROOTCONTEXT_MNT; 567 opts->mnt_opts_flags[i++] = ROOTCONTEXT_MNT;
568 } 568 }
569 if (sbsec->flags & SBLABEL_MNT) { 569 if (sbsec->flags & SBLABEL_MNT) {
570 opts->mnt_opts[i] = NULL; 570 opts->mnt_opts[i] = NULL;
571 opts->mnt_opts_flags[i++] = SBLABEL_MNT; 571 opts->mnt_opts_flags[i++] = SBLABEL_MNT;
572 } 572 }
573 573
574 BUG_ON(i != opts->num_mnt_opts); 574 BUG_ON(i != opts->num_mnt_opts);
575 575
576 return 0; 576 return 0;
577 577
578 out_free: 578 out_free:
579 security_free_mnt_opts(opts); 579 security_free_mnt_opts(opts);
580 return rc; 580 return rc;
581 } 581 }
582 582
583 static int bad_option(struct superblock_security_struct *sbsec, char flag, 583 static int bad_option(struct superblock_security_struct *sbsec, char flag,
584 u32 old_sid, u32 new_sid) 584 u32 old_sid, u32 new_sid)
585 { 585 {
586 char mnt_flags = sbsec->flags & SE_MNTMASK; 586 char mnt_flags = sbsec->flags & SE_MNTMASK;
587 587
588 /* check if the old mount command had the same options */ 588 /* check if the old mount command had the same options */
589 if (sbsec->flags & SE_SBINITIALIZED) 589 if (sbsec->flags & SE_SBINITIALIZED)
590 if (!(sbsec->flags & flag) || 590 if (!(sbsec->flags & flag) ||
591 (old_sid != new_sid)) 591 (old_sid != new_sid))
592 return 1; 592 return 1;
593 593
594 /* check if we were passed the same options twice, 594 /* check if we were passed the same options twice,
595 * aka someone passed context=a,context=b 595 * aka someone passed context=a,context=b
596 */ 596 */
597 if (!(sbsec->flags & SE_SBINITIALIZED)) 597 if (!(sbsec->flags & SE_SBINITIALIZED))
598 if (mnt_flags & flag) 598 if (mnt_flags & flag)
599 return 1; 599 return 1;
600 return 0; 600 return 0;
601 } 601 }
602 602
603 /* 603 /*
604 * Allow filesystems with binary mount data to explicitly set mount point 604 * Allow filesystems with binary mount data to explicitly set mount point
605 * labeling information. 605 * labeling information.
606 */ 606 */
607 static int selinux_set_mnt_opts(struct super_block *sb, 607 static int selinux_set_mnt_opts(struct super_block *sb,
608 struct security_mnt_opts *opts, 608 struct security_mnt_opts *opts,
609 unsigned long kern_flags, 609 unsigned long kern_flags,
610 unsigned long *set_kern_flags) 610 unsigned long *set_kern_flags)
611 { 611 {
612 const struct cred *cred = current_cred(); 612 const struct cred *cred = current_cred();
613 int rc = 0, i; 613 int rc = 0, i;
614 struct superblock_security_struct *sbsec = sb->s_security; 614 struct superblock_security_struct *sbsec = sb->s_security;
615 const char *name = sb->s_type->name; 615 const char *name = sb->s_type->name;
616 struct inode *inode = sbsec->sb->s_root->d_inode; 616 struct inode *inode = sbsec->sb->s_root->d_inode;
617 struct inode_security_struct *root_isec = inode->i_security; 617 struct inode_security_struct *root_isec = inode->i_security;
618 u32 fscontext_sid = 0, context_sid = 0, rootcontext_sid = 0; 618 u32 fscontext_sid = 0, context_sid = 0, rootcontext_sid = 0;
619 u32 defcontext_sid = 0; 619 u32 defcontext_sid = 0;
620 char **mount_options = opts->mnt_opts; 620 char **mount_options = opts->mnt_opts;
621 int *flags = opts->mnt_opts_flags; 621 int *flags = opts->mnt_opts_flags;
622 int num_opts = opts->num_mnt_opts; 622 int num_opts = opts->num_mnt_opts;
623 623
624 mutex_lock(&sbsec->lock); 624 mutex_lock(&sbsec->lock);
625 625
626 if (!ss_initialized) { 626 if (!ss_initialized) {
627 if (!num_opts) { 627 if (!num_opts) {
628 /* Defer initialization until selinux_complete_init, 628 /* Defer initialization until selinux_complete_init,
629 after the initial policy is loaded and the security 629 after the initial policy is loaded and the security
630 server is ready to handle calls. */ 630 server is ready to handle calls. */
631 goto out; 631 goto out;
632 } 632 }
633 rc = -EINVAL; 633 rc = -EINVAL;
634 printk(KERN_WARNING "SELinux: Unable to set superblock options " 634 printk(KERN_WARNING "SELinux: Unable to set superblock options "
635 "before the security server is initialized\n"); 635 "before the security server is initialized\n");
636 goto out; 636 goto out;
637 } 637 }
638 if (kern_flags && !set_kern_flags) { 638 if (kern_flags && !set_kern_flags) {
639 /* Specifying internal flags without providing a place to 639 /* Specifying internal flags without providing a place to
640 * place the results is not allowed */ 640 * place the results is not allowed */
641 rc = -EINVAL; 641 rc = -EINVAL;
642 goto out; 642 goto out;
643 } 643 }
644 644
645 /* 645 /*
646 * Binary mount data FS will come through this function twice. Once 646 * Binary mount data FS will come through this function twice. Once
647 * from an explicit call and once from the generic calls from the vfs. 647 * from an explicit call and once from the generic calls from the vfs.
648 * Since the generic VFS calls will not contain any security mount data 648 * Since the generic VFS calls will not contain any security mount data
649 * we need to skip the double mount verification. 649 * we need to skip the double mount verification.
650 * 650 *
651 * This does open a hole in which we will not notice if the first 651 * This does open a hole in which we will not notice if the first
652 * mount using this sb set explict options and a second mount using 652 * mount using this sb set explict options and a second mount using
653 * this sb does not set any security options. (The first options 653 * this sb does not set any security options. (The first options
654 * will be used for both mounts) 654 * will be used for both mounts)
655 */ 655 */
656 if ((sbsec->flags & SE_SBINITIALIZED) && (sb->s_type->fs_flags & FS_BINARY_MOUNTDATA) 656 if ((sbsec->flags & SE_SBINITIALIZED) && (sb->s_type->fs_flags & FS_BINARY_MOUNTDATA)
657 && (num_opts == 0)) 657 && (num_opts == 0))
658 goto out; 658 goto out;
659 659
660 /* 660 /*
661 * parse the mount options, check if they are valid sids. 661 * parse the mount options, check if they are valid sids.
662 * also check if someone is trying to mount the same sb more 662 * also check if someone is trying to mount the same sb more
663 * than once with different security options. 663 * than once with different security options.
664 */ 664 */
665 for (i = 0; i < num_opts; i++) { 665 for (i = 0; i < num_opts; i++) {
666 u32 sid; 666 u32 sid;
667 667
668 if (flags[i] == SBLABEL_MNT) 668 if (flags[i] == SBLABEL_MNT)
669 continue; 669 continue;
670 rc = security_context_to_sid(mount_options[i], 670 rc = security_context_to_sid(mount_options[i],
671 strlen(mount_options[i]), &sid, GFP_KERNEL); 671 strlen(mount_options[i]), &sid, GFP_KERNEL);
672 if (rc) { 672 if (rc) {
673 printk(KERN_WARNING "SELinux: security_context_to_sid" 673 printk(KERN_WARNING "SELinux: security_context_to_sid"
674 "(%s) failed for (dev %s, type %s) errno=%d\n", 674 "(%s) failed for (dev %s, type %s) errno=%d\n",
675 mount_options[i], sb->s_id, name, rc); 675 mount_options[i], sb->s_id, name, rc);
676 goto out; 676 goto out;
677 } 677 }
678 switch (flags[i]) { 678 switch (flags[i]) {
679 case FSCONTEXT_MNT: 679 case FSCONTEXT_MNT:
680 fscontext_sid = sid; 680 fscontext_sid = sid;
681 681
682 if (bad_option(sbsec, FSCONTEXT_MNT, sbsec->sid, 682 if (bad_option(sbsec, FSCONTEXT_MNT, sbsec->sid,
683 fscontext_sid)) 683 fscontext_sid))
684 goto out_double_mount; 684 goto out_double_mount;
685 685
686 sbsec->flags |= FSCONTEXT_MNT; 686 sbsec->flags |= FSCONTEXT_MNT;
687 break; 687 break;
688 case CONTEXT_MNT: 688 case CONTEXT_MNT:
689 context_sid = sid; 689 context_sid = sid;
690 690
691 if (bad_option(sbsec, CONTEXT_MNT, sbsec->mntpoint_sid, 691 if (bad_option(sbsec, CONTEXT_MNT, sbsec->mntpoint_sid,
692 context_sid)) 692 context_sid))
693 goto out_double_mount; 693 goto out_double_mount;
694 694
695 sbsec->flags |= CONTEXT_MNT; 695 sbsec->flags |= CONTEXT_MNT;
696 break; 696 break;
697 case ROOTCONTEXT_MNT: 697 case ROOTCONTEXT_MNT:
698 rootcontext_sid = sid; 698 rootcontext_sid = sid;
699 699
700 if (bad_option(sbsec, ROOTCONTEXT_MNT, root_isec->sid, 700 if (bad_option(sbsec, ROOTCONTEXT_MNT, root_isec->sid,
701 rootcontext_sid)) 701 rootcontext_sid))
702 goto out_double_mount; 702 goto out_double_mount;
703 703
704 sbsec->flags |= ROOTCONTEXT_MNT; 704 sbsec->flags |= ROOTCONTEXT_MNT;
705 705
706 break; 706 break;
707 case DEFCONTEXT_MNT: 707 case DEFCONTEXT_MNT:
708 defcontext_sid = sid; 708 defcontext_sid = sid;
709 709
710 if (bad_option(sbsec, DEFCONTEXT_MNT, sbsec->def_sid, 710 if (bad_option(sbsec, DEFCONTEXT_MNT, sbsec->def_sid,
711 defcontext_sid)) 711 defcontext_sid))
712 goto out_double_mount; 712 goto out_double_mount;
713 713
714 sbsec->flags |= DEFCONTEXT_MNT; 714 sbsec->flags |= DEFCONTEXT_MNT;
715 715
716 break; 716 break;
717 default: 717 default:
718 rc = -EINVAL; 718 rc = -EINVAL;
719 goto out; 719 goto out;
720 } 720 }
721 } 721 }
722 722
723 if (sbsec->flags & SE_SBINITIALIZED) { 723 if (sbsec->flags & SE_SBINITIALIZED) {
724 /* previously mounted with options, but not on this attempt? */ 724 /* previously mounted with options, but not on this attempt? */
725 if ((sbsec->flags & SE_MNTMASK) && !num_opts) 725 if ((sbsec->flags & SE_MNTMASK) && !num_opts)
726 goto out_double_mount; 726 goto out_double_mount;
727 rc = 0; 727 rc = 0;
728 goto out; 728 goto out;
729 } 729 }
730 730
731 if (strcmp(sb->s_type->name, "proc") == 0) 731 if (strcmp(sb->s_type->name, "proc") == 0)
732 sbsec->flags |= SE_SBPROC; 732 sbsec->flags |= SE_SBPROC;
733 733
734 if (!sbsec->behavior) { 734 if (!sbsec->behavior) {
735 /* 735 /*
736 * Determine the labeling behavior to use for this 736 * Determine the labeling behavior to use for this
737 * filesystem type. 737 * filesystem type.
738 */ 738 */
739 rc = security_fs_use(sb); 739 rc = security_fs_use(sb);
740 if (rc) { 740 if (rc) {
741 printk(KERN_WARNING 741 printk(KERN_WARNING
742 "%s: security_fs_use(%s) returned %d\n", 742 "%s: security_fs_use(%s) returned %d\n",
743 __func__, sb->s_type->name, rc); 743 __func__, sb->s_type->name, rc);
744 goto out; 744 goto out;
745 } 745 }
746 } 746 }
747 /* sets the context of the superblock for the fs being mounted. */ 747 /* sets the context of the superblock for the fs being mounted. */
748 if (fscontext_sid) { 748 if (fscontext_sid) {
749 rc = may_context_mount_sb_relabel(fscontext_sid, sbsec, cred); 749 rc = may_context_mount_sb_relabel(fscontext_sid, sbsec, cred);
750 if (rc) 750 if (rc)
751 goto out; 751 goto out;
752 752
753 sbsec->sid = fscontext_sid; 753 sbsec->sid = fscontext_sid;
754 } 754 }
755 755
756 /* 756 /*
757 * Switch to using mount point labeling behavior. 757 * Switch to using mount point labeling behavior.
758 * sets the label used on all file below the mountpoint, and will set 758 * sets the label used on all file below the mountpoint, and will set
759 * the superblock context if not already set. 759 * the superblock context if not already set.
760 */ 760 */
761 if (kern_flags & SECURITY_LSM_NATIVE_LABELS && !context_sid) { 761 if (kern_flags & SECURITY_LSM_NATIVE_LABELS && !context_sid) {
762 sbsec->behavior = SECURITY_FS_USE_NATIVE; 762 sbsec->behavior = SECURITY_FS_USE_NATIVE;
763 *set_kern_flags |= SECURITY_LSM_NATIVE_LABELS; 763 *set_kern_flags |= SECURITY_LSM_NATIVE_LABELS;
764 } 764 }
765 765
766 if (context_sid) { 766 if (context_sid) {
767 if (!fscontext_sid) { 767 if (!fscontext_sid) {
768 rc = may_context_mount_sb_relabel(context_sid, sbsec, 768 rc = may_context_mount_sb_relabel(context_sid, sbsec,
769 cred); 769 cred);
770 if (rc) 770 if (rc)
771 goto out; 771 goto out;
772 sbsec->sid = context_sid; 772 sbsec->sid = context_sid;
773 } else { 773 } else {
774 rc = may_context_mount_inode_relabel(context_sid, sbsec, 774 rc = may_context_mount_inode_relabel(context_sid, sbsec,
775 cred); 775 cred);
776 if (rc) 776 if (rc)
777 goto out; 777 goto out;
778 } 778 }
779 if (!rootcontext_sid) 779 if (!rootcontext_sid)
780 rootcontext_sid = context_sid; 780 rootcontext_sid = context_sid;
781 781
782 sbsec->mntpoint_sid = context_sid; 782 sbsec->mntpoint_sid = context_sid;
783 sbsec->behavior = SECURITY_FS_USE_MNTPOINT; 783 sbsec->behavior = SECURITY_FS_USE_MNTPOINT;
784 } 784 }
785 785
786 if (rootcontext_sid) { 786 if (rootcontext_sid) {
787 rc = may_context_mount_inode_relabel(rootcontext_sid, sbsec, 787 rc = may_context_mount_inode_relabel(rootcontext_sid, sbsec,
788 cred); 788 cred);
789 if (rc) 789 if (rc)
790 goto out; 790 goto out;
791 791
792 root_isec->sid = rootcontext_sid; 792 root_isec->sid = rootcontext_sid;
793 root_isec->initialized = 1; 793 root_isec->initialized = 1;
794 } 794 }
795 795
796 if (defcontext_sid) { 796 if (defcontext_sid) {
797 if (sbsec->behavior != SECURITY_FS_USE_XATTR && 797 if (sbsec->behavior != SECURITY_FS_USE_XATTR &&
798 sbsec->behavior != SECURITY_FS_USE_NATIVE) { 798 sbsec->behavior != SECURITY_FS_USE_NATIVE) {
799 rc = -EINVAL; 799 rc = -EINVAL;
800 printk(KERN_WARNING "SELinux: defcontext option is " 800 printk(KERN_WARNING "SELinux: defcontext option is "
801 "invalid for this filesystem type\n"); 801 "invalid for this filesystem type\n");
802 goto out; 802 goto out;
803 } 803 }
804 804
805 if (defcontext_sid != sbsec->def_sid) { 805 if (defcontext_sid != sbsec->def_sid) {
806 rc = may_context_mount_inode_relabel(defcontext_sid, 806 rc = may_context_mount_inode_relabel(defcontext_sid,
807 sbsec, cred); 807 sbsec, cred);
808 if (rc) 808 if (rc)
809 goto out; 809 goto out;
810 } 810 }
811 811
812 sbsec->def_sid = defcontext_sid; 812 sbsec->def_sid = defcontext_sid;
813 } 813 }
814 814
815 rc = sb_finish_set_opts(sb); 815 rc = sb_finish_set_opts(sb);
816 out: 816 out:
817 mutex_unlock(&sbsec->lock); 817 mutex_unlock(&sbsec->lock);
818 return rc; 818 return rc;
819 out_double_mount: 819 out_double_mount:
820 rc = -EINVAL; 820 rc = -EINVAL;
821 printk(KERN_WARNING "SELinux: mount invalid. Same superblock, different " 821 printk(KERN_WARNING "SELinux: mount invalid. Same superblock, different "
822 "security settings for (dev %s, type %s)\n", sb->s_id, name); 822 "security settings for (dev %s, type %s)\n", sb->s_id, name);
823 goto out; 823 goto out;
824 } 824 }
825 825
826 static int selinux_cmp_sb_context(const struct super_block *oldsb, 826 static int selinux_cmp_sb_context(const struct super_block *oldsb,
827 const struct super_block *newsb) 827 const struct super_block *newsb)
828 { 828 {
829 struct superblock_security_struct *old = oldsb->s_security; 829 struct superblock_security_struct *old = oldsb->s_security;
830 struct superblock_security_struct *new = newsb->s_security; 830 struct superblock_security_struct *new = newsb->s_security;
831 char oldflags = old->flags & SE_MNTMASK; 831 char oldflags = old->flags & SE_MNTMASK;
832 char newflags = new->flags & SE_MNTMASK; 832 char newflags = new->flags & SE_MNTMASK;
833 833
834 if (oldflags != newflags) 834 if (oldflags != newflags)
835 goto mismatch; 835 goto mismatch;
836 if ((oldflags & FSCONTEXT_MNT) && old->sid != new->sid) 836 if ((oldflags & FSCONTEXT_MNT) && old->sid != new->sid)
837 goto mismatch; 837 goto mismatch;
838 if ((oldflags & CONTEXT_MNT) && old->mntpoint_sid != new->mntpoint_sid) 838 if ((oldflags & CONTEXT_MNT) && old->mntpoint_sid != new->mntpoint_sid)
839 goto mismatch; 839 goto mismatch;
840 if ((oldflags & DEFCONTEXT_MNT) && old->def_sid != new->def_sid) 840 if ((oldflags & DEFCONTEXT_MNT) && old->def_sid != new->def_sid)
841 goto mismatch; 841 goto mismatch;
842 if (oldflags & ROOTCONTEXT_MNT) { 842 if (oldflags & ROOTCONTEXT_MNT) {
843 struct inode_security_struct *oldroot = oldsb->s_root->d_inode->i_security; 843 struct inode_security_struct *oldroot = oldsb->s_root->d_inode->i_security;
844 struct inode_security_struct *newroot = newsb->s_root->d_inode->i_security; 844 struct inode_security_struct *newroot = newsb->s_root->d_inode->i_security;
845 if (oldroot->sid != newroot->sid) 845 if (oldroot->sid != newroot->sid)
846 goto mismatch; 846 goto mismatch;
847 } 847 }
848 return 0; 848 return 0;
849 mismatch: 849 mismatch:
850 printk(KERN_WARNING "SELinux: mount invalid. Same superblock, " 850 printk(KERN_WARNING "SELinux: mount invalid. Same superblock, "
851 "different security settings for (dev %s, " 851 "different security settings for (dev %s, "
852 "type %s)\n", newsb->s_id, newsb->s_type->name); 852 "type %s)\n", newsb->s_id, newsb->s_type->name);
853 return -EBUSY; 853 return -EBUSY;
854 } 854 }
855 855
856 static int selinux_sb_clone_mnt_opts(const struct super_block *oldsb, 856 static int selinux_sb_clone_mnt_opts(const struct super_block *oldsb,
857 struct super_block *newsb) 857 struct super_block *newsb)
858 { 858 {
859 const struct superblock_security_struct *oldsbsec = oldsb->s_security; 859 const struct superblock_security_struct *oldsbsec = oldsb->s_security;
860 struct superblock_security_struct *newsbsec = newsb->s_security; 860 struct superblock_security_struct *newsbsec = newsb->s_security;
861 861
862 int set_fscontext = (oldsbsec->flags & FSCONTEXT_MNT); 862 int set_fscontext = (oldsbsec->flags & FSCONTEXT_MNT);
863 int set_context = (oldsbsec->flags & CONTEXT_MNT); 863 int set_context = (oldsbsec->flags & CONTEXT_MNT);
864 int set_rootcontext = (oldsbsec->flags & ROOTCONTEXT_MNT); 864 int set_rootcontext = (oldsbsec->flags & ROOTCONTEXT_MNT);
865 865
866 /* 866 /*
867 * if the parent was able to be mounted it clearly had no special lsm 867 * if the parent was able to be mounted it clearly had no special lsm
868 * mount options. thus we can safely deal with this superblock later 868 * mount options. thus we can safely deal with this superblock later
869 */ 869 */
870 if (!ss_initialized) 870 if (!ss_initialized)
871 return 0; 871 return 0;
872 872
873 /* how can we clone if the old one wasn't set up?? */ 873 /* how can we clone if the old one wasn't set up?? */
874 BUG_ON(!(oldsbsec->flags & SE_SBINITIALIZED)); 874 BUG_ON(!(oldsbsec->flags & SE_SBINITIALIZED));
875 875
876 /* if fs is reusing a sb, make sure that the contexts match */ 876 /* if fs is reusing a sb, make sure that the contexts match */
877 if (newsbsec->flags & SE_SBINITIALIZED) 877 if (newsbsec->flags & SE_SBINITIALIZED)
878 return selinux_cmp_sb_context(oldsb, newsb); 878 return selinux_cmp_sb_context(oldsb, newsb);
879 879
880 mutex_lock(&newsbsec->lock); 880 mutex_lock(&newsbsec->lock);
881 881
882 newsbsec->flags = oldsbsec->flags; 882 newsbsec->flags = oldsbsec->flags;
883 883
884 newsbsec->sid = oldsbsec->sid; 884 newsbsec->sid = oldsbsec->sid;
885 newsbsec->def_sid = oldsbsec->def_sid; 885 newsbsec->def_sid = oldsbsec->def_sid;
886 newsbsec->behavior = oldsbsec->behavior; 886 newsbsec->behavior = oldsbsec->behavior;
887 887
888 if (set_context) { 888 if (set_context) {
889 u32 sid = oldsbsec->mntpoint_sid; 889 u32 sid = oldsbsec->mntpoint_sid;
890 890
891 if (!set_fscontext) 891 if (!set_fscontext)
892 newsbsec->sid = sid; 892 newsbsec->sid = sid;
893 if (!set_rootcontext) { 893 if (!set_rootcontext) {
894 struct inode *newinode = newsb->s_root->d_inode; 894 struct inode *newinode = newsb->s_root->d_inode;
895 struct inode_security_struct *newisec = newinode->i_security; 895 struct inode_security_struct *newisec = newinode->i_security;
896 newisec->sid = sid; 896 newisec->sid = sid;
897 } 897 }
898 newsbsec->mntpoint_sid = sid; 898 newsbsec->mntpoint_sid = sid;
899 } 899 }
900 if (set_rootcontext) { 900 if (set_rootcontext) {
901 const struct inode *oldinode = oldsb->s_root->d_inode; 901 const struct inode *oldinode = oldsb->s_root->d_inode;
902 const struct inode_security_struct *oldisec = oldinode->i_security; 902 const struct inode_security_struct *oldisec = oldinode->i_security;
903 struct inode *newinode = newsb->s_root->d_inode; 903 struct inode *newinode = newsb->s_root->d_inode;
904 struct inode_security_struct *newisec = newinode->i_security; 904 struct inode_security_struct *newisec = newinode->i_security;
905 905
906 newisec->sid = oldisec->sid; 906 newisec->sid = oldisec->sid;
907 } 907 }
908 908
909 sb_finish_set_opts(newsb); 909 sb_finish_set_opts(newsb);
910 mutex_unlock(&newsbsec->lock); 910 mutex_unlock(&newsbsec->lock);
911 return 0; 911 return 0;
912 } 912 }
913 913
914 static int selinux_parse_opts_str(char *options, 914 static int selinux_parse_opts_str(char *options,
915 struct security_mnt_opts *opts) 915 struct security_mnt_opts *opts)
916 { 916 {
917 char *p; 917 char *p;
918 char *context = NULL, *defcontext = NULL; 918 char *context = NULL, *defcontext = NULL;
919 char *fscontext = NULL, *rootcontext = NULL; 919 char *fscontext = NULL, *rootcontext = NULL;
920 int rc, num_mnt_opts = 0; 920 int rc, num_mnt_opts = 0;
921 921
922 opts->num_mnt_opts = 0; 922 opts->num_mnt_opts = 0;
923 923
924 /* Standard string-based options. */ 924 /* Standard string-based options. */
925 while ((p = strsep(&options, "|")) != NULL) { 925 while ((p = strsep(&options, "|")) != NULL) {
926 int token; 926 int token;
927 substring_t args[MAX_OPT_ARGS]; 927 substring_t args[MAX_OPT_ARGS];
928 928
929 if (!*p) 929 if (!*p)
930 continue; 930 continue;
931 931
932 token = match_token(p, tokens, args); 932 token = match_token(p, tokens, args);
933 933
934 switch (token) { 934 switch (token) {
935 case Opt_context: 935 case Opt_context:
936 if (context || defcontext) { 936 if (context || defcontext) {
937 rc = -EINVAL; 937 rc = -EINVAL;
938 printk(KERN_WARNING SEL_MOUNT_FAIL_MSG); 938 printk(KERN_WARNING SEL_MOUNT_FAIL_MSG);
939 goto out_err; 939 goto out_err;
940 } 940 }
941 context = match_strdup(&args[0]); 941 context = match_strdup(&args[0]);
942 if (!context) { 942 if (!context) {
943 rc = -ENOMEM; 943 rc = -ENOMEM;
944 goto out_err; 944 goto out_err;
945 } 945 }
946 break; 946 break;
947 947
948 case Opt_fscontext: 948 case Opt_fscontext:
949 if (fscontext) { 949 if (fscontext) {
950 rc = -EINVAL; 950 rc = -EINVAL;
951 printk(KERN_WARNING SEL_MOUNT_FAIL_MSG); 951 printk(KERN_WARNING SEL_MOUNT_FAIL_MSG);
952 goto out_err; 952 goto out_err;
953 } 953 }
954 fscontext = match_strdup(&args[0]); 954 fscontext = match_strdup(&args[0]);
955 if (!fscontext) { 955 if (!fscontext) {
956 rc = -ENOMEM; 956 rc = -ENOMEM;
957 goto out_err; 957 goto out_err;
958 } 958 }
959 break; 959 break;
960 960
961 case Opt_rootcontext: 961 case Opt_rootcontext:
962 if (rootcontext) { 962 if (rootcontext) {
963 rc = -EINVAL; 963 rc = -EINVAL;
964 printk(KERN_WARNING SEL_MOUNT_FAIL_MSG); 964 printk(KERN_WARNING SEL_MOUNT_FAIL_MSG);
965 goto out_err; 965 goto out_err;
966 } 966 }
967 rootcontext = match_strdup(&args[0]); 967 rootcontext = match_strdup(&args[0]);
968 if (!rootcontext) { 968 if (!rootcontext) {
969 rc = -ENOMEM; 969 rc = -ENOMEM;
970 goto out_err; 970 goto out_err;
971 } 971 }
972 break; 972 break;
973 973
974 case Opt_defcontext: 974 case Opt_defcontext:
975 if (context || defcontext) { 975 if (context || defcontext) {
976 rc = -EINVAL; 976 rc = -EINVAL;
977 printk(KERN_WARNING SEL_MOUNT_FAIL_MSG); 977 printk(KERN_WARNING SEL_MOUNT_FAIL_MSG);
978 goto out_err; 978 goto out_err;
979 } 979 }
980 defcontext = match_strdup(&args[0]); 980 defcontext = match_strdup(&args[0]);
981 if (!defcontext) { 981 if (!defcontext) {
982 rc = -ENOMEM; 982 rc = -ENOMEM;
983 goto out_err; 983 goto out_err;
984 } 984 }
985 break; 985 break;
986 case Opt_labelsupport: 986 case Opt_labelsupport:
987 break; 987 break;
988 default: 988 default:
989 rc = -EINVAL; 989 rc = -EINVAL;
990 printk(KERN_WARNING "SELinux: unknown mount option\n"); 990 printk(KERN_WARNING "SELinux: unknown mount option\n");
991 goto out_err; 991 goto out_err;
992 992
993 } 993 }
994 } 994 }
995 995
996 rc = -ENOMEM; 996 rc = -ENOMEM;
997 opts->mnt_opts = kcalloc(NUM_SEL_MNT_OPTS, sizeof(char *), GFP_ATOMIC); 997 opts->mnt_opts = kcalloc(NUM_SEL_MNT_OPTS, sizeof(char *), GFP_ATOMIC);
998 if (!opts->mnt_opts) 998 if (!opts->mnt_opts)
999 goto out_err; 999 goto out_err;
1000 1000
1001 opts->mnt_opts_flags = kcalloc(NUM_SEL_MNT_OPTS, sizeof(int), GFP_ATOMIC); 1001 opts->mnt_opts_flags = kcalloc(NUM_SEL_MNT_OPTS, sizeof(int), GFP_ATOMIC);
1002 if (!opts->mnt_opts_flags) { 1002 if (!opts->mnt_opts_flags) {
1003 kfree(opts->mnt_opts); 1003 kfree(opts->mnt_opts);
1004 goto out_err; 1004 goto out_err;
1005 } 1005 }
1006 1006
1007 if (fscontext) { 1007 if (fscontext) {
1008 opts->mnt_opts[num_mnt_opts] = fscontext; 1008 opts->mnt_opts[num_mnt_opts] = fscontext;
1009 opts->mnt_opts_flags[num_mnt_opts++] = FSCONTEXT_MNT; 1009 opts->mnt_opts_flags[num_mnt_opts++] = FSCONTEXT_MNT;
1010 } 1010 }
1011 if (context) { 1011 if (context) {
1012 opts->mnt_opts[num_mnt_opts] = context; 1012 opts->mnt_opts[num_mnt_opts] = context;
1013 opts->mnt_opts_flags[num_mnt_opts++] = CONTEXT_MNT; 1013 opts->mnt_opts_flags[num_mnt_opts++] = CONTEXT_MNT;
1014 } 1014 }
1015 if (rootcontext) { 1015 if (rootcontext) {
1016 opts->mnt_opts[num_mnt_opts] = rootcontext; 1016 opts->mnt_opts[num_mnt_opts] = rootcontext;
1017 opts->mnt_opts_flags[num_mnt_opts++] = ROOTCONTEXT_MNT; 1017 opts->mnt_opts_flags[num_mnt_opts++] = ROOTCONTEXT_MNT;
1018 } 1018 }
1019 if (defcontext) { 1019 if (defcontext) {
1020 opts->mnt_opts[num_mnt_opts] = defcontext; 1020 opts->mnt_opts[num_mnt_opts] = defcontext;
1021 opts->mnt_opts_flags[num_mnt_opts++] = DEFCONTEXT_MNT; 1021 opts->mnt_opts_flags[num_mnt_opts++] = DEFCONTEXT_MNT;
1022 } 1022 }
1023 1023
1024 opts->num_mnt_opts = num_mnt_opts; 1024 opts->num_mnt_opts = num_mnt_opts;
1025 return 0; 1025 return 0;
1026 1026
1027 out_err: 1027 out_err:
1028 kfree(context); 1028 kfree(context);
1029 kfree(defcontext); 1029 kfree(defcontext);
1030 kfree(fscontext); 1030 kfree(fscontext);
1031 kfree(rootcontext); 1031 kfree(rootcontext);
1032 return rc; 1032 return rc;
1033 } 1033 }
1034 /* 1034 /*
1035 * string mount options parsing and call set the sbsec 1035 * string mount options parsing and call set the sbsec
1036 */ 1036 */
1037 static int superblock_doinit(struct super_block *sb, void *data) 1037 static int superblock_doinit(struct super_block *sb, void *data)
1038 { 1038 {
1039 int rc = 0; 1039 int rc = 0;
1040 char *options = data; 1040 char *options = data;
1041 struct security_mnt_opts opts; 1041 struct security_mnt_opts opts;
1042 1042
1043 security_init_mnt_opts(&opts); 1043 security_init_mnt_opts(&opts);
1044 1044
1045 if (!data) 1045 if (!data)
1046 goto out; 1046 goto out;
1047 1047
1048 BUG_ON(sb->s_type->fs_flags & FS_BINARY_MOUNTDATA); 1048 BUG_ON(sb->s_type->fs_flags & FS_BINARY_MOUNTDATA);
1049 1049
1050 rc = selinux_parse_opts_str(options, &opts); 1050 rc = selinux_parse_opts_str(options, &opts);
1051 if (rc) 1051 if (rc)
1052 goto out_err; 1052 goto out_err;
1053 1053
1054 out: 1054 out:
1055 rc = selinux_set_mnt_opts(sb, &opts, 0, NULL); 1055 rc = selinux_set_mnt_opts(sb, &opts, 0, NULL);
1056 1056
1057 out_err: 1057 out_err:
1058 security_free_mnt_opts(&opts); 1058 security_free_mnt_opts(&opts);
1059 return rc; 1059 return rc;
1060 } 1060 }
1061 1061
1062 static void selinux_write_opts(struct seq_file *m, 1062 static void selinux_write_opts(struct seq_file *m,
1063 struct security_mnt_opts *opts) 1063 struct security_mnt_opts *opts)
1064 { 1064 {
1065 int i; 1065 int i;
1066 char *prefix; 1066 char *prefix;
1067 1067
1068 for (i = 0; i < opts->num_mnt_opts; i++) { 1068 for (i = 0; i < opts->num_mnt_opts; i++) {
1069 char *has_comma; 1069 char *has_comma;
1070 1070
1071 if (opts->mnt_opts[i]) 1071 if (opts->mnt_opts[i])
1072 has_comma = strchr(opts->mnt_opts[i], ','); 1072 has_comma = strchr(opts->mnt_opts[i], ',');
1073 else 1073 else
1074 has_comma = NULL; 1074 has_comma = NULL;
1075 1075
1076 switch (opts->mnt_opts_flags[i]) { 1076 switch (opts->mnt_opts_flags[i]) {
1077 case CONTEXT_MNT: 1077 case CONTEXT_MNT:
1078 prefix = CONTEXT_STR; 1078 prefix = CONTEXT_STR;
1079 break; 1079 break;
1080 case FSCONTEXT_MNT: 1080 case FSCONTEXT_MNT:
1081 prefix = FSCONTEXT_STR; 1081 prefix = FSCONTEXT_STR;
1082 break; 1082 break;
1083 case ROOTCONTEXT_MNT: 1083 case ROOTCONTEXT_MNT:
1084 prefix = ROOTCONTEXT_STR; 1084 prefix = ROOTCONTEXT_STR;
1085 break; 1085 break;
1086 case DEFCONTEXT_MNT: 1086 case DEFCONTEXT_MNT:
1087 prefix = DEFCONTEXT_STR; 1087 prefix = DEFCONTEXT_STR;
1088 break; 1088 break;
1089 case SBLABEL_MNT: 1089 case SBLABEL_MNT:
1090 seq_putc(m, ','); 1090 seq_putc(m, ',');
1091 seq_puts(m, LABELSUPP_STR); 1091 seq_puts(m, LABELSUPP_STR);
1092 continue; 1092 continue;
1093 default: 1093 default:
1094 BUG(); 1094 BUG();
1095 return; 1095 return;
1096 }; 1096 };
1097 /* we need a comma before each option */ 1097 /* we need a comma before each option */
1098 seq_putc(m, ','); 1098 seq_putc(m, ',');
1099 seq_puts(m, prefix); 1099 seq_puts(m, prefix);
1100 if (has_comma) 1100 if (has_comma)
1101 seq_putc(m, '\"'); 1101 seq_putc(m, '\"');
1102 seq_puts(m, opts->mnt_opts[i]); 1102 seq_puts(m, opts->mnt_opts[i]);
1103 if (has_comma) 1103 if (has_comma)
1104 seq_putc(m, '\"'); 1104 seq_putc(m, '\"');
1105 } 1105 }
1106 } 1106 }
1107 1107
1108 static int selinux_sb_show_options(struct seq_file *m, struct super_block *sb) 1108 static int selinux_sb_show_options(struct seq_file *m, struct super_block *sb)
1109 { 1109 {
1110 struct security_mnt_opts opts; 1110 struct security_mnt_opts opts;
1111 int rc; 1111 int rc;
1112 1112
1113 rc = selinux_get_mnt_opts(sb, &opts); 1113 rc = selinux_get_mnt_opts(sb, &opts);
1114 if (rc) { 1114 if (rc) {
1115 /* before policy load we may get EINVAL, don't show anything */ 1115 /* before policy load we may get EINVAL, don't show anything */
1116 if (rc == -EINVAL) 1116 if (rc == -EINVAL)
1117 rc = 0; 1117 rc = 0;
1118 return rc; 1118 return rc;
1119 } 1119 }
1120 1120
1121 selinux_write_opts(m, &opts); 1121 selinux_write_opts(m, &opts);
1122 1122
1123 security_free_mnt_opts(&opts); 1123 security_free_mnt_opts(&opts);
1124 1124
1125 return rc; 1125 return rc;
1126 } 1126 }
1127 1127
1128 static inline u16 inode_mode_to_security_class(umode_t mode) 1128 static inline u16 inode_mode_to_security_class(umode_t mode)
1129 { 1129 {
1130 switch (mode & S_IFMT) { 1130 switch (mode & S_IFMT) {
1131 case S_IFSOCK: 1131 case S_IFSOCK:
1132 return SECCLASS_SOCK_FILE; 1132 return SECCLASS_SOCK_FILE;
1133 case S_IFLNK: 1133 case S_IFLNK:
1134 return SECCLASS_LNK_FILE; 1134 return SECCLASS_LNK_FILE;
1135 case S_IFREG: 1135 case S_IFREG:
1136 return SECCLASS_FILE; 1136 return SECCLASS_FILE;
1137 case S_IFBLK: 1137 case S_IFBLK:
1138 return SECCLASS_BLK_FILE; 1138 return SECCLASS_BLK_FILE;
1139 case S_IFDIR: 1139 case S_IFDIR:
1140 return SECCLASS_DIR; 1140 return SECCLASS_DIR;
1141 case S_IFCHR: 1141 case S_IFCHR:
1142 return SECCLASS_CHR_FILE; 1142 return SECCLASS_CHR_FILE;
1143 case S_IFIFO: 1143 case S_IFIFO:
1144 return SECCLASS_FIFO_FILE; 1144 return SECCLASS_FIFO_FILE;
1145 1145
1146 } 1146 }
1147 1147
1148 return SECCLASS_FILE; 1148 return SECCLASS_FILE;
1149 } 1149 }
1150 1150
1151 static inline int default_protocol_stream(int protocol) 1151 static inline int default_protocol_stream(int protocol)
1152 { 1152 {
1153 return (protocol == IPPROTO_IP || protocol == IPPROTO_TCP); 1153 return (protocol == IPPROTO_IP || protocol == IPPROTO_TCP);
1154 } 1154 }
1155 1155
1156 static inline int default_protocol_dgram(int protocol) 1156 static inline int default_protocol_dgram(int protocol)
1157 { 1157 {
1158 return (protocol == IPPROTO_IP || protocol == IPPROTO_UDP); 1158 return (protocol == IPPROTO_IP || protocol == IPPROTO_UDP);
1159 } 1159 }
1160 1160
1161 static inline u16 socket_type_to_security_class(int family, int type, int protocol) 1161 static inline u16 socket_type_to_security_class(int family, int type, int protocol)
1162 { 1162 {
1163 switch (family) { 1163 switch (family) {
1164 case PF_UNIX: 1164 case PF_UNIX:
1165 switch (type) { 1165 switch (type) {
1166 case SOCK_STREAM: 1166 case SOCK_STREAM:
1167 case SOCK_SEQPACKET: 1167 case SOCK_SEQPACKET:
1168 return SECCLASS_UNIX_STREAM_SOCKET; 1168 return SECCLASS_UNIX_STREAM_SOCKET;
1169 case SOCK_DGRAM: 1169 case SOCK_DGRAM:
1170 return SECCLASS_UNIX_DGRAM_SOCKET; 1170 return SECCLASS_UNIX_DGRAM_SOCKET;
1171 } 1171 }
1172 break; 1172 break;
1173 case PF_INET: 1173 case PF_INET:
1174 case PF_INET6: 1174 case PF_INET6:
1175 switch (type) { 1175 switch (type) {
1176 case SOCK_STREAM: 1176 case SOCK_STREAM:
1177 if (default_protocol_stream(protocol)) 1177 if (default_protocol_stream(protocol))
1178 return SECCLASS_TCP_SOCKET; 1178 return SECCLASS_TCP_SOCKET;
1179 else 1179 else
1180 return SECCLASS_RAWIP_SOCKET; 1180 return SECCLASS_RAWIP_SOCKET;
1181 case SOCK_DGRAM: 1181 case SOCK_DGRAM:
1182 if (default_protocol_dgram(protocol)) 1182 if (default_protocol_dgram(protocol))
1183 return SECCLASS_UDP_SOCKET; 1183 return SECCLASS_UDP_SOCKET;
1184 else 1184 else
1185 return SECCLASS_RAWIP_SOCKET; 1185 return SECCLASS_RAWIP_SOCKET;
1186 case SOCK_DCCP: 1186 case SOCK_DCCP:
1187 return SECCLASS_DCCP_SOCKET; 1187 return SECCLASS_DCCP_SOCKET;
1188 default: 1188 default:
1189 return SECCLASS_RAWIP_SOCKET; 1189 return SECCLASS_RAWIP_SOCKET;
1190 } 1190 }
1191 break; 1191 break;
1192 case PF_NETLINK: 1192 case PF_NETLINK:
1193 switch (protocol) { 1193 switch (protocol) {
1194 case NETLINK_ROUTE: 1194 case NETLINK_ROUTE:
1195 return SECCLASS_NETLINK_ROUTE_SOCKET; 1195 return SECCLASS_NETLINK_ROUTE_SOCKET;
1196 case NETLINK_FIREWALL: 1196 case NETLINK_FIREWALL:
1197 return SECCLASS_NETLINK_FIREWALL_SOCKET; 1197 return SECCLASS_NETLINK_FIREWALL_SOCKET;
1198 case NETLINK_SOCK_DIAG: 1198 case NETLINK_SOCK_DIAG:
1199 return SECCLASS_NETLINK_TCPDIAG_SOCKET; 1199 return SECCLASS_NETLINK_TCPDIAG_SOCKET;
1200 case NETLINK_NFLOG: 1200 case NETLINK_NFLOG:
1201 return SECCLASS_NETLINK_NFLOG_SOCKET; 1201 return SECCLASS_NETLINK_NFLOG_SOCKET;
1202 case NETLINK_XFRM: 1202 case NETLINK_XFRM:
1203 return SECCLASS_NETLINK_XFRM_SOCKET; 1203 return SECCLASS_NETLINK_XFRM_SOCKET;
1204 case NETLINK_SELINUX: 1204 case NETLINK_SELINUX:
1205 return SECCLASS_NETLINK_SELINUX_SOCKET; 1205 return SECCLASS_NETLINK_SELINUX_SOCKET;
1206 case NETLINK_AUDIT: 1206 case NETLINK_AUDIT:
1207 return SECCLASS_NETLINK_AUDIT_SOCKET; 1207 return SECCLASS_NETLINK_AUDIT_SOCKET;
1208 case NETLINK_IP6_FW: 1208 case NETLINK_IP6_FW:
1209 return SECCLASS_NETLINK_IP6FW_SOCKET; 1209 return SECCLASS_NETLINK_IP6FW_SOCKET;
1210 case NETLINK_DNRTMSG: 1210 case NETLINK_DNRTMSG:
1211 return SECCLASS_NETLINK_DNRT_SOCKET; 1211 return SECCLASS_NETLINK_DNRT_SOCKET;
1212 case NETLINK_KOBJECT_UEVENT: 1212 case NETLINK_KOBJECT_UEVENT:
1213 return SECCLASS_NETLINK_KOBJECT_UEVENT_SOCKET; 1213 return SECCLASS_NETLINK_KOBJECT_UEVENT_SOCKET;
1214 default: 1214 default:
1215 return SECCLASS_NETLINK_SOCKET; 1215 return SECCLASS_NETLINK_SOCKET;
1216 } 1216 }
1217 case PF_PACKET: 1217 case PF_PACKET:
1218 return SECCLASS_PACKET_SOCKET; 1218 return SECCLASS_PACKET_SOCKET;
1219 case PF_KEY: 1219 case PF_KEY:
1220 return SECCLASS_KEY_SOCKET; 1220 return SECCLASS_KEY_SOCKET;
1221 case PF_APPLETALK: 1221 case PF_APPLETALK:
1222 return SECCLASS_APPLETALK_SOCKET; 1222 return SECCLASS_APPLETALK_SOCKET;
1223 } 1223 }
1224 1224
1225 return SECCLASS_SOCKET; 1225 return SECCLASS_SOCKET;
1226 } 1226 }
1227 1227
1228 #ifdef CONFIG_PROC_FS 1228 #ifdef CONFIG_PROC_FS
1229 static int selinux_proc_get_sid(struct dentry *dentry, 1229 static int selinux_proc_get_sid(struct dentry *dentry,
1230 u16 tclass, 1230 u16 tclass,
1231 u32 *sid) 1231 u32 *sid)
1232 { 1232 {
1233 int rc; 1233 int rc;
1234 char *buffer, *path; 1234 char *buffer, *path;
1235 1235
1236 buffer = (char *)__get_free_page(GFP_KERNEL); 1236 buffer = (char *)__get_free_page(GFP_KERNEL);
1237 if (!buffer) 1237 if (!buffer)
1238 return -ENOMEM; 1238 return -ENOMEM;
1239 1239
1240 path = dentry_path_raw(dentry, buffer, PAGE_SIZE); 1240 path = dentry_path_raw(dentry, buffer, PAGE_SIZE);
1241 if (IS_ERR(path)) 1241 if (IS_ERR(path))
1242 rc = PTR_ERR(path); 1242 rc = PTR_ERR(path);
1243 else { 1243 else {
1244 /* each process gets a /proc/PID/ entry. Strip off the 1244 /* each process gets a /proc/PID/ entry. Strip off the
1245 * PID part to get a valid selinux labeling. 1245 * PID part to get a valid selinux labeling.
1246 * e.g. /proc/1/net/rpc/nfs -> /net/rpc/nfs */ 1246 * e.g. /proc/1/net/rpc/nfs -> /net/rpc/nfs */
1247 while (path[1] >= '0' && path[1] <= '9') { 1247 while (path[1] >= '0' && path[1] <= '9') {
1248 path[1] = '/'; 1248 path[1] = '/';
1249 path++; 1249 path++;
1250 } 1250 }
1251 rc = security_genfs_sid("proc", path, tclass, sid); 1251 rc = security_genfs_sid("proc", path, tclass, sid);
1252 } 1252 }
1253 free_page((unsigned long)buffer); 1253 free_page((unsigned long)buffer);
1254 return rc; 1254 return rc;
1255 } 1255 }
1256 #else 1256 #else
1257 static int selinux_proc_get_sid(struct dentry *dentry, 1257 static int selinux_proc_get_sid(struct dentry *dentry,
1258 u16 tclass, 1258 u16 tclass,
1259 u32 *sid) 1259 u32 *sid)
1260 { 1260 {
1261 return -EINVAL; 1261 return -EINVAL;
1262 } 1262 }
1263 #endif 1263 #endif
1264 1264
1265 /* The inode's security attributes must be initialized before first use. */ 1265 /* The inode's security attributes must be initialized before first use. */
1266 static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dentry) 1266 static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dentry)
1267 { 1267 {
1268 struct superblock_security_struct *sbsec = NULL; 1268 struct superblock_security_struct *sbsec = NULL;
1269 struct inode_security_struct *isec = inode->i_security; 1269 struct inode_security_struct *isec = inode->i_security;
1270 u32 sid; 1270 u32 sid;
1271 struct dentry *dentry; 1271 struct dentry *dentry;
1272 #define INITCONTEXTLEN 255 1272 #define INITCONTEXTLEN 255
1273 char *context = NULL; 1273 char *context = NULL;
1274 unsigned len = 0; 1274 unsigned len = 0;
1275 int rc = 0; 1275 int rc = 0;
1276 1276
1277 if (isec->initialized) 1277 if (isec->initialized)
1278 goto out; 1278 goto out;
1279 1279
1280 mutex_lock(&isec->lock); 1280 mutex_lock(&isec->lock);
1281 if (isec->initialized) 1281 if (isec->initialized)
1282 goto out_unlock; 1282 goto out_unlock;
1283 1283
1284 sbsec = inode->i_sb->s_security; 1284 sbsec = inode->i_sb->s_security;
1285 if (!(sbsec->flags & SE_SBINITIALIZED)) { 1285 if (!(sbsec->flags & SE_SBINITIALIZED)) {
1286 /* Defer initialization until selinux_complete_init, 1286 /* Defer initialization until selinux_complete_init,
1287 after the initial policy is loaded and the security 1287 after the initial policy is loaded and the security
1288 server is ready to handle calls. */ 1288 server is ready to handle calls. */
1289 spin_lock(&sbsec->isec_lock); 1289 spin_lock(&sbsec->isec_lock);
1290 if (list_empty(&isec->list)) 1290 if (list_empty(&isec->list))
1291 list_add(&isec->list, &sbsec->isec_head); 1291 list_add(&isec->list, &sbsec->isec_head);
1292 spin_unlock(&sbsec->isec_lock); 1292 spin_unlock(&sbsec->isec_lock);
1293 goto out_unlock; 1293 goto out_unlock;
1294 } 1294 }
1295 1295
1296 switch (sbsec->behavior) { 1296 switch (sbsec->behavior) {
1297 case SECURITY_FS_USE_NATIVE: 1297 case SECURITY_FS_USE_NATIVE:
1298 break; 1298 break;
1299 case SECURITY_FS_USE_XATTR: 1299 case SECURITY_FS_USE_XATTR:
1300 if (!inode->i_op->getxattr) { 1300 if (!inode->i_op->getxattr) {
1301 isec->sid = sbsec->def_sid; 1301 isec->sid = sbsec->def_sid;
1302 break; 1302 break;
1303 } 1303 }
1304 1304
1305 /* Need a dentry, since the xattr API requires one. 1305 /* Need a dentry, since the xattr API requires one.
1306 Life would be simpler if we could just pass the inode. */ 1306 Life would be simpler if we could just pass the inode. */
1307 if (opt_dentry) { 1307 if (opt_dentry) {
1308 /* Called from d_instantiate or d_splice_alias. */ 1308 /* Called from d_instantiate or d_splice_alias. */
1309 dentry = dget(opt_dentry); 1309 dentry = dget(opt_dentry);
1310 } else { 1310 } else {
1311 /* Called from selinux_complete_init, try to find a dentry. */ 1311 /* Called from selinux_complete_init, try to find a dentry. */
1312 dentry = d_find_alias(inode); 1312 dentry = d_find_alias(inode);
1313 } 1313 }
1314 if (!dentry) { 1314 if (!dentry) {
1315 /* 1315 /*
1316 * this is can be hit on boot when a file is accessed 1316 * this is can be hit on boot when a file is accessed
1317 * before the policy is loaded. When we load policy we 1317 * before the policy is loaded. When we load policy we
1318 * may find inodes that have no dentry on the 1318 * may find inodes that have no dentry on the
1319 * sbsec->isec_head list. No reason to complain as these 1319 * sbsec->isec_head list. No reason to complain as these
1320 * will get fixed up the next time we go through 1320 * will get fixed up the next time we go through
1321 * inode_doinit with a dentry, before these inodes could 1321 * inode_doinit with a dentry, before these inodes could
1322 * be used again by userspace. 1322 * be used again by userspace.
1323 */ 1323 */
1324 goto out_unlock; 1324 goto out_unlock;
1325 } 1325 }
1326 1326
1327 len = INITCONTEXTLEN; 1327 len = INITCONTEXTLEN;
1328 context = kmalloc(len+1, GFP_NOFS); 1328 context = kmalloc(len+1, GFP_NOFS);
1329 if (!context) { 1329 if (!context) {
1330 rc = -ENOMEM; 1330 rc = -ENOMEM;
1331 dput(dentry); 1331 dput(dentry);
1332 goto out_unlock; 1332 goto out_unlock;
1333 } 1333 }
1334 context[len] = '\0'; 1334 context[len] = '\0';
1335 rc = inode->i_op->getxattr(dentry, XATTR_NAME_SELINUX, 1335 rc = inode->i_op->getxattr(dentry, XATTR_NAME_SELINUX,
1336 context, len); 1336 context, len);
1337 if (rc == -ERANGE) { 1337 if (rc == -ERANGE) {
1338 kfree(context); 1338 kfree(context);
1339 1339
1340 /* Need a larger buffer. Query for the right size. */ 1340 /* Need a larger buffer. Query for the right size. */
1341 rc = inode->i_op->getxattr(dentry, XATTR_NAME_SELINUX, 1341 rc = inode->i_op->getxattr(dentry, XATTR_NAME_SELINUX,
1342 NULL, 0); 1342 NULL, 0);
1343 if (rc < 0) { 1343 if (rc < 0) {
1344 dput(dentry); 1344 dput(dentry);
1345 goto out_unlock; 1345 goto out_unlock;
1346 } 1346 }
1347 len = rc; 1347 len = rc;
1348 context = kmalloc(len+1, GFP_NOFS); 1348 context = kmalloc(len+1, GFP_NOFS);
1349 if (!context) { 1349 if (!context) {
1350 rc = -ENOMEM; 1350 rc = -ENOMEM;
1351 dput(dentry); 1351 dput(dentry);
1352 goto out_unlock; 1352 goto out_unlock;
1353 } 1353 }
1354 context[len] = '\0'; 1354 context[len] = '\0';
1355 rc = inode->i_op->getxattr(dentry, 1355 rc = inode->i_op->getxattr(dentry,
1356 XATTR_NAME_SELINUX, 1356 XATTR_NAME_SELINUX,
1357 context, len); 1357 context, len);
1358 } 1358 }
1359 dput(dentry); 1359 dput(dentry);
1360 if (rc < 0) { 1360 if (rc < 0) {
1361 if (rc != -ENODATA) { 1361 if (rc != -ENODATA) {
1362 printk(KERN_WARNING "SELinux: %s: getxattr returned " 1362 printk(KERN_WARNING "SELinux: %s: getxattr returned "
1363 "%d for dev=%s ino=%ld\n", __func__, 1363 "%d for dev=%s ino=%ld\n", __func__,
1364 -rc, inode->i_sb->s_id, inode->i_ino); 1364 -rc, inode->i_sb->s_id, inode->i_ino);
1365 kfree(context); 1365 kfree(context);
1366 goto out_unlock; 1366 goto out_unlock;
1367 } 1367 }
1368 /* Map ENODATA to the default file SID */ 1368 /* Map ENODATA to the default file SID */
1369 sid = sbsec->def_sid; 1369 sid = sbsec->def_sid;
1370 rc = 0; 1370 rc = 0;
1371 } else { 1371 } else {
1372 rc = security_context_to_sid_default(context, rc, &sid, 1372 rc = security_context_to_sid_default(context, rc, &sid,
1373 sbsec->def_sid, 1373 sbsec->def_sid,
1374 GFP_NOFS); 1374 GFP_NOFS);
1375 if (rc) { 1375 if (rc) {
1376 char *dev = inode->i_sb->s_id; 1376 char *dev = inode->i_sb->s_id;
1377 unsigned long ino = inode->i_ino; 1377 unsigned long ino = inode->i_ino;
1378 1378
1379 if (rc == -EINVAL) { 1379 if (rc == -EINVAL) {
1380 if (printk_ratelimit()) 1380 if (printk_ratelimit())
1381 printk(KERN_NOTICE "SELinux: inode=%lu on dev=%s was found to have an invalid " 1381 printk(KERN_NOTICE "SELinux: inode=%lu on dev=%s was found to have an invalid "
1382 "context=%s. This indicates you may need to relabel the inode or the " 1382 "context=%s. This indicates you may need to relabel the inode or the "
1383 "filesystem in question.\n", ino, dev, context); 1383 "filesystem in question.\n", ino, dev, context);
1384 } else { 1384 } else {
1385 printk(KERN_WARNING "SELinux: %s: context_to_sid(%s) " 1385 printk(KERN_WARNING "SELinux: %s: context_to_sid(%s) "
1386 "returned %d for dev=%s ino=%ld\n", 1386 "returned %d for dev=%s ino=%ld\n",
1387 __func__, context, -rc, dev, ino); 1387 __func__, context, -rc, dev, ino);
1388 } 1388 }
1389 kfree(context); 1389 kfree(context);
1390 /* Leave with the unlabeled SID */ 1390 /* Leave with the unlabeled SID */
1391 rc = 0; 1391 rc = 0;
1392 break; 1392 break;
1393 } 1393 }
1394 } 1394 }
1395 kfree(context); 1395 kfree(context);
1396 isec->sid = sid; 1396 isec->sid = sid;
1397 break; 1397 break;
1398 case SECURITY_FS_USE_TASK: 1398 case SECURITY_FS_USE_TASK:
1399 isec->sid = isec->task_sid; 1399 isec->sid = isec->task_sid;
1400 break; 1400 break;
1401 case SECURITY_FS_USE_TRANS: 1401 case SECURITY_FS_USE_TRANS:
1402 /* Default to the fs SID. */ 1402 /* Default to the fs SID. */
1403 isec->sid = sbsec->sid; 1403 isec->sid = sbsec->sid;
1404 1404
1405 /* Try to obtain a transition SID. */ 1405 /* Try to obtain a transition SID. */
1406 isec->sclass = inode_mode_to_security_class(inode->i_mode); 1406 isec->sclass = inode_mode_to_security_class(inode->i_mode);
1407 rc = security_transition_sid(isec->task_sid, sbsec->sid, 1407 rc = security_transition_sid(isec->task_sid, sbsec->sid,
1408 isec->sclass, NULL, &sid); 1408 isec->sclass, NULL, &sid);
1409 if (rc) 1409 if (rc)
1410 goto out_unlock; 1410 goto out_unlock;
1411 isec->sid = sid; 1411 isec->sid = sid;
1412 break; 1412 break;
1413 case SECURITY_FS_USE_MNTPOINT: 1413 case SECURITY_FS_USE_MNTPOINT:
1414 isec->sid = sbsec->mntpoint_sid; 1414 isec->sid = sbsec->mntpoint_sid;
1415 break; 1415 break;
1416 default: 1416 default:
1417 /* Default to the fs superblock SID. */ 1417 /* Default to the fs superblock SID. */
1418 isec->sid = sbsec->sid; 1418 isec->sid = sbsec->sid;
1419 1419
1420 if ((sbsec->flags & SE_SBPROC) && !S_ISLNK(inode->i_mode)) { 1420 if ((sbsec->flags & SE_SBPROC) && !S_ISLNK(inode->i_mode)) {
1421 /* We must have a dentry to determine the label on 1421 /* We must have a dentry to determine the label on
1422 * procfs inodes */ 1422 * procfs inodes */
1423 if (opt_dentry) 1423 if (opt_dentry)
1424 /* Called from d_instantiate or 1424 /* Called from d_instantiate or
1425 * d_splice_alias. */ 1425 * d_splice_alias. */
1426 dentry = dget(opt_dentry); 1426 dentry = dget(opt_dentry);
1427 else 1427 else
1428 /* Called from selinux_complete_init, try to 1428 /* Called from selinux_complete_init, try to
1429 * find a dentry. */ 1429 * find a dentry. */
1430 dentry = d_find_alias(inode); 1430 dentry = d_find_alias(inode);
1431 /* 1431 /*
1432 * This can be hit on boot when a file is accessed 1432 * This can be hit on boot when a file is accessed
1433 * before the policy is loaded. When we load policy we 1433 * before the policy is loaded. When we load policy we
1434 * may find inodes that have no dentry on the 1434 * may find inodes that have no dentry on the
1435 * sbsec->isec_head list. No reason to complain as 1435 * sbsec->isec_head list. No reason to complain as
1436 * these will get fixed up the next time we go through 1436 * these will get fixed up the next time we go through
1437 * inode_doinit() with a dentry, before these inodes 1437 * inode_doinit() with a dentry, before these inodes
1438 * could be used again by userspace. 1438 * could be used again by userspace.
1439 */ 1439 */
1440 if (!dentry) 1440 if (!dentry)
1441 goto out_unlock; 1441 goto out_unlock;
1442 isec->sclass = inode_mode_to_security_class(inode->i_mode); 1442 isec->sclass = inode_mode_to_security_class(inode->i_mode);
1443 rc = selinux_proc_get_sid(dentry, isec->sclass, &sid); 1443 rc = selinux_proc_get_sid(dentry, isec->sclass, &sid);
1444 dput(dentry); 1444 dput(dentry);
1445 if (rc) 1445 if (rc)
1446 goto out_unlock; 1446 goto out_unlock;
1447 isec->sid = sid; 1447 isec->sid = sid;
1448 } 1448 }
1449 break; 1449 break;
1450 } 1450 }
1451 1451
1452 isec->initialized = 1; 1452 isec->initialized = 1;
1453 1453
1454 out_unlock: 1454 out_unlock:
1455 mutex_unlock(&isec->lock); 1455 mutex_unlock(&isec->lock);
1456 out: 1456 out:
1457 if (isec->sclass == SECCLASS_FILE) 1457 if (isec->sclass == SECCLASS_FILE)
1458 isec->sclass = inode_mode_to_security_class(inode->i_mode); 1458 isec->sclass = inode_mode_to_security_class(inode->i_mode);
1459 return rc; 1459 return rc;
1460 } 1460 }
1461 1461
1462 /* Convert a Linux signal to an access vector. */ 1462 /* Convert a Linux signal to an access vector. */
1463 static inline u32 signal_to_av(int sig) 1463 static inline u32 signal_to_av(int sig)
1464 { 1464 {
1465 u32 perm = 0; 1465 u32 perm = 0;
1466 1466
1467 switch (sig) { 1467 switch (sig) {
1468 case SIGCHLD: 1468 case SIGCHLD:
1469 /* Commonly granted from child to parent. */ 1469 /* Commonly granted from child to parent. */
1470 perm = PROCESS__SIGCHLD; 1470 perm = PROCESS__SIGCHLD;
1471 break; 1471 break;
1472 case SIGKILL: 1472 case SIGKILL:
1473 /* Cannot be caught or ignored */ 1473 /* Cannot be caught or ignored */
1474 perm = PROCESS__SIGKILL; 1474 perm = PROCESS__SIGKILL;
1475 break; 1475 break;
1476 case SIGSTOP: 1476 case SIGSTOP:
1477 /* Cannot be caught or ignored */ 1477 /* Cannot be caught or ignored */
1478 perm = PROCESS__SIGSTOP; 1478 perm = PROCESS__SIGSTOP;
1479 break; 1479 break;
1480 default: 1480 default:
1481 /* All other signals. */ 1481 /* All other signals. */
1482 perm = PROCESS__SIGNAL; 1482 perm = PROCESS__SIGNAL;
1483 break; 1483 break;
1484 } 1484 }
1485 1485
1486 return perm; 1486 return perm;
1487 } 1487 }
1488 1488
1489 /* 1489 /*
1490 * Check permission between a pair of credentials 1490 * Check permission between a pair of credentials
1491 * fork check, ptrace check, etc. 1491 * fork check, ptrace check, etc.
1492 */ 1492 */
1493 static int cred_has_perm(const struct cred *actor, 1493 static int cred_has_perm(const struct cred *actor,
1494 const struct cred *target, 1494 const struct cred *target,
1495 u32 perms) 1495 u32 perms)
1496 { 1496 {
1497 u32 asid = cred_sid(actor), tsid = cred_sid(target); 1497 u32 asid = cred_sid(actor), tsid = cred_sid(target);
1498 1498
1499 return avc_has_perm(asid, tsid, SECCLASS_PROCESS, perms, NULL); 1499 return avc_has_perm(asid, tsid, SECCLASS_PROCESS, perms, NULL);
1500 } 1500 }
1501 1501
1502 /* 1502 /*
1503 * Check permission between a pair of tasks, e.g. signal checks, 1503 * Check permission between a pair of tasks, e.g. signal checks,
1504 * fork check, ptrace check, etc. 1504 * fork check, ptrace check, etc.
1505 * tsk1 is the actor and tsk2 is the target 1505 * tsk1 is the actor and tsk2 is the target
1506 * - this uses the default subjective creds of tsk1 1506 * - this uses the default subjective creds of tsk1
1507 */ 1507 */
1508 static int task_has_perm(const struct task_struct *tsk1, 1508 static int task_has_perm(const struct task_struct *tsk1,
1509 const struct task_struct *tsk2, 1509 const struct task_struct *tsk2,
1510 u32 perms) 1510 u32 perms)
1511 { 1511 {
1512 const struct task_security_struct *__tsec1, *__tsec2; 1512 const struct task_security_struct *__tsec1, *__tsec2;
1513 u32 sid1, sid2; 1513 u32 sid1, sid2;
1514 1514
1515 rcu_read_lock(); 1515 rcu_read_lock();
1516 __tsec1 = __task_cred(tsk1)->security; sid1 = __tsec1->sid; 1516 __tsec1 = __task_cred(tsk1)->security; sid1 = __tsec1->sid;
1517 __tsec2 = __task_cred(tsk2)->security; sid2 = __tsec2->sid; 1517 __tsec2 = __task_cred(tsk2)->security; sid2 = __tsec2->sid;
1518 rcu_read_unlock(); 1518 rcu_read_unlock();
1519 return avc_has_perm(sid1, sid2, SECCLASS_PROCESS, perms, NULL); 1519 return avc_has_perm(sid1, sid2, SECCLASS_PROCESS, perms, NULL);
1520 } 1520 }
1521 1521
1522 /* 1522 /*
1523 * Check permission between current and another task, e.g. signal checks, 1523 * Check permission between current and another task, e.g. signal checks,
1524 * fork check, ptrace check, etc. 1524 * fork check, ptrace check, etc.
1525 * current is the actor and tsk2 is the target 1525 * current is the actor and tsk2 is the target
1526 * - this uses current's subjective creds 1526 * - this uses current's subjective creds
1527 */ 1527 */
1528 static int current_has_perm(const struct task_struct *tsk, 1528 static int current_has_perm(const struct task_struct *tsk,
1529 u32 perms) 1529 u32 perms)
1530 { 1530 {
1531 u32 sid, tsid; 1531 u32 sid, tsid;
1532 1532
1533 sid = current_sid(); 1533 sid = current_sid();
1534 tsid = task_sid(tsk); 1534 tsid = task_sid(tsk);
1535 return avc_has_perm(sid, tsid, SECCLASS_PROCESS, perms, NULL); 1535 return avc_has_perm(sid, tsid, SECCLASS_PROCESS, perms, NULL);
1536 } 1536 }
1537 1537
1538 #if CAP_LAST_CAP > 63 1538 #if CAP_LAST_CAP > 63
1539 #error Fix SELinux to handle capabilities > 63. 1539 #error Fix SELinux to handle capabilities > 63.
1540 #endif 1540 #endif
1541 1541
1542 /* Check whether a task is allowed to use a capability. */ 1542 /* Check whether a task is allowed to use a capability. */
1543 static int cred_has_capability(const struct cred *cred, 1543 static int cred_has_capability(const struct cred *cred,
1544 int cap, int audit) 1544 int cap, int audit)
1545 { 1545 {
1546 struct common_audit_data ad; 1546 struct common_audit_data ad;
1547 struct av_decision avd; 1547 struct av_decision avd;
1548 u16 sclass; 1548 u16 sclass;
1549 u32 sid = cred_sid(cred); 1549 u32 sid = cred_sid(cred);
1550 u32 av = CAP_TO_MASK(cap); 1550 u32 av = CAP_TO_MASK(cap);
1551 int rc; 1551 int rc;
1552 1552
1553 ad.type = LSM_AUDIT_DATA_CAP; 1553 ad.type = LSM_AUDIT_DATA_CAP;
1554 ad.u.cap = cap; 1554 ad.u.cap = cap;
1555 1555
1556 switch (CAP_TO_INDEX(cap)) { 1556 switch (CAP_TO_INDEX(cap)) {
1557 case 0: 1557 case 0:
1558 sclass = SECCLASS_CAPABILITY; 1558 sclass = SECCLASS_CAPABILITY;
1559 break; 1559 break;
1560 case 1: 1560 case 1:
1561 sclass = SECCLASS_CAPABILITY2; 1561 sclass = SECCLASS_CAPABILITY2;
1562 break; 1562 break;
1563 default: 1563 default:
1564 printk(KERN_ERR 1564 printk(KERN_ERR
1565 "SELinux: out of range capability %d\n", cap); 1565 "SELinux: out of range capability %d\n", cap);
1566 BUG(); 1566 BUG();
1567 return -EINVAL; 1567 return -EINVAL;
1568 } 1568 }
1569 1569
1570 rc = avc_has_perm_noaudit(sid, sid, sclass, av, 0, &avd); 1570 rc = avc_has_perm_noaudit(sid, sid, sclass, av, 0, &avd);
1571 if (audit == SECURITY_CAP_AUDIT) { 1571 if (audit == SECURITY_CAP_AUDIT) {
1572 int rc2 = avc_audit(sid, sid, sclass, av, &avd, rc, &ad); 1572 int rc2 = avc_audit(sid, sid, sclass, av, &avd, rc, &ad);
1573 if (rc2) 1573 if (rc2)
1574 return rc2; 1574 return rc2;
1575 } 1575 }
1576 return rc; 1576 return rc;
1577 } 1577 }
1578 1578
1579 /* Check whether a task is allowed to use a system operation. */ 1579 /* Check whether a task is allowed to use a system operation. */
1580 static int task_has_system(struct task_struct *tsk, 1580 static int task_has_system(struct task_struct *tsk,
1581 u32 perms) 1581 u32 perms)
1582 { 1582 {
1583 u32 sid = task_sid(tsk); 1583 u32 sid = task_sid(tsk);
1584 1584
1585 return avc_has_perm(sid, SECINITSID_KERNEL, 1585 return avc_has_perm(sid, SECINITSID_KERNEL,
1586 SECCLASS_SYSTEM, perms, NULL); 1586 SECCLASS_SYSTEM, perms, NULL);
1587 } 1587 }
1588 1588
1589 /* Check whether a task has a particular permission to an inode. 1589 /* Check whether a task has a particular permission to an inode.
1590 The 'adp' parameter is optional and allows other audit 1590 The 'adp' parameter is optional and allows other audit
1591 data to be passed (e.g. the dentry). */ 1591 data to be passed (e.g. the dentry). */
1592 static int inode_has_perm(const struct cred *cred, 1592 static int inode_has_perm(const struct cred *cred,
1593 struct inode *inode, 1593 struct inode *inode,
1594 u32 perms, 1594 u32 perms,
1595 struct common_audit_data *adp) 1595 struct common_audit_data *adp)
1596 { 1596 {
1597 struct inode_security_struct *isec; 1597 struct inode_security_struct *isec;
1598 u32 sid; 1598 u32 sid;
1599 1599
1600 validate_creds(cred); 1600 validate_creds(cred);
1601 1601
1602 if (unlikely(IS_PRIVATE(inode))) 1602 if (unlikely(IS_PRIVATE(inode)))
1603 return 0; 1603 return 0;
1604 1604
1605 sid = cred_sid(cred); 1605 sid = cred_sid(cred);
1606 isec = inode->i_security; 1606 isec = inode->i_security;
1607 1607
1608 return avc_has_perm(sid, isec->sid, isec->sclass, perms, adp); 1608 return avc_has_perm(sid, isec->sid, isec->sclass, perms, adp);
1609 } 1609 }
1610 1610
1611 /* Same as inode_has_perm, but pass explicit audit data containing 1611 /* Same as inode_has_perm, but pass explicit audit data containing
1612 the dentry to help the auditing code to more easily generate the 1612 the dentry to help the auditing code to more easily generate the
1613 pathname if needed. */ 1613 pathname if needed. */
1614 static inline int dentry_has_perm(const struct cred *cred, 1614 static inline int dentry_has_perm(const struct cred *cred,
1615 struct dentry *dentry, 1615 struct dentry *dentry,
1616 u32 av) 1616 u32 av)
1617 { 1617 {
1618 struct inode *inode = dentry->d_inode; 1618 struct inode *inode = dentry->d_inode;
1619 struct common_audit_data ad; 1619 struct common_audit_data ad;
1620 1620
1621 ad.type = LSM_AUDIT_DATA_DENTRY; 1621 ad.type = LSM_AUDIT_DATA_DENTRY;
1622 ad.u.dentry = dentry; 1622 ad.u.dentry = dentry;
1623 return inode_has_perm(cred, inode, av, &ad); 1623 return inode_has_perm(cred, inode, av, &ad);
1624 } 1624 }
1625 1625
1626 /* Same as inode_has_perm, but pass explicit audit data containing 1626 /* Same as inode_has_perm, but pass explicit audit data containing
1627 the path to help the auditing code to more easily generate the 1627 the path to help the auditing code to more easily generate the
1628 pathname if needed. */ 1628 pathname if needed. */
1629 static inline int path_has_perm(const struct cred *cred, 1629 static inline int path_has_perm(const struct cred *cred,
1630 struct path *path, 1630 struct path *path,
1631 u32 av) 1631 u32 av)
1632 { 1632 {
1633 struct inode *inode = path->dentry->d_inode; 1633 struct inode *inode = path->dentry->d_inode;
1634 struct common_audit_data ad; 1634 struct common_audit_data ad;
1635 1635
1636 ad.type = LSM_AUDIT_DATA_PATH; 1636 ad.type = LSM_AUDIT_DATA_PATH;
1637 ad.u.path = *path; 1637 ad.u.path = *path;
1638 return inode_has_perm(cred, inode, av, &ad); 1638 return inode_has_perm(cred, inode, av, &ad);
1639 } 1639 }
1640 1640
1641 /* Same as path_has_perm, but uses the inode from the file struct. */ 1641 /* Same as path_has_perm, but uses the inode from the file struct. */
1642 static inline int file_path_has_perm(const struct cred *cred, 1642 static inline int file_path_has_perm(const struct cred *cred,
1643 struct file *file, 1643 struct file *file,
1644 u32 av) 1644 u32 av)
1645 { 1645 {
1646 struct common_audit_data ad; 1646 struct common_audit_data ad;
1647 1647
1648 ad.type = LSM_AUDIT_DATA_PATH; 1648 ad.type = LSM_AUDIT_DATA_PATH;
1649 ad.u.path = file->f_path; 1649 ad.u.path = file->f_path;
1650 return inode_has_perm(cred, file_inode(file), av, &ad); 1650 return inode_has_perm(cred, file_inode(file), av, &ad);
1651 } 1651 }
1652 1652
1653 /* Check whether a task can use an open file descriptor to 1653 /* Check whether a task can use an open file descriptor to
1654 access an inode in a given way. Check access to the 1654 access an inode in a given way. Check access to the
1655 descriptor itself, and then use dentry_has_perm to 1655 descriptor itself, and then use dentry_has_perm to
1656 check a particular permission to the file. 1656 check a particular permission to the file.
1657 Access to the descriptor is implicitly granted if it 1657 Access to the descriptor is implicitly granted if it
1658 has the same SID as the process. If av is zero, then 1658 has the same SID as the process. If av is zero, then
1659 access to the file is not checked, e.g. for cases 1659 access to the file is not checked, e.g. for cases
1660 where only the descriptor is affected like seek. */ 1660 where only the descriptor is affected like seek. */
1661 static int file_has_perm(const struct cred *cred, 1661 static int file_has_perm(const struct cred *cred,
1662 struct file *file, 1662 struct file *file,
1663 u32 av) 1663 u32 av)
1664 { 1664 {
1665 struct file_security_struct *fsec = file->f_security; 1665 struct file_security_struct *fsec = file->f_security;
1666 struct inode *inode = file_inode(file); 1666 struct inode *inode = file_inode(file);
1667 struct common_audit_data ad; 1667 struct common_audit_data ad;
1668 u32 sid = cred_sid(cred); 1668 u32 sid = cred_sid(cred);
1669 int rc; 1669 int rc;
1670 1670
1671 ad.type = LSM_AUDIT_DATA_PATH; 1671 ad.type = LSM_AUDIT_DATA_PATH;
1672 ad.u.path = file->f_path; 1672 ad.u.path = file->f_path;
1673 1673
1674 if (sid != fsec->sid) { 1674 if (sid != fsec->sid) {
1675 rc = avc_has_perm(sid, fsec->sid, 1675 rc = avc_has_perm(sid, fsec->sid,
1676 SECCLASS_FD, 1676 SECCLASS_FD,
1677 FD__USE, 1677 FD__USE,
1678 &ad); 1678 &ad);
1679 if (rc) 1679 if (rc)
1680 goto out; 1680 goto out;
1681 } 1681 }
1682 1682
1683 /* av is zero if only checking access to the descriptor. */ 1683 /* av is zero if only checking access to the descriptor. */
1684 rc = 0; 1684 rc = 0;
1685 if (av) 1685 if (av)
1686 rc = inode_has_perm(cred, inode, av, &ad); 1686 rc = inode_has_perm(cred, inode, av, &ad);
1687 1687
1688 out: 1688 out:
1689 return rc; 1689 return rc;
1690 } 1690 }
1691 1691
1692 /* Check whether a task can create a file. */ 1692 /* Check whether a task can create a file. */
1693 static int may_create(struct inode *dir, 1693 static int may_create(struct inode *dir,
1694 struct dentry *dentry, 1694 struct dentry *dentry,
1695 u16 tclass) 1695 u16 tclass)
1696 { 1696 {
1697 const struct task_security_struct *tsec = current_security(); 1697 const struct task_security_struct *tsec = current_security();
1698 struct inode_security_struct *dsec; 1698 struct inode_security_struct *dsec;
1699 struct superblock_security_struct *sbsec; 1699 struct superblock_security_struct *sbsec;
1700 u32 sid, newsid; 1700 u32 sid, newsid;
1701 struct common_audit_data ad; 1701 struct common_audit_data ad;
1702 int rc; 1702 int rc;
1703 1703
1704 dsec = dir->i_security; 1704 dsec = dir->i_security;
1705 sbsec = dir->i_sb->s_security; 1705 sbsec = dir->i_sb->s_security;
1706 1706
1707 sid = tsec->sid; 1707 sid = tsec->sid;
1708 newsid = tsec->create_sid; 1708 newsid = tsec->create_sid;
1709 1709
1710 ad.type = LSM_AUDIT_DATA_DENTRY; 1710 ad.type = LSM_AUDIT_DATA_DENTRY;
1711 ad.u.dentry = dentry; 1711 ad.u.dentry = dentry;
1712 1712
1713 rc = avc_has_perm(sid, dsec->sid, SECCLASS_DIR, 1713 rc = avc_has_perm(sid, dsec->sid, SECCLASS_DIR,
1714 DIR__ADD_NAME | DIR__SEARCH, 1714 DIR__ADD_NAME | DIR__SEARCH,
1715 &ad); 1715 &ad);
1716 if (rc) 1716 if (rc)
1717 return rc; 1717 return rc;
1718 1718
1719 if (!newsid || !(sbsec->flags & SBLABEL_MNT)) { 1719 if (!newsid || !(sbsec->flags & SBLABEL_MNT)) {
1720 rc = security_transition_sid(sid, dsec->sid, tclass, 1720 rc = security_transition_sid(sid, dsec->sid, tclass,
1721 &dentry->d_name, &newsid); 1721 &dentry->d_name, &newsid);
1722 if (rc) 1722 if (rc)
1723 return rc; 1723 return rc;
1724 } 1724 }
1725 1725
1726 rc = avc_has_perm(sid, newsid, tclass, FILE__CREATE, &ad); 1726 rc = avc_has_perm(sid, newsid, tclass, FILE__CREATE, &ad);
1727 if (rc) 1727 if (rc)
1728 return rc; 1728 return rc;
1729 1729
1730 return avc_has_perm(newsid, sbsec->sid, 1730 return avc_has_perm(newsid, sbsec->sid,
1731 SECCLASS_FILESYSTEM, 1731 SECCLASS_FILESYSTEM,
1732 FILESYSTEM__ASSOCIATE, &ad); 1732 FILESYSTEM__ASSOCIATE, &ad);
1733 } 1733 }
1734 1734
1735 /* Check whether a task can create a key. */ 1735 /* Check whether a task can create a key. */
1736 static int may_create_key(u32 ksid, 1736 static int may_create_key(u32 ksid,
1737 struct task_struct *ctx) 1737 struct task_struct *ctx)
1738 { 1738 {
1739 u32 sid = task_sid(ctx); 1739 u32 sid = task_sid(ctx);
1740 1740
1741 return avc_has_perm(sid, ksid, SECCLASS_KEY, KEY__CREATE, NULL); 1741 return avc_has_perm(sid, ksid, SECCLASS_KEY, KEY__CREATE, NULL);
1742 } 1742 }
1743 1743
1744 #define MAY_LINK 0 1744 #define MAY_LINK 0
1745 #define MAY_UNLINK 1 1745 #define MAY_UNLINK 1
1746 #define MAY_RMDIR 2 1746 #define MAY_RMDIR 2
1747 1747
1748 /* Check whether a task can link, unlink, or rmdir a file/directory. */ 1748 /* Check whether a task can link, unlink, or rmdir a file/directory. */
1749 static int may_link(struct inode *dir, 1749 static int may_link(struct inode *dir,
1750 struct dentry *dentry, 1750 struct dentry *dentry,
1751 int kind) 1751 int kind)
1752 1752
1753 { 1753 {
1754 struct inode_security_struct *dsec, *isec; 1754 struct inode_security_struct *dsec, *isec;
1755 struct common_audit_data ad; 1755 struct common_audit_data ad;
1756 u32 sid = current_sid(); 1756 u32 sid = current_sid();
1757 u32 av; 1757 u32 av;
1758 int rc; 1758 int rc;
1759 1759
1760 dsec = dir->i_security; 1760 dsec = dir->i_security;
1761 isec = dentry->d_inode->i_security; 1761 isec = dentry->d_inode->i_security;
1762 1762
1763 ad.type = LSM_AUDIT_DATA_DENTRY; 1763 ad.type = LSM_AUDIT_DATA_DENTRY;
1764 ad.u.dentry = dentry; 1764 ad.u.dentry = dentry;
1765 1765
1766 av = DIR__SEARCH; 1766 av = DIR__SEARCH;
1767 av |= (kind ? DIR__REMOVE_NAME : DIR__ADD_NAME); 1767 av |= (kind ? DIR__REMOVE_NAME : DIR__ADD_NAME);
1768 rc = avc_has_perm(sid, dsec->sid, SECCLASS_DIR, av, &ad); 1768 rc = avc_has_perm(sid, dsec->sid, SECCLASS_DIR, av, &ad);
1769 if (rc) 1769 if (rc)
1770 return rc; 1770 return rc;
1771 1771
1772 switch (kind) { 1772 switch (kind) {
1773 case MAY_LINK: 1773 case MAY_LINK:
1774 av = FILE__LINK; 1774 av = FILE__LINK;
1775 break; 1775 break;
1776 case MAY_UNLINK: 1776 case MAY_UNLINK:
1777 av = FILE__UNLINK; 1777 av = FILE__UNLINK;
1778 break; 1778 break;
1779 case MAY_RMDIR: 1779 case MAY_RMDIR:
1780 av = DIR__RMDIR; 1780 av = DIR__RMDIR;
1781 break; 1781 break;
1782 default: 1782 default:
1783 printk(KERN_WARNING "SELinux: %s: unrecognized kind %d\n", 1783 printk(KERN_WARNING "SELinux: %s: unrecognized kind %d\n",
1784 __func__, kind); 1784 __func__, kind);
1785 return 0; 1785 return 0;
1786 } 1786 }
1787 1787
1788 rc = avc_has_perm(sid, isec->sid, isec->sclass, av, &ad); 1788 rc = avc_has_perm(sid, isec->sid, isec->sclass, av, &ad);
1789 return rc; 1789 return rc;
1790 } 1790 }
1791 1791
1792 static inline int may_rename(struct inode *old_dir, 1792 static inline int may_rename(struct inode *old_dir,
1793 struct dentry *old_dentry, 1793 struct dentry *old_dentry,
1794 struct inode *new_dir, 1794 struct inode *new_dir,
1795 struct dentry *new_dentry) 1795 struct dentry *new_dentry)
1796 { 1796 {
1797 struct inode_security_struct *old_dsec, *new_dsec, *old_isec, *new_isec; 1797 struct inode_security_struct *old_dsec, *new_dsec, *old_isec, *new_isec;
1798 struct common_audit_data ad; 1798 struct common_audit_data ad;
1799 u32 sid = current_sid(); 1799 u32 sid = current_sid();
1800 u32 av; 1800 u32 av;
1801 int old_is_dir, new_is_dir; 1801 int old_is_dir, new_is_dir;
1802 int rc; 1802 int rc;
1803 1803
1804 old_dsec = old_dir->i_security; 1804 old_dsec = old_dir->i_security;
1805 old_isec = old_dentry->d_inode->i_security; 1805 old_isec = old_dentry->d_inode->i_security;
1806 old_is_dir = S_ISDIR(old_dentry->d_inode->i_mode); 1806 old_is_dir = S_ISDIR(old_dentry->d_inode->i_mode);
1807 new_dsec = new_dir->i_security; 1807 new_dsec = new_dir->i_security;
1808 1808
1809 ad.type = LSM_AUDIT_DATA_DENTRY; 1809 ad.type = LSM_AUDIT_DATA_DENTRY;
1810 1810
1811 ad.u.dentry = old_dentry; 1811 ad.u.dentry = old_dentry;
1812 rc = avc_has_perm(sid, old_dsec->sid, SECCLASS_DIR, 1812 rc = avc_has_perm(sid, old_dsec->sid, SECCLASS_DIR,
1813 DIR__REMOVE_NAME | DIR__SEARCH, &ad); 1813 DIR__REMOVE_NAME | DIR__SEARCH, &ad);
1814 if (rc) 1814 if (rc)
1815 return rc; 1815 return rc;
1816 rc = avc_has_perm(sid, old_isec->sid, 1816 rc = avc_has_perm(sid, old_isec->sid,
1817 old_isec->sclass, FILE__RENAME, &ad); 1817 old_isec->sclass, FILE__RENAME, &ad);
1818 if (rc) 1818 if (rc)
1819 return rc; 1819 return rc;
1820 if (old_is_dir && new_dir != old_dir) { 1820 if (old_is_dir && new_dir != old_dir) {
1821 rc = avc_has_perm(sid, old_isec->sid, 1821 rc = avc_has_perm(sid, old_isec->sid,
1822 old_isec->sclass, DIR__REPARENT, &ad); 1822 old_isec->sclass, DIR__REPARENT, &ad);
1823 if (rc) 1823 if (rc)
1824 return rc; 1824 return rc;
1825 } 1825 }
1826 1826
1827 ad.u.dentry = new_dentry; 1827 ad.u.dentry = new_dentry;
1828 av = DIR__ADD_NAME | DIR__SEARCH; 1828 av = DIR__ADD_NAME | DIR__SEARCH;
1829 if (new_dentry->d_inode) 1829 if (new_dentry->d_inode)
1830 av |= DIR__REMOVE_NAME; 1830 av |= DIR__REMOVE_NAME;
1831 rc = avc_has_perm(sid, new_dsec->sid, SECCLASS_DIR, av, &ad); 1831 rc = avc_has_perm(sid, new_dsec->sid, SECCLASS_DIR, av, &ad);
1832 if (rc) 1832 if (rc)
1833 return rc; 1833 return rc;
1834 if (new_dentry->d_inode) { 1834 if (new_dentry->d_inode) {
1835 new_isec = new_dentry->d_inode->i_security; 1835 new_isec = new_dentry->d_inode->i_security;
1836 new_is_dir = S_ISDIR(new_dentry->d_inode->i_mode); 1836 new_is_dir = S_ISDIR(new_dentry->d_inode->i_mode);
1837 rc = avc_has_perm(sid, new_isec->sid, 1837 rc = avc_has_perm(sid, new_isec->sid,
1838 new_isec->sclass, 1838 new_isec->sclass,
1839 (new_is_dir ? DIR__RMDIR : FILE__UNLINK), &ad); 1839 (new_is_dir ? DIR__RMDIR : FILE__UNLINK), &ad);
1840 if (rc) 1840 if (rc)
1841 return rc; 1841 return rc;
1842 } 1842 }
1843 1843
1844 return 0; 1844 return 0;
1845 } 1845 }
1846 1846
1847 /* Check whether a task can perform a filesystem operation. */ 1847 /* Check whether a task can perform a filesystem operation. */
1848 static int superblock_has_perm(const struct cred *cred, 1848 static int superblock_has_perm(const struct cred *cred,
1849 struct super_block *sb, 1849 struct super_block *sb,
1850 u32 perms, 1850 u32 perms,
1851 struct common_audit_data *ad) 1851 struct common_audit_data *ad)
1852 { 1852 {
1853 struct superblock_security_struct *sbsec; 1853 struct superblock_security_struct *sbsec;
1854 u32 sid = cred_sid(cred); 1854 u32 sid = cred_sid(cred);
1855 1855
1856 sbsec = sb->s_security; 1856 sbsec = sb->s_security;
1857 return avc_has_perm(sid, sbsec->sid, SECCLASS_FILESYSTEM, perms, ad); 1857 return avc_has_perm(sid, sbsec->sid, SECCLASS_FILESYSTEM, perms, ad);
1858 } 1858 }
1859 1859
1860 /* Convert a Linux mode and permission mask to an access vector. */ 1860 /* Convert a Linux mode and permission mask to an access vector. */
1861 static inline u32 file_mask_to_av(int mode, int mask) 1861 static inline u32 file_mask_to_av(int mode, int mask)
1862 { 1862 {
1863 u32 av = 0; 1863 u32 av = 0;
1864 1864
1865 if (!S_ISDIR(mode)) { 1865 if (!S_ISDIR(mode)) {
1866 if (mask & MAY_EXEC) 1866 if (mask & MAY_EXEC)
1867 av |= FILE__EXECUTE; 1867 av |= FILE__EXECUTE;
1868 if (mask & MAY_READ) 1868 if (mask & MAY_READ)
1869 av |= FILE__READ; 1869 av |= FILE__READ;
1870 1870
1871 if (mask & MAY_APPEND) 1871 if (mask & MAY_APPEND)
1872 av |= FILE__APPEND; 1872 av |= FILE__APPEND;
1873 else if (mask & MAY_WRITE) 1873 else if (mask & MAY_WRITE)
1874 av |= FILE__WRITE; 1874 av |= FILE__WRITE;
1875 1875
1876 } else { 1876 } else {
1877 if (mask & MAY_EXEC) 1877 if (mask & MAY_EXEC)
1878 av |= DIR__SEARCH; 1878 av |= DIR__SEARCH;
1879 if (mask & MAY_WRITE) 1879 if (mask & MAY_WRITE)
1880 av |= DIR__WRITE; 1880 av |= DIR__WRITE;
1881 if (mask & MAY_READ) 1881 if (mask & MAY_READ)
1882 av |= DIR__READ; 1882 av |= DIR__READ;
1883 } 1883 }
1884 1884
1885 return av; 1885 return av;
1886 } 1886 }
1887 1887
1888 /* Convert a Linux file to an access vector. */ 1888 /* Convert a Linux file to an access vector. */
1889 static inline u32 file_to_av(struct file *file) 1889 static inline u32 file_to_av(struct file *file)
1890 { 1890 {
1891 u32 av = 0; 1891 u32 av = 0;
1892 1892
1893 if (file->f_mode & FMODE_READ) 1893 if (file->f_mode & FMODE_READ)
1894 av |= FILE__READ; 1894 av |= FILE__READ;
1895 if (file->f_mode & FMODE_WRITE) { 1895 if (file->f_mode & FMODE_WRITE) {
1896 if (file->f_flags & O_APPEND) 1896 if (file->f_flags & O_APPEND)
1897 av |= FILE__APPEND; 1897 av |= FILE__APPEND;
1898 else 1898 else
1899 av |= FILE__WRITE; 1899 av |= FILE__WRITE;
1900 } 1900 }
1901 if (!av) { 1901 if (!av) {
1902 /* 1902 /*
1903 * Special file opened with flags 3 for ioctl-only use. 1903 * Special file opened with flags 3 for ioctl-only use.
1904 */ 1904 */
1905 av = FILE__IOCTL; 1905 av = FILE__IOCTL;
1906 } 1906 }
1907 1907
1908 return av; 1908 return av;
1909 } 1909 }
1910 1910
1911 /* 1911 /*
1912 * Convert a file to an access vector and include the correct open 1912 * Convert a file to an access vector and include the correct open
1913 * open permission. 1913 * open permission.
1914 */ 1914 */
1915 static inline u32 open_file_to_av(struct file *file) 1915 static inline u32 open_file_to_av(struct file *file)
1916 { 1916 {
1917 u32 av = file_to_av(file); 1917 u32 av = file_to_av(file);
1918 1918
1919 if (selinux_policycap_openperm) 1919 if (selinux_policycap_openperm)
1920 av |= FILE__OPEN; 1920 av |= FILE__OPEN;
1921 1921
1922 return av; 1922 return av;
1923 } 1923 }
1924 1924
1925 /* Hook functions begin here. */ 1925 /* Hook functions begin here. */
1926 1926
1927 static int selinux_ptrace_access_check(struct task_struct *child, 1927 static int selinux_ptrace_access_check(struct task_struct *child,
1928 unsigned int mode) 1928 unsigned int mode)
1929 { 1929 {
1930 int rc; 1930 int rc;
1931 1931
1932 rc = cap_ptrace_access_check(child, mode); 1932 rc = cap_ptrace_access_check(child, mode);
1933 if (rc) 1933 if (rc)
1934 return rc; 1934 return rc;
1935 1935
1936 if (mode & PTRACE_MODE_READ) { 1936 if (mode & PTRACE_MODE_READ) {
1937 u32 sid = current_sid(); 1937 u32 sid = current_sid();
1938 u32 csid = task_sid(child); 1938 u32 csid = task_sid(child);
1939 return avc_has_perm(sid, csid, SECCLASS_FILE, FILE__READ, NULL); 1939 return avc_has_perm(sid, csid, SECCLASS_FILE, FILE__READ, NULL);
1940 } 1940 }
1941 1941
1942 return current_has_perm(child, PROCESS__PTRACE); 1942 return current_has_perm(child, PROCESS__PTRACE);
1943 } 1943 }
1944 1944
1945 static int selinux_ptrace_traceme(struct task_struct *parent) 1945 static int selinux_ptrace_traceme(struct task_struct *parent)
1946 { 1946 {
1947 int rc; 1947 int rc;
1948 1948
1949 rc = cap_ptrace_traceme(parent); 1949 rc = cap_ptrace_traceme(parent);
1950 if (rc) 1950 if (rc)
1951 return rc; 1951 return rc;
1952 1952
1953 return task_has_perm(parent, current, PROCESS__PTRACE); 1953 return task_has_perm(parent, current, PROCESS__PTRACE);
1954 } 1954 }
1955 1955
1956 static int selinux_capget(struct task_struct *target, kernel_cap_t *effective, 1956 static int selinux_capget(struct task_struct *target, kernel_cap_t *effective,
1957 kernel_cap_t *inheritable, kernel_cap_t *permitted) 1957 kernel_cap_t *inheritable, kernel_cap_t *permitted)
1958 { 1958 {
1959 int error; 1959 int error;
1960 1960
1961 error = current_has_perm(target, PROCESS__GETCAP); 1961 error = current_has_perm(target, PROCESS__GETCAP);
1962 if (error) 1962 if (error)
1963 return error; 1963 return error;
1964 1964
1965 return cap_capget(target, effective, inheritable, permitted); 1965 return cap_capget(target, effective, inheritable, permitted);
1966 } 1966 }
1967 1967
1968 static int selinux_capset(struct cred *new, const struct cred *old, 1968 static int selinux_capset(struct cred *new, const struct cred *old,
1969 const kernel_cap_t *effective, 1969 const kernel_cap_t *effective,
1970 const kernel_cap_t *inheritable, 1970 const kernel_cap_t *inheritable,
1971 const kernel_cap_t *permitted) 1971 const kernel_cap_t *permitted)
1972 { 1972 {
1973 int error; 1973 int error;
1974 1974
1975 error = cap_capset(new, old, 1975 error = cap_capset(new, old,
1976 effective, inheritable, permitted); 1976 effective, inheritable, permitted);
1977 if (error) 1977 if (error)
1978 return error; 1978 return error;
1979 1979
1980 return cred_has_perm(old, new, PROCESS__SETCAP); 1980 return cred_has_perm(old, new, PROCESS__SETCAP);
1981 } 1981 }
1982 1982
1983 /* 1983 /*
1984 * (This comment used to live with the selinux_task_setuid hook, 1984 * (This comment used to live with the selinux_task_setuid hook,
1985 * which was removed). 1985 * which was removed).
1986 * 1986 *
1987 * Since setuid only affects the current process, and since the SELinux 1987 * Since setuid only affects the current process, and since the SELinux
1988 * controls are not based on the Linux identity attributes, SELinux does not 1988 * controls are not based on the Linux identity attributes, SELinux does not
1989 * need to control this operation. However, SELinux does control the use of 1989 * need to control this operation. However, SELinux does control the use of
1990 * the CAP_SETUID and CAP_SETGID capabilities using the capable hook. 1990 * the CAP_SETUID and CAP_SETGID capabilities using the capable hook.
1991 */ 1991 */
1992 1992
1993 static int selinux_capable(const struct cred *cred, struct user_namespace *ns, 1993 static int selinux_capable(const struct cred *cred, struct user_namespace *ns,
1994 int cap, int audit) 1994 int cap, int audit)
1995 { 1995 {
1996 int rc; 1996 int rc;
1997 1997
1998 rc = cap_capable(cred, ns, cap, audit); 1998 rc = cap_capable(cred, ns, cap, audit);
1999 if (rc) 1999 if (rc)
2000 return rc; 2000 return rc;
2001 2001
2002 return cred_has_capability(cred, cap, audit); 2002 return cred_has_capability(cred, cap, audit);
2003 } 2003 }
2004 2004
2005 static int selinux_quotactl(int cmds, int type, int id, struct super_block *sb) 2005 static int selinux_quotactl(int cmds, int type, int id, struct super_block *sb)
2006 { 2006 {
2007 const struct cred *cred = current_cred(); 2007 const struct cred *cred = current_cred();
2008 int rc = 0; 2008 int rc = 0;
2009 2009
2010 if (!sb) 2010 if (!sb)
2011 return 0; 2011 return 0;
2012 2012
2013 switch (cmds) { 2013 switch (cmds) {
2014 case Q_SYNC: 2014 case Q_SYNC:
2015 case Q_QUOTAON: 2015 case Q_QUOTAON:
2016 case Q_QUOTAOFF: 2016 case Q_QUOTAOFF:
2017 case Q_SETINFO: 2017 case Q_SETINFO:
2018 case Q_SETQUOTA: 2018 case Q_SETQUOTA:
2019 rc = superblock_has_perm(cred, sb, FILESYSTEM__QUOTAMOD, NULL); 2019 rc = superblock_has_perm(cred, sb, FILESYSTEM__QUOTAMOD, NULL);
2020 break; 2020 break;
2021 case Q_GETFMT: 2021 case Q_GETFMT:
2022 case Q_GETINFO: 2022 case Q_GETINFO:
2023 case Q_GETQUOTA: 2023 case Q_GETQUOTA:
2024 rc = superblock_has_perm(cred, sb, FILESYSTEM__QUOTAGET, NULL); 2024 rc = superblock_has_perm(cred, sb, FILESYSTEM__QUOTAGET, NULL);
2025 break; 2025 break;
2026 default: 2026 default:
2027 rc = 0; /* let the kernel handle invalid cmds */ 2027 rc = 0; /* let the kernel handle invalid cmds */
2028 break; 2028 break;
2029 } 2029 }
2030 return rc; 2030 return rc;
2031 } 2031 }
2032 2032
2033 static int selinux_quota_on(struct dentry *dentry) 2033 static int selinux_quota_on(struct dentry *dentry)
2034 { 2034 {
2035 const struct cred *cred = current_cred(); 2035 const struct cred *cred = current_cred();
2036 2036
2037 return dentry_has_perm(cred, dentry, FILE__QUOTAON); 2037 return dentry_has_perm(cred, dentry, FILE__QUOTAON);
2038 } 2038 }
2039 2039
2040 static int selinux_syslog(int type) 2040 static int selinux_syslog(int type)
2041 { 2041 {
2042 int rc; 2042 int rc;
2043 2043
2044 switch (type) { 2044 switch (type) {
2045 case SYSLOG_ACTION_READ_ALL: /* Read last kernel messages */ 2045 case SYSLOG_ACTION_READ_ALL: /* Read last kernel messages */
2046 case SYSLOG_ACTION_SIZE_BUFFER: /* Return size of the log buffer */ 2046 case SYSLOG_ACTION_SIZE_BUFFER: /* Return size of the log buffer */
2047 rc = task_has_system(current, SYSTEM__SYSLOG_READ); 2047 rc = task_has_system(current, SYSTEM__SYSLOG_READ);
2048 break; 2048 break;
2049 case SYSLOG_ACTION_CONSOLE_OFF: /* Disable logging to console */ 2049 case SYSLOG_ACTION_CONSOLE_OFF: /* Disable logging to console */
2050 case SYSLOG_ACTION_CONSOLE_ON: /* Enable logging to console */ 2050 case SYSLOG_ACTION_CONSOLE_ON: /* Enable logging to console */
2051 /* Set level of messages printed to console */ 2051 /* Set level of messages printed to console */
2052 case SYSLOG_ACTION_CONSOLE_LEVEL: 2052 case SYSLOG_ACTION_CONSOLE_LEVEL:
2053 rc = task_has_system(current, SYSTEM__SYSLOG_CONSOLE); 2053 rc = task_has_system(current, SYSTEM__SYSLOG_CONSOLE);
2054 break; 2054 break;
2055 case SYSLOG_ACTION_CLOSE: /* Close log */ 2055 case SYSLOG_ACTION_CLOSE: /* Close log */
2056 case SYSLOG_ACTION_OPEN: /* Open log */ 2056 case SYSLOG_ACTION_OPEN: /* Open log */
2057 case SYSLOG_ACTION_READ: /* Read from log */ 2057 case SYSLOG_ACTION_READ: /* Read from log */
2058 case SYSLOG_ACTION_READ_CLEAR: /* Read/clear last kernel messages */ 2058 case SYSLOG_ACTION_READ_CLEAR: /* Read/clear last kernel messages */
2059 case SYSLOG_ACTION_CLEAR: /* Clear ring buffer */ 2059 case SYSLOG_ACTION_CLEAR: /* Clear ring buffer */
2060 default: 2060 default:
2061 rc = task_has_system(current, SYSTEM__SYSLOG_MOD); 2061 rc = task_has_system(current, SYSTEM__SYSLOG_MOD);
2062 break; 2062 break;
2063 } 2063 }
2064 return rc; 2064 return rc;
2065 } 2065 }
2066 2066
2067 /* 2067 /*
2068 * Check that a process has enough memory to allocate a new virtual 2068 * Check that a process has enough memory to allocate a new virtual
2069 * mapping. 0 means there is enough memory for the allocation to 2069 * mapping. 0 means there is enough memory for the allocation to
2070 * succeed and -ENOMEM implies there is not. 2070 * succeed and -ENOMEM implies there is not.
2071 * 2071 *
2072 * Do not audit the selinux permission check, as this is applied to all 2072 * Do not audit the selinux permission check, as this is applied to all
2073 * processes that allocate mappings. 2073 * processes that allocate mappings.
2074 */ 2074 */
2075 static int selinux_vm_enough_memory(struct mm_struct *mm, long pages) 2075 static int selinux_vm_enough_memory(struct mm_struct *mm, long pages)
2076 { 2076 {
2077 int rc, cap_sys_admin = 0; 2077 int rc, cap_sys_admin = 0;
2078 2078
2079 rc = selinux_capable(current_cred(), &init_user_ns, CAP_SYS_ADMIN, 2079 rc = selinux_capable(current_cred(), &init_user_ns, CAP_SYS_ADMIN,
2080 SECURITY_CAP_NOAUDIT); 2080 SECURITY_CAP_NOAUDIT);
2081 if (rc == 0) 2081 if (rc == 0)
2082 cap_sys_admin = 1; 2082 cap_sys_admin = 1;
2083 2083
2084 return __vm_enough_memory(mm, pages, cap_sys_admin); 2084 return __vm_enough_memory(mm, pages, cap_sys_admin);
2085 } 2085 }
2086 2086
2087 /* binprm security operations */ 2087 /* binprm security operations */
2088 2088
2089 static int selinux_bprm_set_creds(struct linux_binprm *bprm) 2089 static int selinux_bprm_set_creds(struct linux_binprm *bprm)
2090 { 2090 {
2091 const struct task_security_struct *old_tsec; 2091 const struct task_security_struct *old_tsec;
2092 struct task_security_struct *new_tsec; 2092 struct task_security_struct *new_tsec;
2093 struct inode_security_struct *isec; 2093 struct inode_security_struct *isec;
2094 struct common_audit_data ad; 2094 struct common_audit_data ad;
2095 struct inode *inode = file_inode(bprm->file); 2095 struct inode *inode = file_inode(bprm->file);
2096 int rc; 2096 int rc;
2097 2097
2098 rc = cap_bprm_set_creds(bprm); 2098 rc = cap_bprm_set_creds(bprm);
2099 if (rc) 2099 if (rc)
2100 return rc; 2100 return rc;
2101 2101
2102 /* SELinux context only depends on initial program or script and not 2102 /* SELinux context only depends on initial program or script and not
2103 * the script interpreter */ 2103 * the script interpreter */
2104 if (bprm->cred_prepared) 2104 if (bprm->cred_prepared)
2105 return 0; 2105 return 0;
2106 2106
2107 old_tsec = current_security(); 2107 old_tsec = current_security();
2108 new_tsec = bprm->cred->security; 2108 new_tsec = bprm->cred->security;
2109 isec = inode->i_security; 2109 isec = inode->i_security;
2110 2110
2111 /* Default to the current task SID. */ 2111 /* Default to the current task SID. */
2112 new_tsec->sid = old_tsec->sid; 2112 new_tsec->sid = old_tsec->sid;
2113 new_tsec->osid = old_tsec->sid; 2113 new_tsec->osid = old_tsec->sid;
2114 2114
2115 /* Reset fs, key, and sock SIDs on execve. */ 2115 /* Reset fs, key, and sock SIDs on execve. */
2116 new_tsec->create_sid = 0; 2116 new_tsec->create_sid = 0;
2117 new_tsec->keycreate_sid = 0; 2117 new_tsec->keycreate_sid = 0;
2118 new_tsec->sockcreate_sid = 0; 2118 new_tsec->sockcreate_sid = 0;
2119 2119
2120 if (old_tsec->exec_sid) { 2120 if (old_tsec->exec_sid) {
2121 new_tsec->sid = old_tsec->exec_sid; 2121 new_tsec->sid = old_tsec->exec_sid;
2122 /* Reset exec SID on execve. */ 2122 /* Reset exec SID on execve. */
2123 new_tsec->exec_sid = 0; 2123 new_tsec->exec_sid = 0;
2124 2124
2125 /* 2125 /*
2126 * Minimize confusion: if no_new_privs and a transition is 2126 * Minimize confusion: if no_new_privs and a transition is
2127 * explicitly requested, then fail the exec. 2127 * explicitly requested, then fail the exec.
2128 */ 2128 */
2129 if (bprm->unsafe & LSM_UNSAFE_NO_NEW_PRIVS) 2129 if (bprm->unsafe & LSM_UNSAFE_NO_NEW_PRIVS)
2130 return -EPERM; 2130 return -EPERM;
2131 } else { 2131 } else {
2132 /* Check for a default transition on this program. */ 2132 /* Check for a default transition on this program. */
2133 rc = security_transition_sid(old_tsec->sid, isec->sid, 2133 rc = security_transition_sid(old_tsec->sid, isec->sid,
2134 SECCLASS_PROCESS, NULL, 2134 SECCLASS_PROCESS, NULL,
2135 &new_tsec->sid); 2135 &new_tsec->sid);
2136 if (rc) 2136 if (rc)
2137 return rc; 2137 return rc;
2138 } 2138 }
2139 2139
2140 ad.type = LSM_AUDIT_DATA_PATH; 2140 ad.type = LSM_AUDIT_DATA_PATH;
2141 ad.u.path = bprm->file->f_path; 2141 ad.u.path = bprm->file->f_path;
2142 2142
2143 if ((bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID) || 2143 if ((bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID) ||
2144 (bprm->unsafe & LSM_UNSAFE_NO_NEW_PRIVS)) 2144 (bprm->unsafe & LSM_UNSAFE_NO_NEW_PRIVS))
2145 new_tsec->sid = old_tsec->sid; 2145 new_tsec->sid = old_tsec->sid;
2146 2146
2147 if (new_tsec->sid == old_tsec->sid) { 2147 if (new_tsec->sid == old_tsec->sid) {
2148 rc = avc_has_perm(old_tsec->sid, isec->sid, 2148 rc = avc_has_perm(old_tsec->sid, isec->sid,
2149 SECCLASS_FILE, FILE__EXECUTE_NO_TRANS, &ad); 2149 SECCLASS_FILE, FILE__EXECUTE_NO_TRANS, &ad);
2150 if (rc) 2150 if (rc)
2151 return rc; 2151 return rc;
2152 } else { 2152 } else {
2153 /* Check permissions for the transition. */ 2153 /* Check permissions for the transition. */
2154 rc = avc_has_perm(old_tsec->sid, new_tsec->sid, 2154 rc = avc_has_perm(old_tsec->sid, new_tsec->sid,
2155 SECCLASS_PROCESS, PROCESS__TRANSITION, &ad); 2155 SECCLASS_PROCESS, PROCESS__TRANSITION, &ad);
2156 if (rc) 2156 if (rc)
2157 return rc; 2157 return rc;
2158 2158
2159 rc = avc_has_perm(new_tsec->sid, isec->sid, 2159 rc = avc_has_perm(new_tsec->sid, isec->sid,
2160 SECCLASS_FILE, FILE__ENTRYPOINT, &ad); 2160 SECCLASS_FILE, FILE__ENTRYPOINT, &ad);
2161 if (rc) 2161 if (rc)
2162 return rc; 2162 return rc;
2163 2163
2164 /* Check for shared state */ 2164 /* Check for shared state */
2165 if (bprm->unsafe & LSM_UNSAFE_SHARE) { 2165 if (bprm->unsafe & LSM_UNSAFE_SHARE) {
2166 rc = avc_has_perm(old_tsec->sid, new_tsec->sid, 2166 rc = avc_has_perm(old_tsec->sid, new_tsec->sid,
2167 SECCLASS_PROCESS, PROCESS__SHARE, 2167 SECCLASS_PROCESS, PROCESS__SHARE,
2168 NULL); 2168 NULL);
2169 if (rc) 2169 if (rc)
2170 return -EPERM; 2170 return -EPERM;
2171 } 2171 }
2172 2172
2173 /* Make sure that anyone attempting to ptrace over a task that 2173 /* Make sure that anyone attempting to ptrace over a task that
2174 * changes its SID has the appropriate permit */ 2174 * changes its SID has the appropriate permit */
2175 if (bprm->unsafe & 2175 if (bprm->unsafe &
2176 (LSM_UNSAFE_PTRACE | LSM_UNSAFE_PTRACE_CAP)) { 2176 (LSM_UNSAFE_PTRACE | LSM_UNSAFE_PTRACE_CAP)) {
2177 struct task_struct *tracer; 2177 struct task_struct *tracer;
2178 struct task_security_struct *sec; 2178 struct task_security_struct *sec;
2179 u32 ptsid = 0; 2179 u32 ptsid = 0;
2180 2180
2181 rcu_read_lock(); 2181 rcu_read_lock();
2182 tracer = ptrace_parent(current); 2182 tracer = ptrace_parent(current);
2183 if (likely(tracer != NULL)) { 2183 if (likely(tracer != NULL)) {
2184 sec = __task_cred(tracer)->security; 2184 sec = __task_cred(tracer)->security;
2185 ptsid = sec->sid; 2185 ptsid = sec->sid;
2186 } 2186 }
2187 rcu_read_unlock(); 2187 rcu_read_unlock();
2188 2188
2189 if (ptsid != 0) { 2189 if (ptsid != 0) {
2190 rc = avc_has_perm(ptsid, new_tsec->sid, 2190 rc = avc_has_perm(ptsid, new_tsec->sid,
2191 SECCLASS_PROCESS, 2191 SECCLASS_PROCESS,
2192 PROCESS__PTRACE, NULL); 2192 PROCESS__PTRACE, NULL);
2193 if (rc) 2193 if (rc)
2194 return -EPERM; 2194 return -EPERM;
2195 } 2195 }
2196 } 2196 }
2197 2197
2198 /* Clear any possibly unsafe personality bits on exec: */ 2198 /* Clear any possibly unsafe personality bits on exec: */
2199 bprm->per_clear |= PER_CLEAR_ON_SETID; 2199 bprm->per_clear |= PER_CLEAR_ON_SETID;
2200 } 2200 }
2201 2201
2202 return 0; 2202 return 0;
2203 } 2203 }
2204 2204
2205 static int selinux_bprm_secureexec(struct linux_binprm *bprm) 2205 static int selinux_bprm_secureexec(struct linux_binprm *bprm)
2206 { 2206 {
2207 const struct task_security_struct *tsec = current_security(); 2207 const struct task_security_struct *tsec = current_security();
2208 u32 sid, osid; 2208 u32 sid, osid;
2209 int atsecure = 0; 2209 int atsecure = 0;
2210 2210
2211 sid = tsec->sid; 2211 sid = tsec->sid;
2212 osid = tsec->osid; 2212 osid = tsec->osid;
2213 2213
2214 if (osid != sid) { 2214 if (osid != sid) {
2215 /* Enable secure mode for SIDs transitions unless 2215 /* Enable secure mode for SIDs transitions unless
2216 the noatsecure permission is granted between 2216 the noatsecure permission is granted between
2217 the two SIDs, i.e. ahp returns 0. */ 2217 the two SIDs, i.e. ahp returns 0. */
2218 atsecure = avc_has_perm(osid, sid, 2218 atsecure = avc_has_perm(osid, sid,
2219 SECCLASS_PROCESS, 2219 SECCLASS_PROCESS,
2220 PROCESS__NOATSECURE, NULL); 2220 PROCESS__NOATSECURE, NULL);
2221 } 2221 }
2222 2222
2223 return (atsecure || cap_bprm_secureexec(bprm)); 2223 return (atsecure || cap_bprm_secureexec(bprm));
2224 } 2224 }
2225 2225
2226 static int match_file(const void *p, struct file *file, unsigned fd) 2226 static int match_file(const void *p, struct file *file, unsigned fd)
2227 { 2227 {
2228 return file_has_perm(p, file, file_to_av(file)) ? fd + 1 : 0; 2228 return file_has_perm(p, file, file_to_av(file)) ? fd + 1 : 0;
2229 } 2229 }
2230 2230
2231 /* Derived from fs/exec.c:flush_old_files. */ 2231 /* Derived from fs/exec.c:flush_old_files. */
2232 static inline void flush_unauthorized_files(const struct cred *cred, 2232 static inline void flush_unauthorized_files(const struct cred *cred,
2233 struct files_struct *files) 2233 struct files_struct *files)
2234 { 2234 {
2235 struct file *file, *devnull = NULL; 2235 struct file *file, *devnull = NULL;
2236 struct tty_struct *tty; 2236 struct tty_struct *tty;
2237 int drop_tty = 0; 2237 int drop_tty = 0;
2238 unsigned n; 2238 unsigned n;
2239 2239
2240 tty = get_current_tty(); 2240 tty = get_current_tty();
2241 if (tty) { 2241 if (tty) {
2242 spin_lock(&tty_files_lock); 2242 spin_lock(&tty_files_lock);
2243 if (!list_empty(&tty->tty_files)) { 2243 if (!list_empty(&tty->tty_files)) {
2244 struct tty_file_private *file_priv; 2244 struct tty_file_private *file_priv;
2245 2245
2246 /* Revalidate access to controlling tty. 2246 /* Revalidate access to controlling tty.
2247 Use file_path_has_perm on the tty path directly 2247 Use file_path_has_perm on the tty path directly
2248 rather than using file_has_perm, as this particular 2248 rather than using file_has_perm, as this particular
2249 open file may belong to another process and we are 2249 open file may belong to another process and we are
2250 only interested in the inode-based check here. */ 2250 only interested in the inode-based check here. */
2251 file_priv = list_first_entry(&tty->tty_files, 2251 file_priv = list_first_entry(&tty->tty_files,
2252 struct tty_file_private, list); 2252 struct tty_file_private, list);
2253 file = file_priv->file; 2253 file = file_priv->file;
2254 if (file_path_has_perm(cred, file, FILE__READ | FILE__WRITE)) 2254 if (file_path_has_perm(cred, file, FILE__READ | FILE__WRITE))
2255 drop_tty = 1; 2255 drop_tty = 1;
2256 } 2256 }
2257 spin_unlock(&tty_files_lock); 2257 spin_unlock(&tty_files_lock);
2258 tty_kref_put(tty); 2258 tty_kref_put(tty);
2259 } 2259 }
2260 /* Reset controlling tty. */ 2260 /* Reset controlling tty. */
2261 if (drop_tty) 2261 if (drop_tty)
2262 no_tty(); 2262 no_tty();
2263 2263
2264 /* Revalidate access to inherited open files. */ 2264 /* Revalidate access to inherited open files. */
2265 n = iterate_fd(files, 0, match_file, cred); 2265 n = iterate_fd(files, 0, match_file, cred);
2266 if (!n) /* none found? */ 2266 if (!n) /* none found? */
2267 return; 2267 return;
2268 2268
2269 devnull = dentry_open(&selinux_null, O_RDWR, cred); 2269 devnull = dentry_open(&selinux_null, O_RDWR, cred);
2270 if (IS_ERR(devnull)) 2270 if (IS_ERR(devnull))
2271 devnull = NULL; 2271 devnull = NULL;
2272 /* replace all the matching ones with this */ 2272 /* replace all the matching ones with this */
2273 do { 2273 do {
2274 replace_fd(n - 1, devnull, 0); 2274 replace_fd(n - 1, devnull, 0);
2275 } while ((n = iterate_fd(files, n, match_file, cred)) != 0); 2275 } while ((n = iterate_fd(files, n, match_file, cred)) != 0);
2276 if (devnull) 2276 if (devnull)
2277 fput(devnull); 2277 fput(devnull);
2278 } 2278 }
2279 2279
2280 /* 2280 /*
2281 * Prepare a process for imminent new credential changes due to exec 2281 * Prepare a process for imminent new credential changes due to exec
2282 */ 2282 */
2283 static void selinux_bprm_committing_creds(struct linux_binprm *bprm) 2283 static void selinux_bprm_committing_creds(struct linux_binprm *bprm)
2284 { 2284 {
2285 struct task_security_struct *new_tsec; 2285 struct task_security_struct *new_tsec;
2286 struct rlimit *rlim, *initrlim; 2286 struct rlimit *rlim, *initrlim;
2287 int rc, i; 2287 int rc, i;
2288 2288
2289 new_tsec = bprm->cred->security; 2289 new_tsec = bprm->cred->security;
2290 if (new_tsec->sid == new_tsec->osid) 2290 if (new_tsec->sid == new_tsec->osid)
2291 return; 2291 return;
2292 2292
2293 /* Close files for which the new task SID is not authorized. */ 2293 /* Close files for which the new task SID is not authorized. */
2294 flush_unauthorized_files(bprm->cred, current->files); 2294 flush_unauthorized_files(bprm->cred, current->files);
2295 2295
2296 /* Always clear parent death signal on SID transitions. */ 2296 /* Always clear parent death signal on SID transitions. */
2297 current->pdeath_signal = 0; 2297 current->pdeath_signal = 0;
2298 2298
2299 /* Check whether the new SID can inherit resource limits from the old 2299 /* Check whether the new SID can inherit resource limits from the old
2300 * SID. If not, reset all soft limits to the lower of the current 2300 * SID. If not, reset all soft limits to the lower of the current
2301 * task's hard limit and the init task's soft limit. 2301 * task's hard limit and the init task's soft limit.
2302 * 2302 *
2303 * Note that the setting of hard limits (even to lower them) can be 2303 * Note that the setting of hard limits (even to lower them) can be
2304 * controlled by the setrlimit check. The inclusion of the init task's 2304 * controlled by the setrlimit check. The inclusion of the init task's
2305 * soft limit into the computation is to avoid resetting soft limits 2305 * soft limit into the computation is to avoid resetting soft limits
2306 * higher than the default soft limit for cases where the default is 2306 * higher than the default soft limit for cases where the default is
2307 * lower than the hard limit, e.g. RLIMIT_CORE or RLIMIT_STACK. 2307 * lower than the hard limit, e.g. RLIMIT_CORE or RLIMIT_STACK.
2308 */ 2308 */
2309 rc = avc_has_perm(new_tsec->osid, new_tsec->sid, SECCLASS_PROCESS, 2309 rc = avc_has_perm(new_tsec->osid, new_tsec->sid, SECCLASS_PROCESS,
2310 PROCESS__RLIMITINH, NULL); 2310 PROCESS__RLIMITINH, NULL);
2311 if (rc) { 2311 if (rc) {
2312 /* protect against do_prlimit() */ 2312 /* protect against do_prlimit() */
2313 task_lock(current); 2313 task_lock(current);
2314 for (i = 0; i < RLIM_NLIMITS; i++) { 2314 for (i = 0; i < RLIM_NLIMITS; i++) {
2315 rlim = current->signal->rlim + i; 2315 rlim = current->signal->rlim + i;
2316 initrlim = init_task.signal->rlim + i; 2316 initrlim = init_task.signal->rlim + i;
2317 rlim->rlim_cur = min(rlim->rlim_max, initrlim->rlim_cur); 2317 rlim->rlim_cur = min(rlim->rlim_max, initrlim->rlim_cur);
2318 } 2318 }
2319 task_unlock(current); 2319 task_unlock(current);
2320 update_rlimit_cpu(current, rlimit(RLIMIT_CPU)); 2320 update_rlimit_cpu(current, rlimit(RLIMIT_CPU));
2321 } 2321 }
2322 } 2322 }
2323 2323
2324 /* 2324 /*
2325 * Clean up the process immediately after the installation of new credentials 2325 * Clean up the process immediately after the installation of new credentials
2326 * due to exec 2326 * due to exec
2327 */ 2327 */
2328 static void selinux_bprm_committed_creds(struct linux_binprm *bprm) 2328 static void selinux_bprm_committed_creds(struct linux_binprm *bprm)
2329 { 2329 {
2330 const struct task_security_struct *tsec = current_security(); 2330 const struct task_security_struct *tsec = current_security();
2331 struct itimerval itimer; 2331 struct itimerval itimer;
2332 u32 osid, sid; 2332 u32 osid, sid;
2333 int rc, i; 2333 int rc, i;
2334 2334
2335 osid = tsec->osid; 2335 osid = tsec->osid;
2336 sid = tsec->sid; 2336 sid = tsec->sid;
2337 2337
2338 if (sid == osid) 2338 if (sid == osid)
2339 return; 2339 return;
2340 2340
2341 /* Check whether the new SID can inherit signal state from the old SID. 2341 /* Check whether the new SID can inherit signal state from the old SID.
2342 * If not, clear itimers to avoid subsequent signal generation and 2342 * If not, clear itimers to avoid subsequent signal generation and
2343 * flush and unblock signals. 2343 * flush and unblock signals.
2344 * 2344 *
2345 * This must occur _after_ the task SID has been updated so that any 2345 * This must occur _after_ the task SID has been updated so that any
2346 * kill done after the flush will be checked against the new SID. 2346 * kill done after the flush will be checked against the new SID.
2347 */ 2347 */
2348 rc = avc_has_perm(osid, sid, SECCLASS_PROCESS, PROCESS__SIGINH, NULL); 2348 rc = avc_has_perm(osid, sid, SECCLASS_PROCESS, PROCESS__SIGINH, NULL);
2349 if (rc) { 2349 if (rc) {
2350 memset(&itimer, 0, sizeof itimer); 2350 memset(&itimer, 0, sizeof itimer);
2351 for (i = 0; i < 3; i++) 2351 for (i = 0; i < 3; i++)
2352 do_setitimer(i, &itimer, NULL); 2352 do_setitimer(i, &itimer, NULL);
2353 spin_lock_irq(&current->sighand->siglock); 2353 spin_lock_irq(&current->sighand->siglock);
2354 if (!(current->signal->flags & SIGNAL_GROUP_EXIT)) { 2354 if (!(current->signal->flags & SIGNAL_GROUP_EXIT)) {
2355 __flush_signals(current); 2355 __flush_signals(current);
2356 flush_signal_handlers(current, 1); 2356 flush_signal_handlers(current, 1);
2357 sigemptyset(&current->blocked); 2357 sigemptyset(&current->blocked);
2358 } 2358 }
2359 spin_unlock_irq(&current->sighand->siglock); 2359 spin_unlock_irq(&current->sighand->siglock);
2360 } 2360 }
2361 2361
2362 /* Wake up the parent if it is waiting so that it can recheck 2362 /* Wake up the parent if it is waiting so that it can recheck
2363 * wait permission to the new task SID. */ 2363 * wait permission to the new task SID. */
2364 read_lock(&tasklist_lock); 2364 read_lock(&tasklist_lock);
2365 __wake_up_parent(current, current->real_parent); 2365 __wake_up_parent(current, current->real_parent);
2366 read_unlock(&tasklist_lock); 2366 read_unlock(&tasklist_lock);
2367 } 2367 }
2368 2368
2369 /* superblock security operations */ 2369 /* superblock security operations */
2370 2370
2371 static int selinux_sb_alloc_security(struct super_block *sb) 2371 static int selinux_sb_alloc_security(struct super_block *sb)
2372 { 2372 {
2373 return superblock_alloc_security(sb); 2373 return superblock_alloc_security(sb);
2374 } 2374 }
2375 2375
2376 static void selinux_sb_free_security(struct super_block *sb) 2376 static void selinux_sb_free_security(struct super_block *sb)
2377 { 2377 {
2378 superblock_free_security(sb); 2378 superblock_free_security(sb);
2379 } 2379 }
2380 2380
2381 static inline int match_prefix(char *prefix, int plen, char *option, int olen) 2381 static inline int match_prefix(char *prefix, int plen, char *option, int olen)
2382 { 2382 {
2383 if (plen > olen) 2383 if (plen > olen)
2384 return 0; 2384 return 0;
2385 2385
2386 return !memcmp(prefix, option, plen); 2386 return !memcmp(prefix, option, plen);
2387 } 2387 }
2388 2388
2389 static inline int selinux_option(char *option, int len) 2389 static inline int selinux_option(char *option, int len)
2390 { 2390 {
2391 return (match_prefix(CONTEXT_STR, sizeof(CONTEXT_STR)-1, option, len) || 2391 return (match_prefix(CONTEXT_STR, sizeof(CONTEXT_STR)-1, option, len) ||
2392 match_prefix(FSCONTEXT_STR, sizeof(FSCONTEXT_STR)-1, option, len) || 2392 match_prefix(FSCONTEXT_STR, sizeof(FSCONTEXT_STR)-1, option, len) ||
2393 match_prefix(DEFCONTEXT_STR, sizeof(DEFCONTEXT_STR)-1, option, len) || 2393 match_prefix(DEFCONTEXT_STR, sizeof(DEFCONTEXT_STR)-1, option, len) ||
2394 match_prefix(ROOTCONTEXT_STR, sizeof(ROOTCONTEXT_STR)-1, option, len) || 2394 match_prefix(ROOTCONTEXT_STR, sizeof(ROOTCONTEXT_STR)-1, option, len) ||
2395 match_prefix(LABELSUPP_STR, sizeof(LABELSUPP_STR)-1, option, len)); 2395 match_prefix(LABELSUPP_STR, sizeof(LABELSUPP_STR)-1, option, len));
2396 } 2396 }
2397 2397
2398 static inline void take_option(char **to, char *from, int *first, int len) 2398 static inline void take_option(char **to, char *from, int *first, int len)
2399 { 2399 {
2400 if (!*first) { 2400 if (!*first) {
2401 **to = ','; 2401 **to = ',';
2402 *to += 1; 2402 *to += 1;
2403 } else 2403 } else
2404 *first = 0; 2404 *first = 0;
2405 memcpy(*to, from, len); 2405 memcpy(*to, from, len);
2406 *to += len; 2406 *to += len;
2407 } 2407 }
2408 2408
2409 static inline void take_selinux_option(char **to, char *from, int *first, 2409 static inline void take_selinux_option(char **to, char *from, int *first,
2410 int len) 2410 int len)
2411 { 2411 {
2412 int current_size = 0; 2412 int current_size = 0;
2413 2413
2414 if (!*first) { 2414 if (!*first) {
2415 **to = '|'; 2415 **to = '|';
2416 *to += 1; 2416 *to += 1;
2417 } else 2417 } else
2418 *first = 0; 2418 *first = 0;
2419 2419
2420 while (current_size < len) { 2420 while (current_size < len) {
2421 if (*from != '"') { 2421 if (*from != '"') {
2422 **to = *from; 2422 **to = *from;
2423 *to += 1; 2423 *to += 1;
2424 } 2424 }
2425 from += 1; 2425 from += 1;
2426 current_size += 1; 2426 current_size += 1;
2427 } 2427 }
2428 } 2428 }
2429 2429
2430 static int selinux_sb_copy_data(char *orig, char *copy) 2430 static int selinux_sb_copy_data(char *orig, char *copy)
2431 { 2431 {
2432 int fnosec, fsec, rc = 0; 2432 int fnosec, fsec, rc = 0;
2433 char *in_save, *in_curr, *in_end; 2433 char *in_save, *in_curr, *in_end;
2434 char *sec_curr, *nosec_save, *nosec; 2434 char *sec_curr, *nosec_save, *nosec;
2435 int open_quote = 0; 2435 int open_quote = 0;
2436 2436
2437 in_curr = orig; 2437 in_curr = orig;
2438 sec_curr = copy; 2438 sec_curr = copy;
2439 2439
2440 nosec = (char *)get_zeroed_page(GFP_KERNEL); 2440 nosec = (char *)get_zeroed_page(GFP_KERNEL);
2441 if (!nosec) { 2441 if (!nosec) {
2442 rc = -ENOMEM; 2442 rc = -ENOMEM;
2443 goto out; 2443 goto out;
2444 } 2444 }
2445 2445
2446 nosec_save = nosec; 2446 nosec_save = nosec;
2447 fnosec = fsec = 1; 2447 fnosec = fsec = 1;
2448 in_save = in_end = orig; 2448 in_save = in_end = orig;
2449 2449
2450 do { 2450 do {
2451 if (*in_end == '"') 2451 if (*in_end == '"')
2452 open_quote = !open_quote; 2452 open_quote = !open_quote;
2453 if ((*in_end == ',' && open_quote == 0) || 2453 if ((*in_end == ',' && open_quote == 0) ||
2454 *in_end == '\0') { 2454 *in_end == '\0') {
2455 int len = in_end - in_curr; 2455 int len = in_end - in_curr;
2456 2456
2457 if (selinux_option(in_curr, len)) 2457 if (selinux_option(in_curr, len))
2458 take_selinux_option(&sec_curr, in_curr, &fsec, len); 2458 take_selinux_option(&sec_curr, in_curr, &fsec, len);
2459 else 2459 else
2460 take_option(&nosec, in_curr, &fnosec, len); 2460 take_option(&nosec, in_curr, &fnosec, len);
2461 2461
2462 in_curr = in_end + 1; 2462 in_curr = in_end + 1;
2463 } 2463 }
2464 } while (*in_end++); 2464 } while (*in_end++);
2465 2465
2466 strcpy(in_save, nosec_save); 2466 strcpy(in_save, nosec_save);
2467 free_page((unsigned long)nosec_save); 2467 free_page((unsigned long)nosec_save);
2468 out: 2468 out:
2469 return rc; 2469 return rc;
2470 } 2470 }
2471 2471
2472 static int selinux_sb_remount(struct super_block *sb, void *data) 2472 static int selinux_sb_remount(struct super_block *sb, void *data)
2473 { 2473 {
2474 int rc, i, *flags; 2474 int rc, i, *flags;
2475 struct security_mnt_opts opts; 2475 struct security_mnt_opts opts;
2476 char *secdata, **mount_options; 2476 char *secdata, **mount_options;
2477 struct superblock_security_struct *sbsec = sb->s_security; 2477 struct superblock_security_struct *sbsec = sb->s_security;
2478 2478
2479 if (!(sbsec->flags & SE_SBINITIALIZED)) 2479 if (!(sbsec->flags & SE_SBINITIALIZED))
2480 return 0; 2480 return 0;
2481 2481
2482 if (!data) 2482 if (!data)
2483 return 0; 2483 return 0;
2484 2484
2485 if (sb->s_type->fs_flags & FS_BINARY_MOUNTDATA) 2485 if (sb->s_type->fs_flags & FS_BINARY_MOUNTDATA)
2486 return 0; 2486 return 0;
2487 2487
2488 security_init_mnt_opts(&opts); 2488 security_init_mnt_opts(&opts);
2489 secdata = alloc_secdata(); 2489 secdata = alloc_secdata();
2490 if (!secdata) 2490 if (!secdata)
2491 return -ENOMEM; 2491 return -ENOMEM;
2492 rc = selinux_sb_copy_data(data, secdata); 2492 rc = selinux_sb_copy_data(data, secdata);
2493 if (rc) 2493 if (rc)
2494 goto out_free_secdata; 2494 goto out_free_secdata;
2495 2495
2496 rc = selinux_parse_opts_str(secdata, &opts); 2496 rc = selinux_parse_opts_str(secdata, &opts);
2497 if (rc) 2497 if (rc)
2498 goto out_free_secdata; 2498 goto out_free_secdata;
2499 2499
2500 mount_options = opts.mnt_opts; 2500 mount_options = opts.mnt_opts;
2501 flags = opts.mnt_opts_flags; 2501 flags = opts.mnt_opts_flags;
2502 2502
2503 for (i = 0; i < opts.num_mnt_opts; i++) { 2503 for (i = 0; i < opts.num_mnt_opts; i++) {
2504 u32 sid; 2504 u32 sid;
2505 size_t len; 2505 size_t len;
2506 2506
2507 if (flags[i] == SBLABEL_MNT) 2507 if (flags[i] == SBLABEL_MNT)
2508 continue; 2508 continue;
2509 len = strlen(mount_options[i]); 2509 len = strlen(mount_options[i]);
2510 rc = security_context_to_sid(mount_options[i], len, &sid, 2510 rc = security_context_to_sid(mount_options[i], len, &sid,
2511 GFP_KERNEL); 2511 GFP_KERNEL);
2512 if (rc) { 2512 if (rc) {
2513 printk(KERN_WARNING "SELinux: security_context_to_sid" 2513 printk(KERN_WARNING "SELinux: security_context_to_sid"
2514 "(%s) failed for (dev %s, type %s) errno=%d\n", 2514 "(%s) failed for (dev %s, type %s) errno=%d\n",
2515 mount_options[i], sb->s_id, sb->s_type->name, rc); 2515 mount_options[i], sb->s_id, sb->s_type->name, rc);
2516 goto out_free_opts; 2516 goto out_free_opts;
2517 } 2517 }
2518 rc = -EINVAL; 2518 rc = -EINVAL;
2519 switch (flags[i]) { 2519 switch (flags[i]) {
2520 case FSCONTEXT_MNT: 2520 case FSCONTEXT_MNT:
2521 if (bad_option(sbsec, FSCONTEXT_MNT, sbsec->sid, sid)) 2521 if (bad_option(sbsec, FSCONTEXT_MNT, sbsec->sid, sid))
2522 goto out_bad_option; 2522 goto out_bad_option;
2523 break; 2523 break;
2524 case CONTEXT_MNT: 2524 case CONTEXT_MNT:
2525 if (bad_option(sbsec, CONTEXT_MNT, sbsec->mntpoint_sid, sid)) 2525 if (bad_option(sbsec, CONTEXT_MNT, sbsec->mntpoint_sid, sid))
2526 goto out_bad_option; 2526 goto out_bad_option;
2527 break; 2527 break;
2528 case ROOTCONTEXT_MNT: { 2528 case ROOTCONTEXT_MNT: {
2529 struct inode_security_struct *root_isec; 2529 struct inode_security_struct *root_isec;
2530 root_isec = sb->s_root->d_inode->i_security; 2530 root_isec = sb->s_root->d_inode->i_security;
2531 2531
2532 if (bad_option(sbsec, ROOTCONTEXT_MNT, root_isec->sid, sid)) 2532 if (bad_option(sbsec, ROOTCONTEXT_MNT, root_isec->sid, sid))
2533 goto out_bad_option; 2533 goto out_bad_option;
2534 break; 2534 break;
2535 } 2535 }
2536 case DEFCONTEXT_MNT: 2536 case DEFCONTEXT_MNT:
2537 if (bad_option(sbsec, DEFCONTEXT_MNT, sbsec->def_sid, sid)) 2537 if (bad_option(sbsec, DEFCONTEXT_MNT, sbsec->def_sid, sid))
2538 goto out_bad_option; 2538 goto out_bad_option;
2539 break; 2539 break;
2540 default: 2540 default:
2541 goto out_free_opts; 2541 goto out_free_opts;
2542 } 2542 }
2543 } 2543 }
2544 2544
2545 rc = 0; 2545 rc = 0;
2546 out_free_opts: 2546 out_free_opts:
2547 security_free_mnt_opts(&opts); 2547 security_free_mnt_opts(&opts);
2548 out_free_secdata: 2548 out_free_secdata:
2549 free_secdata(secdata); 2549 free_secdata(secdata);
2550 return rc; 2550 return rc;
2551 out_bad_option: 2551 out_bad_option:
2552 printk(KERN_WARNING "SELinux: unable to change security options " 2552 printk(KERN_WARNING "SELinux: unable to change security options "
2553 "during remount (dev %s, type=%s)\n", sb->s_id, 2553 "during remount (dev %s, type=%s)\n", sb->s_id,
2554 sb->s_type->name); 2554 sb->s_type->name);
2555 goto out_free_opts; 2555 goto out_free_opts;
2556 } 2556 }
2557 2557
2558 static int selinux_sb_kern_mount(struct super_block *sb, int flags, void *data) 2558 static int selinux_sb_kern_mount(struct super_block *sb, int flags, void *data)
2559 { 2559 {
2560 const struct cred *cred = current_cred(); 2560 const struct cred *cred = current_cred();
2561 struct common_audit_data ad; 2561 struct common_audit_data ad;
2562 int rc; 2562 int rc;
2563 2563
2564 rc = superblock_doinit(sb, data); 2564 rc = superblock_doinit(sb, data);
2565 if (rc) 2565 if (rc)
2566 return rc; 2566 return rc;
2567 2567
2568 /* Allow all mounts performed by the kernel */ 2568 /* Allow all mounts performed by the kernel */
2569 if (flags & MS_KERNMOUNT) 2569 if (flags & MS_KERNMOUNT)
2570 return 0; 2570 return 0;
2571 2571
2572 ad.type = LSM_AUDIT_DATA_DENTRY; 2572 ad.type = LSM_AUDIT_DATA_DENTRY;
2573 ad.u.dentry = sb->s_root; 2573 ad.u.dentry = sb->s_root;
2574 return superblock_has_perm(cred, sb, FILESYSTEM__MOUNT, &ad); 2574 return superblock_has_perm(cred, sb, FILESYSTEM__MOUNT, &ad);
2575 } 2575 }
2576 2576
2577 static int selinux_sb_statfs(struct dentry *dentry) 2577 static int selinux_sb_statfs(struct dentry *dentry)
2578 { 2578 {
2579 const struct cred *cred = current_cred(); 2579 const struct cred *cred = current_cred();
2580 struct common_audit_data ad; 2580 struct common_audit_data ad;
2581 2581
2582 ad.type = LSM_AUDIT_DATA_DENTRY; 2582 ad.type = LSM_AUDIT_DATA_DENTRY;
2583 ad.u.dentry = dentry->d_sb->s_root; 2583 ad.u.dentry = dentry->d_sb->s_root;
2584 return superblock_has_perm(cred, dentry->d_sb, FILESYSTEM__GETATTR, &ad); 2584 return superblock_has_perm(cred, dentry->d_sb, FILESYSTEM__GETATTR, &ad);
2585 } 2585 }
2586 2586
2587 static int selinux_mount(const char *dev_name, 2587 static int selinux_mount(const char *dev_name,
2588 struct path *path, 2588 struct path *path,
2589 const char *type, 2589 const char *type,
2590 unsigned long flags, 2590 unsigned long flags,
2591 void *data) 2591 void *data)
2592 { 2592 {
2593 const struct cred *cred = current_cred(); 2593 const struct cred *cred = current_cred();
2594 2594
2595 if (flags & MS_REMOUNT) 2595 if (flags & MS_REMOUNT)
2596 return superblock_has_perm(cred, path->dentry->d_sb, 2596 return superblock_has_perm(cred, path->dentry->d_sb,
2597 FILESYSTEM__REMOUNT, NULL); 2597 FILESYSTEM__REMOUNT, NULL);
2598 else 2598 else
2599 return path_has_perm(cred, path, FILE__MOUNTON); 2599 return path_has_perm(cred, path, FILE__MOUNTON);
2600 } 2600 }
2601 2601
2602 static int selinux_umount(struct vfsmount *mnt, int flags) 2602 static int selinux_umount(struct vfsmount *mnt, int flags)
2603 { 2603 {
2604 const struct cred *cred = current_cred(); 2604 const struct cred *cred = current_cred();
2605 2605
2606 return superblock_has_perm(cred, mnt->mnt_sb, 2606 return superblock_has_perm(cred, mnt->mnt_sb,
2607 FILESYSTEM__UNMOUNT, NULL); 2607 FILESYSTEM__UNMOUNT, NULL);
2608 } 2608 }
2609 2609
2610 /* inode security operations */ 2610 /* inode security operations */
2611 2611
2612 static int selinux_inode_alloc_security(struct inode *inode) 2612 static int selinux_inode_alloc_security(struct inode *inode)
2613 { 2613 {
2614 return inode_alloc_security(inode); 2614 return inode_alloc_security(inode);
2615 } 2615 }
2616 2616
2617 static void selinux_inode_free_security(struct inode *inode) 2617 static void selinux_inode_free_security(struct inode *inode)
2618 { 2618 {
2619 inode_free_security(inode); 2619 inode_free_security(inode);
2620 } 2620 }
2621 2621
2622 static int selinux_dentry_init_security(struct dentry *dentry, int mode, 2622 static int selinux_dentry_init_security(struct dentry *dentry, int mode,
2623 struct qstr *name, void **ctx, 2623 struct qstr *name, void **ctx,
2624 u32 *ctxlen) 2624 u32 *ctxlen)
2625 { 2625 {
2626 const struct cred *cred = current_cred(); 2626 const struct cred *cred = current_cred();
2627 struct task_security_struct *tsec; 2627 struct task_security_struct *tsec;
2628 struct inode_security_struct *dsec; 2628 struct inode_security_struct *dsec;
2629 struct superblock_security_struct *sbsec; 2629 struct superblock_security_struct *sbsec;
2630 struct inode *dir = dentry->d_parent->d_inode; 2630 struct inode *dir = dentry->d_parent->d_inode;
2631 u32 newsid; 2631 u32 newsid;
2632 int rc; 2632 int rc;
2633 2633
2634 tsec = cred->security; 2634 tsec = cred->security;
2635 dsec = dir->i_security; 2635 dsec = dir->i_security;
2636 sbsec = dir->i_sb->s_security; 2636 sbsec = dir->i_sb->s_security;
2637 2637
2638 if (tsec->create_sid && sbsec->behavior != SECURITY_FS_USE_MNTPOINT) { 2638 if (tsec->create_sid && sbsec->behavior != SECURITY_FS_USE_MNTPOINT) {
2639 newsid = tsec->create_sid; 2639 newsid = tsec->create_sid;
2640 } else { 2640 } else {
2641 rc = security_transition_sid(tsec->sid, dsec->sid, 2641 rc = security_transition_sid(tsec->sid, dsec->sid,
2642 inode_mode_to_security_class(mode), 2642 inode_mode_to_security_class(mode),
2643 name, 2643 name,
2644 &newsid); 2644 &newsid);
2645 if (rc) { 2645 if (rc) {
2646 printk(KERN_WARNING 2646 printk(KERN_WARNING
2647 "%s: security_transition_sid failed, rc=%d\n", 2647 "%s: security_transition_sid failed, rc=%d\n",
2648 __func__, -rc); 2648 __func__, -rc);
2649 return rc; 2649 return rc;
2650 } 2650 }
2651 } 2651 }
2652 2652
2653 return security_sid_to_context(newsid, (char **)ctx, ctxlen); 2653 return security_sid_to_context(newsid, (char **)ctx, ctxlen);
2654 } 2654 }
2655 2655
2656 static int selinux_inode_init_security(struct inode *inode, struct inode *dir, 2656 static int selinux_inode_init_security(struct inode *inode, struct inode *dir,
2657 const struct qstr *qstr, 2657 const struct qstr *qstr,
2658 const char **name, 2658 const char **name,
2659 void **value, size_t *len) 2659 void **value, size_t *len)
2660 { 2660 {
2661 const struct task_security_struct *tsec = current_security(); 2661 const struct task_security_struct *tsec = current_security();
2662 struct inode_security_struct *dsec; 2662 struct inode_security_struct *dsec;
2663 struct superblock_security_struct *sbsec; 2663 struct superblock_security_struct *sbsec;
2664 u32 sid, newsid, clen; 2664 u32 sid, newsid, clen;
2665 int rc; 2665 int rc;
2666 char *context; 2666 char *context;
2667 2667
2668 dsec = dir->i_security; 2668 dsec = dir->i_security;
2669 sbsec = dir->i_sb->s_security; 2669 sbsec = dir->i_sb->s_security;
2670 2670
2671 sid = tsec->sid; 2671 sid = tsec->sid;
2672 newsid = tsec->create_sid; 2672 newsid = tsec->create_sid;
2673 2673
2674 if ((sbsec->flags & SE_SBINITIALIZED) && 2674 if ((sbsec->flags & SE_SBINITIALIZED) &&
2675 (sbsec->behavior == SECURITY_FS_USE_MNTPOINT)) 2675 (sbsec->behavior == SECURITY_FS_USE_MNTPOINT))
2676 newsid = sbsec->mntpoint_sid; 2676 newsid = sbsec->mntpoint_sid;
2677 else if (!newsid || !(sbsec->flags & SBLABEL_MNT)) { 2677 else if (!newsid || !(sbsec->flags & SBLABEL_MNT)) {
2678 rc = security_transition_sid(sid, dsec->sid, 2678 rc = security_transition_sid(sid, dsec->sid,
2679 inode_mode_to_security_class(inode->i_mode), 2679 inode_mode_to_security_class(inode->i_mode),
2680 qstr, &newsid); 2680 qstr, &newsid);
2681 if (rc) { 2681 if (rc) {
2682 printk(KERN_WARNING "%s: " 2682 printk(KERN_WARNING "%s: "
2683 "security_transition_sid failed, rc=%d (dev=%s " 2683 "security_transition_sid failed, rc=%d (dev=%s "
2684 "ino=%ld)\n", 2684 "ino=%ld)\n",
2685 __func__, 2685 __func__,
2686 -rc, inode->i_sb->s_id, inode->i_ino); 2686 -rc, inode->i_sb->s_id, inode->i_ino);
2687 return rc; 2687 return rc;
2688 } 2688 }
2689 } 2689 }
2690 2690
2691 /* Possibly defer initialization to selinux_complete_init. */ 2691 /* Possibly defer initialization to selinux_complete_init. */
2692 if (sbsec->flags & SE_SBINITIALIZED) { 2692 if (sbsec->flags & SE_SBINITIALIZED) {
2693 struct inode_security_struct *isec = inode->i_security; 2693 struct inode_security_struct *isec = inode->i_security;
2694 isec->sclass = inode_mode_to_security_class(inode->i_mode); 2694 isec->sclass = inode_mode_to_security_class(inode->i_mode);
2695 isec->sid = newsid; 2695 isec->sid = newsid;
2696 isec->initialized = 1; 2696 isec->initialized = 1;
2697 } 2697 }
2698 2698
2699 if (!ss_initialized || !(sbsec->flags & SBLABEL_MNT)) 2699 if (!ss_initialized || !(sbsec->flags & SBLABEL_MNT))
2700 return -EOPNOTSUPP; 2700 return -EOPNOTSUPP;
2701 2701
2702 if (name) 2702 if (name)
2703 *name = XATTR_SELINUX_SUFFIX; 2703 *name = XATTR_SELINUX_SUFFIX;
2704 2704
2705 if (value && len) { 2705 if (value && len) {
2706 rc = security_sid_to_context_force(newsid, &context, &clen); 2706 rc = security_sid_to_context_force(newsid, &context, &clen);
2707 if (rc) 2707 if (rc)
2708 return rc; 2708 return rc;
2709 *value = context; 2709 *value = context;
2710 *len = clen; 2710 *len = clen;
2711 } 2711 }
2712 2712
2713 return 0; 2713 return 0;
2714 } 2714 }
2715 2715
2716 static int selinux_inode_create(struct inode *dir, struct dentry *dentry, umode_t mode) 2716 static int selinux_inode_create(struct inode *dir, struct dentry *dentry, umode_t mode)
2717 { 2717 {
2718 return may_create(dir, dentry, SECCLASS_FILE); 2718 return may_create(dir, dentry, SECCLASS_FILE);
2719 } 2719 }
2720 2720
2721 static int selinux_inode_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry) 2721 static int selinux_inode_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry)
2722 { 2722 {
2723 return may_link(dir, old_dentry, MAY_LINK); 2723 return may_link(dir, old_dentry, MAY_LINK);
2724 } 2724 }
2725 2725
2726 static int selinux_inode_unlink(struct inode *dir, struct dentry *dentry) 2726 static int selinux_inode_unlink(struct inode *dir, struct dentry *dentry)
2727 { 2727 {
2728 return may_link(dir, dentry, MAY_UNLINK); 2728 return may_link(dir, dentry, MAY_UNLINK);
2729 } 2729 }
2730 2730
2731 static int selinux_inode_symlink(struct inode *dir, struct dentry *dentry, const char *name) 2731 static int selinux_inode_symlink(struct inode *dir, struct dentry *dentry, const char *name)
2732 { 2732 {
2733 return may_create(dir, dentry, SECCLASS_LNK_FILE); 2733 return may_create(dir, dentry, SECCLASS_LNK_FILE);
2734 } 2734 }
2735 2735
2736 static int selinux_inode_mkdir(struct inode *dir, struct dentry *dentry, umode_t mask) 2736 static int selinux_inode_mkdir(struct inode *dir, struct dentry *dentry, umode_t mask)
2737 { 2737 {
2738 return may_create(dir, dentry, SECCLASS_DIR); 2738 return may_create(dir, dentry, SECCLASS_DIR);
2739 } 2739 }
2740 2740
2741 static int selinux_inode_rmdir(struct inode *dir, struct dentry *dentry) 2741 static int selinux_inode_rmdir(struct inode *dir, struct dentry *dentry)
2742 { 2742 {
2743 return may_link(dir, dentry, MAY_RMDIR); 2743 return may_link(dir, dentry, MAY_RMDIR);
2744 } 2744 }
2745 2745
2746 static int selinux_inode_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev) 2746 static int selinux_inode_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev)
2747 { 2747 {
2748 return may_create(dir, dentry, inode_mode_to_security_class(mode)); 2748 return may_create(dir, dentry, inode_mode_to_security_class(mode));
2749 } 2749 }
2750 2750
2751 static int selinux_inode_rename(struct inode *old_inode, struct dentry *old_dentry, 2751 static int selinux_inode_rename(struct inode *old_inode, struct dentry *old_dentry,
2752 struct inode *new_inode, struct dentry *new_dentry) 2752 struct inode *new_inode, struct dentry *new_dentry)
2753 { 2753 {
2754 return may_rename(old_inode, old_dentry, new_inode, new_dentry); 2754 return may_rename(old_inode, old_dentry, new_inode, new_dentry);
2755 } 2755 }
2756 2756
2757 static int selinux_inode_readlink(struct dentry *dentry) 2757 static int selinux_inode_readlink(struct dentry *dentry)
2758 { 2758 {
2759 const struct cred *cred = current_cred(); 2759 const struct cred *cred = current_cred();
2760 2760
2761 return dentry_has_perm(cred, dentry, FILE__READ); 2761 return dentry_has_perm(cred, dentry, FILE__READ);
2762 } 2762 }
2763 2763
2764 static int selinux_inode_follow_link(struct dentry *dentry, struct nameidata *nameidata) 2764 static int selinux_inode_follow_link(struct dentry *dentry, struct nameidata *nameidata)
2765 { 2765 {
2766 const struct cred *cred = current_cred(); 2766 const struct cred *cred = current_cred();
2767 2767
2768 return dentry_has_perm(cred, dentry, FILE__READ); 2768 return dentry_has_perm(cred, dentry, FILE__READ);
2769 } 2769 }
2770 2770
2771 static noinline int audit_inode_permission(struct inode *inode, 2771 static noinline int audit_inode_permission(struct inode *inode,
2772 u32 perms, u32 audited, u32 denied, 2772 u32 perms, u32 audited, u32 denied,
2773 unsigned flags) 2773 unsigned flags)
2774 { 2774 {
2775 struct common_audit_data ad; 2775 struct common_audit_data ad;
2776 struct inode_security_struct *isec = inode->i_security; 2776 struct inode_security_struct *isec = inode->i_security;
2777 int rc; 2777 int rc;
2778 2778
2779 ad.type = LSM_AUDIT_DATA_INODE; 2779 ad.type = LSM_AUDIT_DATA_INODE;
2780 ad.u.inode = inode; 2780 ad.u.inode = inode;
2781 2781
2782 rc = slow_avc_audit(current_sid(), isec->sid, isec->sclass, perms, 2782 rc = slow_avc_audit(current_sid(), isec->sid, isec->sclass, perms,
2783 audited, denied, &ad, flags); 2783 audited, denied, &ad, flags);
2784 if (rc) 2784 if (rc)
2785 return rc; 2785 return rc;
2786 return 0; 2786 return 0;
2787 } 2787 }
2788 2788
2789 static int selinux_inode_permission(struct inode *inode, int mask) 2789 static int selinux_inode_permission(struct inode *inode, int mask)
2790 { 2790 {
2791 const struct cred *cred = current_cred(); 2791 const struct cred *cred = current_cred();
2792 u32 perms; 2792 u32 perms;
2793 bool from_access; 2793 bool from_access;
2794 unsigned flags = mask & MAY_NOT_BLOCK; 2794 unsigned flags = mask & MAY_NOT_BLOCK;
2795 struct inode_security_struct *isec; 2795 struct inode_security_struct *isec;
2796 u32 sid; 2796 u32 sid;
2797 struct av_decision avd; 2797 struct av_decision avd;
2798 int rc, rc2; 2798 int rc, rc2;
2799 u32 audited, denied; 2799 u32 audited, denied;
2800 2800
2801 from_access = mask & MAY_ACCESS; 2801 from_access = mask & MAY_ACCESS;
2802 mask &= (MAY_READ|MAY_WRITE|MAY_EXEC|MAY_APPEND); 2802 mask &= (MAY_READ|MAY_WRITE|MAY_EXEC|MAY_APPEND);
2803 2803
2804 /* No permission to check. Existence test. */ 2804 /* No permission to check. Existence test. */
2805 if (!mask) 2805 if (!mask)
2806 return 0; 2806 return 0;
2807 2807
2808 validate_creds(cred); 2808 validate_creds(cred);
2809 2809
2810 if (unlikely(IS_PRIVATE(inode))) 2810 if (unlikely(IS_PRIVATE(inode)))
2811 return 0; 2811 return 0;
2812 2812
2813 perms = file_mask_to_av(inode->i_mode, mask); 2813 perms = file_mask_to_av(inode->i_mode, mask);
2814 2814
2815 sid = cred_sid(cred); 2815 sid = cred_sid(cred);
2816 isec = inode->i_security; 2816 isec = inode->i_security;
2817 2817
2818 rc = avc_has_perm_noaudit(sid, isec->sid, isec->sclass, perms, 0, &avd); 2818 rc = avc_has_perm_noaudit(sid, isec->sid, isec->sclass, perms, 0, &avd);
2819 audited = avc_audit_required(perms, &avd, rc, 2819 audited = avc_audit_required(perms, &avd, rc,
2820 from_access ? FILE__AUDIT_ACCESS : 0, 2820 from_access ? FILE__AUDIT_ACCESS : 0,
2821 &denied); 2821 &denied);
2822 if (likely(!audited)) 2822 if (likely(!audited))
2823 return rc; 2823 return rc;
2824 2824
2825 rc2 = audit_inode_permission(inode, perms, audited, denied, flags); 2825 rc2 = audit_inode_permission(inode, perms, audited, denied, flags);
2826 if (rc2) 2826 if (rc2)
2827 return rc2; 2827 return rc2;
2828 return rc; 2828 return rc;
2829 } 2829 }
2830 2830
2831 static int selinux_inode_setattr(struct dentry *dentry, struct iattr *iattr) 2831 static int selinux_inode_setattr(struct dentry *dentry, struct iattr *iattr)
2832 { 2832 {
2833 const struct cred *cred = current_cred(); 2833 const struct cred *cred = current_cred();
2834 unsigned int ia_valid = iattr->ia_valid; 2834 unsigned int ia_valid = iattr->ia_valid;
2835 __u32 av = FILE__WRITE; 2835 __u32 av = FILE__WRITE;
2836 2836
2837 /* ATTR_FORCE is just used for ATTR_KILL_S[UG]ID. */ 2837 /* ATTR_FORCE is just used for ATTR_KILL_S[UG]ID. */
2838 if (ia_valid & ATTR_FORCE) { 2838 if (ia_valid & ATTR_FORCE) {
2839 ia_valid &= ~(ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_MODE | 2839 ia_valid &= ~(ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_MODE |
2840 ATTR_FORCE); 2840 ATTR_FORCE);
2841 if (!ia_valid) 2841 if (!ia_valid)
2842 return 0; 2842 return 0;
2843 } 2843 }
2844 2844
2845 if (ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID | 2845 if (ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID |
2846 ATTR_ATIME_SET | ATTR_MTIME_SET | ATTR_TIMES_SET)) 2846 ATTR_ATIME_SET | ATTR_MTIME_SET | ATTR_TIMES_SET))
2847 return dentry_has_perm(cred, dentry, FILE__SETATTR); 2847 return dentry_has_perm(cred, dentry, FILE__SETATTR);
2848 2848
2849 if (selinux_policycap_openperm && (ia_valid & ATTR_SIZE)) 2849 if (selinux_policycap_openperm && (ia_valid & ATTR_SIZE))
2850 av |= FILE__OPEN; 2850 av |= FILE__OPEN;
2851 2851
2852 return dentry_has_perm(cred, dentry, av); 2852 return dentry_has_perm(cred, dentry, av);
2853 } 2853 }
2854 2854
2855 static int selinux_inode_getattr(struct vfsmount *mnt, struct dentry *dentry) 2855 static int selinux_inode_getattr(struct vfsmount *mnt, struct dentry *dentry)
2856 { 2856 {
2857 const struct cred *cred = current_cred(); 2857 const struct cred *cred = current_cred();
2858 struct path path; 2858 struct path path;
2859 2859
2860 path.dentry = dentry; 2860 path.dentry = dentry;
2861 path.mnt = mnt; 2861 path.mnt = mnt;
2862 2862
2863 return path_has_perm(cred, &path, FILE__GETATTR); 2863 return path_has_perm(cred, &path, FILE__GETATTR);
2864 } 2864 }
2865 2865
2866 static int selinux_inode_setotherxattr(struct dentry *dentry, const char *name) 2866 static int selinux_inode_setotherxattr(struct dentry *dentry, const char *name)
2867 { 2867 {
2868 const struct cred *cred = current_cred(); 2868 const struct cred *cred = current_cred();
2869 2869
2870 if (!strncmp(name, XATTR_SECURITY_PREFIX, 2870 if (!strncmp(name, XATTR_SECURITY_PREFIX,
2871 sizeof XATTR_SECURITY_PREFIX - 1)) { 2871 sizeof XATTR_SECURITY_PREFIX - 1)) {
2872 if (!strcmp(name, XATTR_NAME_CAPS)) { 2872 if (!strcmp(name, XATTR_NAME_CAPS)) {
2873 if (!capable(CAP_SETFCAP)) 2873 if (!capable(CAP_SETFCAP))
2874 return -EPERM; 2874 return -EPERM;
2875 } else if (!capable(CAP_SYS_ADMIN)) { 2875 } else if (!capable(CAP_SYS_ADMIN)) {
2876 /* A different attribute in the security namespace. 2876 /* A different attribute in the security namespace.
2877 Restrict to administrator. */ 2877 Restrict to administrator. */
2878 return -EPERM; 2878 return -EPERM;
2879 } 2879 }
2880 } 2880 }
2881 2881
2882 /* Not an attribute we recognize, so just check the 2882 /* Not an attribute we recognize, so just check the
2883 ordinary setattr permission. */ 2883 ordinary setattr permission. */
2884 return dentry_has_perm(cred, dentry, FILE__SETATTR); 2884 return dentry_has_perm(cred, dentry, FILE__SETATTR);
2885 } 2885 }
2886 2886
2887 static int selinux_inode_setxattr(struct dentry *dentry, const char *name, 2887 static int selinux_inode_setxattr(struct dentry *dentry, const char *name,
2888 const void *value, size_t size, int flags) 2888 const void *value, size_t size, int flags)
2889 { 2889 {
2890 struct inode *inode = dentry->d_inode; 2890 struct inode *inode = dentry->d_inode;
2891 struct inode_security_struct *isec = inode->i_security; 2891 struct inode_security_struct *isec = inode->i_security;
2892 struct superblock_security_struct *sbsec; 2892 struct superblock_security_struct *sbsec;
2893 struct common_audit_data ad; 2893 struct common_audit_data ad;
2894 u32 newsid, sid = current_sid(); 2894 u32 newsid, sid = current_sid();
2895 int rc = 0; 2895 int rc = 0;
2896 2896
2897 if (strcmp(name, XATTR_NAME_SELINUX)) 2897 if (strcmp(name, XATTR_NAME_SELINUX))
2898 return selinux_inode_setotherxattr(dentry, name); 2898 return selinux_inode_setotherxattr(dentry, name);
2899 2899
2900 sbsec = inode->i_sb->s_security; 2900 sbsec = inode->i_sb->s_security;
2901 if (!(sbsec->flags & SBLABEL_MNT)) 2901 if (!(sbsec->flags & SBLABEL_MNT))
2902 return -EOPNOTSUPP; 2902 return -EOPNOTSUPP;
2903 2903
2904 if (!inode_owner_or_capable(inode)) 2904 if (!inode_owner_or_capable(inode))
2905 return -EPERM; 2905 return -EPERM;
2906 2906
2907 ad.type = LSM_AUDIT_DATA_DENTRY; 2907 ad.type = LSM_AUDIT_DATA_DENTRY;
2908 ad.u.dentry = dentry; 2908 ad.u.dentry = dentry;
2909 2909
2910 rc = avc_has_perm(sid, isec->sid, isec->sclass, 2910 rc = avc_has_perm(sid, isec->sid, isec->sclass,
2911 FILE__RELABELFROM, &ad); 2911 FILE__RELABELFROM, &ad);
2912 if (rc) 2912 if (rc)
2913 return rc; 2913 return rc;
2914 2914
2915 rc = security_context_to_sid(value, size, &newsid, GFP_KERNEL); 2915 rc = security_context_to_sid(value, size, &newsid, GFP_KERNEL);
2916 if (rc == -EINVAL) { 2916 if (rc == -EINVAL) {
2917 if (!capable(CAP_MAC_ADMIN)) { 2917 if (!capable(CAP_MAC_ADMIN)) {
2918 struct audit_buffer *ab; 2918 struct audit_buffer *ab;
2919 size_t audit_size; 2919 size_t audit_size;
2920 const char *str; 2920 const char *str;
2921 2921
2922 /* We strip a nul only if it is at the end, otherwise the 2922 /* We strip a nul only if it is at the end, otherwise the
2923 * context contains a nul and we should audit that */ 2923 * context contains a nul and we should audit that */
2924 if (value) { 2924 if (value) {
2925 str = value; 2925 str = value;
2926 if (str[size - 1] == '\0') 2926 if (str[size - 1] == '\0')
2927 audit_size = size - 1; 2927 audit_size = size - 1;
2928 else 2928 else
2929 audit_size = size; 2929 audit_size = size;
2930 } else { 2930 } else {
2931 str = ""; 2931 str = "";
2932 audit_size = 0; 2932 audit_size = 0;
2933 } 2933 }
2934 ab = audit_log_start(current->audit_context, GFP_ATOMIC, AUDIT_SELINUX_ERR); 2934 ab = audit_log_start(current->audit_context, GFP_ATOMIC, AUDIT_SELINUX_ERR);
2935 audit_log_format(ab, "op=setxattr invalid_context="); 2935 audit_log_format(ab, "op=setxattr invalid_context=");
2936 audit_log_n_untrustedstring(ab, value, audit_size); 2936 audit_log_n_untrustedstring(ab, value, audit_size);
2937 audit_log_end(ab); 2937 audit_log_end(ab);
2938 2938
2939 return rc; 2939 return rc;
2940 } 2940 }
2941 rc = security_context_to_sid_force(value, size, &newsid); 2941 rc = security_context_to_sid_force(value, size, &newsid);
2942 } 2942 }
2943 if (rc) 2943 if (rc)
2944 return rc; 2944 return rc;
2945 2945
2946 rc = avc_has_perm(sid, newsid, isec->sclass, 2946 rc = avc_has_perm(sid, newsid, isec->sclass,
2947 FILE__RELABELTO, &ad); 2947 FILE__RELABELTO, &ad);
2948 if (rc) 2948 if (rc)
2949 return rc; 2949 return rc;
2950 2950
2951 rc = security_validate_transition(isec->sid, newsid, sid, 2951 rc = security_validate_transition(isec->sid, newsid, sid,
2952 isec->sclass); 2952 isec->sclass);
2953 if (rc) 2953 if (rc)
2954 return rc; 2954 return rc;
2955 2955
2956 return avc_has_perm(newsid, 2956 return avc_has_perm(newsid,
2957 sbsec->sid, 2957 sbsec->sid,
2958 SECCLASS_FILESYSTEM, 2958 SECCLASS_FILESYSTEM,
2959 FILESYSTEM__ASSOCIATE, 2959 FILESYSTEM__ASSOCIATE,
2960 &ad); 2960 &ad);
2961 } 2961 }
2962 2962
2963 static void selinux_inode_post_setxattr(struct dentry *dentry, const char *name, 2963 static void selinux_inode_post_setxattr(struct dentry *dentry, const char *name,
2964 const void *value, size_t size, 2964 const void *value, size_t size,
2965 int flags) 2965 int flags)
2966 { 2966 {
2967 struct inode *inode = dentry->d_inode; 2967 struct inode *inode = dentry->d_inode;
2968 struct inode_security_struct *isec = inode->i_security; 2968 struct inode_security_struct *isec = inode->i_security;
2969 u32 newsid; 2969 u32 newsid;
2970 int rc; 2970 int rc;
2971 2971
2972 if (strcmp(name, XATTR_NAME_SELINUX)) { 2972 if (strcmp(name, XATTR_NAME_SELINUX)) {
2973 /* Not an attribute we recognize, so nothing to do. */ 2973 /* Not an attribute we recognize, so nothing to do. */
2974 return; 2974 return;
2975 } 2975 }
2976 2976
2977 rc = security_context_to_sid_force(value, size, &newsid); 2977 rc = security_context_to_sid_force(value, size, &newsid);
2978 if (rc) { 2978 if (rc) {
2979 printk(KERN_ERR "SELinux: unable to map context to SID" 2979 printk(KERN_ERR "SELinux: unable to map context to SID"
2980 "for (%s, %lu), rc=%d\n", 2980 "for (%s, %lu), rc=%d\n",
2981 inode->i_sb->s_id, inode->i_ino, -rc); 2981 inode->i_sb->s_id, inode->i_ino, -rc);
2982 return; 2982 return;
2983 } 2983 }
2984 2984
2985 isec->sclass = inode_mode_to_security_class(inode->i_mode); 2985 isec->sclass = inode_mode_to_security_class(inode->i_mode);
2986 isec->sid = newsid; 2986 isec->sid = newsid;
2987 isec->initialized = 1; 2987 isec->initialized = 1;
2988 2988
2989 return; 2989 return;
2990 } 2990 }
2991 2991
2992 static int selinux_inode_getxattr(struct dentry *dentry, const char *name) 2992 static int selinux_inode_getxattr(struct dentry *dentry, const char *name)
2993 { 2993 {
2994 const struct cred *cred = current_cred(); 2994 const struct cred *cred = current_cred();
2995 2995
2996 return dentry_has_perm(cred, dentry, FILE__GETATTR); 2996 return dentry_has_perm(cred, dentry, FILE__GETATTR);
2997 } 2997 }
2998 2998
2999 static int selinux_inode_listxattr(struct dentry *dentry) 2999 static int selinux_inode_listxattr(struct dentry *dentry)
3000 { 3000 {
3001 const struct cred *cred = current_cred(); 3001 const struct cred *cred = current_cred();
3002 3002
3003 return dentry_has_perm(cred, dentry, FILE__GETATTR); 3003 return dentry_has_perm(cred, dentry, FILE__GETATTR);
3004 } 3004 }
3005 3005
3006 static int selinux_inode_removexattr(struct dentry *dentry, const char *name) 3006 static int selinux_inode_removexattr(struct dentry *dentry, const char *name)
3007 { 3007 {
3008 if (strcmp(name, XATTR_NAME_SELINUX)) 3008 if (strcmp(name, XATTR_NAME_SELINUX))
3009 return selinux_inode_setotherxattr(dentry, name); 3009 return selinux_inode_setotherxattr(dentry, name);
3010 3010
3011 /* No one is allowed to remove a SELinux security label. 3011 /* No one is allowed to remove a SELinux security label.
3012 You can change the label, but all data must be labeled. */ 3012 You can change the label, but all data must be labeled. */
3013 return -EACCES; 3013 return -EACCES;
3014 } 3014 }
3015 3015
3016 /* 3016 /*
3017 * Copy the inode security context value to the user. 3017 * Copy the inode security context value to the user.
3018 * 3018 *
3019 * Permission check is handled by selinux_inode_getxattr hook. 3019 * Permission check is handled by selinux_inode_getxattr hook.
3020 */ 3020 */
3021 static int selinux_inode_getsecurity(const struct inode *inode, const char *name, void **buffer, bool alloc) 3021 static int selinux_inode_getsecurity(const struct inode *inode, const char *name, void **buffer, bool alloc)
3022 { 3022 {
3023 u32 size; 3023 u32 size;
3024 int error; 3024 int error;
3025 char *context = NULL; 3025 char *context = NULL;
3026 struct inode_security_struct *isec = inode->i_security; 3026 struct inode_security_struct *isec = inode->i_security;
3027 3027
3028 if (strcmp(name, XATTR_SELINUX_SUFFIX)) 3028 if (strcmp(name, XATTR_SELINUX_SUFFIX))
3029 return -EOPNOTSUPP; 3029 return -EOPNOTSUPP;
3030 3030
3031 /* 3031 /*
3032 * If the caller has CAP_MAC_ADMIN, then get the raw context 3032 * If the caller has CAP_MAC_ADMIN, then get the raw context
3033 * value even if it is not defined by current policy; otherwise, 3033 * value even if it is not defined by current policy; otherwise,
3034 * use the in-core value under current policy. 3034 * use the in-core value under current policy.
3035 * Use the non-auditing forms of the permission checks since 3035 * Use the non-auditing forms of the permission checks since
3036 * getxattr may be called by unprivileged processes commonly 3036 * getxattr may be called by unprivileged processes commonly
3037 * and lack of permission just means that we fall back to the 3037 * and lack of permission just means that we fall back to the
3038 * in-core context value, not a denial. 3038 * in-core context value, not a denial.
3039 */ 3039 */
3040 error = selinux_capable(current_cred(), &init_user_ns, CAP_MAC_ADMIN, 3040 error = selinux_capable(current_cred(), &init_user_ns, CAP_MAC_ADMIN,
3041 SECURITY_CAP_NOAUDIT); 3041 SECURITY_CAP_NOAUDIT);
3042 if (!error) 3042 if (!error)
3043 error = security_sid_to_context_force(isec->sid, &context, 3043 error = security_sid_to_context_force(isec->sid, &context,
3044 &size); 3044 &size);
3045 else 3045 else
3046 error = security_sid_to_context(isec->sid, &context, &size); 3046 error = security_sid_to_context(isec->sid, &context, &size);
3047 if (error) 3047 if (error)
3048 return error; 3048 return error;
3049 error = size; 3049 error = size;
3050 if (alloc) { 3050 if (alloc) {
3051 *buffer = context; 3051 *buffer = context;
3052 goto out_nofree; 3052 goto out_nofree;
3053 } 3053 }
3054 kfree(context); 3054 kfree(context);
3055 out_nofree: 3055 out_nofree:
3056 return error; 3056 return error;
3057 } 3057 }
3058 3058
3059 static int selinux_inode_setsecurity(struct inode *inode, const char *name, 3059 static int selinux_inode_setsecurity(struct inode *inode, const char *name,
3060 const void *value, size_t size, int flags) 3060 const void *value, size_t size, int flags)
3061 { 3061 {
3062 struct inode_security_struct *isec = inode->i_security; 3062 struct inode_security_struct *isec = inode->i_security;
3063 u32 newsid; 3063 u32 newsid;
3064 int rc; 3064 int rc;
3065 3065
3066 if (strcmp(name, XATTR_SELINUX_SUFFIX)) 3066 if (strcmp(name, XATTR_SELINUX_SUFFIX))
3067 return -EOPNOTSUPP; 3067 return -EOPNOTSUPP;
3068 3068
3069 if (!value || !size) 3069 if (!value || !size)
3070 return -EACCES; 3070 return -EACCES;
3071 3071
3072 rc = security_context_to_sid((void *)value, size, &newsid, GFP_KERNEL); 3072 rc = security_context_to_sid((void *)value, size, &newsid, GFP_KERNEL);
3073 if (rc) 3073 if (rc)
3074 return rc; 3074 return rc;
3075 3075
3076 isec->sclass = inode_mode_to_security_class(inode->i_mode); 3076 isec->sclass = inode_mode_to_security_class(inode->i_mode);
3077 isec->sid = newsid; 3077 isec->sid = newsid;
3078 isec->initialized = 1; 3078 isec->initialized = 1;
3079 return 0; 3079 return 0;
3080 } 3080 }
3081 3081
3082 static int selinux_inode_listsecurity(struct inode *inode, char *buffer, size_t buffer_size) 3082 static int selinux_inode_listsecurity(struct inode *inode, char *buffer, size_t buffer_size)
3083 { 3083 {
3084 const int len = sizeof(XATTR_NAME_SELINUX); 3084 const int len = sizeof(XATTR_NAME_SELINUX);
3085 if (buffer && len <= buffer_size) 3085 if (buffer && len <= buffer_size)
3086 memcpy(buffer, XATTR_NAME_SELINUX, len); 3086 memcpy(buffer, XATTR_NAME_SELINUX, len);
3087 return len; 3087 return len;
3088 } 3088 }
3089 3089
3090 static void selinux_inode_getsecid(const struct inode *inode, u32 *secid) 3090 static void selinux_inode_getsecid(const struct inode *inode, u32 *secid)
3091 { 3091 {
3092 struct inode_security_struct *isec = inode->i_security; 3092 struct inode_security_struct *isec = inode->i_security;
3093 *secid = isec->sid; 3093 *secid = isec->sid;
3094 } 3094 }
3095 3095
3096 /* file security operations */ 3096 /* file security operations */
3097 3097
3098 static int selinux_revalidate_file_permission(struct file *file, int mask) 3098 static int selinux_revalidate_file_permission(struct file *file, int mask)
3099 { 3099 {
3100 const struct cred *cred = current_cred(); 3100 const struct cred *cred = current_cred();
3101 struct inode *inode = file_inode(file); 3101 struct inode *inode = file_inode(file);
3102 3102
3103 /* file_mask_to_av won't add FILE__WRITE if MAY_APPEND is set */ 3103 /* file_mask_to_av won't add FILE__WRITE if MAY_APPEND is set */
3104 if ((file->f_flags & O_APPEND) && (mask & MAY_WRITE)) 3104 if ((file->f_flags & O_APPEND) && (mask & MAY_WRITE))
3105 mask |= MAY_APPEND; 3105 mask |= MAY_APPEND;
3106 3106
3107 return file_has_perm(cred, file, 3107 return file_has_perm(cred, file,
3108 file_mask_to_av(inode->i_mode, mask)); 3108 file_mask_to_av(inode->i_mode, mask));
3109 } 3109 }
3110 3110
3111 static int selinux_file_permission(struct file *file, int mask) 3111 static int selinux_file_permission(struct file *file, int mask)
3112 { 3112 {
3113 struct inode *inode = file_inode(file); 3113 struct inode *inode = file_inode(file);
3114 struct file_security_struct *fsec = file->f_security; 3114 struct file_security_struct *fsec = file->f_security;
3115 struct inode_security_struct *isec = inode->i_security; 3115 struct inode_security_struct *isec = inode->i_security;
3116 u32 sid = current_sid(); 3116 u32 sid = current_sid();
3117 3117
3118 if (!mask) 3118 if (!mask)
3119 /* No permission to check. Existence test. */ 3119 /* No permission to check. Existence test. */
3120 return 0; 3120 return 0;
3121 3121
3122 if (sid == fsec->sid && fsec->isid == isec->sid && 3122 if (sid == fsec->sid && fsec->isid == isec->sid &&
3123 fsec->pseqno == avc_policy_seqno()) 3123 fsec->pseqno == avc_policy_seqno())
3124 /* No change since file_open check. */ 3124 /* No change since file_open check. */
3125 return 0; 3125 return 0;
3126 3126
3127 return selinux_revalidate_file_permission(file, mask); 3127 return selinux_revalidate_file_permission(file, mask);
3128 } 3128 }
3129 3129
3130 static int selinux_file_alloc_security(struct file *file) 3130 static int selinux_file_alloc_security(struct file *file)
3131 { 3131 {
3132 return file_alloc_security(file); 3132 return file_alloc_security(file);
3133 } 3133 }
3134 3134
3135 static void selinux_file_free_security(struct file *file) 3135 static void selinux_file_free_security(struct file *file)
3136 { 3136 {
3137 file_free_security(file); 3137 file_free_security(file);
3138 } 3138 }
3139 3139
3140 static int selinux_file_ioctl(struct file *file, unsigned int cmd, 3140 static int selinux_file_ioctl(struct file *file, unsigned int cmd,
3141 unsigned long arg) 3141 unsigned long arg)
3142 { 3142 {
3143 const struct cred *cred = current_cred(); 3143 const struct cred *cred = current_cred();
3144 int error = 0; 3144 int error = 0;
3145 3145
3146 switch (cmd) { 3146 switch (cmd) {
3147 case FIONREAD: 3147 case FIONREAD:
3148 /* fall through */ 3148 /* fall through */
3149 case FIBMAP: 3149 case FIBMAP:
3150 /* fall through */ 3150 /* fall through */
3151 case FIGETBSZ: 3151 case FIGETBSZ:
3152 /* fall through */ 3152 /* fall through */
3153 case FS_IOC_GETFLAGS: 3153 case FS_IOC_GETFLAGS:
3154 /* fall through */ 3154 /* fall through */
3155 case FS_IOC_GETVERSION: 3155 case FS_IOC_GETVERSION:
3156 error = file_has_perm(cred, file, FILE__GETATTR); 3156 error = file_has_perm(cred, file, FILE__GETATTR);
3157 break; 3157 break;
3158 3158
3159 case FS_IOC_SETFLAGS: 3159 case FS_IOC_SETFLAGS:
3160 /* fall through */ 3160 /* fall through */
3161 case FS_IOC_SETVERSION: 3161 case FS_IOC_SETVERSION:
3162 error = file_has_perm(cred, file, FILE__SETATTR); 3162 error = file_has_perm(cred, file, FILE__SETATTR);
3163 break; 3163 break;
3164 3164
3165 /* sys_ioctl() checks */ 3165 /* sys_ioctl() checks */
3166 case FIONBIO: 3166 case FIONBIO:
3167 /* fall through */ 3167 /* fall through */
3168 case FIOASYNC: 3168 case FIOASYNC:
3169 error = file_has_perm(cred, file, 0); 3169 error = file_has_perm(cred, file, 0);
3170 break; 3170 break;
3171 3171
3172 case KDSKBENT: 3172 case KDSKBENT:
3173 case KDSKBSENT: 3173 case KDSKBSENT:
3174 error = cred_has_capability(cred, CAP_SYS_TTY_CONFIG, 3174 error = cred_has_capability(cred, CAP_SYS_TTY_CONFIG,
3175 SECURITY_CAP_AUDIT); 3175 SECURITY_CAP_AUDIT);
3176 break; 3176 break;
3177 3177
3178 /* default case assumes that the command will go 3178 /* default case assumes that the command will go
3179 * to the file's ioctl() function. 3179 * to the file's ioctl() function.
3180 */ 3180 */
3181 default: 3181 default:
3182 error = file_has_perm(cred, file, FILE__IOCTL); 3182 error = file_has_perm(cred, file, FILE__IOCTL);
3183 } 3183 }
3184 return error; 3184 return error;
3185 } 3185 }
3186 3186
3187 static int default_noexec; 3187 static int default_noexec;
3188 3188
3189 static int file_map_prot_check(struct file *file, unsigned long prot, int shared) 3189 static int file_map_prot_check(struct file *file, unsigned long prot, int shared)
3190 { 3190 {
3191 const struct cred *cred = current_cred(); 3191 const struct cred *cred = current_cred();
3192 int rc = 0; 3192 int rc = 0;
3193 3193
3194 if (default_noexec && 3194 if (default_noexec &&
3195 (prot & PROT_EXEC) && (!file || (!shared && (prot & PROT_WRITE)))) { 3195 (prot & PROT_EXEC) && (!file || (!shared && (prot & PROT_WRITE)))) {
3196 /* 3196 /*
3197 * We are making executable an anonymous mapping or a 3197 * We are making executable an anonymous mapping or a
3198 * private file mapping that will also be writable. 3198 * private file mapping that will also be writable.
3199 * This has an additional check. 3199 * This has an additional check.
3200 */ 3200 */
3201 rc = cred_has_perm(cred, cred, PROCESS__EXECMEM); 3201 rc = cred_has_perm(cred, cred, PROCESS__EXECMEM);
3202 if (rc) 3202 if (rc)
3203 goto error; 3203 goto error;
3204 } 3204 }
3205 3205
3206 if (file) { 3206 if (file) {
3207 /* read access is always possible with a mapping */ 3207 /* read access is always possible with a mapping */
3208 u32 av = FILE__READ; 3208 u32 av = FILE__READ;
3209 3209
3210 /* write access only matters if the mapping is shared */ 3210 /* write access only matters if the mapping is shared */
3211 if (shared && (prot & PROT_WRITE)) 3211 if (shared && (prot & PROT_WRITE))
3212 av |= FILE__WRITE; 3212 av |= FILE__WRITE;
3213 3213
3214 if (prot & PROT_EXEC) 3214 if (prot & PROT_EXEC)
3215 av |= FILE__EXECUTE; 3215 av |= FILE__EXECUTE;
3216 3216
3217 return file_has_perm(cred, file, av); 3217 return file_has_perm(cred, file, av);
3218 } 3218 }
3219 3219
3220 error: 3220 error:
3221 return rc; 3221 return rc;
3222 } 3222 }
3223 3223
3224 static int selinux_mmap_addr(unsigned long addr) 3224 static int selinux_mmap_addr(unsigned long addr)
3225 { 3225 {
3226 int rc; 3226 int rc;
3227 3227
3228 /* do DAC check on address space usage */ 3228 /* do DAC check on address space usage */
3229 rc = cap_mmap_addr(addr); 3229 rc = cap_mmap_addr(addr);
3230 if (rc) 3230 if (rc)
3231 return rc; 3231 return rc;
3232 3232
3233 if (addr < CONFIG_LSM_MMAP_MIN_ADDR) { 3233 if (addr < CONFIG_LSM_MMAP_MIN_ADDR) {
3234 u32 sid = current_sid(); 3234 u32 sid = current_sid();
3235 rc = avc_has_perm(sid, sid, SECCLASS_MEMPROTECT, 3235 rc = avc_has_perm(sid, sid, SECCLASS_MEMPROTECT,
3236 MEMPROTECT__MMAP_ZERO, NULL); 3236 MEMPROTECT__MMAP_ZERO, NULL);
3237 } 3237 }
3238 3238
3239 return rc; 3239 return rc;
3240 } 3240 }
3241 3241
3242 static int selinux_mmap_file(struct file *file, unsigned long reqprot, 3242 static int selinux_mmap_file(struct file *file, unsigned long reqprot,
3243 unsigned long prot, unsigned long flags) 3243 unsigned long prot, unsigned long flags)
3244 { 3244 {
3245 if (selinux_checkreqprot) 3245 if (selinux_checkreqprot)
3246 prot = reqprot; 3246 prot = reqprot;
3247 3247
3248 return file_map_prot_check(file, prot, 3248 return file_map_prot_check(file, prot,
3249 (flags & MAP_TYPE) == MAP_SHARED); 3249 (flags & MAP_TYPE) == MAP_SHARED);
3250 } 3250 }
3251 3251
3252 static int selinux_file_mprotect(struct vm_area_struct *vma, 3252 static int selinux_file_mprotect(struct vm_area_struct *vma,
3253 unsigned long reqprot, 3253 unsigned long reqprot,
3254 unsigned long prot) 3254 unsigned long prot)
3255 { 3255 {
3256 const struct cred *cred = current_cred(); 3256 const struct cred *cred = current_cred();
3257 3257
3258 if (selinux_checkreqprot) 3258 if (selinux_checkreqprot)
3259 prot = reqprot; 3259 prot = reqprot;
3260 3260
3261 if (default_noexec && 3261 if (default_noexec &&
3262 (prot & PROT_EXEC) && !(vma->vm_flags & VM_EXEC)) { 3262 (prot & PROT_EXEC) && !(vma->vm_flags & VM_EXEC)) {
3263 int rc = 0; 3263 int rc = 0;
3264 if (vma->vm_start >= vma->vm_mm->start_brk && 3264 if (vma->vm_start >= vma->vm_mm->start_brk &&
3265 vma->vm_end <= vma->vm_mm->brk) { 3265 vma->vm_end <= vma->vm_mm->brk) {
3266 rc = cred_has_perm(cred, cred, PROCESS__EXECHEAP); 3266 rc = cred_has_perm(cred, cred, PROCESS__EXECHEAP);
3267 } else if (!vma->vm_file && 3267 } else if (!vma->vm_file &&
3268 vma->vm_start <= vma->vm_mm->start_stack && 3268 vma->vm_start <= vma->vm_mm->start_stack &&
3269 vma->vm_end >= vma->vm_mm->start_stack) { 3269 vma->vm_end >= vma->vm_mm->start_stack) {
3270 rc = current_has_perm(current, PROCESS__EXECSTACK); 3270 rc = current_has_perm(current, PROCESS__EXECSTACK);
3271 } else if (vma->vm_file && vma->anon_vma) { 3271 } else if (vma->vm_file && vma->anon_vma) {
3272 /* 3272 /*
3273 * We are making executable a file mapping that has 3273 * We are making executable a file mapping that has
3274 * had some COW done. Since pages might have been 3274 * had some COW done. Since pages might have been
3275 * written, check ability to execute the possibly 3275 * written, check ability to execute the possibly
3276 * modified content. This typically should only 3276 * modified content. This typically should only
3277 * occur for text relocations. 3277 * occur for text relocations.
3278 */ 3278 */
3279 rc = file_has_perm(cred, vma->vm_file, FILE__EXECMOD); 3279 rc = file_has_perm(cred, vma->vm_file, FILE__EXECMOD);
3280 } 3280 }
3281 if (rc) 3281 if (rc)
3282 return rc; 3282 return rc;
3283 } 3283 }
3284 3284
3285 return file_map_prot_check(vma->vm_file, prot, vma->vm_flags&VM_SHARED); 3285 return file_map_prot_check(vma->vm_file, prot, vma->vm_flags&VM_SHARED);
3286 } 3286 }
3287 3287
3288 static int selinux_file_lock(struct file *file, unsigned int cmd) 3288 static int selinux_file_lock(struct file *file, unsigned int cmd)
3289 { 3289 {
3290 const struct cred *cred = current_cred(); 3290 const struct cred *cred = current_cred();
3291 3291
3292 return file_has_perm(cred, file, FILE__LOCK); 3292 return file_has_perm(cred, file, FILE__LOCK);
3293 } 3293 }
3294 3294
3295 static int selinux_file_fcntl(struct file *file, unsigned int cmd, 3295 static int selinux_file_fcntl(struct file *file, unsigned int cmd,
3296 unsigned long arg) 3296 unsigned long arg)
3297 { 3297 {
3298 const struct cred *cred = current_cred(); 3298 const struct cred *cred = current_cred();
3299 int err = 0; 3299 int err = 0;
3300 3300
3301 switch (cmd) { 3301 switch (cmd) {
3302 case F_SETFL: 3302 case F_SETFL:
3303 if ((file->f_flags & O_APPEND) && !(arg & O_APPEND)) { 3303 if ((file->f_flags & O_APPEND) && !(arg & O_APPEND)) {
3304 err = file_has_perm(cred, file, FILE__WRITE); 3304 err = file_has_perm(cred, file, FILE__WRITE);
3305 break; 3305 break;
3306 } 3306 }
3307 /* fall through */ 3307 /* fall through */
3308 case F_SETOWN: 3308 case F_SETOWN:
3309 case F_SETSIG: 3309 case F_SETSIG:
3310 case F_GETFL: 3310 case F_GETFL:
3311 case F_GETOWN: 3311 case F_GETOWN:
3312 case F_GETSIG: 3312 case F_GETSIG:
3313 case F_GETOWNER_UIDS: 3313 case F_GETOWNER_UIDS:
3314 /* Just check FD__USE permission */ 3314 /* Just check FD__USE permission */
3315 err = file_has_perm(cred, file, 0); 3315 err = file_has_perm(cred, file, 0);
3316 break; 3316 break;
3317 case F_GETLK: 3317 case F_GETLK:
3318 case F_SETLK: 3318 case F_SETLK:
3319 case F_SETLKW: 3319 case F_SETLKW:
3320 case F_GETLKP: 3320 case F_OFD_GETLK:
3321 case F_SETLKP: 3321 case F_OFD_SETLK:
3322 case F_SETLKPW: 3322 case F_OFD_SETLKW:
3323 #if BITS_PER_LONG == 32 3323 #if BITS_PER_LONG == 32
3324 case F_GETLK64: 3324 case F_GETLK64:
3325 case F_SETLK64: 3325 case F_SETLK64:
3326 case F_SETLKW64: 3326 case F_SETLKW64:
3327 #endif 3327 #endif
3328 err = file_has_perm(cred, file, FILE__LOCK); 3328 err = file_has_perm(cred, file, FILE__LOCK);
3329 break; 3329 break;
3330 } 3330 }
3331 3331
3332 return err; 3332 return err;
3333 } 3333 }
3334 3334
3335 static int selinux_file_set_fowner(struct file *file) 3335 static int selinux_file_set_fowner(struct file *file)
3336 { 3336 {
3337 struct file_security_struct *fsec; 3337 struct file_security_struct *fsec;
3338 3338
3339 fsec = file->f_security; 3339 fsec = file->f_security;
3340 fsec->fown_sid = current_sid(); 3340 fsec->fown_sid = current_sid();
3341 3341
3342 return 0; 3342 return 0;
3343 } 3343 }
3344 3344
3345 static int selinux_file_send_sigiotask(struct task_struct *tsk, 3345 static int selinux_file_send_sigiotask(struct task_struct *tsk,
3346 struct fown_struct *fown, int signum) 3346 struct fown_struct *fown, int signum)
3347 { 3347 {
3348 struct file *file; 3348 struct file *file;
3349 u32 sid = task_sid(tsk); 3349 u32 sid = task_sid(tsk);
3350 u32 perm; 3350 u32 perm;
3351 struct file_security_struct *fsec; 3351 struct file_security_struct *fsec;
3352 3352
3353 /* struct fown_struct is never outside the context of a struct file */ 3353 /* struct fown_struct is never outside the context of a struct file */
3354 file = container_of(fown, struct file, f_owner); 3354 file = container_of(fown, struct file, f_owner);
3355 3355
3356 fsec = file->f_security; 3356 fsec = file->f_security;
3357 3357
3358 if (!signum) 3358 if (!signum)
3359 perm = signal_to_av(SIGIO); /* as per send_sigio_to_task */ 3359 perm = signal_to_av(SIGIO); /* as per send_sigio_to_task */
3360 else 3360 else
3361 perm = signal_to_av(signum); 3361 perm = signal_to_av(signum);
3362 3362
3363 return avc_has_perm(fsec->fown_sid, sid, 3363 return avc_has_perm(fsec->fown_sid, sid,
3364 SECCLASS_PROCESS, perm, NULL); 3364 SECCLASS_PROCESS, perm, NULL);
3365 } 3365 }
3366 3366
3367 static int selinux_file_receive(struct file *file) 3367 static int selinux_file_receive(struct file *file)
3368 { 3368 {
3369 const struct cred *cred = current_cred(); 3369 const struct cred *cred = current_cred();
3370 3370
3371 return file_has_perm(cred, file, file_to_av(file)); 3371 return file_has_perm(cred, file, file_to_av(file));
3372 } 3372 }
3373 3373
3374 static int selinux_file_open(struct file *file, const struct cred *cred) 3374 static int selinux_file_open(struct file *file, const struct cred *cred)
3375 { 3375 {
3376 struct file_security_struct *fsec; 3376 struct file_security_struct *fsec;
3377 struct inode_security_struct *isec; 3377 struct inode_security_struct *isec;
3378 3378
3379 fsec = file->f_security; 3379 fsec = file->f_security;
3380 isec = file_inode(file)->i_security; 3380 isec = file_inode(file)->i_security;
3381 /* 3381 /*
3382 * Save inode label and policy sequence number 3382 * Save inode label and policy sequence number
3383 * at open-time so that selinux_file_permission 3383 * at open-time so that selinux_file_permission
3384 * can determine whether revalidation is necessary. 3384 * can determine whether revalidation is necessary.
3385 * Task label is already saved in the file security 3385 * Task label is already saved in the file security
3386 * struct as its SID. 3386 * struct as its SID.
3387 */ 3387 */
3388 fsec->isid = isec->sid; 3388 fsec->isid = isec->sid;
3389 fsec->pseqno = avc_policy_seqno(); 3389 fsec->pseqno = avc_policy_seqno();
3390 /* 3390 /*
3391 * Since the inode label or policy seqno may have changed 3391 * Since the inode label or policy seqno may have changed
3392 * between the selinux_inode_permission check and the saving 3392 * between the selinux_inode_permission check and the saving
3393 * of state above, recheck that access is still permitted. 3393 * of state above, recheck that access is still permitted.
3394 * Otherwise, access might never be revalidated against the 3394 * Otherwise, access might never be revalidated against the
3395 * new inode label or new policy. 3395 * new inode label or new policy.
3396 * This check is not redundant - do not remove. 3396 * This check is not redundant - do not remove.
3397 */ 3397 */
3398 return file_path_has_perm(cred, file, open_file_to_av(file)); 3398 return file_path_has_perm(cred, file, open_file_to_av(file));
3399 } 3399 }
3400 3400
3401 /* task security operations */ 3401 /* task security operations */
3402 3402
3403 static int selinux_task_create(unsigned long clone_flags) 3403 static int selinux_task_create(unsigned long clone_flags)
3404 { 3404 {
3405 return current_has_perm(current, PROCESS__FORK); 3405 return current_has_perm(current, PROCESS__FORK);
3406 } 3406 }
3407 3407
3408 /* 3408 /*
3409 * allocate the SELinux part of blank credentials 3409 * allocate the SELinux part of blank credentials
3410 */ 3410 */
3411 static int selinux_cred_alloc_blank(struct cred *cred, gfp_t gfp) 3411 static int selinux_cred_alloc_blank(struct cred *cred, gfp_t gfp)
3412 { 3412 {
3413 struct task_security_struct *tsec; 3413 struct task_security_struct *tsec;
3414 3414
3415 tsec = kzalloc(sizeof(struct task_security_struct), gfp); 3415 tsec = kzalloc(sizeof(struct task_security_struct), gfp);
3416 if (!tsec) 3416 if (!tsec)
3417 return -ENOMEM; 3417 return -ENOMEM;
3418 3418
3419 cred->security = tsec; 3419 cred->security = tsec;
3420 return 0; 3420 return 0;
3421 } 3421 }
3422 3422
3423 /* 3423 /*
3424 * detach and free the LSM part of a set of credentials 3424 * detach and free the LSM part of a set of credentials
3425 */ 3425 */
3426 static void selinux_cred_free(struct cred *cred) 3426 static void selinux_cred_free(struct cred *cred)
3427 { 3427 {
3428 struct task_security_struct *tsec = cred->security; 3428 struct task_security_struct *tsec = cred->security;
3429 3429
3430 /* 3430 /*
3431 * cred->security == NULL if security_cred_alloc_blank() or 3431 * cred->security == NULL if security_cred_alloc_blank() or
3432 * security_prepare_creds() returned an error. 3432 * security_prepare_creds() returned an error.
3433 */ 3433 */
3434 BUG_ON(cred->security && (unsigned long) cred->security < PAGE_SIZE); 3434 BUG_ON(cred->security && (unsigned long) cred->security < PAGE_SIZE);
3435 cred->security = (void *) 0x7UL; 3435 cred->security = (void *) 0x7UL;
3436 kfree(tsec); 3436 kfree(tsec);
3437 } 3437 }
3438 3438
3439 /* 3439 /*
3440 * prepare a new set of credentials for modification 3440 * prepare a new set of credentials for modification
3441 */ 3441 */
3442 static int selinux_cred_prepare(struct cred *new, const struct cred *old, 3442 static int selinux_cred_prepare(struct cred *new, const struct cred *old,
3443 gfp_t gfp) 3443 gfp_t gfp)
3444 { 3444 {
3445 const struct task_security_struct *old_tsec; 3445 const struct task_security_struct *old_tsec;
3446 struct task_security_struct *tsec; 3446 struct task_security_struct *tsec;
3447 3447
3448 old_tsec = old->security; 3448 old_tsec = old->security;
3449 3449
3450 tsec = kmemdup(old_tsec, sizeof(struct task_security_struct), gfp); 3450 tsec = kmemdup(old_tsec, sizeof(struct task_security_struct), gfp);
3451 if (!tsec) 3451 if (!tsec)
3452 return -ENOMEM; 3452 return -ENOMEM;
3453 3453
3454 new->security = tsec; 3454 new->security = tsec;
3455 return 0; 3455 return 0;
3456 } 3456 }
3457 3457
3458 /* 3458 /*
3459 * transfer the SELinux data to a blank set of creds 3459 * transfer the SELinux data to a blank set of creds
3460 */ 3460 */
3461 static void selinux_cred_transfer(struct cred *new, const struct cred *old) 3461 static void selinux_cred_transfer(struct cred *new, const struct cred *old)
3462 { 3462 {
3463 const struct task_security_struct *old_tsec = old->security; 3463 const struct task_security_struct *old_tsec = old->security;
3464 struct task_security_struct *tsec = new->security; 3464 struct task_security_struct *tsec = new->security;
3465 3465
3466 *tsec = *old_tsec; 3466 *tsec = *old_tsec;
3467 } 3467 }
3468 3468
3469 /* 3469 /*
3470 * set the security data for a kernel service 3470 * set the security data for a kernel service
3471 * - all the creation contexts are set to unlabelled 3471 * - all the creation contexts are set to unlabelled
3472 */ 3472 */
3473 static int selinux_kernel_act_as(struct cred *new, u32 secid) 3473 static int selinux_kernel_act_as(struct cred *new, u32 secid)
3474 { 3474 {
3475 struct task_security_struct *tsec = new->security; 3475 struct task_security_struct *tsec = new->security;
3476 u32 sid = current_sid(); 3476 u32 sid = current_sid();
3477 int ret; 3477 int ret;
3478 3478
3479 ret = avc_has_perm(sid, secid, 3479 ret = avc_has_perm(sid, secid,
3480 SECCLASS_KERNEL_SERVICE, 3480 SECCLASS_KERNEL_SERVICE,
3481 KERNEL_SERVICE__USE_AS_OVERRIDE, 3481 KERNEL_SERVICE__USE_AS_OVERRIDE,
3482 NULL); 3482 NULL);
3483 if (ret == 0) { 3483 if (ret == 0) {
3484 tsec->sid = secid; 3484 tsec->sid = secid;
3485 tsec->create_sid = 0; 3485 tsec->create_sid = 0;
3486 tsec->keycreate_sid = 0; 3486 tsec->keycreate_sid = 0;
3487 tsec->sockcreate_sid = 0; 3487 tsec->sockcreate_sid = 0;
3488 } 3488 }
3489 return ret; 3489 return ret;
3490 } 3490 }
3491 3491
3492 /* 3492 /*
3493 * set the file creation context in a security record to the same as the 3493 * set the file creation context in a security record to the same as the
3494 * objective context of the specified inode 3494 * objective context of the specified inode
3495 */ 3495 */
3496 static int selinux_kernel_create_files_as(struct cred *new, struct inode *inode) 3496 static int selinux_kernel_create_files_as(struct cred *new, struct inode *inode)
3497 { 3497 {
3498 struct inode_security_struct *isec = inode->i_security; 3498 struct inode_security_struct *isec = inode->i_security;
3499 struct task_security_struct *tsec = new->security; 3499 struct task_security_struct *tsec = new->security;
3500 u32 sid = current_sid(); 3500 u32 sid = current_sid();
3501 int ret; 3501 int ret;
3502 3502
3503 ret = avc_has_perm(sid, isec->sid, 3503 ret = avc_has_perm(sid, isec->sid,
3504 SECCLASS_KERNEL_SERVICE, 3504 SECCLASS_KERNEL_SERVICE,
3505 KERNEL_SERVICE__CREATE_FILES_AS, 3505 KERNEL_SERVICE__CREATE_FILES_AS,
3506 NULL); 3506 NULL);
3507 3507
3508 if (ret == 0) 3508 if (ret == 0)
3509 tsec->create_sid = isec->sid; 3509 tsec->create_sid = isec->sid;
3510 return ret; 3510 return ret;
3511 } 3511 }
3512 3512
3513 static int selinux_kernel_module_request(char *kmod_name) 3513 static int selinux_kernel_module_request(char *kmod_name)
3514 { 3514 {
3515 u32 sid; 3515 u32 sid;
3516 struct common_audit_data ad; 3516 struct common_audit_data ad;
3517 3517
3518 sid = task_sid(current); 3518 sid = task_sid(current);
3519 3519
3520 ad.type = LSM_AUDIT_DATA_KMOD; 3520 ad.type = LSM_AUDIT_DATA_KMOD;
3521 ad.u.kmod_name = kmod_name; 3521 ad.u.kmod_name = kmod_name;
3522 3522
3523 return avc_has_perm(sid, SECINITSID_KERNEL, SECCLASS_SYSTEM, 3523 return avc_has_perm(sid, SECINITSID_KERNEL, SECCLASS_SYSTEM,
3524 SYSTEM__MODULE_REQUEST, &ad); 3524 SYSTEM__MODULE_REQUEST, &ad);
3525 } 3525 }
3526 3526
3527 static int selinux_task_setpgid(struct task_struct *p, pid_t pgid) 3527 static int selinux_task_setpgid(struct task_struct *p, pid_t pgid)
3528 { 3528 {
3529 return current_has_perm(p, PROCESS__SETPGID); 3529 return current_has_perm(p, PROCESS__SETPGID);
3530 } 3530 }
3531 3531
3532 static int selinux_task_getpgid(struct task_struct *p) 3532 static int selinux_task_getpgid(struct task_struct *p)
3533 { 3533 {
3534 return current_has_perm(p, PROCESS__GETPGID); 3534 return current_has_perm(p, PROCESS__GETPGID);
3535 } 3535 }
3536 3536
3537 static int selinux_task_getsid(struct task_struct *p) 3537 static int selinux_task_getsid(struct task_struct *p)
3538 { 3538 {
3539 return current_has_perm(p, PROCESS__GETSESSION); 3539 return current_has_perm(p, PROCESS__GETSESSION);
3540 } 3540 }
3541 3541
3542 static void selinux_task_getsecid(struct task_struct *p, u32 *secid) 3542 static void selinux_task_getsecid(struct task_struct *p, u32 *secid)
3543 { 3543 {
3544 *secid = task_sid(p); 3544 *secid = task_sid(p);
3545 } 3545 }
3546 3546
3547 static int selinux_task_setnice(struct task_struct *p, int nice) 3547 static int selinux_task_setnice(struct task_struct *p, int nice)
3548 { 3548 {
3549 int rc; 3549 int rc;
3550 3550
3551 rc = cap_task_setnice(p, nice); 3551 rc = cap_task_setnice(p, nice);
3552 if (rc) 3552 if (rc)
3553 return rc; 3553 return rc;
3554 3554
3555 return current_has_perm(p, PROCESS__SETSCHED); 3555 return current_has_perm(p, PROCESS__SETSCHED);
3556 } 3556 }
3557 3557
3558 static int selinux_task_setioprio(struct task_struct *p, int ioprio) 3558 static int selinux_task_setioprio(struct task_struct *p, int ioprio)
3559 { 3559 {
3560 int rc; 3560 int rc;
3561 3561
3562 rc = cap_task_setioprio(p, ioprio); 3562 rc = cap_task_setioprio(p, ioprio);
3563 if (rc) 3563 if (rc)
3564 return rc; 3564 return rc;
3565 3565
3566 return current_has_perm(p, PROCESS__SETSCHED); 3566 return current_has_perm(p, PROCESS__SETSCHED);
3567 } 3567 }
3568 3568
3569 static int selinux_task_getioprio(struct task_struct *p) 3569 static int selinux_task_getioprio(struct task_struct *p)
3570 { 3570 {
3571 return current_has_perm(p, PROCESS__GETSCHED); 3571 return current_has_perm(p, PROCESS__GETSCHED);
3572 } 3572 }
3573 3573
3574 static int selinux_task_setrlimit(struct task_struct *p, unsigned int resource, 3574 static int selinux_task_setrlimit(struct task_struct *p, unsigned int resource,
3575 struct rlimit *new_rlim) 3575 struct rlimit *new_rlim)
3576 { 3576 {
3577 struct rlimit *old_rlim = p->signal->rlim + resource; 3577 struct rlimit *old_rlim = p->signal->rlim + resource;
3578 3578
3579 /* Control the ability to change the hard limit (whether 3579 /* Control the ability to change the hard limit (whether
3580 lowering or raising it), so that the hard limit can 3580 lowering or raising it), so that the hard limit can
3581 later be used as a safe reset point for the soft limit 3581 later be used as a safe reset point for the soft limit
3582 upon context transitions. See selinux_bprm_committing_creds. */ 3582 upon context transitions. See selinux_bprm_committing_creds. */
3583 if (old_rlim->rlim_max != new_rlim->rlim_max) 3583 if (old_rlim->rlim_max != new_rlim->rlim_max)
3584 return current_has_perm(p, PROCESS__SETRLIMIT); 3584 return current_has_perm(p, PROCESS__SETRLIMIT);
3585 3585
3586 return 0; 3586 return 0;
3587 } 3587 }
3588 3588
3589 static int selinux_task_setscheduler(struct task_struct *p) 3589 static int selinux_task_setscheduler(struct task_struct *p)
3590 { 3590 {
3591 int rc; 3591 int rc;
3592 3592
3593 rc = cap_task_setscheduler(p); 3593 rc = cap_task_setscheduler(p);
3594 if (rc) 3594 if (rc)
3595 return rc; 3595 return rc;
3596 3596
3597 return current_has_perm(p, PROCESS__SETSCHED); 3597 return current_has_perm(p, PROCESS__SETSCHED);
3598 } 3598 }
3599 3599
3600 static int selinux_task_getscheduler(struct task_struct *p) 3600 static int selinux_task_getscheduler(struct task_struct *p)
3601 { 3601 {
3602 return current_has_perm(p, PROCESS__GETSCHED); 3602 return current_has_perm(p, PROCESS__GETSCHED);
3603 } 3603 }
3604 3604
3605 static int selinux_task_movememory(struct task_struct *p) 3605 static int selinux_task_movememory(struct task_struct *p)
3606 { 3606 {
3607 return current_has_perm(p, PROCESS__SETSCHED); 3607 return current_has_perm(p, PROCESS__SETSCHED);
3608 } 3608 }
3609 3609
3610 static int selinux_task_kill(struct task_struct *p, struct siginfo *info, 3610 static int selinux_task_kill(struct task_struct *p, struct siginfo *info,
3611 int sig, u32 secid) 3611 int sig, u32 secid)
3612 { 3612 {
3613 u32 perm; 3613 u32 perm;
3614 int rc; 3614 int rc;
3615 3615
3616 if (!sig) 3616 if (!sig)
3617 perm = PROCESS__SIGNULL; /* null signal; existence test */ 3617 perm = PROCESS__SIGNULL; /* null signal; existence test */
3618 else 3618 else
3619 perm = signal_to_av(sig); 3619 perm = signal_to_av(sig);
3620 if (secid) 3620 if (secid)
3621 rc = avc_has_perm(secid, task_sid(p), 3621 rc = avc_has_perm(secid, task_sid(p),
3622 SECCLASS_PROCESS, perm, NULL); 3622 SECCLASS_PROCESS, perm, NULL);
3623 else 3623 else
3624 rc = current_has_perm(p, perm); 3624 rc = current_has_perm(p, perm);
3625 return rc; 3625 return rc;
3626 } 3626 }
3627 3627
3628 static int selinux_task_wait(struct task_struct *p) 3628 static int selinux_task_wait(struct task_struct *p)
3629 { 3629 {
3630 return task_has_perm(p, current, PROCESS__SIGCHLD); 3630 return task_has_perm(p, current, PROCESS__SIGCHLD);
3631 } 3631 }
3632 3632
3633 static void selinux_task_to_inode(struct task_struct *p, 3633 static void selinux_task_to_inode(struct task_struct *p,
3634 struct inode *inode) 3634 struct inode *inode)
3635 { 3635 {
3636 struct inode_security_struct *isec = inode->i_security; 3636 struct inode_security_struct *isec = inode->i_security;
3637 u32 sid = task_sid(p); 3637 u32 sid = task_sid(p);
3638 3638
3639 isec->sid = sid; 3639 isec->sid = sid;
3640 isec->initialized = 1; 3640 isec->initialized = 1;
3641 } 3641 }
3642 3642
3643 /* Returns error only if unable to parse addresses */ 3643 /* Returns error only if unable to parse addresses */
3644 static int selinux_parse_skb_ipv4(struct sk_buff *skb, 3644 static int selinux_parse_skb_ipv4(struct sk_buff *skb,
3645 struct common_audit_data *ad, u8 *proto) 3645 struct common_audit_data *ad, u8 *proto)
3646 { 3646 {
3647 int offset, ihlen, ret = -EINVAL; 3647 int offset, ihlen, ret = -EINVAL;
3648 struct iphdr _iph, *ih; 3648 struct iphdr _iph, *ih;
3649 3649
3650 offset = skb_network_offset(skb); 3650 offset = skb_network_offset(skb);
3651 ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph); 3651 ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph);
3652 if (ih == NULL) 3652 if (ih == NULL)
3653 goto out; 3653 goto out;
3654 3654
3655 ihlen = ih->ihl * 4; 3655 ihlen = ih->ihl * 4;
3656 if (ihlen < sizeof(_iph)) 3656 if (ihlen < sizeof(_iph))
3657 goto out; 3657 goto out;
3658 3658
3659 ad->u.net->v4info.saddr = ih->saddr; 3659 ad->u.net->v4info.saddr = ih->saddr;
3660 ad->u.net->v4info.daddr = ih->daddr; 3660 ad->u.net->v4info.daddr = ih->daddr;
3661 ret = 0; 3661 ret = 0;
3662 3662
3663 if (proto) 3663 if (proto)
3664 *proto = ih->protocol; 3664 *proto = ih->protocol;
3665 3665
3666 switch (ih->protocol) { 3666 switch (ih->protocol) {
3667 case IPPROTO_TCP: { 3667 case IPPROTO_TCP: {
3668 struct tcphdr _tcph, *th; 3668 struct tcphdr _tcph, *th;
3669 3669
3670 if (ntohs(ih->frag_off) & IP_OFFSET) 3670 if (ntohs(ih->frag_off) & IP_OFFSET)
3671 break; 3671 break;
3672 3672
3673 offset += ihlen; 3673 offset += ihlen;
3674 th = skb_header_pointer(skb, offset, sizeof(_tcph), &_tcph); 3674 th = skb_header_pointer(skb, offset, sizeof(_tcph), &_tcph);
3675 if (th == NULL) 3675 if (th == NULL)
3676 break; 3676 break;
3677 3677
3678 ad->u.net->sport = th->source; 3678 ad->u.net->sport = th->source;
3679 ad->u.net->dport = th->dest; 3679 ad->u.net->dport = th->dest;
3680 break; 3680 break;
3681 } 3681 }
3682 3682
3683 case IPPROTO_UDP: { 3683 case IPPROTO_UDP: {
3684 struct udphdr _udph, *uh; 3684 struct udphdr _udph, *uh;
3685 3685
3686 if (ntohs(ih->frag_off) & IP_OFFSET) 3686 if (ntohs(ih->frag_off) & IP_OFFSET)
3687 break; 3687 break;
3688 3688
3689 offset += ihlen; 3689 offset += ihlen;
3690 uh = skb_header_pointer(skb, offset, sizeof(_udph), &_udph); 3690 uh = skb_header_pointer(skb, offset, sizeof(_udph), &_udph);
3691 if (uh == NULL) 3691 if (uh == NULL)
3692 break; 3692 break;
3693 3693
3694 ad->u.net->sport = uh->source; 3694 ad->u.net->sport = uh->source;
3695 ad->u.net->dport = uh->dest; 3695 ad->u.net->dport = uh->dest;
3696 break; 3696 break;
3697 } 3697 }
3698 3698
3699 case IPPROTO_DCCP: { 3699 case IPPROTO_DCCP: {
3700 struct dccp_hdr _dccph, *dh; 3700 struct dccp_hdr _dccph, *dh;
3701 3701
3702 if (ntohs(ih->frag_off) & IP_OFFSET) 3702 if (ntohs(ih->frag_off) & IP_OFFSET)
3703 break; 3703 break;
3704 3704
3705 offset += ihlen; 3705 offset += ihlen;
3706 dh = skb_header_pointer(skb, offset, sizeof(_dccph), &_dccph); 3706 dh = skb_header_pointer(skb, offset, sizeof(_dccph), &_dccph);
3707 if (dh == NULL) 3707 if (dh == NULL)
3708 break; 3708 break;
3709 3709
3710 ad->u.net->sport = dh->dccph_sport; 3710 ad->u.net->sport = dh->dccph_sport;
3711 ad->u.net->dport = dh->dccph_dport; 3711 ad->u.net->dport = dh->dccph_dport;
3712 break; 3712 break;
3713 } 3713 }
3714 3714
3715 default: 3715 default:
3716 break; 3716 break;
3717 } 3717 }
3718 out: 3718 out:
3719 return ret; 3719 return ret;
3720 } 3720 }
3721 3721
3722 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 3722 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
3723 3723
3724 /* Returns error only if unable to parse addresses */ 3724 /* Returns error only if unable to parse addresses */
3725 static int selinux_parse_skb_ipv6(struct sk_buff *skb, 3725 static int selinux_parse_skb_ipv6(struct sk_buff *skb,
3726 struct common_audit_data *ad, u8 *proto) 3726 struct common_audit_data *ad, u8 *proto)
3727 { 3727 {
3728 u8 nexthdr; 3728 u8 nexthdr;
3729 int ret = -EINVAL, offset; 3729 int ret = -EINVAL, offset;
3730 struct ipv6hdr _ipv6h, *ip6; 3730 struct ipv6hdr _ipv6h, *ip6;
3731 __be16 frag_off; 3731 __be16 frag_off;
3732 3732
3733 offset = skb_network_offset(skb); 3733 offset = skb_network_offset(skb);
3734 ip6 = skb_header_pointer(skb, offset, sizeof(_ipv6h), &_ipv6h); 3734 ip6 = skb_header_pointer(skb, offset, sizeof(_ipv6h), &_ipv6h);
3735 if (ip6 == NULL) 3735 if (ip6 == NULL)
3736 goto out; 3736 goto out;
3737 3737
3738 ad->u.net->v6info.saddr = ip6->saddr; 3738 ad->u.net->v6info.saddr = ip6->saddr;
3739 ad->u.net->v6info.daddr = ip6->daddr; 3739 ad->u.net->v6info.daddr = ip6->daddr;
3740 ret = 0; 3740 ret = 0;
3741 3741
3742 nexthdr = ip6->nexthdr; 3742 nexthdr = ip6->nexthdr;
3743 offset += sizeof(_ipv6h); 3743 offset += sizeof(_ipv6h);
3744 offset = ipv6_skip_exthdr(skb, offset, &nexthdr, &frag_off); 3744 offset = ipv6_skip_exthdr(skb, offset, &nexthdr, &frag_off);
3745 if (offset < 0) 3745 if (offset < 0)
3746 goto out; 3746 goto out;
3747 3747
3748 if (proto) 3748 if (proto)
3749 *proto = nexthdr; 3749 *proto = nexthdr;
3750 3750
3751 switch (nexthdr) { 3751 switch (nexthdr) {
3752 case IPPROTO_TCP: { 3752 case IPPROTO_TCP: {
3753 struct tcphdr _tcph, *th; 3753 struct tcphdr _tcph, *th;
3754 3754
3755 th = skb_header_pointer(skb, offset, sizeof(_tcph), &_tcph); 3755 th = skb_header_pointer(skb, offset, sizeof(_tcph), &_tcph);
3756 if (th == NULL) 3756 if (th == NULL)
3757 break; 3757 break;
3758 3758
3759 ad->u.net->sport = th->source; 3759 ad->u.net->sport = th->source;
3760 ad->u.net->dport = th->dest; 3760 ad->u.net->dport = th->dest;
3761 break; 3761 break;
3762 } 3762 }
3763 3763
3764 case IPPROTO_UDP: { 3764 case IPPROTO_UDP: {
3765 struct udphdr _udph, *uh; 3765 struct udphdr _udph, *uh;
3766 3766
3767 uh = skb_header_pointer(skb, offset, sizeof(_udph), &_udph); 3767 uh = skb_header_pointer(skb, offset, sizeof(_udph), &_udph);
3768 if (uh == NULL) 3768 if (uh == NULL)
3769 break; 3769 break;
3770 3770
3771 ad->u.net->sport = uh->source; 3771 ad->u.net->sport = uh->source;
3772 ad->u.net->dport = uh->dest; 3772 ad->u.net->dport = uh->dest;
3773 break; 3773 break;
3774 } 3774 }
3775 3775
3776 case IPPROTO_DCCP: { 3776 case IPPROTO_DCCP: {
3777 struct dccp_hdr _dccph, *dh; 3777 struct dccp_hdr _dccph, *dh;
3778 3778
3779 dh = skb_header_pointer(skb, offset, sizeof(_dccph), &_dccph); 3779 dh = skb_header_pointer(skb, offset, sizeof(_dccph), &_dccph);
3780 if (dh == NULL) 3780 if (dh == NULL)
3781 break; 3781 break;
3782 3782
3783 ad->u.net->sport = dh->dccph_sport; 3783 ad->u.net->sport = dh->dccph_sport;
3784 ad->u.net->dport = dh->dccph_dport; 3784 ad->u.net->dport = dh->dccph_dport;
3785 break; 3785 break;
3786 } 3786 }
3787 3787
3788 /* includes fragments */ 3788 /* includes fragments */
3789 default: 3789 default:
3790 break; 3790 break;
3791 } 3791 }
3792 out: 3792 out:
3793 return ret; 3793 return ret;
3794 } 3794 }
3795 3795
3796 #endif /* IPV6 */ 3796 #endif /* IPV6 */
3797 3797
3798 static int selinux_parse_skb(struct sk_buff *skb, struct common_audit_data *ad, 3798 static int selinux_parse_skb(struct sk_buff *skb, struct common_audit_data *ad,
3799 char **_addrp, int src, u8 *proto) 3799 char **_addrp, int src, u8 *proto)
3800 { 3800 {
3801 char *addrp; 3801 char *addrp;
3802 int ret; 3802 int ret;
3803 3803
3804 switch (ad->u.net->family) { 3804 switch (ad->u.net->family) {
3805 case PF_INET: 3805 case PF_INET:
3806 ret = selinux_parse_skb_ipv4(skb, ad, proto); 3806 ret = selinux_parse_skb_ipv4(skb, ad, proto);
3807 if (ret) 3807 if (ret)
3808 goto parse_error; 3808 goto parse_error;
3809 addrp = (char *)(src ? &ad->u.net->v4info.saddr : 3809 addrp = (char *)(src ? &ad->u.net->v4info.saddr :
3810 &ad->u.net->v4info.daddr); 3810 &ad->u.net->v4info.daddr);
3811 goto okay; 3811 goto okay;
3812 3812
3813 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 3813 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
3814 case PF_INET6: 3814 case PF_INET6:
3815 ret = selinux_parse_skb_ipv6(skb, ad, proto); 3815 ret = selinux_parse_skb_ipv6(skb, ad, proto);
3816 if (ret) 3816 if (ret)
3817 goto parse_error; 3817 goto parse_error;
3818 addrp = (char *)(src ? &ad->u.net->v6info.saddr : 3818 addrp = (char *)(src ? &ad->u.net->v6info.saddr :
3819 &ad->u.net->v6info.daddr); 3819 &ad->u.net->v6info.daddr);
3820 goto okay; 3820 goto okay;
3821 #endif /* IPV6 */ 3821 #endif /* IPV6 */
3822 default: 3822 default:
3823 addrp = NULL; 3823 addrp = NULL;
3824 goto okay; 3824 goto okay;
3825 } 3825 }
3826 3826
3827 parse_error: 3827 parse_error:
3828 printk(KERN_WARNING 3828 printk(KERN_WARNING
3829 "SELinux: failure in selinux_parse_skb()," 3829 "SELinux: failure in selinux_parse_skb(),"
3830 " unable to parse packet\n"); 3830 " unable to parse packet\n");
3831 return ret; 3831 return ret;
3832 3832
3833 okay: 3833 okay:
3834 if (_addrp) 3834 if (_addrp)
3835 *_addrp = addrp; 3835 *_addrp = addrp;
3836 return 0; 3836 return 0;
3837 } 3837 }
3838 3838
3839 /** 3839 /**
3840 * selinux_skb_peerlbl_sid - Determine the peer label of a packet 3840 * selinux_skb_peerlbl_sid - Determine the peer label of a packet
3841 * @skb: the packet 3841 * @skb: the packet
3842 * @family: protocol family 3842 * @family: protocol family
3843 * @sid: the packet's peer label SID 3843 * @sid: the packet's peer label SID
3844 * 3844 *
3845 * Description: 3845 * Description:
3846 * Check the various different forms of network peer labeling and determine 3846 * Check the various different forms of network peer labeling and determine
3847 * the peer label/SID for the packet; most of the magic actually occurs in 3847 * the peer label/SID for the packet; most of the magic actually occurs in
3848 * the security server function security_net_peersid_cmp(). The function 3848 * the security server function security_net_peersid_cmp(). The function
3849 * returns zero if the value in @sid is valid (although it may be SECSID_NULL) 3849 * returns zero if the value in @sid is valid (although it may be SECSID_NULL)
3850 * or -EACCES if @sid is invalid due to inconsistencies with the different 3850 * or -EACCES if @sid is invalid due to inconsistencies with the different
3851 * peer labels. 3851 * peer labels.
3852 * 3852 *
3853 */ 3853 */
3854 static int selinux_skb_peerlbl_sid(struct sk_buff *skb, u16 family, u32 *sid) 3854 static int selinux_skb_peerlbl_sid(struct sk_buff *skb, u16 family, u32 *sid)
3855 { 3855 {
3856 int err; 3856 int err;
3857 u32 xfrm_sid; 3857 u32 xfrm_sid;
3858 u32 nlbl_sid; 3858 u32 nlbl_sid;
3859 u32 nlbl_type; 3859 u32 nlbl_type;
3860 3860
3861 err = selinux_xfrm_skb_sid(skb, &xfrm_sid); 3861 err = selinux_xfrm_skb_sid(skb, &xfrm_sid);
3862 if (unlikely(err)) 3862 if (unlikely(err))
3863 return -EACCES; 3863 return -EACCES;
3864 err = selinux_netlbl_skbuff_getsid(skb, family, &nlbl_type, &nlbl_sid); 3864 err = selinux_netlbl_skbuff_getsid(skb, family, &nlbl_type, &nlbl_sid);
3865 if (unlikely(err)) 3865 if (unlikely(err))
3866 return -EACCES; 3866 return -EACCES;
3867 3867
3868 err = security_net_peersid_resolve(nlbl_sid, nlbl_type, xfrm_sid, sid); 3868 err = security_net_peersid_resolve(nlbl_sid, nlbl_type, xfrm_sid, sid);
3869 if (unlikely(err)) { 3869 if (unlikely(err)) {
3870 printk(KERN_WARNING 3870 printk(KERN_WARNING
3871 "SELinux: failure in selinux_skb_peerlbl_sid()," 3871 "SELinux: failure in selinux_skb_peerlbl_sid(),"
3872 " unable to determine packet's peer label\n"); 3872 " unable to determine packet's peer label\n");
3873 return -EACCES; 3873 return -EACCES;
3874 } 3874 }
3875 3875
3876 return 0; 3876 return 0;
3877 } 3877 }
3878 3878
3879 /** 3879 /**
3880 * selinux_conn_sid - Determine the child socket label for a connection 3880 * selinux_conn_sid - Determine the child socket label for a connection
3881 * @sk_sid: the parent socket's SID 3881 * @sk_sid: the parent socket's SID
3882 * @skb_sid: the packet's SID 3882 * @skb_sid: the packet's SID
3883 * @conn_sid: the resulting connection SID 3883 * @conn_sid: the resulting connection SID
3884 * 3884 *
3885 * If @skb_sid is valid then the user:role:type information from @sk_sid is 3885 * If @skb_sid is valid then the user:role:type information from @sk_sid is
3886 * combined with the MLS information from @skb_sid in order to create 3886 * combined with the MLS information from @skb_sid in order to create
3887 * @conn_sid. If @skb_sid is not valid then then @conn_sid is simply a copy 3887 * @conn_sid. If @skb_sid is not valid then then @conn_sid is simply a copy
3888 * of @sk_sid. Returns zero on success, negative values on failure. 3888 * of @sk_sid. Returns zero on success, negative values on failure.
3889 * 3889 *
3890 */ 3890 */
3891 static int selinux_conn_sid(u32 sk_sid, u32 skb_sid, u32 *conn_sid) 3891 static int selinux_conn_sid(u32 sk_sid, u32 skb_sid, u32 *conn_sid)
3892 { 3892 {
3893 int err = 0; 3893 int err = 0;
3894 3894
3895 if (skb_sid != SECSID_NULL) 3895 if (skb_sid != SECSID_NULL)
3896 err = security_sid_mls_copy(sk_sid, skb_sid, conn_sid); 3896 err = security_sid_mls_copy(sk_sid, skb_sid, conn_sid);
3897 else 3897 else
3898 *conn_sid = sk_sid; 3898 *conn_sid = sk_sid;
3899 3899
3900 return err; 3900 return err;
3901 } 3901 }
3902 3902
3903 /* socket security operations */ 3903 /* socket security operations */
3904 3904
3905 static int socket_sockcreate_sid(const struct task_security_struct *tsec, 3905 static int socket_sockcreate_sid(const struct task_security_struct *tsec,
3906 u16 secclass, u32 *socksid) 3906 u16 secclass, u32 *socksid)
3907 { 3907 {
3908 if (tsec->sockcreate_sid > SECSID_NULL) { 3908 if (tsec->sockcreate_sid > SECSID_NULL) {
3909 *socksid = tsec->sockcreate_sid; 3909 *socksid = tsec->sockcreate_sid;
3910 return 0; 3910 return 0;
3911 } 3911 }
3912 3912
3913 return security_transition_sid(tsec->sid, tsec->sid, secclass, NULL, 3913 return security_transition_sid(tsec->sid, tsec->sid, secclass, NULL,
3914 socksid); 3914 socksid);
3915 } 3915 }
3916 3916
3917 static int sock_has_perm(struct task_struct *task, struct sock *sk, u32 perms) 3917 static int sock_has_perm(struct task_struct *task, struct sock *sk, u32 perms)
3918 { 3918 {
3919 struct sk_security_struct *sksec = sk->sk_security; 3919 struct sk_security_struct *sksec = sk->sk_security;
3920 struct common_audit_data ad; 3920 struct common_audit_data ad;
3921 struct lsm_network_audit net = {0,}; 3921 struct lsm_network_audit net = {0,};
3922 u32 tsid = task_sid(task); 3922 u32 tsid = task_sid(task);
3923 3923
3924 if (sksec->sid == SECINITSID_KERNEL) 3924 if (sksec->sid == SECINITSID_KERNEL)
3925 return 0; 3925 return 0;
3926 3926
3927 ad.type = LSM_AUDIT_DATA_NET; 3927 ad.type = LSM_AUDIT_DATA_NET;
3928 ad.u.net = &net; 3928 ad.u.net = &net;
3929 ad.u.net->sk = sk; 3929 ad.u.net->sk = sk;
3930 3930
3931 return avc_has_perm(tsid, sksec->sid, sksec->sclass, perms, &ad); 3931 return avc_has_perm(tsid, sksec->sid, sksec->sclass, perms, &ad);
3932 } 3932 }
3933 3933
3934 static int selinux_socket_create(int family, int type, 3934 static int selinux_socket_create(int family, int type,
3935 int protocol, int kern) 3935 int protocol, int kern)
3936 { 3936 {
3937 const struct task_security_struct *tsec = current_security(); 3937 const struct task_security_struct *tsec = current_security();
3938 u32 newsid; 3938 u32 newsid;
3939 u16 secclass; 3939 u16 secclass;
3940 int rc; 3940 int rc;
3941 3941
3942 if (kern) 3942 if (kern)
3943 return 0; 3943 return 0;
3944 3944
3945 secclass = socket_type_to_security_class(family, type, protocol); 3945 secclass = socket_type_to_security_class(family, type, protocol);
3946 rc = socket_sockcreate_sid(tsec, secclass, &newsid); 3946 rc = socket_sockcreate_sid(tsec, secclass, &newsid);
3947 if (rc) 3947 if (rc)
3948 return rc; 3948 return rc;
3949 3949
3950 return avc_has_perm(tsec->sid, newsid, secclass, SOCKET__CREATE, NULL); 3950 return avc_has_perm(tsec->sid, newsid, secclass, SOCKET__CREATE, NULL);
3951 } 3951 }
3952 3952
3953 static int selinux_socket_post_create(struct socket *sock, int family, 3953 static int selinux_socket_post_create(struct socket *sock, int family,
3954 int type, int protocol, int kern) 3954 int type, int protocol, int kern)
3955 { 3955 {
3956 const struct task_security_struct *tsec = current_security(); 3956 const struct task_security_struct *tsec = current_security();
3957 struct inode_security_struct *isec = SOCK_INODE(sock)->i_security; 3957 struct inode_security_struct *isec = SOCK_INODE(sock)->i_security;
3958 struct sk_security_struct *sksec; 3958 struct sk_security_struct *sksec;
3959 int err = 0; 3959 int err = 0;
3960 3960
3961 isec->sclass = socket_type_to_security_class(family, type, protocol); 3961 isec->sclass = socket_type_to_security_class(family, type, protocol);
3962 3962
3963 if (kern) 3963 if (kern)
3964 isec->sid = SECINITSID_KERNEL; 3964 isec->sid = SECINITSID_KERNEL;
3965 else { 3965 else {
3966 err = socket_sockcreate_sid(tsec, isec->sclass, &(isec->sid)); 3966 err = socket_sockcreate_sid(tsec, isec->sclass, &(isec->sid));
3967 if (err) 3967 if (err)
3968 return err; 3968 return err;
3969 } 3969 }
3970 3970
3971 isec->initialized = 1; 3971 isec->initialized = 1;
3972 3972
3973 if (sock->sk) { 3973 if (sock->sk) {
3974 sksec = sock->sk->sk_security; 3974 sksec = sock->sk->sk_security;
3975 sksec->sid = isec->sid; 3975 sksec->sid = isec->sid;
3976 sksec->sclass = isec->sclass; 3976 sksec->sclass = isec->sclass;
3977 err = selinux_netlbl_socket_post_create(sock->sk, family); 3977 err = selinux_netlbl_socket_post_create(sock->sk, family);
3978 } 3978 }
3979 3979
3980 return err; 3980 return err;
3981 } 3981 }
3982 3982
3983 /* Range of port numbers used to automatically bind. 3983 /* Range of port numbers used to automatically bind.
3984 Need to determine whether we should perform a name_bind 3984 Need to determine whether we should perform a name_bind
3985 permission check between the socket and the port number. */ 3985 permission check between the socket and the port number. */
3986 3986
3987 static int selinux_socket_bind(struct socket *sock, struct sockaddr *address, int addrlen) 3987 static int selinux_socket_bind(struct socket *sock, struct sockaddr *address, int addrlen)
3988 { 3988 {
3989 struct sock *sk = sock->sk; 3989 struct sock *sk = sock->sk;
3990 u16 family; 3990 u16 family;
3991 int err; 3991 int err;
3992 3992
3993 err = sock_has_perm(current, sk, SOCKET__BIND); 3993 err = sock_has_perm(current, sk, SOCKET__BIND);
3994 if (err) 3994 if (err)
3995 goto out; 3995 goto out;
3996 3996
3997 /* 3997 /*
3998 * If PF_INET or PF_INET6, check name_bind permission for the port. 3998 * If PF_INET or PF_INET6, check name_bind permission for the port.
3999 * Multiple address binding for SCTP is not supported yet: we just 3999 * Multiple address binding for SCTP is not supported yet: we just
4000 * check the first address now. 4000 * check the first address now.
4001 */ 4001 */
4002 family = sk->sk_family; 4002 family = sk->sk_family;
4003 if (family == PF_INET || family == PF_INET6) { 4003 if (family == PF_INET || family == PF_INET6) {
4004 char *addrp; 4004 char *addrp;
4005 struct sk_security_struct *sksec = sk->sk_security; 4005 struct sk_security_struct *sksec = sk->sk_security;
4006 struct common_audit_data ad; 4006 struct common_audit_data ad;
4007 struct lsm_network_audit net = {0,}; 4007 struct lsm_network_audit net = {0,};
4008 struct sockaddr_in *addr4 = NULL; 4008 struct sockaddr_in *addr4 = NULL;
4009 struct sockaddr_in6 *addr6 = NULL; 4009 struct sockaddr_in6 *addr6 = NULL;
4010 unsigned short snum; 4010 unsigned short snum;
4011 u32 sid, node_perm; 4011 u32 sid, node_perm;
4012 4012
4013 if (family == PF_INET) { 4013 if (family == PF_INET) {
4014 addr4 = (struct sockaddr_in *)address; 4014 addr4 = (struct sockaddr_in *)address;
4015 snum = ntohs(addr4->sin_port); 4015 snum = ntohs(addr4->sin_port);
4016 addrp = (char *)&addr4->sin_addr.s_addr; 4016 addrp = (char *)&addr4->sin_addr.s_addr;
4017 } else { 4017 } else {
4018 addr6 = (struct sockaddr_in6 *)address; 4018 addr6 = (struct sockaddr_in6 *)address;
4019 snum = ntohs(addr6->sin6_port); 4019 snum = ntohs(addr6->sin6_port);
4020 addrp = (char *)&addr6->sin6_addr.s6_addr; 4020 addrp = (char *)&addr6->sin6_addr.s6_addr;
4021 } 4021 }
4022 4022
4023 if (snum) { 4023 if (snum) {
4024 int low, high; 4024 int low, high;
4025 4025
4026 inet_get_local_port_range(sock_net(sk), &low, &high); 4026 inet_get_local_port_range(sock_net(sk), &low, &high);
4027 4027
4028 if (snum < max(PROT_SOCK, low) || snum > high) { 4028 if (snum < max(PROT_SOCK, low) || snum > high) {
4029 err = sel_netport_sid(sk->sk_protocol, 4029 err = sel_netport_sid(sk->sk_protocol,
4030 snum, &sid); 4030 snum, &sid);
4031 if (err) 4031 if (err)
4032 goto out; 4032 goto out;
4033 ad.type = LSM_AUDIT_DATA_NET; 4033 ad.type = LSM_AUDIT_DATA_NET;
4034 ad.u.net = &net; 4034 ad.u.net = &net;
4035 ad.u.net->sport = htons(snum); 4035 ad.u.net->sport = htons(snum);
4036 ad.u.net->family = family; 4036 ad.u.net->family = family;
4037 err = avc_has_perm(sksec->sid, sid, 4037 err = avc_has_perm(sksec->sid, sid,
4038 sksec->sclass, 4038 sksec->sclass,
4039 SOCKET__NAME_BIND, &ad); 4039 SOCKET__NAME_BIND, &ad);
4040 if (err) 4040 if (err)
4041 goto out; 4041 goto out;
4042 } 4042 }
4043 } 4043 }
4044 4044
4045 switch (sksec->sclass) { 4045 switch (sksec->sclass) {
4046 case SECCLASS_TCP_SOCKET: 4046 case SECCLASS_TCP_SOCKET:
4047 node_perm = TCP_SOCKET__NODE_BIND; 4047 node_perm = TCP_SOCKET__NODE_BIND;
4048 break; 4048 break;
4049 4049
4050 case SECCLASS_UDP_SOCKET: 4050 case SECCLASS_UDP_SOCKET:
4051 node_perm = UDP_SOCKET__NODE_BIND; 4051 node_perm = UDP_SOCKET__NODE_BIND;
4052 break; 4052 break;
4053 4053
4054 case SECCLASS_DCCP_SOCKET: 4054 case SECCLASS_DCCP_SOCKET:
4055 node_perm = DCCP_SOCKET__NODE_BIND; 4055 node_perm = DCCP_SOCKET__NODE_BIND;
4056 break; 4056 break;
4057 4057
4058 default: 4058 default:
4059 node_perm = RAWIP_SOCKET__NODE_BIND; 4059 node_perm = RAWIP_SOCKET__NODE_BIND;
4060 break; 4060 break;
4061 } 4061 }
4062 4062
4063 err = sel_netnode_sid(addrp, family, &sid); 4063 err = sel_netnode_sid(addrp, family, &sid);
4064 if (err) 4064 if (err)
4065 goto out; 4065 goto out;
4066 4066
4067 ad.type = LSM_AUDIT_DATA_NET; 4067 ad.type = LSM_AUDIT_DATA_NET;
4068 ad.u.net = &net; 4068 ad.u.net = &net;
4069 ad.u.net->sport = htons(snum); 4069 ad.u.net->sport = htons(snum);
4070 ad.u.net->family = family; 4070 ad.u.net->family = family;
4071 4071
4072 if (family == PF_INET) 4072 if (family == PF_INET)
4073 ad.u.net->v4info.saddr = addr4->sin_addr.s_addr; 4073 ad.u.net->v4info.saddr = addr4->sin_addr.s_addr;
4074 else 4074 else
4075 ad.u.net->v6info.saddr = addr6->sin6_addr; 4075 ad.u.net->v6info.saddr = addr6->sin6_addr;
4076 4076
4077 err = avc_has_perm(sksec->sid, sid, 4077 err = avc_has_perm(sksec->sid, sid,
4078 sksec->sclass, node_perm, &ad); 4078 sksec->sclass, node_perm, &ad);
4079 if (err) 4079 if (err)
4080 goto out; 4080 goto out;
4081 } 4081 }
4082 out: 4082 out:
4083 return err; 4083 return err;
4084 } 4084 }
4085 4085
4086 static int selinux_socket_connect(struct socket *sock, struct sockaddr *address, int addrlen) 4086 static int selinux_socket_connect(struct socket *sock, struct sockaddr *address, int addrlen)
4087 { 4087 {
4088 struct sock *sk = sock->sk; 4088 struct sock *sk = sock->sk;
4089 struct sk_security_struct *sksec = sk->sk_security; 4089 struct sk_security_struct *sksec = sk->sk_security;
4090 int err; 4090 int err;
4091 4091
4092 err = sock_has_perm(current, sk, SOCKET__CONNECT); 4092 err = sock_has_perm(current, sk, SOCKET__CONNECT);
4093 if (err) 4093 if (err)
4094 return err; 4094 return err;
4095 4095
4096 /* 4096 /*
4097 * If a TCP or DCCP socket, check name_connect permission for the port. 4097 * If a TCP or DCCP socket, check name_connect permission for the port.
4098 */ 4098 */
4099 if (sksec->sclass == SECCLASS_TCP_SOCKET || 4099 if (sksec->sclass == SECCLASS_TCP_SOCKET ||
4100 sksec->sclass == SECCLASS_DCCP_SOCKET) { 4100 sksec->sclass == SECCLASS_DCCP_SOCKET) {
4101 struct common_audit_data ad; 4101 struct common_audit_data ad;
4102 struct lsm_network_audit net = {0,}; 4102 struct lsm_network_audit net = {0,};
4103 struct sockaddr_in *addr4 = NULL; 4103 struct sockaddr_in *addr4 = NULL;
4104 struct sockaddr_in6 *addr6 = NULL; 4104 struct sockaddr_in6 *addr6 = NULL;
4105 unsigned short snum; 4105 unsigned short snum;
4106 u32 sid, perm; 4106 u32 sid, perm;
4107 4107
4108 if (sk->sk_family == PF_INET) { 4108 if (sk->sk_family == PF_INET) {
4109 addr4 = (struct sockaddr_in *)address; 4109 addr4 = (struct sockaddr_in *)address;
4110 if (addrlen < sizeof(struct sockaddr_in)) 4110 if (addrlen < sizeof(struct sockaddr_in))
4111 return -EINVAL; 4111 return -EINVAL;
4112 snum = ntohs(addr4->sin_port); 4112 snum = ntohs(addr4->sin_port);
4113 } else { 4113 } else {
4114 addr6 = (struct sockaddr_in6 *)address; 4114 addr6 = (struct sockaddr_in6 *)address;
4115 if (addrlen < SIN6_LEN_RFC2133) 4115 if (addrlen < SIN6_LEN_RFC2133)
4116 return -EINVAL; 4116 return -EINVAL;
4117 snum = ntohs(addr6->sin6_port); 4117 snum = ntohs(addr6->sin6_port);
4118 } 4118 }
4119 4119
4120 err = sel_netport_sid(sk->sk_protocol, snum, &sid); 4120 err = sel_netport_sid(sk->sk_protocol, snum, &sid);
4121 if (err) 4121 if (err)
4122 goto out; 4122 goto out;
4123 4123
4124 perm = (sksec->sclass == SECCLASS_TCP_SOCKET) ? 4124 perm = (sksec->sclass == SECCLASS_TCP_SOCKET) ?
4125 TCP_SOCKET__NAME_CONNECT : DCCP_SOCKET__NAME_CONNECT; 4125 TCP_SOCKET__NAME_CONNECT : DCCP_SOCKET__NAME_CONNECT;
4126 4126
4127 ad.type = LSM_AUDIT_DATA_NET; 4127 ad.type = LSM_AUDIT_DATA_NET;
4128 ad.u.net = &net; 4128 ad.u.net = &net;
4129 ad.u.net->dport = htons(snum); 4129 ad.u.net->dport = htons(snum);
4130 ad.u.net->family = sk->sk_family; 4130 ad.u.net->family = sk->sk_family;
4131 err = avc_has_perm(sksec->sid, sid, sksec->sclass, perm, &ad); 4131 err = avc_has_perm(sksec->sid, sid, sksec->sclass, perm, &ad);
4132 if (err) 4132 if (err)
4133 goto out; 4133 goto out;
4134 } 4134 }
4135 4135
4136 err = selinux_netlbl_socket_connect(sk, address); 4136 err = selinux_netlbl_socket_connect(sk, address);
4137 4137
4138 out: 4138 out:
4139 return err; 4139 return err;
4140 } 4140 }
4141 4141
4142 static int selinux_socket_listen(struct socket *sock, int backlog) 4142 static int selinux_socket_listen(struct socket *sock, int backlog)
4143 { 4143 {
4144 return sock_has_perm(current, sock->sk, SOCKET__LISTEN); 4144 return sock_has_perm(current, sock->sk, SOCKET__LISTEN);
4145 } 4145 }
4146 4146
4147 static int selinux_socket_accept(struct socket *sock, struct socket *newsock) 4147 static int selinux_socket_accept(struct socket *sock, struct socket *newsock)
4148 { 4148 {
4149 int err; 4149 int err;
4150 struct inode_security_struct *isec; 4150 struct inode_security_struct *isec;
4151 struct inode_security_struct *newisec; 4151 struct inode_security_struct *newisec;
4152 4152
4153 err = sock_has_perm(current, sock->sk, SOCKET__ACCEPT); 4153 err = sock_has_perm(current, sock->sk, SOCKET__ACCEPT);
4154 if (err) 4154 if (err)
4155 return err; 4155 return err;
4156 4156
4157 newisec = SOCK_INODE(newsock)->i_security; 4157 newisec = SOCK_INODE(newsock)->i_security;
4158 4158
4159 isec = SOCK_INODE(sock)->i_security; 4159 isec = SOCK_INODE(sock)->i_security;
4160 newisec->sclass = isec->sclass; 4160 newisec->sclass = isec->sclass;
4161 newisec->sid = isec->sid; 4161 newisec->sid = isec->sid;
4162 newisec->initialized = 1; 4162 newisec->initialized = 1;
4163 4163
4164 return 0; 4164 return 0;
4165 } 4165 }
4166 4166
4167 static int selinux_socket_sendmsg(struct socket *sock, struct msghdr *msg, 4167 static int selinux_socket_sendmsg(struct socket *sock, struct msghdr *msg,
4168 int size) 4168 int size)
4169 { 4169 {
4170 return sock_has_perm(current, sock->sk, SOCKET__WRITE); 4170 return sock_has_perm(current, sock->sk, SOCKET__WRITE);
4171 } 4171 }
4172 4172
4173 static int selinux_socket_recvmsg(struct socket *sock, struct msghdr *msg, 4173 static int selinux_socket_recvmsg(struct socket *sock, struct msghdr *msg,
4174 int size, int flags) 4174 int size, int flags)
4175 { 4175 {
4176 return sock_has_perm(current, sock->sk, SOCKET__READ); 4176 return sock_has_perm(current, sock->sk, SOCKET__READ);
4177 } 4177 }
4178 4178
4179 static int selinux_socket_getsockname(struct socket *sock) 4179 static int selinux_socket_getsockname(struct socket *sock)
4180 { 4180 {
4181 return sock_has_perm(current, sock->sk, SOCKET__GETATTR); 4181 return sock_has_perm(current, sock->sk, SOCKET__GETATTR);
4182 } 4182 }
4183 4183
4184 static int selinux_socket_getpeername(struct socket *sock) 4184 static int selinux_socket_getpeername(struct socket *sock)
4185 { 4185 {
4186 return sock_has_perm(current, sock->sk, SOCKET__GETATTR); 4186 return sock_has_perm(current, sock->sk, SOCKET__GETATTR);
4187 } 4187 }
4188 4188
4189 static int selinux_socket_setsockopt(struct socket *sock, int level, int optname) 4189 static int selinux_socket_setsockopt(struct socket *sock, int level, int optname)
4190 { 4190 {
4191 int err; 4191 int err;
4192 4192
4193 err = sock_has_perm(current, sock->sk, SOCKET__SETOPT); 4193 err = sock_has_perm(current, sock->sk, SOCKET__SETOPT);
4194 if (err) 4194 if (err)
4195 return err; 4195 return err;
4196 4196
4197 return selinux_netlbl_socket_setsockopt(sock, level, optname); 4197 return selinux_netlbl_socket_setsockopt(sock, level, optname);
4198 } 4198 }
4199 4199
4200 static int selinux_socket_getsockopt(struct socket *sock, int level, 4200 static int selinux_socket_getsockopt(struct socket *sock, int level,
4201 int optname) 4201 int optname)
4202 { 4202 {
4203 return sock_has_perm(current, sock->sk, SOCKET__GETOPT); 4203 return sock_has_perm(current, sock->sk, SOCKET__GETOPT);
4204 } 4204 }
4205 4205
4206 static int selinux_socket_shutdown(struct socket *sock, int how) 4206 static int selinux_socket_shutdown(struct socket *sock, int how)
4207 { 4207 {
4208 return sock_has_perm(current, sock->sk, SOCKET__SHUTDOWN); 4208 return sock_has_perm(current, sock->sk, SOCKET__SHUTDOWN);
4209 } 4209 }
4210 4210
4211 static int selinux_socket_unix_stream_connect(struct sock *sock, 4211 static int selinux_socket_unix_stream_connect(struct sock *sock,
4212 struct sock *other, 4212 struct sock *other,
4213 struct sock *newsk) 4213 struct sock *newsk)
4214 { 4214 {
4215 struct sk_security_struct *sksec_sock = sock->sk_security; 4215 struct sk_security_struct *sksec_sock = sock->sk_security;
4216 struct sk_security_struct *sksec_other = other->sk_security; 4216 struct sk_security_struct *sksec_other = other->sk_security;
4217 struct sk_security_struct *sksec_new = newsk->sk_security; 4217 struct sk_security_struct *sksec_new = newsk->sk_security;
4218 struct common_audit_data ad; 4218 struct common_audit_data ad;
4219 struct lsm_network_audit net = {0,}; 4219 struct lsm_network_audit net = {0,};
4220 int err; 4220 int err;
4221 4221
4222 ad.type = LSM_AUDIT_DATA_NET; 4222 ad.type = LSM_AUDIT_DATA_NET;
4223 ad.u.net = &net; 4223 ad.u.net = &net;
4224 ad.u.net->sk = other; 4224 ad.u.net->sk = other;
4225 4225
4226 err = avc_has_perm(sksec_sock->sid, sksec_other->sid, 4226 err = avc_has_perm(sksec_sock->sid, sksec_other->sid,
4227 sksec_other->sclass, 4227 sksec_other->sclass,
4228 UNIX_STREAM_SOCKET__CONNECTTO, &ad); 4228 UNIX_STREAM_SOCKET__CONNECTTO, &ad);
4229 if (err) 4229 if (err)
4230 return err; 4230 return err;
4231 4231
4232 /* server child socket */ 4232 /* server child socket */
4233 sksec_new->peer_sid = sksec_sock->sid; 4233 sksec_new->peer_sid = sksec_sock->sid;
4234 err = security_sid_mls_copy(sksec_other->sid, sksec_sock->sid, 4234 err = security_sid_mls_copy(sksec_other->sid, sksec_sock->sid,
4235 &sksec_new->sid); 4235 &sksec_new->sid);
4236 if (err) 4236 if (err)
4237 return err; 4237 return err;
4238 4238
4239 /* connecting socket */ 4239 /* connecting socket */
4240 sksec_sock->peer_sid = sksec_new->sid; 4240 sksec_sock->peer_sid = sksec_new->sid;
4241 4241
4242 return 0; 4242 return 0;
4243 } 4243 }
4244 4244
4245 static int selinux_socket_unix_may_send(struct socket *sock, 4245 static int selinux_socket_unix_may_send(struct socket *sock,
4246 struct socket *other) 4246 struct socket *other)
4247 { 4247 {
4248 struct sk_security_struct *ssec = sock->sk->sk_security; 4248 struct sk_security_struct *ssec = sock->sk->sk_security;
4249 struct sk_security_struct *osec = other->sk->sk_security; 4249 struct sk_security_struct *osec = other->sk->sk_security;
4250 struct common_audit_data ad; 4250 struct common_audit_data ad;
4251 struct lsm_network_audit net = {0,}; 4251 struct lsm_network_audit net = {0,};
4252 4252
4253 ad.type = LSM_AUDIT_DATA_NET; 4253 ad.type = LSM_AUDIT_DATA_NET;
4254 ad.u.net = &net; 4254 ad.u.net = &net;
4255 ad.u.net->sk = other->sk; 4255 ad.u.net->sk = other->sk;
4256 4256
4257 return avc_has_perm(ssec->sid, osec->sid, osec->sclass, SOCKET__SENDTO, 4257 return avc_has_perm(ssec->sid, osec->sid, osec->sclass, SOCKET__SENDTO,
4258 &ad); 4258 &ad);
4259 } 4259 }
4260 4260
4261 static int selinux_inet_sys_rcv_skb(int ifindex, char *addrp, u16 family, 4261 static int selinux_inet_sys_rcv_skb(int ifindex, char *addrp, u16 family,
4262 u32 peer_sid, 4262 u32 peer_sid,
4263 struct common_audit_data *ad) 4263 struct common_audit_data *ad)
4264 { 4264 {
4265 int err; 4265 int err;
4266 u32 if_sid; 4266 u32 if_sid;
4267 u32 node_sid; 4267 u32 node_sid;
4268 4268
4269 err = sel_netif_sid(ifindex, &if_sid); 4269 err = sel_netif_sid(ifindex, &if_sid);
4270 if (err) 4270 if (err)
4271 return err; 4271 return err;
4272 err = avc_has_perm(peer_sid, if_sid, 4272 err = avc_has_perm(peer_sid, if_sid,
4273 SECCLASS_NETIF, NETIF__INGRESS, ad); 4273 SECCLASS_NETIF, NETIF__INGRESS, ad);
4274 if (err) 4274 if (err)
4275 return err; 4275 return err;
4276 4276
4277 err = sel_netnode_sid(addrp, family, &node_sid); 4277 err = sel_netnode_sid(addrp, family, &node_sid);
4278 if (err) 4278 if (err)
4279 return err; 4279 return err;
4280 return avc_has_perm(peer_sid, node_sid, 4280 return avc_has_perm(peer_sid, node_sid,
4281 SECCLASS_NODE, NODE__RECVFROM, ad); 4281 SECCLASS_NODE, NODE__RECVFROM, ad);
4282 } 4282 }
4283 4283
4284 static int selinux_sock_rcv_skb_compat(struct sock *sk, struct sk_buff *skb, 4284 static int selinux_sock_rcv_skb_compat(struct sock *sk, struct sk_buff *skb,
4285 u16 family) 4285 u16 family)
4286 { 4286 {
4287 int err = 0; 4287 int err = 0;
4288 struct sk_security_struct *sksec = sk->sk_security; 4288 struct sk_security_struct *sksec = sk->sk_security;
4289 u32 sk_sid = sksec->sid; 4289 u32 sk_sid = sksec->sid;
4290 struct common_audit_data ad; 4290 struct common_audit_data ad;
4291 struct lsm_network_audit net = {0,}; 4291 struct lsm_network_audit net = {0,};
4292 char *addrp; 4292 char *addrp;
4293 4293
4294 ad.type = LSM_AUDIT_DATA_NET; 4294 ad.type = LSM_AUDIT_DATA_NET;
4295 ad.u.net = &net; 4295 ad.u.net = &net;
4296 ad.u.net->netif = skb->skb_iif; 4296 ad.u.net->netif = skb->skb_iif;
4297 ad.u.net->family = family; 4297 ad.u.net->family = family;
4298 err = selinux_parse_skb(skb, &ad, &addrp, 1, NULL); 4298 err = selinux_parse_skb(skb, &ad, &addrp, 1, NULL);
4299 if (err) 4299 if (err)
4300 return err; 4300 return err;
4301 4301
4302 if (selinux_secmark_enabled()) { 4302 if (selinux_secmark_enabled()) {
4303 err = avc_has_perm(sk_sid, skb->secmark, SECCLASS_PACKET, 4303 err = avc_has_perm(sk_sid, skb->secmark, SECCLASS_PACKET,
4304 PACKET__RECV, &ad); 4304 PACKET__RECV, &ad);
4305 if (err) 4305 if (err)
4306 return err; 4306 return err;
4307 } 4307 }
4308 4308
4309 err = selinux_netlbl_sock_rcv_skb(sksec, skb, family, &ad); 4309 err = selinux_netlbl_sock_rcv_skb(sksec, skb, family, &ad);
4310 if (err) 4310 if (err)
4311 return err; 4311 return err;
4312 err = selinux_xfrm_sock_rcv_skb(sksec->sid, skb, &ad); 4312 err = selinux_xfrm_sock_rcv_skb(sksec->sid, skb, &ad);
4313 4313
4314 return err; 4314 return err;
4315 } 4315 }
4316 4316
4317 static int selinux_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb) 4317 static int selinux_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb)
4318 { 4318 {
4319 int err; 4319 int err;
4320 struct sk_security_struct *sksec = sk->sk_security; 4320 struct sk_security_struct *sksec = sk->sk_security;
4321 u16 family = sk->sk_family; 4321 u16 family = sk->sk_family;
4322 u32 sk_sid = sksec->sid; 4322 u32 sk_sid = sksec->sid;
4323 struct common_audit_data ad; 4323 struct common_audit_data ad;
4324 struct lsm_network_audit net = {0,}; 4324 struct lsm_network_audit net = {0,};
4325 char *addrp; 4325 char *addrp;
4326 u8 secmark_active; 4326 u8 secmark_active;
4327 u8 peerlbl_active; 4327 u8 peerlbl_active;
4328 4328
4329 if (family != PF_INET && family != PF_INET6) 4329 if (family != PF_INET && family != PF_INET6)
4330 return 0; 4330 return 0;
4331 4331
4332 /* Handle mapped IPv4 packets arriving via IPv6 sockets */ 4332 /* Handle mapped IPv4 packets arriving via IPv6 sockets */
4333 if (family == PF_INET6 && skb->protocol == htons(ETH_P_IP)) 4333 if (family == PF_INET6 && skb->protocol == htons(ETH_P_IP))
4334 family = PF_INET; 4334 family = PF_INET;
4335 4335
4336 /* If any sort of compatibility mode is enabled then handoff processing 4336 /* If any sort of compatibility mode is enabled then handoff processing
4337 * to the selinux_sock_rcv_skb_compat() function to deal with the 4337 * to the selinux_sock_rcv_skb_compat() function to deal with the
4338 * special handling. We do this in an attempt to keep this function 4338 * special handling. We do this in an attempt to keep this function
4339 * as fast and as clean as possible. */ 4339 * as fast and as clean as possible. */
4340 if (!selinux_policycap_netpeer) 4340 if (!selinux_policycap_netpeer)
4341 return selinux_sock_rcv_skb_compat(sk, skb, family); 4341 return selinux_sock_rcv_skb_compat(sk, skb, family);
4342 4342
4343 secmark_active = selinux_secmark_enabled(); 4343 secmark_active = selinux_secmark_enabled();
4344 peerlbl_active = selinux_peerlbl_enabled(); 4344 peerlbl_active = selinux_peerlbl_enabled();
4345 if (!secmark_active && !peerlbl_active) 4345 if (!secmark_active && !peerlbl_active)
4346 return 0; 4346 return 0;
4347 4347
4348 ad.type = LSM_AUDIT_DATA_NET; 4348 ad.type = LSM_AUDIT_DATA_NET;
4349 ad.u.net = &net; 4349 ad.u.net = &net;
4350 ad.u.net->netif = skb->skb_iif; 4350 ad.u.net->netif = skb->skb_iif;
4351 ad.u.net->family = family; 4351 ad.u.net->family = family;
4352 err = selinux_parse_skb(skb, &ad, &addrp, 1, NULL); 4352 err = selinux_parse_skb(skb, &ad, &addrp, 1, NULL);
4353 if (err) 4353 if (err)
4354 return err; 4354 return err;
4355 4355
4356 if (peerlbl_active) { 4356 if (peerlbl_active) {
4357 u32 peer_sid; 4357 u32 peer_sid;
4358 4358
4359 err = selinux_skb_peerlbl_sid(skb, family, &peer_sid); 4359 err = selinux_skb_peerlbl_sid(skb, family, &peer_sid);
4360 if (err) 4360 if (err)
4361 return err; 4361 return err;
4362 err = selinux_inet_sys_rcv_skb(skb->skb_iif, addrp, family, 4362 err = selinux_inet_sys_rcv_skb(skb->skb_iif, addrp, family,
4363 peer_sid, &ad); 4363 peer_sid, &ad);
4364 if (err) { 4364 if (err) {
4365 selinux_netlbl_err(skb, err, 0); 4365 selinux_netlbl_err(skb, err, 0);
4366 return err; 4366 return err;
4367 } 4367 }
4368 err = avc_has_perm(sk_sid, peer_sid, SECCLASS_PEER, 4368 err = avc_has_perm(sk_sid, peer_sid, SECCLASS_PEER,
4369 PEER__RECV, &ad); 4369 PEER__RECV, &ad);
4370 if (err) { 4370 if (err) {
4371 selinux_netlbl_err(skb, err, 0); 4371 selinux_netlbl_err(skb, err, 0);
4372 return err; 4372 return err;
4373 } 4373 }
4374 } 4374 }
4375 4375
4376 if (secmark_active) { 4376 if (secmark_active) {
4377 err = avc_has_perm(sk_sid, skb->secmark, SECCLASS_PACKET, 4377 err = avc_has_perm(sk_sid, skb->secmark, SECCLASS_PACKET,
4378 PACKET__RECV, &ad); 4378 PACKET__RECV, &ad);
4379 if (err) 4379 if (err)
4380 return err; 4380 return err;
4381 } 4381 }
4382 4382
4383 return err; 4383 return err;
4384 } 4384 }
4385 4385
4386 static int selinux_socket_getpeersec_stream(struct socket *sock, char __user *optval, 4386 static int selinux_socket_getpeersec_stream(struct socket *sock, char __user *optval,
4387 int __user *optlen, unsigned len) 4387 int __user *optlen, unsigned len)
4388 { 4388 {
4389 int err = 0; 4389 int err = 0;
4390 char *scontext; 4390 char *scontext;
4391 u32 scontext_len; 4391 u32 scontext_len;
4392 struct sk_security_struct *sksec = sock->sk->sk_security; 4392 struct sk_security_struct *sksec = sock->sk->sk_security;
4393 u32 peer_sid = SECSID_NULL; 4393 u32 peer_sid = SECSID_NULL;
4394 4394
4395 if (sksec->sclass == SECCLASS_UNIX_STREAM_SOCKET || 4395 if (sksec->sclass == SECCLASS_UNIX_STREAM_SOCKET ||
4396 sksec->sclass == SECCLASS_TCP_SOCKET) 4396 sksec->sclass == SECCLASS_TCP_SOCKET)
4397 peer_sid = sksec->peer_sid; 4397 peer_sid = sksec->peer_sid;
4398 if (peer_sid == SECSID_NULL) 4398 if (peer_sid == SECSID_NULL)
4399 return -ENOPROTOOPT; 4399 return -ENOPROTOOPT;
4400 4400
4401 err = security_sid_to_context(peer_sid, &scontext, &scontext_len); 4401 err = security_sid_to_context(peer_sid, &scontext, &scontext_len);
4402 if (err) 4402 if (err)
4403 return err; 4403 return err;
4404 4404
4405 if (scontext_len > len) { 4405 if (scontext_len > len) {
4406 err = -ERANGE; 4406 err = -ERANGE;
4407 goto out_len; 4407 goto out_len;
4408 } 4408 }
4409 4409
4410 if (copy_to_user(optval, scontext, scontext_len)) 4410 if (copy_to_user(optval, scontext, scontext_len))
4411 err = -EFAULT; 4411 err = -EFAULT;
4412 4412
4413 out_len: 4413 out_len:
4414 if (put_user(scontext_len, optlen)) 4414 if (put_user(scontext_len, optlen))
4415 err = -EFAULT; 4415 err = -EFAULT;
4416 kfree(scontext); 4416 kfree(scontext);
4417 return err; 4417 return err;
4418 } 4418 }
4419 4419
4420 static int selinux_socket_getpeersec_dgram(struct socket *sock, struct sk_buff *skb, u32 *secid) 4420 static int selinux_socket_getpeersec_dgram(struct socket *sock, struct sk_buff *skb, u32 *secid)
4421 { 4421 {
4422 u32 peer_secid = SECSID_NULL; 4422 u32 peer_secid = SECSID_NULL;
4423 u16 family; 4423 u16 family;
4424 4424
4425 if (skb && skb->protocol == htons(ETH_P_IP)) 4425 if (skb && skb->protocol == htons(ETH_P_IP))
4426 family = PF_INET; 4426 family = PF_INET;
4427 else if (skb && skb->protocol == htons(ETH_P_IPV6)) 4427 else if (skb && skb->protocol == htons(ETH_P_IPV6))
4428 family = PF_INET6; 4428 family = PF_INET6;
4429 else if (sock) 4429 else if (sock)
4430 family = sock->sk->sk_family; 4430 family = sock->sk->sk_family;
4431 else 4431 else
4432 goto out; 4432 goto out;
4433 4433
4434 if (sock && family == PF_UNIX) 4434 if (sock && family == PF_UNIX)
4435 selinux_inode_getsecid(SOCK_INODE(sock), &peer_secid); 4435 selinux_inode_getsecid(SOCK_INODE(sock), &peer_secid);
4436 else if (skb) 4436 else if (skb)
4437 selinux_skb_peerlbl_sid(skb, family, &peer_secid); 4437 selinux_skb_peerlbl_sid(skb, family, &peer_secid);
4438 4438
4439 out: 4439 out:
4440 *secid = peer_secid; 4440 *secid = peer_secid;
4441 if (peer_secid == SECSID_NULL) 4441 if (peer_secid == SECSID_NULL)
4442 return -EINVAL; 4442 return -EINVAL;
4443 return 0; 4443 return 0;
4444 } 4444 }
4445 4445
4446 static int selinux_sk_alloc_security(struct sock *sk, int family, gfp_t priority) 4446 static int selinux_sk_alloc_security(struct sock *sk, int family, gfp_t priority)
4447 { 4447 {
4448 struct sk_security_struct *sksec; 4448 struct sk_security_struct *sksec;
4449 4449
4450 sksec = kzalloc(sizeof(*sksec), priority); 4450 sksec = kzalloc(sizeof(*sksec), priority);
4451 if (!sksec) 4451 if (!sksec)
4452 return -ENOMEM; 4452 return -ENOMEM;
4453 4453
4454 sksec->peer_sid = SECINITSID_UNLABELED; 4454 sksec->peer_sid = SECINITSID_UNLABELED;
4455 sksec->sid = SECINITSID_UNLABELED; 4455 sksec->sid = SECINITSID_UNLABELED;
4456 selinux_netlbl_sk_security_reset(sksec); 4456 selinux_netlbl_sk_security_reset(sksec);
4457 sk->sk_security = sksec; 4457 sk->sk_security = sksec;
4458 4458
4459 return 0; 4459 return 0;
4460 } 4460 }
4461 4461
4462 static void selinux_sk_free_security(struct sock *sk) 4462 static void selinux_sk_free_security(struct sock *sk)
4463 { 4463 {
4464 struct sk_security_struct *sksec = sk->sk_security; 4464 struct sk_security_struct *sksec = sk->sk_security;
4465 4465
4466 sk->sk_security = NULL; 4466 sk->sk_security = NULL;
4467 selinux_netlbl_sk_security_free(sksec); 4467 selinux_netlbl_sk_security_free(sksec);
4468 kfree(sksec); 4468 kfree(sksec);
4469 } 4469 }
4470 4470
4471 static void selinux_sk_clone_security(const struct sock *sk, struct sock *newsk) 4471 static void selinux_sk_clone_security(const struct sock *sk, struct sock *newsk)
4472 { 4472 {
4473 struct sk_security_struct *sksec = sk->sk_security; 4473 struct sk_security_struct *sksec = sk->sk_security;
4474 struct sk_security_struct *newsksec = newsk->sk_security; 4474 struct sk_security_struct *newsksec = newsk->sk_security;
4475 4475
4476 newsksec->sid = sksec->sid; 4476 newsksec->sid = sksec->sid;
4477 newsksec->peer_sid = sksec->peer_sid; 4477 newsksec->peer_sid = sksec->peer_sid;
4478 newsksec->sclass = sksec->sclass; 4478 newsksec->sclass = sksec->sclass;
4479 4479
4480 selinux_netlbl_sk_security_reset(newsksec); 4480 selinux_netlbl_sk_security_reset(newsksec);
4481 } 4481 }
4482 4482
4483 static void selinux_sk_getsecid(struct sock *sk, u32 *secid) 4483 static void selinux_sk_getsecid(struct sock *sk, u32 *secid)
4484 { 4484 {
4485 if (!sk) 4485 if (!sk)
4486 *secid = SECINITSID_ANY_SOCKET; 4486 *secid = SECINITSID_ANY_SOCKET;
4487 else { 4487 else {
4488 struct sk_security_struct *sksec = sk->sk_security; 4488 struct sk_security_struct *sksec = sk->sk_security;
4489 4489
4490 *secid = sksec->sid; 4490 *secid = sksec->sid;
4491 } 4491 }
4492 } 4492 }
4493 4493
4494 static void selinux_sock_graft(struct sock *sk, struct socket *parent) 4494 static void selinux_sock_graft(struct sock *sk, struct socket *parent)
4495 { 4495 {
4496 struct inode_security_struct *isec = SOCK_INODE(parent)->i_security; 4496 struct inode_security_struct *isec = SOCK_INODE(parent)->i_security;
4497 struct sk_security_struct *sksec = sk->sk_security; 4497 struct sk_security_struct *sksec = sk->sk_security;
4498 4498
4499 if (sk->sk_family == PF_INET || sk->sk_family == PF_INET6 || 4499 if (sk->sk_family == PF_INET || sk->sk_family == PF_INET6 ||
4500 sk->sk_family == PF_UNIX) 4500 sk->sk_family == PF_UNIX)
4501 isec->sid = sksec->sid; 4501 isec->sid = sksec->sid;
4502 sksec->sclass = isec->sclass; 4502 sksec->sclass = isec->sclass;
4503 } 4503 }
4504 4504
4505 static int selinux_inet_conn_request(struct sock *sk, struct sk_buff *skb, 4505 static int selinux_inet_conn_request(struct sock *sk, struct sk_buff *skb,
4506 struct request_sock *req) 4506 struct request_sock *req)
4507 { 4507 {
4508 struct sk_security_struct *sksec = sk->sk_security; 4508 struct sk_security_struct *sksec = sk->sk_security;
4509 int err; 4509 int err;
4510 u16 family = req->rsk_ops->family; 4510 u16 family = req->rsk_ops->family;
4511 u32 connsid; 4511 u32 connsid;
4512 u32 peersid; 4512 u32 peersid;
4513 4513
4514 err = selinux_skb_peerlbl_sid(skb, family, &peersid); 4514 err = selinux_skb_peerlbl_sid(skb, family, &peersid);
4515 if (err) 4515 if (err)
4516 return err; 4516 return err;
4517 err = selinux_conn_sid(sksec->sid, peersid, &connsid); 4517 err = selinux_conn_sid(sksec->sid, peersid, &connsid);
4518 if (err) 4518 if (err)
4519 return err; 4519 return err;
4520 req->secid = connsid; 4520 req->secid = connsid;
4521 req->peer_secid = peersid; 4521 req->peer_secid = peersid;
4522 4522
4523 return selinux_netlbl_inet_conn_request(req, family); 4523 return selinux_netlbl_inet_conn_request(req, family);
4524 } 4524 }
4525 4525
4526 static void selinux_inet_csk_clone(struct sock *newsk, 4526 static void selinux_inet_csk_clone(struct sock *newsk,
4527 const struct request_sock *req) 4527 const struct request_sock *req)
4528 { 4528 {
4529 struct sk_security_struct *newsksec = newsk->sk_security; 4529 struct sk_security_struct *newsksec = newsk->sk_security;
4530 4530
4531 newsksec->sid = req->secid; 4531 newsksec->sid = req->secid;
4532 newsksec->peer_sid = req->peer_secid; 4532 newsksec->peer_sid = req->peer_secid;
4533 /* NOTE: Ideally, we should also get the isec->sid for the 4533 /* NOTE: Ideally, we should also get the isec->sid for the
4534 new socket in sync, but we don't have the isec available yet. 4534 new socket in sync, but we don't have the isec available yet.
4535 So we will wait until sock_graft to do it, by which 4535 So we will wait until sock_graft to do it, by which
4536 time it will have been created and available. */ 4536 time it will have been created and available. */
4537 4537
4538 /* We don't need to take any sort of lock here as we are the only 4538 /* We don't need to take any sort of lock here as we are the only
4539 * thread with access to newsksec */ 4539 * thread with access to newsksec */
4540 selinux_netlbl_inet_csk_clone(newsk, req->rsk_ops->family); 4540 selinux_netlbl_inet_csk_clone(newsk, req->rsk_ops->family);
4541 } 4541 }
4542 4542
4543 static void selinux_inet_conn_established(struct sock *sk, struct sk_buff *skb) 4543 static void selinux_inet_conn_established(struct sock *sk, struct sk_buff *skb)
4544 { 4544 {
4545 u16 family = sk->sk_family; 4545 u16 family = sk->sk_family;
4546 struct sk_security_struct *sksec = sk->sk_security; 4546 struct sk_security_struct *sksec = sk->sk_security;
4547 4547
4548 /* handle mapped IPv4 packets arriving via IPv6 sockets */ 4548 /* handle mapped IPv4 packets arriving via IPv6 sockets */
4549 if (family == PF_INET6 && skb->protocol == htons(ETH_P_IP)) 4549 if (family == PF_INET6 && skb->protocol == htons(ETH_P_IP))
4550 family = PF_INET; 4550 family = PF_INET;
4551 4551
4552 selinux_skb_peerlbl_sid(skb, family, &sksec->peer_sid); 4552 selinux_skb_peerlbl_sid(skb, family, &sksec->peer_sid);
4553 } 4553 }
4554 4554
4555 static void selinux_skb_owned_by(struct sk_buff *skb, struct sock *sk) 4555 static void selinux_skb_owned_by(struct sk_buff *skb, struct sock *sk)
4556 { 4556 {
4557 skb_set_owner_w(skb, sk); 4557 skb_set_owner_w(skb, sk);
4558 } 4558 }
4559 4559
4560 static int selinux_secmark_relabel_packet(u32 sid) 4560 static int selinux_secmark_relabel_packet(u32 sid)
4561 { 4561 {
4562 const struct task_security_struct *__tsec; 4562 const struct task_security_struct *__tsec;
4563 u32 tsid; 4563 u32 tsid;
4564 4564
4565 __tsec = current_security(); 4565 __tsec = current_security();
4566 tsid = __tsec->sid; 4566 tsid = __tsec->sid;
4567 4567
4568 return avc_has_perm(tsid, sid, SECCLASS_PACKET, PACKET__RELABELTO, NULL); 4568 return avc_has_perm(tsid, sid, SECCLASS_PACKET, PACKET__RELABELTO, NULL);
4569 } 4569 }
4570 4570
4571 static void selinux_secmark_refcount_inc(void) 4571 static void selinux_secmark_refcount_inc(void)
4572 { 4572 {
4573 atomic_inc(&selinux_secmark_refcount); 4573 atomic_inc(&selinux_secmark_refcount);
4574 } 4574 }
4575 4575
4576 static void selinux_secmark_refcount_dec(void) 4576 static void selinux_secmark_refcount_dec(void)
4577 { 4577 {
4578 atomic_dec(&selinux_secmark_refcount); 4578 atomic_dec(&selinux_secmark_refcount);
4579 } 4579 }
4580 4580
4581 static void selinux_req_classify_flow(const struct request_sock *req, 4581 static void selinux_req_classify_flow(const struct request_sock *req,
4582 struct flowi *fl) 4582 struct flowi *fl)
4583 { 4583 {
4584 fl->flowi_secid = req->secid; 4584 fl->flowi_secid = req->secid;
4585 } 4585 }
4586 4586
4587 static int selinux_tun_dev_alloc_security(void **security) 4587 static int selinux_tun_dev_alloc_security(void **security)
4588 { 4588 {
4589 struct tun_security_struct *tunsec; 4589 struct tun_security_struct *tunsec;
4590 4590
4591 tunsec = kzalloc(sizeof(*tunsec), GFP_KERNEL); 4591 tunsec = kzalloc(sizeof(*tunsec), GFP_KERNEL);
4592 if (!tunsec) 4592 if (!tunsec)
4593 return -ENOMEM; 4593 return -ENOMEM;
4594 tunsec->sid = current_sid(); 4594 tunsec->sid = current_sid();
4595 4595
4596 *security = tunsec; 4596 *security = tunsec;
4597 return 0; 4597 return 0;
4598 } 4598 }
4599 4599
4600 static void selinux_tun_dev_free_security(void *security) 4600 static void selinux_tun_dev_free_security(void *security)
4601 { 4601 {
4602 kfree(security); 4602 kfree(security);
4603 } 4603 }
4604 4604
4605 static int selinux_tun_dev_create(void) 4605 static int selinux_tun_dev_create(void)
4606 { 4606 {
4607 u32 sid = current_sid(); 4607 u32 sid = current_sid();
4608 4608
4609 /* we aren't taking into account the "sockcreate" SID since the socket 4609 /* we aren't taking into account the "sockcreate" SID since the socket
4610 * that is being created here is not a socket in the traditional sense, 4610 * that is being created here is not a socket in the traditional sense,
4611 * instead it is a private sock, accessible only to the kernel, and 4611 * instead it is a private sock, accessible only to the kernel, and
4612 * representing a wide range of network traffic spanning multiple 4612 * representing a wide range of network traffic spanning multiple
4613 * connections unlike traditional sockets - check the TUN driver to 4613 * connections unlike traditional sockets - check the TUN driver to
4614 * get a better understanding of why this socket is special */ 4614 * get a better understanding of why this socket is special */
4615 4615
4616 return avc_has_perm(sid, sid, SECCLASS_TUN_SOCKET, TUN_SOCKET__CREATE, 4616 return avc_has_perm(sid, sid, SECCLASS_TUN_SOCKET, TUN_SOCKET__CREATE,
4617 NULL); 4617 NULL);
4618 } 4618 }
4619 4619
4620 static int selinux_tun_dev_attach_queue(void *security) 4620 static int selinux_tun_dev_attach_queue(void *security)
4621 { 4621 {
4622 struct tun_security_struct *tunsec = security; 4622 struct tun_security_struct *tunsec = security;
4623 4623
4624 return avc_has_perm(current_sid(), tunsec->sid, SECCLASS_TUN_SOCKET, 4624 return avc_has_perm(current_sid(), tunsec->sid, SECCLASS_TUN_SOCKET,
4625 TUN_SOCKET__ATTACH_QUEUE, NULL); 4625 TUN_SOCKET__ATTACH_QUEUE, NULL);
4626 } 4626 }
4627 4627
4628 static int selinux_tun_dev_attach(struct sock *sk, void *security) 4628 static int selinux_tun_dev_attach(struct sock *sk, void *security)
4629 { 4629 {
4630 struct tun_security_struct *tunsec = security; 4630 struct tun_security_struct *tunsec = security;
4631 struct sk_security_struct *sksec = sk->sk_security; 4631 struct sk_security_struct *sksec = sk->sk_security;
4632 4632
4633 /* we don't currently perform any NetLabel based labeling here and it 4633 /* we don't currently perform any NetLabel based labeling here and it
4634 * isn't clear that we would want to do so anyway; while we could apply 4634 * isn't clear that we would want to do so anyway; while we could apply
4635 * labeling without the support of the TUN user the resulting labeled 4635 * labeling without the support of the TUN user the resulting labeled
4636 * traffic from the other end of the connection would almost certainly 4636 * traffic from the other end of the connection would almost certainly
4637 * cause confusion to the TUN user that had no idea network labeling 4637 * cause confusion to the TUN user that had no idea network labeling
4638 * protocols were being used */ 4638 * protocols were being used */
4639 4639
4640 sksec->sid = tunsec->sid; 4640 sksec->sid = tunsec->sid;
4641 sksec->sclass = SECCLASS_TUN_SOCKET; 4641 sksec->sclass = SECCLASS_TUN_SOCKET;
4642 4642
4643 return 0; 4643 return 0;
4644 } 4644 }
4645 4645
4646 static int selinux_tun_dev_open(void *security) 4646 static int selinux_tun_dev_open(void *security)
4647 { 4647 {
4648 struct tun_security_struct *tunsec = security; 4648 struct tun_security_struct *tunsec = security;
4649 u32 sid = current_sid(); 4649 u32 sid = current_sid();
4650 int err; 4650 int err;
4651 4651
4652 err = avc_has_perm(sid, tunsec->sid, SECCLASS_TUN_SOCKET, 4652 err = avc_has_perm(sid, tunsec->sid, SECCLASS_TUN_SOCKET,
4653 TUN_SOCKET__RELABELFROM, NULL); 4653 TUN_SOCKET__RELABELFROM, NULL);
4654 if (err) 4654 if (err)
4655 return err; 4655 return err;
4656 err = avc_has_perm(sid, sid, SECCLASS_TUN_SOCKET, 4656 err = avc_has_perm(sid, sid, SECCLASS_TUN_SOCKET,
4657 TUN_SOCKET__RELABELTO, NULL); 4657 TUN_SOCKET__RELABELTO, NULL);
4658 if (err) 4658 if (err)
4659 return err; 4659 return err;
4660 tunsec->sid = sid; 4660 tunsec->sid = sid;
4661 4661
4662 return 0; 4662 return 0;
4663 } 4663 }
4664 4664
4665 static int selinux_nlmsg_perm(struct sock *sk, struct sk_buff *skb) 4665 static int selinux_nlmsg_perm(struct sock *sk, struct sk_buff *skb)
4666 { 4666 {
4667 int err = 0; 4667 int err = 0;
4668 u32 perm; 4668 u32 perm;
4669 struct nlmsghdr *nlh; 4669 struct nlmsghdr *nlh;
4670 struct sk_security_struct *sksec = sk->sk_security; 4670 struct sk_security_struct *sksec = sk->sk_security;
4671 4671
4672 if (skb->len < NLMSG_HDRLEN) { 4672 if (skb->len < NLMSG_HDRLEN) {
4673 err = -EINVAL; 4673 err = -EINVAL;
4674 goto out; 4674 goto out;
4675 } 4675 }
4676 nlh = nlmsg_hdr(skb); 4676 nlh = nlmsg_hdr(skb);
4677 4677
4678 err = selinux_nlmsg_lookup(sksec->sclass, nlh->nlmsg_type, &perm); 4678 err = selinux_nlmsg_lookup(sksec->sclass, nlh->nlmsg_type, &perm);
4679 if (err) { 4679 if (err) {
4680 if (err == -EINVAL) { 4680 if (err == -EINVAL) {
4681 audit_log(current->audit_context, GFP_KERNEL, AUDIT_SELINUX_ERR, 4681 audit_log(current->audit_context, GFP_KERNEL, AUDIT_SELINUX_ERR,
4682 "SELinux: unrecognized netlink message" 4682 "SELinux: unrecognized netlink message"
4683 " type=%hu for sclass=%hu\n", 4683 " type=%hu for sclass=%hu\n",
4684 nlh->nlmsg_type, sksec->sclass); 4684 nlh->nlmsg_type, sksec->sclass);
4685 if (!selinux_enforcing || security_get_allow_unknown()) 4685 if (!selinux_enforcing || security_get_allow_unknown())
4686 err = 0; 4686 err = 0;
4687 } 4687 }
4688 4688
4689 /* Ignore */ 4689 /* Ignore */
4690 if (err == -ENOENT) 4690 if (err == -ENOENT)
4691 err = 0; 4691 err = 0;
4692 goto out; 4692 goto out;
4693 } 4693 }
4694 4694
4695 err = sock_has_perm(current, sk, perm); 4695 err = sock_has_perm(current, sk, perm);
4696 out: 4696 out:
4697 return err; 4697 return err;
4698 } 4698 }
4699 4699
4700 #ifdef CONFIG_NETFILTER 4700 #ifdef CONFIG_NETFILTER
4701 4701
4702 static unsigned int selinux_ip_forward(struct sk_buff *skb, int ifindex, 4702 static unsigned int selinux_ip_forward(struct sk_buff *skb, int ifindex,
4703 u16 family) 4703 u16 family)
4704 { 4704 {
4705 int err; 4705 int err;
4706 char *addrp; 4706 char *addrp;
4707 u32 peer_sid; 4707 u32 peer_sid;
4708 struct common_audit_data ad; 4708 struct common_audit_data ad;
4709 struct lsm_network_audit net = {0,}; 4709 struct lsm_network_audit net = {0,};
4710 u8 secmark_active; 4710 u8 secmark_active;
4711 u8 netlbl_active; 4711 u8 netlbl_active;
4712 u8 peerlbl_active; 4712 u8 peerlbl_active;
4713 4713
4714 if (!selinux_policycap_netpeer) 4714 if (!selinux_policycap_netpeer)
4715 return NF_ACCEPT; 4715 return NF_ACCEPT;
4716 4716
4717 secmark_active = selinux_secmark_enabled(); 4717 secmark_active = selinux_secmark_enabled();
4718 netlbl_active = netlbl_enabled(); 4718 netlbl_active = netlbl_enabled();
4719 peerlbl_active = selinux_peerlbl_enabled(); 4719 peerlbl_active = selinux_peerlbl_enabled();
4720 if (!secmark_active && !peerlbl_active) 4720 if (!secmark_active && !peerlbl_active)
4721 return NF_ACCEPT; 4721 return NF_ACCEPT;
4722 4722
4723 if (selinux_skb_peerlbl_sid(skb, family, &peer_sid) != 0) 4723 if (selinux_skb_peerlbl_sid(skb, family, &peer_sid) != 0)
4724 return NF_DROP; 4724 return NF_DROP;
4725 4725
4726 ad.type = LSM_AUDIT_DATA_NET; 4726 ad.type = LSM_AUDIT_DATA_NET;
4727 ad.u.net = &net; 4727 ad.u.net = &net;
4728 ad.u.net->netif = ifindex; 4728 ad.u.net->netif = ifindex;
4729 ad.u.net->family = family; 4729 ad.u.net->family = family;
4730 if (selinux_parse_skb(skb, &ad, &addrp, 1, NULL) != 0) 4730 if (selinux_parse_skb(skb, &ad, &addrp, 1, NULL) != 0)
4731 return NF_DROP; 4731 return NF_DROP;
4732 4732
4733 if (peerlbl_active) { 4733 if (peerlbl_active) {
4734 err = selinux_inet_sys_rcv_skb(ifindex, addrp, family, 4734 err = selinux_inet_sys_rcv_skb(ifindex, addrp, family,
4735 peer_sid, &ad); 4735 peer_sid, &ad);
4736 if (err) { 4736 if (err) {
4737 selinux_netlbl_err(skb, err, 1); 4737 selinux_netlbl_err(skb, err, 1);
4738 return NF_DROP; 4738 return NF_DROP;
4739 } 4739 }
4740 } 4740 }
4741 4741
4742 if (secmark_active) 4742 if (secmark_active)
4743 if (avc_has_perm(peer_sid, skb->secmark, 4743 if (avc_has_perm(peer_sid, skb->secmark,
4744 SECCLASS_PACKET, PACKET__FORWARD_IN, &ad)) 4744 SECCLASS_PACKET, PACKET__FORWARD_IN, &ad))
4745 return NF_DROP; 4745 return NF_DROP;
4746 4746
4747 if (netlbl_active) 4747 if (netlbl_active)
4748 /* we do this in the FORWARD path and not the POST_ROUTING 4748 /* we do this in the FORWARD path and not the POST_ROUTING
4749 * path because we want to make sure we apply the necessary 4749 * path because we want to make sure we apply the necessary
4750 * labeling before IPsec is applied so we can leverage AH 4750 * labeling before IPsec is applied so we can leverage AH
4751 * protection */ 4751 * protection */
4752 if (selinux_netlbl_skbuff_setsid(skb, family, peer_sid) != 0) 4752 if (selinux_netlbl_skbuff_setsid(skb, family, peer_sid) != 0)
4753 return NF_DROP; 4753 return NF_DROP;
4754 4754
4755 return NF_ACCEPT; 4755 return NF_ACCEPT;
4756 } 4756 }
4757 4757
4758 static unsigned int selinux_ipv4_forward(const struct nf_hook_ops *ops, 4758 static unsigned int selinux_ipv4_forward(const struct nf_hook_ops *ops,
4759 struct sk_buff *skb, 4759 struct sk_buff *skb,
4760 const struct net_device *in, 4760 const struct net_device *in,
4761 const struct net_device *out, 4761 const struct net_device *out,
4762 int (*okfn)(struct sk_buff *)) 4762 int (*okfn)(struct sk_buff *))
4763 { 4763 {
4764 return selinux_ip_forward(skb, in->ifindex, PF_INET); 4764 return selinux_ip_forward(skb, in->ifindex, PF_INET);
4765 } 4765 }
4766 4766
4767 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 4767 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
4768 static unsigned int selinux_ipv6_forward(const struct nf_hook_ops *ops, 4768 static unsigned int selinux_ipv6_forward(const struct nf_hook_ops *ops,
4769 struct sk_buff *skb, 4769 struct sk_buff *skb,
4770 const struct net_device *in, 4770 const struct net_device *in,
4771 const struct net_device *out, 4771 const struct net_device *out,
4772 int (*okfn)(struct sk_buff *)) 4772 int (*okfn)(struct sk_buff *))
4773 { 4773 {
4774 return selinux_ip_forward(skb, in->ifindex, PF_INET6); 4774 return selinux_ip_forward(skb, in->ifindex, PF_INET6);
4775 } 4775 }
4776 #endif /* IPV6 */ 4776 #endif /* IPV6 */
4777 4777
4778 static unsigned int selinux_ip_output(struct sk_buff *skb, 4778 static unsigned int selinux_ip_output(struct sk_buff *skb,
4779 u16 family) 4779 u16 family)
4780 { 4780 {
4781 struct sock *sk; 4781 struct sock *sk;
4782 u32 sid; 4782 u32 sid;
4783 4783
4784 if (!netlbl_enabled()) 4784 if (!netlbl_enabled())
4785 return NF_ACCEPT; 4785 return NF_ACCEPT;
4786 4786
4787 /* we do this in the LOCAL_OUT path and not the POST_ROUTING path 4787 /* we do this in the LOCAL_OUT path and not the POST_ROUTING path
4788 * because we want to make sure we apply the necessary labeling 4788 * because we want to make sure we apply the necessary labeling
4789 * before IPsec is applied so we can leverage AH protection */ 4789 * before IPsec is applied so we can leverage AH protection */
4790 sk = skb->sk; 4790 sk = skb->sk;
4791 if (sk) { 4791 if (sk) {
4792 struct sk_security_struct *sksec; 4792 struct sk_security_struct *sksec;
4793 4793
4794 if (sk->sk_state == TCP_LISTEN) 4794 if (sk->sk_state == TCP_LISTEN)
4795 /* if the socket is the listening state then this 4795 /* if the socket is the listening state then this
4796 * packet is a SYN-ACK packet which means it needs to 4796 * packet is a SYN-ACK packet which means it needs to
4797 * be labeled based on the connection/request_sock and 4797 * be labeled based on the connection/request_sock and
4798 * not the parent socket. unfortunately, we can't 4798 * not the parent socket. unfortunately, we can't
4799 * lookup the request_sock yet as it isn't queued on 4799 * lookup the request_sock yet as it isn't queued on
4800 * the parent socket until after the SYN-ACK is sent. 4800 * the parent socket until after the SYN-ACK is sent.
4801 * the "solution" is to simply pass the packet as-is 4801 * the "solution" is to simply pass the packet as-is
4802 * as any IP option based labeling should be copied 4802 * as any IP option based labeling should be copied
4803 * from the initial connection request (in the IP 4803 * from the initial connection request (in the IP
4804 * layer). it is far from ideal, but until we get a 4804 * layer). it is far from ideal, but until we get a
4805 * security label in the packet itself this is the 4805 * security label in the packet itself this is the
4806 * best we can do. */ 4806 * best we can do. */
4807 return NF_ACCEPT; 4807 return NF_ACCEPT;
4808 4808
4809 /* standard practice, label using the parent socket */ 4809 /* standard practice, label using the parent socket */
4810 sksec = sk->sk_security; 4810 sksec = sk->sk_security;
4811 sid = sksec->sid; 4811 sid = sksec->sid;
4812 } else 4812 } else
4813 sid = SECINITSID_KERNEL; 4813 sid = SECINITSID_KERNEL;
4814 if (selinux_netlbl_skbuff_setsid(skb, family, sid) != 0) 4814 if (selinux_netlbl_skbuff_setsid(skb, family, sid) != 0)
4815 return NF_DROP; 4815 return NF_DROP;
4816 4816
4817 return NF_ACCEPT; 4817 return NF_ACCEPT;
4818 } 4818 }
4819 4819
4820 static unsigned int selinux_ipv4_output(const struct nf_hook_ops *ops, 4820 static unsigned int selinux_ipv4_output(const struct nf_hook_ops *ops,
4821 struct sk_buff *skb, 4821 struct sk_buff *skb,
4822 const struct net_device *in, 4822 const struct net_device *in,
4823 const struct net_device *out, 4823 const struct net_device *out,
4824 int (*okfn)(struct sk_buff *)) 4824 int (*okfn)(struct sk_buff *))
4825 { 4825 {
4826 return selinux_ip_output(skb, PF_INET); 4826 return selinux_ip_output(skb, PF_INET);
4827 } 4827 }
4828 4828
4829 static unsigned int selinux_ip_postroute_compat(struct sk_buff *skb, 4829 static unsigned int selinux_ip_postroute_compat(struct sk_buff *skb,
4830 int ifindex, 4830 int ifindex,
4831 u16 family) 4831 u16 family)
4832 { 4832 {
4833 struct sock *sk = skb->sk; 4833 struct sock *sk = skb->sk;
4834 struct sk_security_struct *sksec; 4834 struct sk_security_struct *sksec;
4835 struct common_audit_data ad; 4835 struct common_audit_data ad;
4836 struct lsm_network_audit net = {0,}; 4836 struct lsm_network_audit net = {0,};
4837 char *addrp; 4837 char *addrp;
4838 u8 proto; 4838 u8 proto;
4839 4839
4840 if (sk == NULL) 4840 if (sk == NULL)
4841 return NF_ACCEPT; 4841 return NF_ACCEPT;
4842 sksec = sk->sk_security; 4842 sksec = sk->sk_security;
4843 4843
4844 ad.type = LSM_AUDIT_DATA_NET; 4844 ad.type = LSM_AUDIT_DATA_NET;
4845 ad.u.net = &net; 4845 ad.u.net = &net;
4846 ad.u.net->netif = ifindex; 4846 ad.u.net->netif = ifindex;
4847 ad.u.net->family = family; 4847 ad.u.net->family = family;
4848 if (selinux_parse_skb(skb, &ad, &addrp, 0, &proto)) 4848 if (selinux_parse_skb(skb, &ad, &addrp, 0, &proto))
4849 return NF_DROP; 4849 return NF_DROP;
4850 4850
4851 if (selinux_secmark_enabled()) 4851 if (selinux_secmark_enabled())
4852 if (avc_has_perm(sksec->sid, skb->secmark, 4852 if (avc_has_perm(sksec->sid, skb->secmark,
4853 SECCLASS_PACKET, PACKET__SEND, &ad)) 4853 SECCLASS_PACKET, PACKET__SEND, &ad))
4854 return NF_DROP_ERR(-ECONNREFUSED); 4854 return NF_DROP_ERR(-ECONNREFUSED);
4855 4855
4856 if (selinux_xfrm_postroute_last(sksec->sid, skb, &ad, proto)) 4856 if (selinux_xfrm_postroute_last(sksec->sid, skb, &ad, proto))
4857 return NF_DROP_ERR(-ECONNREFUSED); 4857 return NF_DROP_ERR(-ECONNREFUSED);
4858 4858
4859 return NF_ACCEPT; 4859 return NF_ACCEPT;
4860 } 4860 }
4861 4861
4862 static unsigned int selinux_ip_postroute(struct sk_buff *skb, int ifindex, 4862 static unsigned int selinux_ip_postroute(struct sk_buff *skb, int ifindex,
4863 u16 family) 4863 u16 family)
4864 { 4864 {
4865 u32 secmark_perm; 4865 u32 secmark_perm;
4866 u32 peer_sid; 4866 u32 peer_sid;
4867 struct sock *sk; 4867 struct sock *sk;
4868 struct common_audit_data ad; 4868 struct common_audit_data ad;
4869 struct lsm_network_audit net = {0,}; 4869 struct lsm_network_audit net = {0,};
4870 char *addrp; 4870 char *addrp;
4871 u8 secmark_active; 4871 u8 secmark_active;
4872 u8 peerlbl_active; 4872 u8 peerlbl_active;
4873 4873
4874 /* If any sort of compatibility mode is enabled then handoff processing 4874 /* If any sort of compatibility mode is enabled then handoff processing
4875 * to the selinux_ip_postroute_compat() function to deal with the 4875 * to the selinux_ip_postroute_compat() function to deal with the
4876 * special handling. We do this in an attempt to keep this function 4876 * special handling. We do this in an attempt to keep this function
4877 * as fast and as clean as possible. */ 4877 * as fast and as clean as possible. */
4878 if (!selinux_policycap_netpeer) 4878 if (!selinux_policycap_netpeer)
4879 return selinux_ip_postroute_compat(skb, ifindex, family); 4879 return selinux_ip_postroute_compat(skb, ifindex, family);
4880 4880
4881 secmark_active = selinux_secmark_enabled(); 4881 secmark_active = selinux_secmark_enabled();
4882 peerlbl_active = selinux_peerlbl_enabled(); 4882 peerlbl_active = selinux_peerlbl_enabled();
4883 if (!secmark_active && !peerlbl_active) 4883 if (!secmark_active && !peerlbl_active)
4884 return NF_ACCEPT; 4884 return NF_ACCEPT;
4885 4885
4886 sk = skb->sk; 4886 sk = skb->sk;
4887 4887
4888 #ifdef CONFIG_XFRM 4888 #ifdef CONFIG_XFRM
4889 /* If skb->dst->xfrm is non-NULL then the packet is undergoing an IPsec 4889 /* If skb->dst->xfrm is non-NULL then the packet is undergoing an IPsec
4890 * packet transformation so allow the packet to pass without any checks 4890 * packet transformation so allow the packet to pass without any checks
4891 * since we'll have another chance to perform access control checks 4891 * since we'll have another chance to perform access control checks
4892 * when the packet is on it's final way out. 4892 * when the packet is on it's final way out.
4893 * NOTE: there appear to be some IPv6 multicast cases where skb->dst 4893 * NOTE: there appear to be some IPv6 multicast cases where skb->dst
4894 * is NULL, in this case go ahead and apply access control. 4894 * is NULL, in this case go ahead and apply access control.
4895 * NOTE: if this is a local socket (skb->sk != NULL) that is in the 4895 * NOTE: if this is a local socket (skb->sk != NULL) that is in the
4896 * TCP listening state we cannot wait until the XFRM processing 4896 * TCP listening state we cannot wait until the XFRM processing
4897 * is done as we will miss out on the SA label if we do; 4897 * is done as we will miss out on the SA label if we do;
4898 * unfortunately, this means more work, but it is only once per 4898 * unfortunately, this means more work, but it is only once per
4899 * connection. */ 4899 * connection. */
4900 if (skb_dst(skb) != NULL && skb_dst(skb)->xfrm != NULL && 4900 if (skb_dst(skb) != NULL && skb_dst(skb)->xfrm != NULL &&
4901 !(sk != NULL && sk->sk_state == TCP_LISTEN)) 4901 !(sk != NULL && sk->sk_state == TCP_LISTEN))
4902 return NF_ACCEPT; 4902 return NF_ACCEPT;
4903 #endif 4903 #endif
4904 4904
4905 if (sk == NULL) { 4905 if (sk == NULL) {
4906 /* Without an associated socket the packet is either coming 4906 /* Without an associated socket the packet is either coming
4907 * from the kernel or it is being forwarded; check the packet 4907 * from the kernel or it is being forwarded; check the packet
4908 * to determine which and if the packet is being forwarded 4908 * to determine which and if the packet is being forwarded
4909 * query the packet directly to determine the security label. */ 4909 * query the packet directly to determine the security label. */
4910 if (skb->skb_iif) { 4910 if (skb->skb_iif) {
4911 secmark_perm = PACKET__FORWARD_OUT; 4911 secmark_perm = PACKET__FORWARD_OUT;
4912 if (selinux_skb_peerlbl_sid(skb, family, &peer_sid)) 4912 if (selinux_skb_peerlbl_sid(skb, family, &peer_sid))
4913 return NF_DROP; 4913 return NF_DROP;
4914 } else { 4914 } else {
4915 secmark_perm = PACKET__SEND; 4915 secmark_perm = PACKET__SEND;
4916 peer_sid = SECINITSID_KERNEL; 4916 peer_sid = SECINITSID_KERNEL;
4917 } 4917 }
4918 } else if (sk->sk_state == TCP_LISTEN) { 4918 } else if (sk->sk_state == TCP_LISTEN) {
4919 /* Locally generated packet but the associated socket is in the 4919 /* Locally generated packet but the associated socket is in the
4920 * listening state which means this is a SYN-ACK packet. In 4920 * listening state which means this is a SYN-ACK packet. In
4921 * this particular case the correct security label is assigned 4921 * this particular case the correct security label is assigned
4922 * to the connection/request_sock but unfortunately we can't 4922 * to the connection/request_sock but unfortunately we can't
4923 * query the request_sock as it isn't queued on the parent 4923 * query the request_sock as it isn't queued on the parent
4924 * socket until after the SYN-ACK packet is sent; the only 4924 * socket until after the SYN-ACK packet is sent; the only
4925 * viable choice is to regenerate the label like we do in 4925 * viable choice is to regenerate the label like we do in
4926 * selinux_inet_conn_request(). See also selinux_ip_output() 4926 * selinux_inet_conn_request(). See also selinux_ip_output()
4927 * for similar problems. */ 4927 * for similar problems. */
4928 u32 skb_sid; 4928 u32 skb_sid;
4929 struct sk_security_struct *sksec = sk->sk_security; 4929 struct sk_security_struct *sksec = sk->sk_security;
4930 if (selinux_skb_peerlbl_sid(skb, family, &skb_sid)) 4930 if (selinux_skb_peerlbl_sid(skb, family, &skb_sid))
4931 return NF_DROP; 4931 return NF_DROP;
4932 /* At this point, if the returned skb peerlbl is SECSID_NULL 4932 /* At this point, if the returned skb peerlbl is SECSID_NULL
4933 * and the packet has been through at least one XFRM 4933 * and the packet has been through at least one XFRM
4934 * transformation then we must be dealing with the "final" 4934 * transformation then we must be dealing with the "final"
4935 * form of labeled IPsec packet; since we've already applied 4935 * form of labeled IPsec packet; since we've already applied
4936 * all of our access controls on this packet we can safely 4936 * all of our access controls on this packet we can safely
4937 * pass the packet. */ 4937 * pass the packet. */
4938 if (skb_sid == SECSID_NULL) { 4938 if (skb_sid == SECSID_NULL) {
4939 switch (family) { 4939 switch (family) {
4940 case PF_INET: 4940 case PF_INET:
4941 if (IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) 4941 if (IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED)
4942 return NF_ACCEPT; 4942 return NF_ACCEPT;
4943 break; 4943 break;
4944 case PF_INET6: 4944 case PF_INET6:
4945 if (IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) 4945 if (IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED)
4946 return NF_ACCEPT; 4946 return NF_ACCEPT;
4947 default: 4947 default:
4948 return NF_DROP_ERR(-ECONNREFUSED); 4948 return NF_DROP_ERR(-ECONNREFUSED);
4949 } 4949 }
4950 } 4950 }
4951 if (selinux_conn_sid(sksec->sid, skb_sid, &peer_sid)) 4951 if (selinux_conn_sid(sksec->sid, skb_sid, &peer_sid))
4952 return NF_DROP; 4952 return NF_DROP;
4953 secmark_perm = PACKET__SEND; 4953 secmark_perm = PACKET__SEND;
4954 } else { 4954 } else {
4955 /* Locally generated packet, fetch the security label from the 4955 /* Locally generated packet, fetch the security label from the
4956 * associated socket. */ 4956 * associated socket. */
4957 struct sk_security_struct *sksec = sk->sk_security; 4957 struct sk_security_struct *sksec = sk->sk_security;
4958 peer_sid = sksec->sid; 4958 peer_sid = sksec->sid;
4959 secmark_perm = PACKET__SEND; 4959 secmark_perm = PACKET__SEND;
4960 } 4960 }
4961 4961
4962 ad.type = LSM_AUDIT_DATA_NET; 4962 ad.type = LSM_AUDIT_DATA_NET;
4963 ad.u.net = &net; 4963 ad.u.net = &net;
4964 ad.u.net->netif = ifindex; 4964 ad.u.net->netif = ifindex;
4965 ad.u.net->family = family; 4965 ad.u.net->family = family;
4966 if (selinux_parse_skb(skb, &ad, &addrp, 0, NULL)) 4966 if (selinux_parse_skb(skb, &ad, &addrp, 0, NULL))
4967 return NF_DROP; 4967 return NF_DROP;
4968 4968
4969 if (secmark_active) 4969 if (secmark_active)
4970 if (avc_has_perm(peer_sid, skb->secmark, 4970 if (avc_has_perm(peer_sid, skb->secmark,
4971 SECCLASS_PACKET, secmark_perm, &ad)) 4971 SECCLASS_PACKET, secmark_perm, &ad))
4972 return NF_DROP_ERR(-ECONNREFUSED); 4972 return NF_DROP_ERR(-ECONNREFUSED);
4973 4973
4974 if (peerlbl_active) { 4974 if (peerlbl_active) {
4975 u32 if_sid; 4975 u32 if_sid;
4976 u32 node_sid; 4976 u32 node_sid;
4977 4977
4978 if (sel_netif_sid(ifindex, &if_sid)) 4978 if (sel_netif_sid(ifindex, &if_sid))
4979 return NF_DROP; 4979 return NF_DROP;
4980 if (avc_has_perm(peer_sid, if_sid, 4980 if (avc_has_perm(peer_sid, if_sid,
4981 SECCLASS_NETIF, NETIF__EGRESS, &ad)) 4981 SECCLASS_NETIF, NETIF__EGRESS, &ad))
4982 return NF_DROP_ERR(-ECONNREFUSED); 4982 return NF_DROP_ERR(-ECONNREFUSED);
4983 4983
4984 if (sel_netnode_sid(addrp, family, &node_sid)) 4984 if (sel_netnode_sid(addrp, family, &node_sid))
4985 return NF_DROP; 4985 return NF_DROP;
4986 if (avc_has_perm(peer_sid, node_sid, 4986 if (avc_has_perm(peer_sid, node_sid,
4987 SECCLASS_NODE, NODE__SENDTO, &ad)) 4987 SECCLASS_NODE, NODE__SENDTO, &ad))
4988 return NF_DROP_ERR(-ECONNREFUSED); 4988 return NF_DROP_ERR(-ECONNREFUSED);
4989 } 4989 }
4990 4990
4991 return NF_ACCEPT; 4991 return NF_ACCEPT;
4992 } 4992 }
4993 4993
4994 static unsigned int selinux_ipv4_postroute(const struct nf_hook_ops *ops, 4994 static unsigned int selinux_ipv4_postroute(const struct nf_hook_ops *ops,
4995 struct sk_buff *skb, 4995 struct sk_buff *skb,
4996 const struct net_device *in, 4996 const struct net_device *in,
4997 const struct net_device *out, 4997 const struct net_device *out,
4998 int (*okfn)(struct sk_buff *)) 4998 int (*okfn)(struct sk_buff *))
4999 { 4999 {
5000 return selinux_ip_postroute(skb, out->ifindex, PF_INET); 5000 return selinux_ip_postroute(skb, out->ifindex, PF_INET);
5001 } 5001 }
5002 5002
5003 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 5003 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
5004 static unsigned int selinux_ipv6_postroute(const struct nf_hook_ops *ops, 5004 static unsigned int selinux_ipv6_postroute(const struct nf_hook_ops *ops,
5005 struct sk_buff *skb, 5005 struct sk_buff *skb,
5006 const struct net_device *in, 5006 const struct net_device *in,
5007 const struct net_device *out, 5007 const struct net_device *out,
5008 int (*okfn)(struct sk_buff *)) 5008 int (*okfn)(struct sk_buff *))
5009 { 5009 {
5010 return selinux_ip_postroute(skb, out->ifindex, PF_INET6); 5010 return selinux_ip_postroute(skb, out->ifindex, PF_INET6);
5011 } 5011 }
5012 #endif /* IPV6 */ 5012 #endif /* IPV6 */
5013 5013
5014 #endif /* CONFIG_NETFILTER */ 5014 #endif /* CONFIG_NETFILTER */
5015 5015
5016 static int selinux_netlink_send(struct sock *sk, struct sk_buff *skb) 5016 static int selinux_netlink_send(struct sock *sk, struct sk_buff *skb)
5017 { 5017 {
5018 int err; 5018 int err;
5019 5019
5020 err = cap_netlink_send(sk, skb); 5020 err = cap_netlink_send(sk, skb);
5021 if (err) 5021 if (err)
5022 return err; 5022 return err;
5023 5023
5024 return selinux_nlmsg_perm(sk, skb); 5024 return selinux_nlmsg_perm(sk, skb);
5025 } 5025 }
5026 5026
5027 static int ipc_alloc_security(struct task_struct *task, 5027 static int ipc_alloc_security(struct task_struct *task,
5028 struct kern_ipc_perm *perm, 5028 struct kern_ipc_perm *perm,
5029 u16 sclass) 5029 u16 sclass)
5030 { 5030 {
5031 struct ipc_security_struct *isec; 5031 struct ipc_security_struct *isec;
5032 u32 sid; 5032 u32 sid;
5033 5033
5034 isec = kzalloc(sizeof(struct ipc_security_struct), GFP_KERNEL); 5034 isec = kzalloc(sizeof(struct ipc_security_struct), GFP_KERNEL);
5035 if (!isec) 5035 if (!isec)
5036 return -ENOMEM; 5036 return -ENOMEM;
5037 5037
5038 sid = task_sid(task); 5038 sid = task_sid(task);
5039 isec->sclass = sclass; 5039 isec->sclass = sclass;
5040 isec->sid = sid; 5040 isec->sid = sid;
5041 perm->security = isec; 5041 perm->security = isec;
5042 5042
5043 return 0; 5043 return 0;
5044 } 5044 }
5045 5045
5046 static void ipc_free_security(struct kern_ipc_perm *perm) 5046 static void ipc_free_security(struct kern_ipc_perm *perm)
5047 { 5047 {
5048 struct ipc_security_struct *isec = perm->security; 5048 struct ipc_security_struct *isec = perm->security;
5049 perm->security = NULL; 5049 perm->security = NULL;
5050 kfree(isec); 5050 kfree(isec);
5051 } 5051 }
5052 5052
5053 static int msg_msg_alloc_security(struct msg_msg *msg) 5053 static int msg_msg_alloc_security(struct msg_msg *msg)
5054 { 5054 {
5055 struct msg_security_struct *msec; 5055 struct msg_security_struct *msec;
5056 5056
5057 msec = kzalloc(sizeof(struct msg_security_struct), GFP_KERNEL); 5057 msec = kzalloc(sizeof(struct msg_security_struct), GFP_KERNEL);
5058 if (!msec) 5058 if (!msec)
5059 return -ENOMEM; 5059 return -ENOMEM;
5060 5060
5061 msec->sid = SECINITSID_UNLABELED; 5061 msec->sid = SECINITSID_UNLABELED;
5062 msg->security = msec; 5062 msg->security = msec;
5063 5063
5064 return 0; 5064 return 0;
5065 } 5065 }
5066 5066
5067 static void msg_msg_free_security(struct msg_msg *msg) 5067 static void msg_msg_free_security(struct msg_msg *msg)
5068 { 5068 {
5069 struct msg_security_struct *msec = msg->security; 5069 struct msg_security_struct *msec = msg->security;
5070 5070
5071 msg->security = NULL; 5071 msg->security = NULL;
5072 kfree(msec); 5072 kfree(msec);
5073 } 5073 }
5074 5074
5075 static int ipc_has_perm(struct kern_ipc_perm *ipc_perms, 5075 static int ipc_has_perm(struct kern_ipc_perm *ipc_perms,
5076 u32 perms) 5076 u32 perms)
5077 { 5077 {
5078 struct ipc_security_struct *isec; 5078 struct ipc_security_struct *isec;
5079 struct common_audit_data ad; 5079 struct common_audit_data ad;
5080 u32 sid = current_sid(); 5080 u32 sid = current_sid();
5081 5081
5082 isec = ipc_perms->security; 5082 isec = ipc_perms->security;
5083 5083
5084 ad.type = LSM_AUDIT_DATA_IPC; 5084 ad.type = LSM_AUDIT_DATA_IPC;
5085 ad.u.ipc_id = ipc_perms->key; 5085 ad.u.ipc_id = ipc_perms->key;
5086 5086
5087 return avc_has_perm(sid, isec->sid, isec->sclass, perms, &ad); 5087 return avc_has_perm(sid, isec->sid, isec->sclass, perms, &ad);
5088 } 5088 }
5089 5089
5090 static int selinux_msg_msg_alloc_security(struct msg_msg *msg) 5090 static int selinux_msg_msg_alloc_security(struct msg_msg *msg)
5091 { 5091 {
5092 return msg_msg_alloc_security(msg); 5092 return msg_msg_alloc_security(msg);
5093 } 5093 }
5094 5094
5095 static void selinux_msg_msg_free_security(struct msg_msg *msg) 5095 static void selinux_msg_msg_free_security(struct msg_msg *msg)
5096 { 5096 {
5097 msg_msg_free_security(msg); 5097 msg_msg_free_security(msg);
5098 } 5098 }
5099 5099
5100 /* message queue security operations */ 5100 /* message queue security operations */
5101 static int selinux_msg_queue_alloc_security(struct msg_queue *msq) 5101 static int selinux_msg_queue_alloc_security(struct msg_queue *msq)
5102 { 5102 {
5103 struct ipc_security_struct *isec; 5103 struct ipc_security_struct *isec;
5104 struct common_audit_data ad; 5104 struct common_audit_data ad;
5105 u32 sid = current_sid(); 5105 u32 sid = current_sid();
5106 int rc; 5106 int rc;
5107 5107
5108 rc = ipc_alloc_security(current, &msq->q_perm, SECCLASS_MSGQ); 5108 rc = ipc_alloc_security(current, &msq->q_perm, SECCLASS_MSGQ);
5109 if (rc) 5109 if (rc)
5110 return rc; 5110 return rc;
5111 5111
5112 isec = msq->q_perm.security; 5112 isec = msq->q_perm.security;
5113 5113
5114 ad.type = LSM_AUDIT_DATA_IPC; 5114 ad.type = LSM_AUDIT_DATA_IPC;
5115 ad.u.ipc_id = msq->q_perm.key; 5115 ad.u.ipc_id = msq->q_perm.key;
5116 5116
5117 rc = avc_has_perm(sid, isec->sid, SECCLASS_MSGQ, 5117 rc = avc_has_perm(sid, isec->sid, SECCLASS_MSGQ,
5118 MSGQ__CREATE, &ad); 5118 MSGQ__CREATE, &ad);
5119 if (rc) { 5119 if (rc) {
5120 ipc_free_security(&msq->q_perm); 5120 ipc_free_security(&msq->q_perm);
5121 return rc; 5121 return rc;
5122 } 5122 }
5123 return 0; 5123 return 0;
5124 } 5124 }
5125 5125
5126 static void selinux_msg_queue_free_security(struct msg_queue *msq) 5126 static void selinux_msg_queue_free_security(struct msg_queue *msq)
5127 { 5127 {
5128 ipc_free_security(&msq->q_perm); 5128 ipc_free_security(&msq->q_perm);
5129 } 5129 }
5130 5130
5131 static int selinux_msg_queue_associate(struct msg_queue *msq, int msqflg) 5131 static int selinux_msg_queue_associate(struct msg_queue *msq, int msqflg)
5132 { 5132 {
5133 struct ipc_security_struct *isec; 5133 struct ipc_security_struct *isec;
5134 struct common_audit_data ad; 5134 struct common_audit_data ad;
5135 u32 sid = current_sid(); 5135 u32 sid = current_sid();
5136 5136
5137 isec = msq->q_perm.security; 5137 isec = msq->q_perm.security;
5138 5138
5139 ad.type = LSM_AUDIT_DATA_IPC; 5139 ad.type = LSM_AUDIT_DATA_IPC;
5140 ad.u.ipc_id = msq->q_perm.key; 5140 ad.u.ipc_id = msq->q_perm.key;
5141 5141
5142 return avc_has_perm(sid, isec->sid, SECCLASS_MSGQ, 5142 return avc_has_perm(sid, isec->sid, SECCLASS_MSGQ,
5143 MSGQ__ASSOCIATE, &ad); 5143 MSGQ__ASSOCIATE, &ad);
5144 } 5144 }
5145 5145
5146 static int selinux_msg_queue_msgctl(struct msg_queue *msq, int cmd) 5146 static int selinux_msg_queue_msgctl(struct msg_queue *msq, int cmd)
5147 { 5147 {
5148 int err; 5148 int err;
5149 int perms; 5149 int perms;
5150 5150
5151 switch (cmd) { 5151 switch (cmd) {
5152 case IPC_INFO: 5152 case IPC_INFO:
5153 case MSG_INFO: 5153 case MSG_INFO:
5154 /* No specific object, just general system-wide information. */ 5154 /* No specific object, just general system-wide information. */
5155 return task_has_system(current, SYSTEM__IPC_INFO); 5155 return task_has_system(current, SYSTEM__IPC_INFO);
5156 case IPC_STAT: 5156 case IPC_STAT:
5157 case MSG_STAT: 5157 case MSG_STAT:
5158 perms = MSGQ__GETATTR | MSGQ__ASSOCIATE; 5158 perms = MSGQ__GETATTR | MSGQ__ASSOCIATE;
5159 break; 5159 break;
5160 case IPC_SET: 5160 case IPC_SET:
5161 perms = MSGQ__SETATTR; 5161 perms = MSGQ__SETATTR;
5162 break; 5162 break;
5163 case IPC_RMID: 5163 case IPC_RMID:
5164 perms = MSGQ__DESTROY; 5164 perms = MSGQ__DESTROY;
5165 break; 5165 break;
5166 default: 5166 default:
5167 return 0; 5167 return 0;
5168 } 5168 }
5169 5169
5170 err = ipc_has_perm(&msq->q_perm, perms); 5170 err = ipc_has_perm(&msq->q_perm, perms);
5171 return err; 5171 return err;
5172 } 5172 }
5173 5173
5174 static int selinux_msg_queue_msgsnd(struct msg_queue *msq, struct msg_msg *msg, int msqflg) 5174 static int selinux_msg_queue_msgsnd(struct msg_queue *msq, struct msg_msg *msg, int msqflg)
5175 { 5175 {
5176 struct ipc_security_struct *isec; 5176 struct ipc_security_struct *isec;
5177 struct msg_security_struct *msec; 5177 struct msg_security_struct *msec;
5178 struct common_audit_data ad; 5178 struct common_audit_data ad;
5179 u32 sid = current_sid(); 5179 u32 sid = current_sid();
5180 int rc; 5180 int rc;
5181 5181
5182 isec = msq->q_perm.security; 5182 isec = msq->q_perm.security;
5183 msec = msg->security; 5183 msec = msg->security;
5184 5184
5185 /* 5185 /*
5186 * First time through, need to assign label to the message 5186 * First time through, need to assign label to the message
5187 */ 5187 */
5188 if (msec->sid == SECINITSID_UNLABELED) { 5188 if (msec->sid == SECINITSID_UNLABELED) {
5189 /* 5189 /*
5190 * Compute new sid based on current process and 5190 * Compute new sid based on current process and
5191 * message queue this message will be stored in 5191 * message queue this message will be stored in
5192 */ 5192 */
5193 rc = security_transition_sid(sid, isec->sid, SECCLASS_MSG, 5193 rc = security_transition_sid(sid, isec->sid, SECCLASS_MSG,
5194 NULL, &msec->sid); 5194 NULL, &msec->sid);
5195 if (rc) 5195 if (rc)
5196 return rc; 5196 return rc;
5197 } 5197 }
5198 5198
5199 ad.type = LSM_AUDIT_DATA_IPC; 5199 ad.type = LSM_AUDIT_DATA_IPC;
5200 ad.u.ipc_id = msq->q_perm.key; 5200 ad.u.ipc_id = msq->q_perm.key;
5201 5201
5202 /* Can this process write to the queue? */ 5202 /* Can this process write to the queue? */
5203 rc = avc_has_perm(sid, isec->sid, SECCLASS_MSGQ, 5203 rc = avc_has_perm(sid, isec->sid, SECCLASS_MSGQ,
5204 MSGQ__WRITE, &ad); 5204 MSGQ__WRITE, &ad);
5205 if (!rc) 5205 if (!rc)
5206 /* Can this process send the message */ 5206 /* Can this process send the message */
5207 rc = avc_has_perm(sid, msec->sid, SECCLASS_MSG, 5207 rc = avc_has_perm(sid, msec->sid, SECCLASS_MSG,
5208 MSG__SEND, &ad); 5208 MSG__SEND, &ad);
5209 if (!rc) 5209 if (!rc)
5210 /* Can the message be put in the queue? */ 5210 /* Can the message be put in the queue? */
5211 rc = avc_has_perm(msec->sid, isec->sid, SECCLASS_MSGQ, 5211 rc = avc_has_perm(msec->sid, isec->sid, SECCLASS_MSGQ,
5212 MSGQ__ENQUEUE, &ad); 5212 MSGQ__ENQUEUE, &ad);
5213 5213
5214 return rc; 5214 return rc;
5215 } 5215 }
5216 5216
5217 static int selinux_msg_queue_msgrcv(struct msg_queue *msq, struct msg_msg *msg, 5217 static int selinux_msg_queue_msgrcv(struct msg_queue *msq, struct msg_msg *msg,
5218 struct task_struct *target, 5218 struct task_struct *target,
5219 long type, int mode) 5219 long type, int mode)
5220 { 5220 {
5221 struct ipc_security_struct *isec; 5221 struct ipc_security_struct *isec;
5222 struct msg_security_struct *msec; 5222 struct msg_security_struct *msec;
5223 struct common_audit_data ad; 5223 struct common_audit_data ad;
5224 u32 sid = task_sid(target); 5224 u32 sid = task_sid(target);
5225 int rc; 5225 int rc;
5226 5226
5227 isec = msq->q_perm.security; 5227 isec = msq->q_perm.security;
5228 msec = msg->security; 5228 msec = msg->security;
5229 5229
5230 ad.type = LSM_AUDIT_DATA_IPC; 5230 ad.type = LSM_AUDIT_DATA_IPC;
5231 ad.u.ipc_id = msq->q_perm.key; 5231 ad.u.ipc_id = msq->q_perm.key;
5232 5232
5233 rc = avc_has_perm(sid, isec->sid, 5233 rc = avc_has_perm(sid, isec->sid,
5234 SECCLASS_MSGQ, MSGQ__READ, &ad); 5234 SECCLASS_MSGQ, MSGQ__READ, &ad);
5235 if (!rc) 5235 if (!rc)
5236 rc = avc_has_perm(sid, msec->sid, 5236 rc = avc_has_perm(sid, msec->sid,
5237 SECCLASS_MSG, MSG__RECEIVE, &ad); 5237 SECCLASS_MSG, MSG__RECEIVE, &ad);
5238 return rc; 5238 return rc;
5239 } 5239 }
5240 5240
5241 /* Shared Memory security operations */ 5241 /* Shared Memory security operations */
5242 static int selinux_shm_alloc_security(struct shmid_kernel *shp) 5242 static int selinux_shm_alloc_security(struct shmid_kernel *shp)
5243 { 5243 {
5244 struct ipc_security_struct *isec; 5244 struct ipc_security_struct *isec;
5245 struct common_audit_data ad; 5245 struct common_audit_data ad;
5246 u32 sid = current_sid(); 5246 u32 sid = current_sid();
5247 int rc; 5247 int rc;
5248 5248
5249 rc = ipc_alloc_security(current, &shp->shm_perm, SECCLASS_SHM); 5249 rc = ipc_alloc_security(current, &shp->shm_perm, SECCLASS_SHM);
5250 if (rc) 5250 if (rc)
5251 return rc; 5251 return rc;
5252 5252
5253 isec = shp->shm_perm.security; 5253 isec = shp->shm_perm.security;
5254 5254
5255 ad.type = LSM_AUDIT_DATA_IPC; 5255 ad.type = LSM_AUDIT_DATA_IPC;
5256 ad.u.ipc_id = shp->shm_perm.key; 5256 ad.u.ipc_id = shp->shm_perm.key;
5257 5257
5258 rc = avc_has_perm(sid, isec->sid, SECCLASS_SHM, 5258 rc = avc_has_perm(sid, isec->sid, SECCLASS_SHM,
5259 SHM__CREATE, &ad); 5259 SHM__CREATE, &ad);
5260 if (rc) { 5260 if (rc) {
5261 ipc_free_security(&shp->shm_perm); 5261 ipc_free_security(&shp->shm_perm);
5262 return rc; 5262 return rc;
5263 } 5263 }
5264 return 0; 5264 return 0;
5265 } 5265 }
5266 5266
5267 static void selinux_shm_free_security(struct shmid_kernel *shp) 5267 static void selinux_shm_free_security(struct shmid_kernel *shp)
5268 { 5268 {
5269 ipc_free_security(&shp->shm_perm); 5269 ipc_free_security(&shp->shm_perm);
5270 } 5270 }
5271 5271
5272 static int selinux_shm_associate(struct shmid_kernel *shp, int shmflg) 5272 static int selinux_shm_associate(struct shmid_kernel *shp, int shmflg)
5273 { 5273 {
5274 struct ipc_security_struct *isec; 5274 struct ipc_security_struct *isec;
5275 struct common_audit_data ad; 5275 struct common_audit_data ad;
5276 u32 sid = current_sid(); 5276 u32 sid = current_sid();
5277 5277
5278 isec = shp->shm_perm.security; 5278 isec = shp->shm_perm.security;
5279 5279
5280 ad.type = LSM_AUDIT_DATA_IPC; 5280 ad.type = LSM_AUDIT_DATA_IPC;
5281 ad.u.ipc_id = shp->shm_perm.key; 5281 ad.u.ipc_id = shp->shm_perm.key;
5282 5282
5283 return avc_has_perm(sid, isec->sid, SECCLASS_SHM, 5283 return avc_has_perm(sid, isec->sid, SECCLASS_SHM,
5284 SHM__ASSOCIATE, &ad); 5284 SHM__ASSOCIATE, &ad);
5285 } 5285 }
5286 5286
5287 /* Note, at this point, shp is locked down */ 5287 /* Note, at this point, shp is locked down */
5288 static int selinux_shm_shmctl(struct shmid_kernel *shp, int cmd) 5288 static int selinux_shm_shmctl(struct shmid_kernel *shp, int cmd)
5289 { 5289 {
5290 int perms; 5290 int perms;
5291 int err; 5291 int err;
5292 5292
5293 switch (cmd) { 5293 switch (cmd) {
5294 case IPC_INFO: 5294 case IPC_INFO:
5295 case SHM_INFO: 5295 case SHM_INFO:
5296 /* No specific object, just general system-wide information. */ 5296 /* No specific object, just general system-wide information. */
5297 return task_has_system(current, SYSTEM__IPC_INFO); 5297 return task_has_system(current, SYSTEM__IPC_INFO);
5298 case IPC_STAT: 5298 case IPC_STAT:
5299 case SHM_STAT: 5299 case SHM_STAT:
5300 perms = SHM__GETATTR | SHM__ASSOCIATE; 5300 perms = SHM__GETATTR | SHM__ASSOCIATE;
5301 break; 5301 break;
5302 case IPC_SET: 5302 case IPC_SET:
5303 perms = SHM__SETATTR; 5303 perms = SHM__SETATTR;
5304 break; 5304 break;
5305 case SHM_LOCK: 5305 case SHM_LOCK:
5306 case SHM_UNLOCK: 5306 case SHM_UNLOCK:
5307 perms = SHM__LOCK; 5307 perms = SHM__LOCK;
5308 break; 5308 break;
5309 case IPC_RMID: 5309 case IPC_RMID:
5310 perms = SHM__DESTROY; 5310 perms = SHM__DESTROY;
5311 break; 5311 break;
5312 default: 5312 default:
5313 return 0; 5313 return 0;
5314 } 5314 }
5315 5315
5316 err = ipc_has_perm(&shp->shm_perm, perms); 5316 err = ipc_has_perm(&shp->shm_perm, perms);
5317 return err; 5317 return err;
5318 } 5318 }
5319 5319
5320 static int selinux_shm_shmat(struct shmid_kernel *shp, 5320 static int selinux_shm_shmat(struct shmid_kernel *shp,
5321 char __user *shmaddr, int shmflg) 5321 char __user *shmaddr, int shmflg)
5322 { 5322 {
5323 u32 perms; 5323 u32 perms;
5324 5324
5325 if (shmflg & SHM_RDONLY) 5325 if (shmflg & SHM_RDONLY)
5326 perms = SHM__READ; 5326 perms = SHM__READ;
5327 else 5327 else
5328 perms = SHM__READ | SHM__WRITE; 5328 perms = SHM__READ | SHM__WRITE;
5329 5329
5330 return ipc_has_perm(&shp->shm_perm, perms); 5330 return ipc_has_perm(&shp->shm_perm, perms);
5331 } 5331 }
5332 5332
5333 /* Semaphore security operations */ 5333 /* Semaphore security operations */
5334 static int selinux_sem_alloc_security(struct sem_array *sma) 5334 static int selinux_sem_alloc_security(struct sem_array *sma)
5335 { 5335 {
5336 struct ipc_security_struct *isec; 5336 struct ipc_security_struct *isec;
5337 struct common_audit_data ad; 5337 struct common_audit_data ad;
5338 u32 sid = current_sid(); 5338 u32 sid = current_sid();
5339 int rc; 5339 int rc;
5340 5340
5341 rc = ipc_alloc_security(current, &sma->sem_perm, SECCLASS_SEM); 5341 rc = ipc_alloc_security(current, &sma->sem_perm, SECCLASS_SEM);
5342 if (rc) 5342 if (rc)
5343 return rc; 5343 return rc;
5344 5344
5345 isec = sma->sem_perm.security; 5345 isec = sma->sem_perm.security;
5346 5346
5347 ad.type = LSM_AUDIT_DATA_IPC; 5347 ad.type = LSM_AUDIT_DATA_IPC;
5348 ad.u.ipc_id = sma->sem_perm.key; 5348 ad.u.ipc_id = sma->sem_perm.key;
5349 5349
5350 rc = avc_has_perm(sid, isec->sid, SECCLASS_SEM, 5350 rc = avc_has_perm(sid, isec->sid, SECCLASS_SEM,
5351 SEM__CREATE, &ad); 5351 SEM__CREATE, &ad);
5352 if (rc) { 5352 if (rc) {
5353 ipc_free_security(&sma->sem_perm); 5353 ipc_free_security(&sma->sem_perm);
5354 return rc; 5354 return rc;
5355 } 5355 }
5356 return 0; 5356 return 0;
5357 } 5357 }
5358 5358
5359 static void selinux_sem_free_security(struct sem_array *sma) 5359 static void selinux_sem_free_security(struct sem_array *sma)
5360 { 5360 {
5361 ipc_free_security(&sma->sem_perm); 5361 ipc_free_security(&sma->sem_perm);
5362 } 5362 }
5363 5363
5364 static int selinux_sem_associate(struct sem_array *sma, int semflg) 5364 static int selinux_sem_associate(struct sem_array *sma, int semflg)
5365 { 5365 {
5366 struct ipc_security_struct *isec; 5366 struct ipc_security_struct *isec;
5367 struct common_audit_data ad; 5367 struct common_audit_data ad;
5368 u32 sid = current_sid(); 5368 u32 sid = current_sid();
5369 5369
5370 isec = sma->sem_perm.security; 5370 isec = sma->sem_perm.security;
5371 5371
5372 ad.type = LSM_AUDIT_DATA_IPC; 5372 ad.type = LSM_AUDIT_DATA_IPC;
5373 ad.u.ipc_id = sma->sem_perm.key; 5373 ad.u.ipc_id = sma->sem_perm.key;
5374 5374
5375 return avc_has_perm(sid, isec->sid, SECCLASS_SEM, 5375 return avc_has_perm(sid, isec->sid, SECCLASS_SEM,
5376 SEM__ASSOCIATE, &ad); 5376 SEM__ASSOCIATE, &ad);
5377 } 5377 }
5378 5378
5379 /* Note, at this point, sma is locked down */ 5379 /* Note, at this point, sma is locked down */
5380 static int selinux_sem_semctl(struct sem_array *sma, int cmd) 5380 static int selinux_sem_semctl(struct sem_array *sma, int cmd)
5381 { 5381 {
5382 int err; 5382 int err;
5383 u32 perms; 5383 u32 perms;
5384 5384
5385 switch (cmd) { 5385 switch (cmd) {
5386 case IPC_INFO: 5386 case IPC_INFO:
5387 case SEM_INFO: 5387 case SEM_INFO:
5388 /* No specific object, just general system-wide information. */ 5388 /* No specific object, just general system-wide information. */
5389 return task_has_system(current, SYSTEM__IPC_INFO); 5389 return task_has_system(current, SYSTEM__IPC_INFO);
5390 case GETPID: 5390 case GETPID:
5391 case GETNCNT: 5391 case GETNCNT:
5392 case GETZCNT: 5392 case GETZCNT:
5393 perms = SEM__GETATTR; 5393 perms = SEM__GETATTR;
5394 break; 5394 break;
5395 case GETVAL: 5395 case GETVAL:
5396 case GETALL: 5396 case GETALL:
5397 perms = SEM__READ; 5397 perms = SEM__READ;
5398 break; 5398 break;
5399 case SETVAL: 5399 case SETVAL:
5400 case SETALL: 5400 case SETALL:
5401 perms = SEM__WRITE; 5401 perms = SEM__WRITE;
5402 break; 5402 break;
5403 case IPC_RMID: 5403 case IPC_RMID:
5404 perms = SEM__DESTROY; 5404 perms = SEM__DESTROY;
5405 break; 5405 break;
5406 case IPC_SET: 5406 case IPC_SET:
5407 perms = SEM__SETATTR; 5407 perms = SEM__SETATTR;
5408 break; 5408 break;
5409 case IPC_STAT: 5409 case IPC_STAT:
5410 case SEM_STAT: 5410 case SEM_STAT:
5411 perms = SEM__GETATTR | SEM__ASSOCIATE; 5411 perms = SEM__GETATTR | SEM__ASSOCIATE;
5412 break; 5412 break;
5413 default: 5413 default:
5414 return 0; 5414 return 0;
5415 } 5415 }
5416 5416
5417 err = ipc_has_perm(&sma->sem_perm, perms); 5417 err = ipc_has_perm(&sma->sem_perm, perms);
5418 return err; 5418 return err;
5419 } 5419 }
5420 5420
5421 static int selinux_sem_semop(struct sem_array *sma, 5421 static int selinux_sem_semop(struct sem_array *sma,
5422 struct sembuf *sops, unsigned nsops, int alter) 5422 struct sembuf *sops, unsigned nsops, int alter)
5423 { 5423 {
5424 u32 perms; 5424 u32 perms;
5425 5425
5426 if (alter) 5426 if (alter)
5427 perms = SEM__READ | SEM__WRITE; 5427 perms = SEM__READ | SEM__WRITE;
5428 else 5428 else
5429 perms = SEM__READ; 5429 perms = SEM__READ;
5430 5430
5431 return ipc_has_perm(&sma->sem_perm, perms); 5431 return ipc_has_perm(&sma->sem_perm, perms);
5432 } 5432 }
5433 5433
5434 static int selinux_ipc_permission(struct kern_ipc_perm *ipcp, short flag) 5434 static int selinux_ipc_permission(struct kern_ipc_perm *ipcp, short flag)
5435 { 5435 {
5436 u32 av = 0; 5436 u32 av = 0;
5437 5437
5438 av = 0; 5438 av = 0;
5439 if (flag & S_IRUGO) 5439 if (flag & S_IRUGO)
5440 av |= IPC__UNIX_READ; 5440 av |= IPC__UNIX_READ;
5441 if (flag & S_IWUGO) 5441 if (flag & S_IWUGO)
5442 av |= IPC__UNIX_WRITE; 5442 av |= IPC__UNIX_WRITE;
5443 5443
5444 if (av == 0) 5444 if (av == 0)
5445 return 0; 5445 return 0;
5446 5446
5447 return ipc_has_perm(ipcp, av); 5447 return ipc_has_perm(ipcp, av);
5448 } 5448 }
5449 5449
5450 static void selinux_ipc_getsecid(struct kern_ipc_perm *ipcp, u32 *secid) 5450 static void selinux_ipc_getsecid(struct kern_ipc_perm *ipcp, u32 *secid)
5451 { 5451 {
5452 struct ipc_security_struct *isec = ipcp->security; 5452 struct ipc_security_struct *isec = ipcp->security;
5453 *secid = isec->sid; 5453 *secid = isec->sid;
5454 } 5454 }
5455 5455
5456 static void selinux_d_instantiate(struct dentry *dentry, struct inode *inode) 5456 static void selinux_d_instantiate(struct dentry *dentry, struct inode *inode)
5457 { 5457 {
5458 if (inode) 5458 if (inode)
5459 inode_doinit_with_dentry(inode, dentry); 5459 inode_doinit_with_dentry(inode, dentry);
5460 } 5460 }
5461 5461
5462 static int selinux_getprocattr(struct task_struct *p, 5462 static int selinux_getprocattr(struct task_struct *p,
5463 char *name, char **value) 5463 char *name, char **value)
5464 { 5464 {
5465 const struct task_security_struct *__tsec; 5465 const struct task_security_struct *__tsec;
5466 u32 sid; 5466 u32 sid;
5467 int error; 5467 int error;
5468 unsigned len; 5468 unsigned len;
5469 5469
5470 if (current != p) { 5470 if (current != p) {
5471 error = current_has_perm(p, PROCESS__GETATTR); 5471 error = current_has_perm(p, PROCESS__GETATTR);
5472 if (error) 5472 if (error)
5473 return error; 5473 return error;
5474 } 5474 }
5475 5475
5476 rcu_read_lock(); 5476 rcu_read_lock();
5477 __tsec = __task_cred(p)->security; 5477 __tsec = __task_cred(p)->security;
5478 5478
5479 if (!strcmp(name, "current")) 5479 if (!strcmp(name, "current"))
5480 sid = __tsec->sid; 5480 sid = __tsec->sid;
5481 else if (!strcmp(name, "prev")) 5481 else if (!strcmp(name, "prev"))
5482 sid = __tsec->osid; 5482 sid = __tsec->osid;
5483 else if (!strcmp(name, "exec")) 5483 else if (!strcmp(name, "exec"))
5484 sid = __tsec->exec_sid; 5484 sid = __tsec->exec_sid;
5485 else if (!strcmp(name, "fscreate")) 5485 else if (!strcmp(name, "fscreate"))
5486 sid = __tsec->create_sid; 5486 sid = __tsec->create_sid;
5487 else if (!strcmp(name, "keycreate")) 5487 else if (!strcmp(name, "keycreate"))
5488 sid = __tsec->keycreate_sid; 5488 sid = __tsec->keycreate_sid;
5489 else if (!strcmp(name, "sockcreate")) 5489 else if (!strcmp(name, "sockcreate"))
5490 sid = __tsec->sockcreate_sid; 5490 sid = __tsec->sockcreate_sid;
5491 else 5491 else
5492 goto invalid; 5492 goto invalid;
5493 rcu_read_unlock(); 5493 rcu_read_unlock();
5494 5494
5495 if (!sid) 5495 if (!sid)
5496 return 0; 5496 return 0;
5497 5497
5498 error = security_sid_to_context(sid, value, &len); 5498 error = security_sid_to_context(sid, value, &len);
5499 if (error) 5499 if (error)
5500 return error; 5500 return error;
5501 return len; 5501 return len;
5502 5502
5503 invalid: 5503 invalid:
5504 rcu_read_unlock(); 5504 rcu_read_unlock();
5505 return -EINVAL; 5505 return -EINVAL;
5506 } 5506 }
5507 5507
5508 static int selinux_setprocattr(struct task_struct *p, 5508 static int selinux_setprocattr(struct task_struct *p,
5509 char *name, void *value, size_t size) 5509 char *name, void *value, size_t size)
5510 { 5510 {
5511 struct task_security_struct *tsec; 5511 struct task_security_struct *tsec;
5512 struct task_struct *tracer; 5512 struct task_struct *tracer;
5513 struct cred *new; 5513 struct cred *new;
5514 u32 sid = 0, ptsid; 5514 u32 sid = 0, ptsid;
5515 int error; 5515 int error;
5516 char *str = value; 5516 char *str = value;
5517 5517
5518 if (current != p) { 5518 if (current != p) {
5519 /* SELinux only allows a process to change its own 5519 /* SELinux only allows a process to change its own
5520 security attributes. */ 5520 security attributes. */
5521 return -EACCES; 5521 return -EACCES;
5522 } 5522 }
5523 5523
5524 /* 5524 /*
5525 * Basic control over ability to set these attributes at all. 5525 * Basic control over ability to set these attributes at all.
5526 * current == p, but we'll pass them separately in case the 5526 * current == p, but we'll pass them separately in case the
5527 * above restriction is ever removed. 5527 * above restriction is ever removed.
5528 */ 5528 */
5529 if (!strcmp(name, "exec")) 5529 if (!strcmp(name, "exec"))
5530 error = current_has_perm(p, PROCESS__SETEXEC); 5530 error = current_has_perm(p, PROCESS__SETEXEC);
5531 else if (!strcmp(name, "fscreate")) 5531 else if (!strcmp(name, "fscreate"))
5532 error = current_has_perm(p, PROCESS__SETFSCREATE); 5532 error = current_has_perm(p, PROCESS__SETFSCREATE);
5533 else if (!strcmp(name, "keycreate")) 5533 else if (!strcmp(name, "keycreate"))
5534 error = current_has_perm(p, PROCESS__SETKEYCREATE); 5534 error = current_has_perm(p, PROCESS__SETKEYCREATE);
5535 else if (!strcmp(name, "sockcreate")) 5535 else if (!strcmp(name, "sockcreate"))
5536 error = current_has_perm(p, PROCESS__SETSOCKCREATE); 5536 error = current_has_perm(p, PROCESS__SETSOCKCREATE);
5537 else if (!strcmp(name, "current")) 5537 else if (!strcmp(name, "current"))
5538 error = current_has_perm(p, PROCESS__SETCURRENT); 5538 error = current_has_perm(p, PROCESS__SETCURRENT);
5539 else 5539 else
5540 error = -EINVAL; 5540 error = -EINVAL;
5541 if (error) 5541 if (error)
5542 return error; 5542 return error;
5543 5543
5544 /* Obtain a SID for the context, if one was specified. */ 5544 /* Obtain a SID for the context, if one was specified. */
5545 if (size && str[1] && str[1] != '\n') { 5545 if (size && str[1] && str[1] != '\n') {
5546 if (str[size-1] == '\n') { 5546 if (str[size-1] == '\n') {
5547 str[size-1] = 0; 5547 str[size-1] = 0;
5548 size--; 5548 size--;
5549 } 5549 }
5550 error = security_context_to_sid(value, size, &sid, GFP_KERNEL); 5550 error = security_context_to_sid(value, size, &sid, GFP_KERNEL);
5551 if (error == -EINVAL && !strcmp(name, "fscreate")) { 5551 if (error == -EINVAL && !strcmp(name, "fscreate")) {
5552 if (!capable(CAP_MAC_ADMIN)) { 5552 if (!capable(CAP_MAC_ADMIN)) {
5553 struct audit_buffer *ab; 5553 struct audit_buffer *ab;
5554 size_t audit_size; 5554 size_t audit_size;
5555 5555
5556 /* We strip a nul only if it is at the end, otherwise the 5556 /* We strip a nul only if it is at the end, otherwise the
5557 * context contains a nul and we should audit that */ 5557 * context contains a nul and we should audit that */
5558 if (str[size - 1] == '\0') 5558 if (str[size - 1] == '\0')
5559 audit_size = size - 1; 5559 audit_size = size - 1;
5560 else 5560 else
5561 audit_size = size; 5561 audit_size = size;
5562 ab = audit_log_start(current->audit_context, GFP_ATOMIC, AUDIT_SELINUX_ERR); 5562 ab = audit_log_start(current->audit_context, GFP_ATOMIC, AUDIT_SELINUX_ERR);
5563 audit_log_format(ab, "op=fscreate invalid_context="); 5563 audit_log_format(ab, "op=fscreate invalid_context=");
5564 audit_log_n_untrustedstring(ab, value, audit_size); 5564 audit_log_n_untrustedstring(ab, value, audit_size);
5565 audit_log_end(ab); 5565 audit_log_end(ab);
5566 5566
5567 return error; 5567 return error;
5568 } 5568 }
5569 error = security_context_to_sid_force(value, size, 5569 error = security_context_to_sid_force(value, size,
5570 &sid); 5570 &sid);
5571 } 5571 }
5572 if (error) 5572 if (error)
5573 return error; 5573 return error;
5574 } 5574 }
5575 5575
5576 new = prepare_creds(); 5576 new = prepare_creds();
5577 if (!new) 5577 if (!new)
5578 return -ENOMEM; 5578 return -ENOMEM;
5579 5579
5580 /* Permission checking based on the specified context is 5580 /* Permission checking based on the specified context is
5581 performed during the actual operation (execve, 5581 performed during the actual operation (execve,
5582 open/mkdir/...), when we know the full context of the 5582 open/mkdir/...), when we know the full context of the
5583 operation. See selinux_bprm_set_creds for the execve 5583 operation. See selinux_bprm_set_creds for the execve
5584 checks and may_create for the file creation checks. The 5584 checks and may_create for the file creation checks. The
5585 operation will then fail if the context is not permitted. */ 5585 operation will then fail if the context is not permitted. */
5586 tsec = new->security; 5586 tsec = new->security;
5587 if (!strcmp(name, "exec")) { 5587 if (!strcmp(name, "exec")) {
5588 tsec->exec_sid = sid; 5588 tsec->exec_sid = sid;
5589 } else if (!strcmp(name, "fscreate")) { 5589 } else if (!strcmp(name, "fscreate")) {
5590 tsec->create_sid = sid; 5590 tsec->create_sid = sid;
5591 } else if (!strcmp(name, "keycreate")) { 5591 } else if (!strcmp(name, "keycreate")) {
5592 error = may_create_key(sid, p); 5592 error = may_create_key(sid, p);
5593 if (error) 5593 if (error)
5594 goto abort_change; 5594 goto abort_change;
5595 tsec->keycreate_sid = sid; 5595 tsec->keycreate_sid = sid;
5596 } else if (!strcmp(name, "sockcreate")) { 5596 } else if (!strcmp(name, "sockcreate")) {
5597 tsec->sockcreate_sid = sid; 5597 tsec->sockcreate_sid = sid;
5598 } else if (!strcmp(name, "current")) { 5598 } else if (!strcmp(name, "current")) {
5599 error = -EINVAL; 5599 error = -EINVAL;
5600 if (sid == 0) 5600 if (sid == 0)
5601 goto abort_change; 5601 goto abort_change;
5602 5602
5603 /* Only allow single threaded processes to change context */ 5603 /* Only allow single threaded processes to change context */
5604 error = -EPERM; 5604 error = -EPERM;
5605 if (!current_is_single_threaded()) { 5605 if (!current_is_single_threaded()) {
5606 error = security_bounded_transition(tsec->sid, sid); 5606 error = security_bounded_transition(tsec->sid, sid);
5607 if (error) 5607 if (error)
5608 goto abort_change; 5608 goto abort_change;
5609 } 5609 }
5610 5610
5611 /* Check permissions for the transition. */ 5611 /* Check permissions for the transition. */
5612 error = avc_has_perm(tsec->sid, sid, SECCLASS_PROCESS, 5612 error = avc_has_perm(tsec->sid, sid, SECCLASS_PROCESS,
5613 PROCESS__DYNTRANSITION, NULL); 5613 PROCESS__DYNTRANSITION, NULL);
5614 if (error) 5614 if (error)
5615 goto abort_change; 5615 goto abort_change;
5616 5616
5617 /* Check for ptracing, and update the task SID if ok. 5617 /* Check for ptracing, and update the task SID if ok.
5618 Otherwise, leave SID unchanged and fail. */ 5618 Otherwise, leave SID unchanged and fail. */
5619 ptsid = 0; 5619 ptsid = 0;
5620 rcu_read_lock(); 5620 rcu_read_lock();
5621 tracer = ptrace_parent(p); 5621 tracer = ptrace_parent(p);
5622 if (tracer) 5622 if (tracer)
5623 ptsid = task_sid(tracer); 5623 ptsid = task_sid(tracer);
5624 rcu_read_unlock(); 5624 rcu_read_unlock();
5625 5625
5626 if (tracer) { 5626 if (tracer) {
5627 error = avc_has_perm(ptsid, sid, SECCLASS_PROCESS, 5627 error = avc_has_perm(ptsid, sid, SECCLASS_PROCESS,
5628 PROCESS__PTRACE, NULL); 5628 PROCESS__PTRACE, NULL);
5629 if (error) 5629 if (error)
5630 goto abort_change; 5630 goto abort_change;
5631 } 5631 }
5632 5632
5633 tsec->sid = sid; 5633 tsec->sid = sid;
5634 } else { 5634 } else {
5635 error = -EINVAL; 5635 error = -EINVAL;
5636 goto abort_change; 5636 goto abort_change;
5637 } 5637 }
5638 5638
5639 commit_creds(new); 5639 commit_creds(new);
5640 return size; 5640 return size;
5641 5641
5642 abort_change: 5642 abort_change:
5643 abort_creds(new); 5643 abort_creds(new);
5644 return error; 5644 return error;
5645 } 5645 }
5646 5646
5647 static int selinux_ismaclabel(const char *name) 5647 static int selinux_ismaclabel(const char *name)
5648 { 5648 {
5649 return (strcmp(name, XATTR_SELINUX_SUFFIX) == 0); 5649 return (strcmp(name, XATTR_SELINUX_SUFFIX) == 0);
5650 } 5650 }
5651 5651
5652 static int selinux_secid_to_secctx(u32 secid, char **secdata, u32 *seclen) 5652 static int selinux_secid_to_secctx(u32 secid, char **secdata, u32 *seclen)
5653 { 5653 {
5654 return security_sid_to_context(secid, secdata, seclen); 5654 return security_sid_to_context(secid, secdata, seclen);
5655 } 5655 }
5656 5656
5657 static int selinux_secctx_to_secid(const char *secdata, u32 seclen, u32 *secid) 5657 static int selinux_secctx_to_secid(const char *secdata, u32 seclen, u32 *secid)
5658 { 5658 {
5659 return security_context_to_sid(secdata, seclen, secid, GFP_KERNEL); 5659 return security_context_to_sid(secdata, seclen, secid, GFP_KERNEL);
5660 } 5660 }
5661 5661
5662 static void selinux_release_secctx(char *secdata, u32 seclen) 5662 static void selinux_release_secctx(char *secdata, u32 seclen)
5663 { 5663 {
5664 kfree(secdata); 5664 kfree(secdata);
5665 } 5665 }
5666 5666
5667 /* 5667 /*
5668 * called with inode->i_mutex locked 5668 * called with inode->i_mutex locked
5669 */ 5669 */
5670 static int selinux_inode_notifysecctx(struct inode *inode, void *ctx, u32 ctxlen) 5670 static int selinux_inode_notifysecctx(struct inode *inode, void *ctx, u32 ctxlen)
5671 { 5671 {
5672 return selinux_inode_setsecurity(inode, XATTR_SELINUX_SUFFIX, ctx, ctxlen, 0); 5672 return selinux_inode_setsecurity(inode, XATTR_SELINUX_SUFFIX, ctx, ctxlen, 0);
5673 } 5673 }
5674 5674
5675 /* 5675 /*
5676 * called with inode->i_mutex locked 5676 * called with inode->i_mutex locked
5677 */ 5677 */
5678 static int selinux_inode_setsecctx(struct dentry *dentry, void *ctx, u32 ctxlen) 5678 static int selinux_inode_setsecctx(struct dentry *dentry, void *ctx, u32 ctxlen)
5679 { 5679 {
5680 return __vfs_setxattr_noperm(dentry, XATTR_NAME_SELINUX, ctx, ctxlen, 0); 5680 return __vfs_setxattr_noperm(dentry, XATTR_NAME_SELINUX, ctx, ctxlen, 0);
5681 } 5681 }
5682 5682
5683 static int selinux_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen) 5683 static int selinux_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen)
5684 { 5684 {
5685 int len = 0; 5685 int len = 0;
5686 len = selinux_inode_getsecurity(inode, XATTR_SELINUX_SUFFIX, 5686 len = selinux_inode_getsecurity(inode, XATTR_SELINUX_SUFFIX,
5687 ctx, true); 5687 ctx, true);
5688 if (len < 0) 5688 if (len < 0)
5689 return len; 5689 return len;
5690 *ctxlen = len; 5690 *ctxlen = len;
5691 return 0; 5691 return 0;
5692 } 5692 }
5693 #ifdef CONFIG_KEYS 5693 #ifdef CONFIG_KEYS
5694 5694
5695 static int selinux_key_alloc(struct key *k, const struct cred *cred, 5695 static int selinux_key_alloc(struct key *k, const struct cred *cred,
5696 unsigned long flags) 5696 unsigned long flags)
5697 { 5697 {
5698 const struct task_security_struct *tsec; 5698 const struct task_security_struct *tsec;
5699 struct key_security_struct *ksec; 5699 struct key_security_struct *ksec;
5700 5700
5701 ksec = kzalloc(sizeof(struct key_security_struct), GFP_KERNEL); 5701 ksec = kzalloc(sizeof(struct key_security_struct), GFP_KERNEL);
5702 if (!ksec) 5702 if (!ksec)
5703 return -ENOMEM; 5703 return -ENOMEM;
5704 5704
5705 tsec = cred->security; 5705 tsec = cred->security;
5706 if (tsec->keycreate_sid) 5706 if (tsec->keycreate_sid)
5707 ksec->sid = tsec->keycreate_sid; 5707 ksec->sid = tsec->keycreate_sid;
5708 else 5708 else
5709 ksec->sid = tsec->sid; 5709 ksec->sid = tsec->sid;
5710 5710
5711 k->security = ksec; 5711 k->security = ksec;
5712 return 0; 5712 return 0;
5713 } 5713 }
5714 5714
5715 static void selinux_key_free(struct key *k) 5715 static void selinux_key_free(struct key *k)
5716 { 5716 {
5717 struct key_security_struct *ksec = k->security; 5717 struct key_security_struct *ksec = k->security;
5718 5718
5719 k->security = NULL; 5719 k->security = NULL;
5720 kfree(ksec); 5720 kfree(ksec);
5721 } 5721 }
5722 5722
5723 static int selinux_key_permission(key_ref_t key_ref, 5723 static int selinux_key_permission(key_ref_t key_ref,
5724 const struct cred *cred, 5724 const struct cred *cred,
5725 key_perm_t perm) 5725 key_perm_t perm)
5726 { 5726 {
5727 struct key *key; 5727 struct key *key;
5728 struct key_security_struct *ksec; 5728 struct key_security_struct *ksec;
5729 u32 sid; 5729 u32 sid;
5730 5730
5731 /* if no specific permissions are requested, we skip the 5731 /* if no specific permissions are requested, we skip the
5732 permission check. No serious, additional covert channels 5732 permission check. No serious, additional covert channels
5733 appear to be created. */ 5733 appear to be created. */
5734 if (perm == 0) 5734 if (perm == 0)
5735 return 0; 5735 return 0;
5736 5736
5737 sid = cred_sid(cred); 5737 sid = cred_sid(cred);
5738 5738
5739 key = key_ref_to_ptr(key_ref); 5739 key = key_ref_to_ptr(key_ref);
5740 ksec = key->security; 5740 ksec = key->security;
5741 5741
5742 return avc_has_perm(sid, ksec->sid, SECCLASS_KEY, perm, NULL); 5742 return avc_has_perm(sid, ksec->sid, SECCLASS_KEY, perm, NULL);
5743 } 5743 }
5744 5744
5745 static int selinux_key_getsecurity(struct key *key, char **_buffer) 5745 static int selinux_key_getsecurity(struct key *key, char **_buffer)
5746 { 5746 {
5747 struct key_security_struct *ksec = key->security; 5747 struct key_security_struct *ksec = key->security;
5748 char *context = NULL; 5748 char *context = NULL;
5749 unsigned len; 5749 unsigned len;
5750 int rc; 5750 int rc;
5751 5751
5752 rc = security_sid_to_context(ksec->sid, &context, &len); 5752 rc = security_sid_to_context(ksec->sid, &context, &len);
5753 if (!rc) 5753 if (!rc)
5754 rc = len; 5754 rc = len;
5755 *_buffer = context; 5755 *_buffer = context;
5756 return rc; 5756 return rc;
5757 } 5757 }
5758 5758
5759 #endif 5759 #endif
5760 5760
5761 static struct security_operations selinux_ops = { 5761 static struct security_operations selinux_ops = {
5762 .name = "selinux", 5762 .name = "selinux",
5763 5763
5764 .ptrace_access_check = selinux_ptrace_access_check, 5764 .ptrace_access_check = selinux_ptrace_access_check,
5765 .ptrace_traceme = selinux_ptrace_traceme, 5765 .ptrace_traceme = selinux_ptrace_traceme,
5766 .capget = selinux_capget, 5766 .capget = selinux_capget,
5767 .capset = selinux_capset, 5767 .capset = selinux_capset,
5768 .capable = selinux_capable, 5768 .capable = selinux_capable,
5769 .quotactl = selinux_quotactl, 5769 .quotactl = selinux_quotactl,
5770 .quota_on = selinux_quota_on, 5770 .quota_on = selinux_quota_on,
5771 .syslog = selinux_syslog, 5771 .syslog = selinux_syslog,
5772 .vm_enough_memory = selinux_vm_enough_memory, 5772 .vm_enough_memory = selinux_vm_enough_memory,
5773 5773
5774 .netlink_send = selinux_netlink_send, 5774 .netlink_send = selinux_netlink_send,
5775 5775
5776 .bprm_set_creds = selinux_bprm_set_creds, 5776 .bprm_set_creds = selinux_bprm_set_creds,
5777 .bprm_committing_creds = selinux_bprm_committing_creds, 5777 .bprm_committing_creds = selinux_bprm_committing_creds,
5778 .bprm_committed_creds = selinux_bprm_committed_creds, 5778 .bprm_committed_creds = selinux_bprm_committed_creds,
5779 .bprm_secureexec = selinux_bprm_secureexec, 5779 .bprm_secureexec = selinux_bprm_secureexec,
5780 5780
5781 .sb_alloc_security = selinux_sb_alloc_security, 5781 .sb_alloc_security = selinux_sb_alloc_security,
5782 .sb_free_security = selinux_sb_free_security, 5782 .sb_free_security = selinux_sb_free_security,
5783 .sb_copy_data = selinux_sb_copy_data, 5783 .sb_copy_data = selinux_sb_copy_data,
5784 .sb_remount = selinux_sb_remount, 5784 .sb_remount = selinux_sb_remount,
5785 .sb_kern_mount = selinux_sb_kern_mount, 5785 .sb_kern_mount = selinux_sb_kern_mount,
5786 .sb_show_options = selinux_sb_show_options, 5786 .sb_show_options = selinux_sb_show_options,
5787 .sb_statfs = selinux_sb_statfs, 5787 .sb_statfs = selinux_sb_statfs,
5788 .sb_mount = selinux_mount, 5788 .sb_mount = selinux_mount,
5789 .sb_umount = selinux_umount, 5789 .sb_umount = selinux_umount,
5790 .sb_set_mnt_opts = selinux_set_mnt_opts, 5790 .sb_set_mnt_opts = selinux_set_mnt_opts,
5791 .sb_clone_mnt_opts = selinux_sb_clone_mnt_opts, 5791 .sb_clone_mnt_opts = selinux_sb_clone_mnt_opts,
5792 .sb_parse_opts_str = selinux_parse_opts_str, 5792 .sb_parse_opts_str = selinux_parse_opts_str,
5793 5793
5794 .dentry_init_security = selinux_dentry_init_security, 5794 .dentry_init_security = selinux_dentry_init_security,
5795 5795
5796 .inode_alloc_security = selinux_inode_alloc_security, 5796 .inode_alloc_security = selinux_inode_alloc_security,
5797 .inode_free_security = selinux_inode_free_security, 5797 .inode_free_security = selinux_inode_free_security,
5798 .inode_init_security = selinux_inode_init_security, 5798 .inode_init_security = selinux_inode_init_security,
5799 .inode_create = selinux_inode_create, 5799 .inode_create = selinux_inode_create,
5800 .inode_link = selinux_inode_link, 5800 .inode_link = selinux_inode_link,
5801 .inode_unlink = selinux_inode_unlink, 5801 .inode_unlink = selinux_inode_unlink,
5802 .inode_symlink = selinux_inode_symlink, 5802 .inode_symlink = selinux_inode_symlink,
5803 .inode_mkdir = selinux_inode_mkdir, 5803 .inode_mkdir = selinux_inode_mkdir,
5804 .inode_rmdir = selinux_inode_rmdir, 5804 .inode_rmdir = selinux_inode_rmdir,
5805 .inode_mknod = selinux_inode_mknod, 5805 .inode_mknod = selinux_inode_mknod,
5806 .inode_rename = selinux_inode_rename, 5806 .inode_rename = selinux_inode_rename,
5807 .inode_readlink = selinux_inode_readlink, 5807 .inode_readlink = selinux_inode_readlink,
5808 .inode_follow_link = selinux_inode_follow_link, 5808 .inode_follow_link = selinux_inode_follow_link,
5809 .inode_permission = selinux_inode_permission, 5809 .inode_permission = selinux_inode_permission,
5810 .inode_setattr = selinux_inode_setattr, 5810 .inode_setattr = selinux_inode_setattr,
5811 .inode_getattr = selinux_inode_getattr, 5811 .inode_getattr = selinux_inode_getattr,
5812 .inode_setxattr = selinux_inode_setxattr, 5812 .inode_setxattr = selinux_inode_setxattr,
5813 .inode_post_setxattr = selinux_inode_post_setxattr, 5813 .inode_post_setxattr = selinux_inode_post_setxattr,
5814 .inode_getxattr = selinux_inode_getxattr, 5814 .inode_getxattr = selinux_inode_getxattr,
5815 .inode_listxattr = selinux_inode_listxattr, 5815 .inode_listxattr = selinux_inode_listxattr,
5816 .inode_removexattr = selinux_inode_removexattr, 5816 .inode_removexattr = selinux_inode_removexattr,
5817 .inode_getsecurity = selinux_inode_getsecurity, 5817 .inode_getsecurity = selinux_inode_getsecurity,
5818 .inode_setsecurity = selinux_inode_setsecurity, 5818 .inode_setsecurity = selinux_inode_setsecurity,
5819 .inode_listsecurity = selinux_inode_listsecurity, 5819 .inode_listsecurity = selinux_inode_listsecurity,
5820 .inode_getsecid = selinux_inode_getsecid, 5820 .inode_getsecid = selinux_inode_getsecid,
5821 5821
5822 .file_permission = selinux_file_permission, 5822 .file_permission = selinux_file_permission,
5823 .file_alloc_security = selinux_file_alloc_security, 5823 .file_alloc_security = selinux_file_alloc_security,
5824 .file_free_security = selinux_file_free_security, 5824 .file_free_security = selinux_file_free_security,
5825 .file_ioctl = selinux_file_ioctl, 5825 .file_ioctl = selinux_file_ioctl,
5826 .mmap_file = selinux_mmap_file, 5826 .mmap_file = selinux_mmap_file,
5827 .mmap_addr = selinux_mmap_addr, 5827 .mmap_addr = selinux_mmap_addr,
5828 .file_mprotect = selinux_file_mprotect, 5828 .file_mprotect = selinux_file_mprotect,
5829 .file_lock = selinux_file_lock, 5829 .file_lock = selinux_file_lock,
5830 .file_fcntl = selinux_file_fcntl, 5830 .file_fcntl = selinux_file_fcntl,
5831 .file_set_fowner = selinux_file_set_fowner, 5831 .file_set_fowner = selinux_file_set_fowner,
5832 .file_send_sigiotask = selinux_file_send_sigiotask, 5832 .file_send_sigiotask = selinux_file_send_sigiotask,
5833 .file_receive = selinux_file_receive, 5833 .file_receive = selinux_file_receive,
5834 5834
5835 .file_open = selinux_file_open, 5835 .file_open = selinux_file_open,
5836 5836
5837 .task_create = selinux_task_create, 5837 .task_create = selinux_task_create,
5838 .cred_alloc_blank = selinux_cred_alloc_blank, 5838 .cred_alloc_blank = selinux_cred_alloc_blank,
5839 .cred_free = selinux_cred_free, 5839 .cred_free = selinux_cred_free,
5840 .cred_prepare = selinux_cred_prepare, 5840 .cred_prepare = selinux_cred_prepare,
5841 .cred_transfer = selinux_cred_transfer, 5841 .cred_transfer = selinux_cred_transfer,
5842 .kernel_act_as = selinux_kernel_act_as, 5842 .kernel_act_as = selinux_kernel_act_as,
5843 .kernel_create_files_as = selinux_kernel_create_files_as, 5843 .kernel_create_files_as = selinux_kernel_create_files_as,
5844 .kernel_module_request = selinux_kernel_module_request, 5844 .kernel_module_request = selinux_kernel_module_request,
5845 .task_setpgid = selinux_task_setpgid, 5845 .task_setpgid = selinux_task_setpgid,
5846 .task_getpgid = selinux_task_getpgid, 5846 .task_getpgid = selinux_task_getpgid,
5847 .task_getsid = selinux_task_getsid, 5847 .task_getsid = selinux_task_getsid,
5848 .task_getsecid = selinux_task_getsecid, 5848 .task_getsecid = selinux_task_getsecid,
5849 .task_setnice = selinux_task_setnice, 5849 .task_setnice = selinux_task_setnice,
5850 .task_setioprio = selinux_task_setioprio, 5850 .task_setioprio = selinux_task_setioprio,
5851 .task_getioprio = selinux_task_getioprio, 5851 .task_getioprio = selinux_task_getioprio,
5852 .task_setrlimit = selinux_task_setrlimit, 5852 .task_setrlimit = selinux_task_setrlimit,
5853 .task_setscheduler = selinux_task_setscheduler, 5853 .task_setscheduler = selinux_task_setscheduler,
5854 .task_getscheduler = selinux_task_getscheduler, 5854 .task_getscheduler = selinux_task_getscheduler,
5855 .task_movememory = selinux_task_movememory, 5855 .task_movememory = selinux_task_movememory,
5856 .task_kill = selinux_task_kill, 5856 .task_kill = selinux_task_kill,
5857 .task_wait = selinux_task_wait, 5857 .task_wait = selinux_task_wait,
5858 .task_to_inode = selinux_task_to_inode, 5858 .task_to_inode = selinux_task_to_inode,
5859 5859
5860 .ipc_permission = selinux_ipc_permission, 5860 .ipc_permission = selinux_ipc_permission,
5861 .ipc_getsecid = selinux_ipc_getsecid, 5861 .ipc_getsecid = selinux_ipc_getsecid,
5862 5862
5863 .msg_msg_alloc_security = selinux_msg_msg_alloc_security, 5863 .msg_msg_alloc_security = selinux_msg_msg_alloc_security,
5864 .msg_msg_free_security = selinux_msg_msg_free_security, 5864 .msg_msg_free_security = selinux_msg_msg_free_security,
5865 5865
5866 .msg_queue_alloc_security = selinux_msg_queue_alloc_security, 5866 .msg_queue_alloc_security = selinux_msg_queue_alloc_security,
5867 .msg_queue_free_security = selinux_msg_queue_free_security, 5867 .msg_queue_free_security = selinux_msg_queue_free_security,
5868 .msg_queue_associate = selinux_msg_queue_associate, 5868 .msg_queue_associate = selinux_msg_queue_associate,
5869 .msg_queue_msgctl = selinux_msg_queue_msgctl, 5869 .msg_queue_msgctl = selinux_msg_queue_msgctl,
5870 .msg_queue_msgsnd = selinux_msg_queue_msgsnd, 5870 .msg_queue_msgsnd = selinux_msg_queue_msgsnd,
5871 .msg_queue_msgrcv = selinux_msg_queue_msgrcv, 5871 .msg_queue_msgrcv = selinux_msg_queue_msgrcv,
5872 5872
5873 .shm_alloc_security = selinux_shm_alloc_security, 5873 .shm_alloc_security = selinux_shm_alloc_security,
5874 .shm_free_security = selinux_shm_free_security, 5874 .shm_free_security = selinux_shm_free_security,
5875 .shm_associate = selinux_shm_associate, 5875 .shm_associate = selinux_shm_associate,
5876 .shm_shmctl = selinux_shm_shmctl, 5876 .shm_shmctl = selinux_shm_shmctl,
5877 .shm_shmat = selinux_shm_shmat, 5877 .shm_shmat = selinux_shm_shmat,
5878 5878
5879 .sem_alloc_security = selinux_sem_alloc_security, 5879 .sem_alloc_security = selinux_sem_alloc_security,
5880 .sem_free_security = selinux_sem_free_security, 5880 .sem_free_security = selinux_sem_free_security,
5881 .sem_associate = selinux_sem_associate, 5881 .sem_associate = selinux_sem_associate,
5882 .sem_semctl = selinux_sem_semctl, 5882 .sem_semctl = selinux_sem_semctl,
5883 .sem_semop = selinux_sem_semop, 5883 .sem_semop = selinux_sem_semop,
5884 5884
5885 .d_instantiate = selinux_d_instantiate, 5885 .d_instantiate = selinux_d_instantiate,
5886 5886
5887 .getprocattr = selinux_getprocattr, 5887 .getprocattr = selinux_getprocattr,
5888 .setprocattr = selinux_setprocattr, 5888 .setprocattr = selinux_setprocattr,
5889 5889
5890 .ismaclabel = selinux_ismaclabel, 5890 .ismaclabel = selinux_ismaclabel,
5891 .secid_to_secctx = selinux_secid_to_secctx, 5891 .secid_to_secctx = selinux_secid_to_secctx,
5892 .secctx_to_secid = selinux_secctx_to_secid, 5892 .secctx_to_secid = selinux_secctx_to_secid,
5893 .release_secctx = selinux_release_secctx, 5893 .release_secctx = selinux_release_secctx,
5894 .inode_notifysecctx = selinux_inode_notifysecctx, 5894 .inode_notifysecctx = selinux_inode_notifysecctx,
5895 .inode_setsecctx = selinux_inode_setsecctx, 5895 .inode_setsecctx = selinux_inode_setsecctx,
5896 .inode_getsecctx = selinux_inode_getsecctx, 5896 .inode_getsecctx = selinux_inode_getsecctx,
5897 5897
5898 .unix_stream_connect = selinux_socket_unix_stream_connect, 5898 .unix_stream_connect = selinux_socket_unix_stream_connect,
5899 .unix_may_send = selinux_socket_unix_may_send, 5899 .unix_may_send = selinux_socket_unix_may_send,
5900 5900
5901 .socket_create = selinux_socket_create, 5901 .socket_create = selinux_socket_create,
5902 .socket_post_create = selinux_socket_post_create, 5902 .socket_post_create = selinux_socket_post_create,
5903 .socket_bind = selinux_socket_bind, 5903 .socket_bind = selinux_socket_bind,
5904 .socket_connect = selinux_socket_connect, 5904 .socket_connect = selinux_socket_connect,
5905 .socket_listen = selinux_socket_listen, 5905 .socket_listen = selinux_socket_listen,
5906 .socket_accept = selinux_socket_accept, 5906 .socket_accept = selinux_socket_accept,
5907 .socket_sendmsg = selinux_socket_sendmsg, 5907 .socket_sendmsg = selinux_socket_sendmsg,
5908 .socket_recvmsg = selinux_socket_recvmsg, 5908 .socket_recvmsg = selinux_socket_recvmsg,
5909 .socket_getsockname = selinux_socket_getsockname, 5909 .socket_getsockname = selinux_socket_getsockname,
5910 .socket_getpeername = selinux_socket_getpeername, 5910 .socket_getpeername = selinux_socket_getpeername,
5911 .socket_getsockopt = selinux_socket_getsockopt, 5911 .socket_getsockopt = selinux_socket_getsockopt,
5912 .socket_setsockopt = selinux_socket_setsockopt, 5912 .socket_setsockopt = selinux_socket_setsockopt,
5913 .socket_shutdown = selinux_socket_shutdown, 5913 .socket_shutdown = selinux_socket_shutdown,
5914 .socket_sock_rcv_skb = selinux_socket_sock_rcv_skb, 5914 .socket_sock_rcv_skb = selinux_socket_sock_rcv_skb,
5915 .socket_getpeersec_stream = selinux_socket_getpeersec_stream, 5915 .socket_getpeersec_stream = selinux_socket_getpeersec_stream,
5916 .socket_getpeersec_dgram = selinux_socket_getpeersec_dgram, 5916 .socket_getpeersec_dgram = selinux_socket_getpeersec_dgram,
5917 .sk_alloc_security = selinux_sk_alloc_security, 5917 .sk_alloc_security = selinux_sk_alloc_security,
5918 .sk_free_security = selinux_sk_free_security, 5918 .sk_free_security = selinux_sk_free_security,
5919 .sk_clone_security = selinux_sk_clone_security, 5919 .sk_clone_security = selinux_sk_clone_security,
5920 .sk_getsecid = selinux_sk_getsecid, 5920 .sk_getsecid = selinux_sk_getsecid,
5921 .sock_graft = selinux_sock_graft, 5921 .sock_graft = selinux_sock_graft,
5922 .inet_conn_request = selinux_inet_conn_request, 5922 .inet_conn_request = selinux_inet_conn_request,
5923 .inet_csk_clone = selinux_inet_csk_clone, 5923 .inet_csk_clone = selinux_inet_csk_clone,
5924 .inet_conn_established = selinux_inet_conn_established, 5924 .inet_conn_established = selinux_inet_conn_established,
5925 .secmark_relabel_packet = selinux_secmark_relabel_packet, 5925 .secmark_relabel_packet = selinux_secmark_relabel_packet,
5926 .secmark_refcount_inc = selinux_secmark_refcount_inc, 5926 .secmark_refcount_inc = selinux_secmark_refcount_inc,
5927 .secmark_refcount_dec = selinux_secmark_refcount_dec, 5927 .secmark_refcount_dec = selinux_secmark_refcount_dec,
5928 .req_classify_flow = selinux_req_classify_flow, 5928 .req_classify_flow = selinux_req_classify_flow,
5929 .tun_dev_alloc_security = selinux_tun_dev_alloc_security, 5929 .tun_dev_alloc_security = selinux_tun_dev_alloc_security,
5930 .tun_dev_free_security = selinux_tun_dev_free_security, 5930 .tun_dev_free_security = selinux_tun_dev_free_security,
5931 .tun_dev_create = selinux_tun_dev_create, 5931 .tun_dev_create = selinux_tun_dev_create,
5932 .tun_dev_attach_queue = selinux_tun_dev_attach_queue, 5932 .tun_dev_attach_queue = selinux_tun_dev_attach_queue,
5933 .tun_dev_attach = selinux_tun_dev_attach, 5933 .tun_dev_attach = selinux_tun_dev_attach,
5934 .tun_dev_open = selinux_tun_dev_open, 5934 .tun_dev_open = selinux_tun_dev_open,
5935 .skb_owned_by = selinux_skb_owned_by, 5935 .skb_owned_by = selinux_skb_owned_by,
5936 5936
5937 #ifdef CONFIG_SECURITY_NETWORK_XFRM 5937 #ifdef CONFIG_SECURITY_NETWORK_XFRM
5938 .xfrm_policy_alloc_security = selinux_xfrm_policy_alloc, 5938 .xfrm_policy_alloc_security = selinux_xfrm_policy_alloc,
5939 .xfrm_policy_clone_security = selinux_xfrm_policy_clone, 5939 .xfrm_policy_clone_security = selinux_xfrm_policy_clone,
5940 .xfrm_policy_free_security = selinux_xfrm_policy_free, 5940 .xfrm_policy_free_security = selinux_xfrm_policy_free,
5941 .xfrm_policy_delete_security = selinux_xfrm_policy_delete, 5941 .xfrm_policy_delete_security = selinux_xfrm_policy_delete,
5942 .xfrm_state_alloc = selinux_xfrm_state_alloc, 5942 .xfrm_state_alloc = selinux_xfrm_state_alloc,
5943 .xfrm_state_alloc_acquire = selinux_xfrm_state_alloc_acquire, 5943 .xfrm_state_alloc_acquire = selinux_xfrm_state_alloc_acquire,
5944 .xfrm_state_free_security = selinux_xfrm_state_free, 5944 .xfrm_state_free_security = selinux_xfrm_state_free,
5945 .xfrm_state_delete_security = selinux_xfrm_state_delete, 5945 .xfrm_state_delete_security = selinux_xfrm_state_delete,
5946 .xfrm_policy_lookup = selinux_xfrm_policy_lookup, 5946 .xfrm_policy_lookup = selinux_xfrm_policy_lookup,
5947 .xfrm_state_pol_flow_match = selinux_xfrm_state_pol_flow_match, 5947 .xfrm_state_pol_flow_match = selinux_xfrm_state_pol_flow_match,
5948 .xfrm_decode_session = selinux_xfrm_decode_session, 5948 .xfrm_decode_session = selinux_xfrm_decode_session,
5949 #endif 5949 #endif
5950 5950
5951 #ifdef CONFIG_KEYS 5951 #ifdef CONFIG_KEYS
5952 .key_alloc = selinux_key_alloc, 5952 .key_alloc = selinux_key_alloc,
5953 .key_free = selinux_key_free, 5953 .key_free = selinux_key_free,
5954 .key_permission = selinux_key_permission, 5954 .key_permission = selinux_key_permission,
5955 .key_getsecurity = selinux_key_getsecurity, 5955 .key_getsecurity = selinux_key_getsecurity,
5956 #endif 5956 #endif
5957 5957
5958 #ifdef CONFIG_AUDIT 5958 #ifdef CONFIG_AUDIT
5959 .audit_rule_init = selinux_audit_rule_init, 5959 .audit_rule_init = selinux_audit_rule_init,
5960 .audit_rule_known = selinux_audit_rule_known, 5960 .audit_rule_known = selinux_audit_rule_known,
5961 .audit_rule_match = selinux_audit_rule_match, 5961 .audit_rule_match = selinux_audit_rule_match,
5962 .audit_rule_free = selinux_audit_rule_free, 5962 .audit_rule_free = selinux_audit_rule_free,
5963 #endif 5963 #endif
5964 }; 5964 };
5965 5965
5966 static __init int selinux_init(void) 5966 static __init int selinux_init(void)
5967 { 5967 {
5968 if (!security_module_enable(&selinux_ops)) { 5968 if (!security_module_enable(&selinux_ops)) {
5969 selinux_enabled = 0; 5969 selinux_enabled = 0;
5970 return 0; 5970 return 0;
5971 } 5971 }
5972 5972
5973 if (!selinux_enabled) { 5973 if (!selinux_enabled) {
5974 printk(KERN_INFO "SELinux: Disabled at boot.\n"); 5974 printk(KERN_INFO "SELinux: Disabled at boot.\n");
5975 return 0; 5975 return 0;
5976 } 5976 }
5977 5977
5978 printk(KERN_INFO "SELinux: Initializing.\n"); 5978 printk(KERN_INFO "SELinux: Initializing.\n");
5979 5979
5980 /* Set the security state for the initial task. */ 5980 /* Set the security state for the initial task. */
5981 cred_init_security(); 5981 cred_init_security();
5982 5982
5983 default_noexec = !(VM_DATA_DEFAULT_FLAGS & VM_EXEC); 5983 default_noexec = !(VM_DATA_DEFAULT_FLAGS & VM_EXEC);
5984 5984
5985 sel_inode_cache = kmem_cache_create("selinux_inode_security", 5985 sel_inode_cache = kmem_cache_create("selinux_inode_security",
5986 sizeof(struct inode_security_struct), 5986 sizeof(struct inode_security_struct),
5987 0, SLAB_PANIC, NULL); 5987 0, SLAB_PANIC, NULL);
5988 avc_init(); 5988 avc_init();
5989 5989
5990 if (register_security(&selinux_ops)) 5990 if (register_security(&selinux_ops))
5991 panic("SELinux: Unable to register with kernel.\n"); 5991 panic("SELinux: Unable to register with kernel.\n");
5992 5992
5993 if (selinux_enforcing) 5993 if (selinux_enforcing)
5994 printk(KERN_DEBUG "SELinux: Starting in enforcing mode\n"); 5994 printk(KERN_DEBUG "SELinux: Starting in enforcing mode\n");
5995 else 5995 else
5996 printk(KERN_DEBUG "SELinux: Starting in permissive mode\n"); 5996 printk(KERN_DEBUG "SELinux: Starting in permissive mode\n");
5997 5997
5998 return 0; 5998 return 0;
5999 } 5999 }
6000 6000
6001 static void delayed_superblock_init(struct super_block *sb, void *unused) 6001 static void delayed_superblock_init(struct super_block *sb, void *unused)
6002 { 6002 {
6003 superblock_doinit(sb, NULL); 6003 superblock_doinit(sb, NULL);
6004 } 6004 }
6005 6005
6006 void selinux_complete_init(void) 6006 void selinux_complete_init(void)
6007 { 6007 {
6008 printk(KERN_DEBUG "SELinux: Completing initialization.\n"); 6008 printk(KERN_DEBUG "SELinux: Completing initialization.\n");
6009 6009
6010 /* Set up any superblocks initialized prior to the policy load. */ 6010 /* Set up any superblocks initialized prior to the policy load. */
6011 printk(KERN_DEBUG "SELinux: Setting up existing superblocks.\n"); 6011 printk(KERN_DEBUG "SELinux: Setting up existing superblocks.\n");
6012 iterate_supers(delayed_superblock_init, NULL); 6012 iterate_supers(delayed_superblock_init, NULL);
6013 } 6013 }
6014 6014
6015 /* SELinux requires early initialization in order to label 6015 /* SELinux requires early initialization in order to label
6016 all processes and objects when they are created. */ 6016 all processes and objects when they are created. */
6017 security_initcall(selinux_init); 6017 security_initcall(selinux_init);
6018 6018
6019 #if defined(CONFIG_NETFILTER) 6019 #if defined(CONFIG_NETFILTER)
6020 6020
6021 static struct nf_hook_ops selinux_ipv4_ops[] = { 6021 static struct nf_hook_ops selinux_ipv4_ops[] = {
6022 { 6022 {
6023 .hook = selinux_ipv4_postroute, 6023 .hook = selinux_ipv4_postroute,
6024 .owner = THIS_MODULE, 6024 .owner = THIS_MODULE,
6025 .pf = NFPROTO_IPV4, 6025 .pf = NFPROTO_IPV4,
6026 .hooknum = NF_INET_POST_ROUTING, 6026 .hooknum = NF_INET_POST_ROUTING,
6027 .priority = NF_IP_PRI_SELINUX_LAST, 6027 .priority = NF_IP_PRI_SELINUX_LAST,
6028 }, 6028 },
6029 { 6029 {
6030 .hook = selinux_ipv4_forward, 6030 .hook = selinux_ipv4_forward,
6031 .owner = THIS_MODULE, 6031 .owner = THIS_MODULE,
6032 .pf = NFPROTO_IPV4, 6032 .pf = NFPROTO_IPV4,
6033 .hooknum = NF_INET_FORWARD, 6033 .hooknum = NF_INET_FORWARD,
6034 .priority = NF_IP_PRI_SELINUX_FIRST, 6034 .priority = NF_IP_PRI_SELINUX_FIRST,
6035 }, 6035 },
6036 { 6036 {
6037 .hook = selinux_ipv4_output, 6037 .hook = selinux_ipv4_output,
6038 .owner = THIS_MODULE, 6038 .owner = THIS_MODULE,
6039 .pf = NFPROTO_IPV4, 6039 .pf = NFPROTO_IPV4,
6040 .hooknum = NF_INET_LOCAL_OUT, 6040 .hooknum = NF_INET_LOCAL_OUT,
6041 .priority = NF_IP_PRI_SELINUX_FIRST, 6041 .priority = NF_IP_PRI_SELINUX_FIRST,
6042 } 6042 }
6043 }; 6043 };
6044 6044
6045 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 6045 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
6046 6046
6047 static struct nf_hook_ops selinux_ipv6_ops[] = { 6047 static struct nf_hook_ops selinux_ipv6_ops[] = {
6048 { 6048 {
6049 .hook = selinux_ipv6_postroute, 6049 .hook = selinux_ipv6_postroute,
6050 .owner = THIS_MODULE, 6050 .owner = THIS_MODULE,
6051 .pf = NFPROTO_IPV6, 6051 .pf = NFPROTO_IPV6,
6052 .hooknum = NF_INET_POST_ROUTING, 6052 .hooknum = NF_INET_POST_ROUTING,
6053 .priority = NF_IP6_PRI_SELINUX_LAST, 6053 .priority = NF_IP6_PRI_SELINUX_LAST,
6054 }, 6054 },
6055 { 6055 {
6056 .hook = selinux_ipv6_forward, 6056 .hook = selinux_ipv6_forward,
6057 .owner = THIS_MODULE, 6057 .owner = THIS_MODULE,
6058 .pf = NFPROTO_IPV6, 6058 .pf = NFPROTO_IPV6,
6059 .hooknum = NF_INET_FORWARD, 6059 .hooknum = NF_INET_FORWARD,
6060 .priority = NF_IP6_PRI_SELINUX_FIRST, 6060 .priority = NF_IP6_PRI_SELINUX_FIRST,
6061 } 6061 }
6062 }; 6062 };
6063 6063
6064 #endif /* IPV6 */ 6064 #endif /* IPV6 */
6065 6065
6066 static int __init selinux_nf_ip_init(void) 6066 static int __init selinux_nf_ip_init(void)
6067 { 6067 {
6068 int err = 0; 6068 int err = 0;
6069 6069
6070 if (!selinux_enabled) 6070 if (!selinux_enabled)
6071 goto out; 6071 goto out;
6072 6072
6073 printk(KERN_DEBUG "SELinux: Registering netfilter hooks\n"); 6073 printk(KERN_DEBUG "SELinux: Registering netfilter hooks\n");
6074 6074
6075 err = nf_register_hooks(selinux_ipv4_ops, ARRAY_SIZE(selinux_ipv4_ops)); 6075 err = nf_register_hooks(selinux_ipv4_ops, ARRAY_SIZE(selinux_ipv4_ops));
6076 if (err) 6076 if (err)
6077 panic("SELinux: nf_register_hooks for IPv4: error %d\n", err); 6077 panic("SELinux: nf_register_hooks for IPv4: error %d\n", err);
6078 6078
6079 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 6079 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
6080 err = nf_register_hooks(selinux_ipv6_ops, ARRAY_SIZE(selinux_ipv6_ops)); 6080 err = nf_register_hooks(selinux_ipv6_ops, ARRAY_SIZE(selinux_ipv6_ops));
6081 if (err) 6081 if (err)
6082 panic("SELinux: nf_register_hooks for IPv6: error %d\n", err); 6082 panic("SELinux: nf_register_hooks for IPv6: error %d\n", err);
6083 #endif /* IPV6 */ 6083 #endif /* IPV6 */
6084 6084
6085 out: 6085 out:
6086 return err; 6086 return err;
6087 } 6087 }
6088 6088
6089 __initcall(selinux_nf_ip_init); 6089 __initcall(selinux_nf_ip_init);
6090 6090
6091 #ifdef CONFIG_SECURITY_SELINUX_DISABLE 6091 #ifdef CONFIG_SECURITY_SELINUX_DISABLE
6092 static void selinux_nf_ip_exit(void) 6092 static void selinux_nf_ip_exit(void)
6093 { 6093 {
6094 printk(KERN_DEBUG "SELinux: Unregistering netfilter hooks\n"); 6094 printk(KERN_DEBUG "SELinux: Unregistering netfilter hooks\n");
6095 6095
6096 nf_unregister_hooks(selinux_ipv4_ops, ARRAY_SIZE(selinux_ipv4_ops)); 6096 nf_unregister_hooks(selinux_ipv4_ops, ARRAY_SIZE(selinux_ipv4_ops));
6097 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 6097 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
6098 nf_unregister_hooks(selinux_ipv6_ops, ARRAY_SIZE(selinux_ipv6_ops)); 6098 nf_unregister_hooks(selinux_ipv6_ops, ARRAY_SIZE(selinux_ipv6_ops));
6099 #endif /* IPV6 */ 6099 #endif /* IPV6 */
6100 } 6100 }
6101 #endif 6101 #endif
6102 6102
6103 #else /* CONFIG_NETFILTER */ 6103 #else /* CONFIG_NETFILTER */
6104 6104
6105 #ifdef CONFIG_SECURITY_SELINUX_DISABLE 6105 #ifdef CONFIG_SECURITY_SELINUX_DISABLE
6106 #define selinux_nf_ip_exit() 6106 #define selinux_nf_ip_exit()
6107 #endif 6107 #endif
6108 6108
6109 #endif /* CONFIG_NETFILTER */ 6109 #endif /* CONFIG_NETFILTER */
6110 6110
6111 #ifdef CONFIG_SECURITY_SELINUX_DISABLE 6111 #ifdef CONFIG_SECURITY_SELINUX_DISABLE
6112 static int selinux_disabled; 6112 static int selinux_disabled;
6113 6113
6114 int selinux_disable(void) 6114 int selinux_disable(void)
6115 { 6115 {
6116 if (ss_initialized) { 6116 if (ss_initialized) {
6117 /* Not permitted after initial policy load. */ 6117 /* Not permitted after initial policy load. */
6118 return -EINVAL; 6118 return -EINVAL;
6119 } 6119 }
6120 6120
6121 if (selinux_disabled) { 6121 if (selinux_disabled) {
6122 /* Only do this once. */ 6122 /* Only do this once. */
6123 return -EINVAL; 6123 return -EINVAL;
6124 } 6124 }
6125 6125
6126 printk(KERN_INFO "SELinux: Disabled at runtime.\n"); 6126 printk(KERN_INFO "SELinux: Disabled at runtime.\n");
6127 6127
6128 selinux_disabled = 1; 6128 selinux_disabled = 1;
6129 selinux_enabled = 0; 6129 selinux_enabled = 0;
6130 6130
6131 reset_security_ops(); 6131 reset_security_ops();
6132 6132
6133 /* Try to destroy the avc node cache */ 6133 /* Try to destroy the avc node cache */
6134 avc_disable(); 6134 avc_disable();
6135 6135
6136 /* Unregister netfilter hooks. */ 6136 /* Unregister netfilter hooks. */
6137 selinux_nf_ip_exit(); 6137 selinux_nf_ip_exit();
6138 6138
6139 /* Unregister selinuxfs. */ 6139 /* Unregister selinuxfs. */
6140 exit_sel_fs(); 6140 exit_sel_fs();
6141 6141
6142 return 0; 6142 return 0;
6143 } 6143 }
6144 #endif 6144 #endif
6145 6145