Commit 2c27c65ed0696f0b5df2dad2cf6462d72164d547

Authored by Christoph Hellwig
Committed by Al Viro
1 parent db78b877f7

check ATTR_SIZE contraints in inode_change_ok

Make sure we check the truncate constraints early on in ->setattr by adding
those checks to inode_change_ok.  Also clean up and document inode_change_ok
to make this obvious.

As a fallout we don't have to call inode_newsize_ok from simple_setsize and
simplify it down to a truncate_setsize which doesn't return an error.  This
simplifies a lot of setattr implementations and means we use truncate_setsize
almost everywhere.  Get rid of fat_setsize now that it's trivial and mark
ext2_setsize static to make the calling convention obvious.

Keep the inode_newsize_ok in vmtruncate for now as all callers need an
audit for its removal anyway.

Note: setattr code in ecryptfs doesn't call inode_change_ok at all and
needs a deeper audit, but that is left for later.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

Showing 21 changed files with 108 additions and 156 deletions Side-by-side Diff

... ... @@ -333,10 +333,7 @@
333 333  
334 334 /* XXX: this is missing some actual on-disk truncation.. */
335 335 if (ia_valid & ATTR_SIZE)
336   - error = simple_setsize(inode, attr->ia_size);
337   -
338   - if (error)
339   - goto out;
  336 + truncate_setsize(inode, attr->ia_size);
340 337  
341 338 if (ia_valid & ATTR_MTIME) {
342 339 inode->i_mtime = attr->ia_mtime;
... ... @@ -14,35 +14,53 @@
14 14 #include <linux/fcntl.h>
15 15 #include <linux/security.h>
16 16  
17   -/* Taken over from the old code... */
18   -
19   -/* POSIX UID/GID verification for setting inode attributes. */
  17 +/**
  18 + * inode_change_ok - check if attribute changes to an inode are allowed
  19 + * @inode: inode to check
  20 + * @attr: attributes to change
  21 + *
  22 + * Check if we are allowed to change the attributes contained in @attr
  23 + * in the given inode. This includes the normal unix access permission
  24 + * checks, as well as checks for rlimits and others.
  25 + *
  26 + * Should be called as the first thing in ->setattr implementations,
  27 + * possibly after taking additional locks.
  28 + */
20 29 int inode_change_ok(const struct inode *inode, struct iattr *attr)
21 30 {
22   - int retval = -EPERM;
23 31 unsigned int ia_valid = attr->ia_valid;
24 32  
  33 + /*
  34 + * First check size constraints. These can't be overriden using
  35 + * ATTR_FORCE.
  36 + */
  37 + if (ia_valid & ATTR_SIZE) {
  38 + int error = inode_newsize_ok(inode, attr->ia_size);
  39 + if (error)
  40 + return error;
  41 + }
  42 +
25 43 /* If force is set do it anyway. */
26 44 if (ia_valid & ATTR_FORCE)
27   - goto fine;
  45 + return 0;
28 46  
29 47 /* Make sure a caller can chown. */
30 48 if ((ia_valid & ATTR_UID) &&
31 49 (current_fsuid() != inode->i_uid ||
32 50 attr->ia_uid != inode->i_uid) && !capable(CAP_CHOWN))
33   - goto error;
  51 + return -EPERM;
34 52  
35 53 /* Make sure caller can chgrp. */
36 54 if ((ia_valid & ATTR_GID) &&
37 55 (current_fsuid() != inode->i_uid ||
38 56 (!in_group_p(attr->ia_gid) && attr->ia_gid != inode->i_gid)) &&
39 57 !capable(CAP_CHOWN))
40   - goto error;
  58 + return -EPERM;
41 59  
42 60 /* Make sure a caller can chmod. */
43 61 if (ia_valid & ATTR_MODE) {
44 62 if (!is_owner_or_cap(inode))
45   - goto error;
  63 + return -EPERM;
46 64 /* Also check the setgid bit! */
47 65 if (!in_group_p((ia_valid & ATTR_GID) ? attr->ia_gid :
48 66 inode->i_gid) && !capable(CAP_FSETID))
49 67  
... ... @@ -52,12 +70,10 @@
52 70 /* Check for setting the inode time. */
53 71 if (ia_valid & (ATTR_MTIME_SET | ATTR_ATIME_SET | ATTR_TIMES_SET)) {
54 72 if (!is_owner_or_cap(inode))
55   - goto error;
  73 + return -EPERM;
56 74 }
57   -fine:
58   - retval = 0;
59   -error:
60   - return retval;
  75 +
  76 + return 0;
61 77 }
62 78 EXPORT_SYMBOL(inode_change_ok);
63 79  
... ... @@ -113,7 +129,7 @@
113 129 *
114 130 * setattr_copy updates the inode's metadata with that specified
115 131 * in attr. Noticably missing is inode size update, which is more complex
116   - * as it requires pagecache updates. See simple_setsize.
  132 + * as it requires pagecache updates.
117 133 *
118 134 * The inode is not marked as dirty after this operation. The rationale is
119 135 * that for "simple" filesystems, the struct inode is the inode storage.
... ... @@ -804,10 +804,20 @@
804 804 size_t num_zeros = (PAGE_CACHE_SIZE
805 805 - (ia->ia_size & ~PAGE_CACHE_MASK));
806 806  
  807 +
  808 + /*
  809 + * XXX(truncate) this should really happen at the begginning
  810 + * of ->setattr. But the code is too messy to that as part
  811 + * of a larger patch. ecryptfs is also totally missing out
  812 + * on the inode_change_ok check at the beginning of
  813 + * ->setattr while would include this.
  814 + */
  815 + rc = inode_newsize_ok(inode, ia->ia_size);
  816 + if (rc)
  817 + goto out;
  818 +
807 819 if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)) {
808   - rc = simple_setsize(inode, ia->ia_size);
809   - if (rc)
810   - goto out;
  820 + truncate_setsize(inode, ia->ia_size);
811 821 lower_ia->ia_size = ia->ia_size;
812 822 lower_ia->ia_valid |= ATTR_SIZE;
813 823 goto out;
... ... @@ -830,7 +840,7 @@
830 840 goto out;
831 841 }
832 842 }
833   - simple_setsize(inode, ia->ia_size);
  843 + truncate_setsize(inode, ia->ia_size);
834 844 rc = ecryptfs_write_inode_size_to_metadata(inode);
835 845 if (rc) {
836 846 printk(KERN_ERR "Problem with "
... ... @@ -1156,15 +1156,10 @@
1156 1156 __ext2_truncate_blocks(inode, offset);
1157 1157 }
1158 1158  
1159   -int ext2_setsize(struct inode *inode, loff_t newsize)
  1159 +static int ext2_setsize(struct inode *inode, loff_t newsize)
1160 1160 {
1161   - loff_t oldsize;
1162 1161 int error;
1163 1162  
1164   - error = inode_newsize_ok(inode, newsize);
1165   - if (error)
1166   - return error;
1167   -
1168 1163 if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
1169 1164 S_ISLNK(inode->i_mode)))
1170 1165 return -EINVAL;
... ... @@ -1184,10 +1179,7 @@
1184 1179 if (error)
1185 1180 return error;
1186 1181  
1187   - oldsize = inode->i_size;
1188   - i_size_write(inode, newsize);
1189   - truncate_pagecache(inode, oldsize, newsize);
1190   -
  1182 + truncate_setsize(inode, newsize);
1191 1183 __ext2_truncate_blocks(inode, newsize);
1192 1184  
1193 1185 inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC;
... ... @@ -306,7 +306,6 @@
306 306 extern const struct file_operations fat_file_operations;
307 307 extern const struct inode_operations fat_file_inode_operations;
308 308 extern int fat_setattr(struct dentry * dentry, struct iattr * attr);
309   -extern int fat_setsize(struct inode *inode, loff_t offset);
310 309 extern void fat_truncate_blocks(struct inode *inode, loff_t offset);
311 310 extern int fat_getattr(struct vfsmount *mnt, struct dentry *dentry,
312 311 struct kstat *stat);
... ... @@ -364,18 +364,6 @@
364 364 return 0;
365 365 }
366 366  
367   -int fat_setsize(struct inode *inode, loff_t offset)
368   -{
369   - int error;
370   -
371   - error = simple_setsize(inode, offset);
372   - if (error)
373   - return error;
374   - fat_truncate_blocks(inode, offset);
375   -
376   - return error;
377   -}
378   -
379 367 #define TIMES_SET_FLAGS (ATTR_MTIME_SET | ATTR_ATIME_SET | ATTR_TIMES_SET)
380 368 /* valid file mode bits */
381 369 #define FAT_VALID_MODE (S_IFREG | S_IFDIR | S_IRWXUGO)
... ... @@ -441,9 +429,8 @@
441 429 }
442 430  
443 431 if (attr->ia_valid & ATTR_SIZE) {
444   - error = fat_setsize(inode, attr->ia_size);
445   - if (error)
446   - goto out;
  432 + truncate_setsize(inode, attr->ia_size);
  433 + fat_truncate_blocks(inode, attr->ia_size);
447 434 }
448 435  
449 436 setattr_copy(inode, attr);
... ... @@ -1280,12 +1280,8 @@
1280 1280 if ((attr->ia_valid & ATTR_OPEN) && fc->atomic_o_trunc)
1281 1281 return 0;
1282 1282  
1283   - if (attr->ia_valid & ATTR_SIZE) {
1284   - err = inode_newsize_ok(inode, attr->ia_size);
1285   - if (err)
1286   - return err;
  1283 + if (attr->ia_valid & ATTR_SIZE)
1287 1284 is_truncate = true;
1288   - }
1289 1285  
1290 1286 req = fuse_get_req(fc);
1291 1287 if (IS_ERR(req))
... ... @@ -702,12 +702,12 @@
702 702 page_cache_release(page);
703 703  
704 704 /*
705   - * XXX(hch): the call below should probably be replaced with
  705 + * XXX(truncate): the call below should probably be replaced with
706 706 * a call to the gfs2-specific truncate blocks helper to actually
707 707 * release disk blocks..
708 708 */
709 709 if (pos + len > ip->i_inode.i_size)
710   - simple_setsize(&ip->i_inode, ip->i_inode.i_size);
  710 + truncate_setsize(&ip->i_inode, ip->i_inode.i_size);
711 711 out_endtrans:
712 712 gfs2_trans_end(sdp);
713 713 out_trans_fail:
... ... @@ -1072,7 +1072,7 @@
1072 1072 }
1073 1073  
1074 1074 /*
1075   - * XXX: should be changed to have proper ordering by opencoding simple_setsize
  1075 + * XXX(truncate): the truncate_setsize calls should be moved to the end.
1076 1076 */
1077 1077 static int setattr_size(struct inode *inode, struct iattr *attr)
1078 1078 {
1079 1079  
... ... @@ -1084,10 +1084,8 @@
1084 1084 error = gfs2_trans_begin(sdp, 0, sdp->sd_jdesc->jd_blocks);
1085 1085 if (error)
1086 1086 return error;
1087   - error = simple_setsize(inode, attr->ia_size);
  1087 + truncate_setsize(inode, attr->ia_size);
1088 1088 gfs2_trans_end(sdp);
1089   - if (error)
1090   - return error;
1091 1089 }
1092 1090  
1093 1091 error = gfs2_truncatei(ip, attr->ia_size);
... ... @@ -169,13 +169,13 @@
169 169 mutex_unlock(&f->sem);
170 170 jffs2_complete_reservation(c);
171 171  
172   - /* We have to do the simple_setsize() without f->sem held, since
  172 + /* We have to do the truncate_setsize() without f->sem held, since
173 173 some pages may be locked and waiting for it in readpage().
174 174 We are protected from a simultaneous write() extending i_size
175 175 back past iattr->ia_size, because do_truncate() holds the
176 176 generic inode semaphore. */
177 177 if (ivalid & ATTR_SIZE && inode->i_size > iattr->ia_size) {
178   - simple_setsize(inode, iattr->ia_size);
  178 + truncate_setsize(inode, iattr->ia_size);
179 179 inode->i_blocks = (inode->i_size + 511) >> 9;
180 180 }
181 181  
... ... @@ -327,49 +327,6 @@
327 327 }
328 328  
329 329 /**
330   - * simple_setsize - handle core mm and vfs requirements for file size change
331   - * @inode: inode
332   - * @newsize: new file size
333   - *
334   - * Returns 0 on success, -error on failure.
335   - *
336   - * simple_setsize must be called with inode_mutex held.
337   - *
338   - * simple_setsize will check that the requested new size is OK (see
339   - * inode_newsize_ok), and then will perform the necessary i_size update
340   - * and pagecache truncation (if necessary). It will be typically be called
341   - * from the filesystem's setattr function when ATTR_SIZE is passed in.
342   - *
343   - * The inode itself must have correct permissions and attributes to allow
344   - * i_size to be changed, this function then just checks that the new size
345   - * requested is valid.
346   - *
347   - * In the case of simple in-memory filesystems with inodes stored solely
348   - * in the inode cache, and file data in the pagecache, nothing more needs
349   - * to be done to satisfy a truncate request. Filesystems with on-disk
350   - * blocks for example will need to free them in the case of truncate, in
351   - * that case it may be easier not to use simple_setsize (but each of its
352   - * components will likely be required at some point to update pagecache
353   - * and inode etc).
354   - */
355   -int simple_setsize(struct inode *inode, loff_t newsize)
356   -{
357   - loff_t oldsize;
358   - int error;
359   -
360   - error = inode_newsize_ok(inode, newsize);
361   - if (error)
362   - return error;
363   -
364   - oldsize = inode->i_size;
365   - i_size_write(inode, newsize);
366   - truncate_pagecache(inode, oldsize, newsize);
367   -
368   - return error;
369   -}
370   -EXPORT_SYMBOL(simple_setsize);
371   -
372   -/**
373 330 * simple_setattr - setattr for simple filesystem
374 331 * @dentry: dentry
375 332 * @iattr: iattr structure
... ... @@ -394,12 +351,8 @@
394 351 if (error)
395 352 return error;
396 353  
397   - if (iattr->ia_valid & ATTR_SIZE) {
398   - error = simple_setsize(inode, iattr->ia_size);
399   - if (error)
400   - return error;
401   - }
402   -
  354 + if (iattr->ia_valid & ATTR_SIZE)
  355 + truncate_setsize(inode, iattr->ia_size);
403 356 setattr_copy(inode, iattr);
404 357 mark_inode_dirty(inode);
405 358 return 0;
... ... @@ -1233,7 +1233,7 @@
1233 1233 }
1234 1234  
1235 1235 /*
1236   - * This will intentionally not wind up calling simple_setsize(),
  1236 + * This will intentionally not wind up calling truncate_setsize(),
1237 1237 * since all the work for a size change has been done above.
1238 1238 * Otherwise, we could get into problems with truncate as
1239 1239 * ip_alloc_sem is used there to protect against i_size
1240 1240  
... ... @@ -2308,12 +2308,12 @@
2308 2308 * blocks outside i_size. Trim these off again.
2309 2309 * Don't need i_size_read because we hold i_mutex.
2310 2310 *
2311   - * XXX(hch): this looks buggy because ocfs2 did not
  2311 + * XXX(truncate): this looks buggy because ocfs2 did not
2312 2312 * actually implement ->truncate. Take a look at
2313 2313 * the new truncate sequence and update this accordingly
2314 2314 */
2315 2315 if (*ppos + count > inode->i_size)
2316   - simple_setsize(inode, inode->i_size);
  2316 + truncate_setsize(inode, inode->i_size);
2317 2317 ret = written;
2318 2318 goto out_dio;
2319 2319 }
fs/ramfs/file-nommu.c
... ... @@ -146,9 +146,8 @@
146 146 return ret;
147 147 }
148 148  
149   - ret = simple_setsize(inode, newsize);
150   -
151   - return ret;
  149 + truncate_setsize(inode, newsize);
  150 + return 0;
152 151 }
153 152  
154 153 /*****************************************************************************/
... ... @@ -714,9 +714,7 @@
714 714 error = server->ops->truncate(inode, attr->ia_size);
715 715 if (error)
716 716 goto out;
717   - error = simple_setsize(inode, attr->ia_size);
718   - if (error)
719   - goto out;
  717 + truncate_setsize(inode, attr->ia_size);
720 718 refresh = 1;
721 719 }
722 720  
... ... @@ -967,14 +967,15 @@
967 967 * the page locked, and it locks @ui_mutex. However, write-back does take inode
968 968 * @i_mutex, which means other VFS operations may be run on this inode at the
969 969 * same time. And the problematic one is truncation to smaller size, from where
970   - * we have to call 'simple_setsize()', which first changes @inode->i_size, then
  970 + * we have to call 'truncate_setsize()', which first changes @inode->i_size, then
971 971 * drops the truncated pages. And while dropping the pages, it takes the page
972   - * lock. This means that 'do_truncation()' cannot call 'simple_setsize()' with
  972 + * lock. This means that 'do_truncation()' cannot call 'truncate_setsize()' with
973 973 * @ui_mutex locked, because it would deadlock with 'ubifs_writepage()'. This
974 974 * means that @inode->i_size is changed while @ui_mutex is unlocked.
975 975 *
976   - * XXX: with the new truncate the above is not true anymore, the simple_setsize
977   - * calls can be replaced with the individual components.
  976 + * XXX(truncate): with the new truncate sequence this is not true anymore,
  977 + * and the calls to truncate_setsize can be move around freely. They should
  978 + * be moved to the very end of the truncate sequence.
978 979 *
979 980 * But in 'ubifs_writepage()' we have to guarantee that we do not write beyond
980 981 * inode size. How do we do this if @inode->i_size may became smaller while we
... ... @@ -1128,9 +1129,7 @@
1128 1129 budgeted = 0;
1129 1130 }
1130 1131  
1131   - err = simple_setsize(inode, new_size);
1132   - if (err)
1133   - goto out_budg;
  1132 + truncate_setsize(inode, new_size);
1134 1133  
1135 1134 if (offset) {
1136 1135 pgoff_t index = new_size >> PAGE_CACHE_SHIFT;
1137 1136  
... ... @@ -1217,16 +1216,14 @@
1217 1216  
1218 1217 if (attr->ia_valid & ATTR_SIZE) {
1219 1218 dbg_gen("size %lld -> %lld", inode->i_size, new_size);
1220   - err = simple_setsize(inode, new_size);
1221   - if (err)
1222   - goto out;
  1219 + truncate_setsize(inode, new_size);
1223 1220 }
1224 1221  
1225 1222 mutex_lock(&ui->ui_mutex);
1226 1223 if (attr->ia_valid & ATTR_SIZE) {
1227 1224 /* Truncation changes inode [mc]time */
1228 1225 inode->i_mtime = inode->i_ctime = ubifs_current_time(inode);
1229   - /* 'simple_setsize()' changed @i_size, update @ui_size */
  1226 + /* 'truncate_setsize()' changed @i_size, update @ui_size */
1230 1227 ui->ui_size = inode->i_size;
1231 1228 }
1232 1229  
... ... @@ -1247,10 +1244,6 @@
1247 1244 ubifs_release_budget(c, &req);
1248 1245 if (IS_SYNC(inode))
1249 1246 err = inode->i_sb->s_op->write_inode(inode, NULL);
1250   - return err;
1251   -
1252   -out:
1253   - ubifs_release_budget(c, &req);
1254 1247 return err;
1255 1248 }
1256 1249  
... ... @@ -379,7 +379,7 @@
379 379 * The @ui_size is a "shadow" variable for @inode->i_size and UBIFS uses
380 380 * @ui_size instead of @inode->i_size. The reason for this is that UBIFS cannot
381 381 * make sure @inode->i_size is always changed under @ui_mutex, because it
382   - * cannot call 'simple_setsize()' with @ui_mutex locked, because it would deadlock
  382 + * cannot call 'truncate_setsize()' with @ui_mutex locked, because it would deadlock
383 383 * with 'ubifs_writepage()' (see file.c). All the other inode fields are
384 384 * changed under @ui_mutex, so they do not need "shadow" fields. Note, one
385 385 * could consider to rework locking and base it on "shadow" fields.
... ... @@ -500,11 +500,6 @@
500 500 return err;
501 501 }
502 502  
503   -/*
504   - * TODO:
505   - * - truncate case should use proper ordering instead of using
506   - * simple_setsize
507   - */
508 503 int ufs_setattr(struct dentry *dentry, struct iattr *attr)
509 504 {
510 505 struct inode *inode = dentry->d_inode;
... ... @@ -518,9 +513,9 @@
518 513 if (ia_valid & ATTR_SIZE && attr->ia_size != inode->i_size) {
519 514 loff_t old_i_size = inode->i_size;
520 515  
521   - error = simple_setsize(inode, attr->ia_size);
522   - if (error)
523   - return error;
  516 + /* XXX(truncate): truncate_setsize should be called last */
  517 + truncate_setsize(inode, attr->ia_size);
  518 +
524 519 error = ufs_truncate(inode, old_i_size);
525 520 if (error)
526 521 return error;
... ... @@ -2355,7 +2355,6 @@
2355 2355 extern int simple_unlink(struct inode *, struct dentry *);
2356 2356 extern int simple_rmdir(struct inode *, struct dentry *);
2357 2357 extern int simple_rename(struct inode *, struct dentry *, struct inode *, struct dentry *);
2358   -extern int simple_setsize(struct inode *, loff_t);
2359 2358 extern int noop_fsync(struct file *, int);
2360 2359 extern int simple_empty(struct dentry *);
2361 2360 extern int simple_readpage(struct file *file, struct page *page);
... ... @@ -815,6 +815,7 @@
815 815 }
816 816  
817 817 extern void truncate_pagecache(struct inode *inode, loff_t old, loff_t new);
  818 +extern void truncate_setsize(struct inode *inode, loff_t newsize);
818 819 extern int vmtruncate(struct inode *inode, loff_t offset);
819 820 extern int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end);
820 821  
... ... @@ -805,11 +805,10 @@
805 805 }
806 806 }
807 807  
808   - error = simple_setsize(inode, newsize);
  808 + /* XXX(truncate): truncate_setsize should be called last */
  809 + truncate_setsize(inode, newsize);
809 810 if (page)
810 811 page_cache_release(page);
811   - if (error)
812   - return error;
813 812 shmem_truncate_range(inode, newsize, (loff_t)-1);
814 813 }
815 814  
... ... @@ -541,29 +541,49 @@
541 541 EXPORT_SYMBOL(truncate_pagecache);
542 542  
543 543 /**
  544 + * truncate_setsize - update inode and pagecache for a new file size
  545 + * @inode: inode
  546 + * @newsize: new file size
  547 + *
  548 + * truncate_setsize updastes i_size update and performs pagecache
  549 + * truncation (if necessary) for a file size updates. It will be
  550 + * typically be called from the filesystem's setattr function when
  551 + * ATTR_SIZE is passed in.
  552 + *
  553 + * Must be called with inode_mutex held and after all filesystem
  554 + * specific block truncation has been performed.
  555 + */
  556 +void truncate_setsize(struct inode *inode, loff_t newsize)
  557 +{
  558 + loff_t oldsize;
  559 +
  560 + oldsize = inode->i_size;
  561 + i_size_write(inode, newsize);
  562 +
  563 + truncate_pagecache(inode, oldsize, newsize);
  564 +}
  565 +EXPORT_SYMBOL(truncate_setsize);
  566 +
  567 +/**
544 568 * vmtruncate - unmap mappings "freed" by truncate() syscall
545 569 * @inode: inode of the file used
546 570 * @offset: file offset to start truncating
547 571 *
548   - * NOTE! We have to be ready to update the memory sharing
549   - * between the file and the memory map for a potential last
550   - * incomplete page. Ugly, but necessary.
551   - *
552   - * This function is deprecated and simple_setsize or truncate_pagecache
553   - * should be used instead.
  572 + * This function is deprecated and truncate_setsize or truncate_pagecache
  573 + * should be used instead, together with filesystem specific block truncation.
554 574 */
555 575 int vmtruncate(struct inode *inode, loff_t offset)
556 576 {
557 577 int error;
558 578  
559   - error = simple_setsize(inode, offset);
  579 + error = inode_newsize_ok(inode, offset);
560 580 if (error)
561 581 return error;
562 582  
  583 + truncate_setsize(inode, offset);
563 584 if (inode->i_op->truncate)
564 585 inode->i_op->truncate(inode);
565   -
566   - return error;
  586 + return 0;
567 587 }
568 588 EXPORT_SYMBOL(vmtruncate);