Commit 6c5daf012c9155aafd2c7973e4278766c30dfad0

Authored by Linus Torvalds

Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs-2.6

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs-2.6:
  truncate: use new helpers
  truncate: new helpers
  fs: fix overflow in sys_mount() for in-kernel calls
  fs: Make unload_nls() NULL pointer safe
  freeze_bdev: grab active reference to frozen superblocks
  freeze_bdev: kill bd_mount_sem
  exofs: remove BKL from super operations
  fs/romfs: correct error-handling code
  vfs: seq_file: add helpers for data filling
  vfs: remove redundant position check in do_sendfile
  vfs: change sb->s_maxbytes to a loff_t
  vfs: explicitly cast s_maxbytes in fiemap_check_ranges
  libfs: return error code on failed attr set
  seq_file: return a negative error code when seq_path_root() fails.
  vfs: optimize touch_time() too
  vfs: optimization for touch_atime()
  vfs: split generic_forget_inode() so that hugetlbfs does not have to copy it
  fs/inode.c: add dev-id and inode number for debugging in init_special_inode()
  libfs: make simple_read_from_buffer conventional

Showing 41 changed files Side-by-side Diff

Documentation/vm/locking
... ... @@ -80,7 +80,7 @@
80 80 mm start up ... this is a loose form of stability on mm_users. For
81 81 example, it is used in copy_mm to protect against a racing tlb_gather_mmu
82 82 single address space optimization, so that the zap_page_range (from
83   -vmtruncate) does not lose sending ipi's to cloned threads that might
  83 +truncate) does not lose sending ipi's to cloned threads that might
84 84 be spawned underneath it and go to user mode to drag in pte's into tlbs.
85 85  
86 86 swap_lock
... ... @@ -18,7 +18,7 @@
18 18 /* Taken over from the old code... */
19 19  
20 20 /* POSIX UID/GID verification for setting inode attributes. */
21   -int inode_change_ok(struct inode *inode, struct iattr *attr)
  21 +int inode_change_ok(const struct inode *inode, struct iattr *attr)
22 22 {
23 23 int retval = -EPERM;
24 24 unsigned int ia_valid = attr->ia_valid;
25 25  
... ... @@ -60,8 +60,50 @@
60 60 error:
61 61 return retval;
62 62 }
63   -
64 63 EXPORT_SYMBOL(inode_change_ok);
  64 +
  65 +/**
  66 + * inode_newsize_ok - may this inode be truncated to a given size
  67 + * @inode: the inode to be truncated
  68 + * @offset: the new size to assign to the inode
  69 + * @Returns: 0 on success, -ve errno on failure
  70 + *
  71 + * inode_newsize_ok will check filesystem limits and ulimits to check that the
  72 + * new inode size is within limits. inode_newsize_ok will also send SIGXFSZ
  73 + * when necessary. Caller must not proceed with inode size change if failure is
  74 + * returned. @inode must be a file (not directory), with appropriate
  75 + * permissions to allow truncate (inode_newsize_ok does NOT check these
  76 + * conditions).
  77 + *
  78 + * inode_newsize_ok must be called with i_mutex held.
  79 + */
  80 +int inode_newsize_ok(const struct inode *inode, loff_t offset)
  81 +{
  82 + if (inode->i_size < offset) {
  83 + unsigned long limit;
  84 +
  85 + limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
  86 + if (limit != RLIM_INFINITY && offset > limit)
  87 + goto out_sig;
  88 + if (offset > inode->i_sb->s_maxbytes)
  89 + goto out_big;
  90 + } else {
  91 + /*
  92 + * truncation of in-use swapfiles is disallowed - it would
  93 + * cause subsequent swapout to scribble on the now-freed
  94 + * blocks.
  95 + */
  96 + if (IS_SWAPFILE(inode))
  97 + return -ETXTBSY;
  98 + }
  99 +
  100 + return 0;
  101 +out_sig:
  102 + send_sig(SIGXFSZ, current, 0);
  103 +out_big:
  104 + return -EFBIG;
  105 +}
  106 +EXPORT_SYMBOL(inode_newsize_ok);
65 107  
66 108 int inode_setattr(struct inode * inode, struct iattr * attr)
67 109 {
... ... @@ -737,12 +737,7 @@
737 737 {
738 738 kfree(BEFS_SB(sb)->mount_opts.iocharset);
739 739 BEFS_SB(sb)->mount_opts.iocharset = NULL;
740   -
741   - if (BEFS_SB(sb)->nls) {
742   - unload_nls(BEFS_SB(sb)->nls);
743   - BEFS_SB(sb)->nls = NULL;
744   - }
745   -
  740 + unload_nls(BEFS_SB(sb)->nls);
746 741 kfree(sb->s_fs_info);
747 742 sb->s_fs_info = NULL;
748 743 }
... ... @@ -216,8 +216,6 @@
216 216 * freeze_bdev -- lock a filesystem and force it into a consistent state
217 217 * @bdev: blockdevice to lock
218 218 *
219   - * This takes the block device bd_mount_sem to make sure no new mounts
220   - * happen on bdev until thaw_bdev() is called.
221 219 * If a superblock is found on this device, we take the s_umount semaphore
222 220 * on it to make sure nobody unmounts until the snapshot creation is done.
223 221 * The reference counter (bd_fsfreeze_count) guarantees that only the last
224 222  
225 223  
226 224  
227 225  
228 226  
229 227  
230 228  
231 229  
232 230  
233 231  
... ... @@ -232,46 +230,55 @@
232 230 int error = 0;
233 231  
234 232 mutex_lock(&bdev->bd_fsfreeze_mutex);
235   - if (bdev->bd_fsfreeze_count > 0) {
236   - bdev->bd_fsfreeze_count++;
  233 + if (++bdev->bd_fsfreeze_count > 1) {
  234 + /*
  235 + * We don't even need to grab a reference - the first call
  236 + * to freeze_bdev grab an active reference and only the last
  237 + * thaw_bdev drops it.
  238 + */
237 239 sb = get_super(bdev);
  240 + drop_super(sb);
238 241 mutex_unlock(&bdev->bd_fsfreeze_mutex);
239 242 return sb;
240 243 }
241   - bdev->bd_fsfreeze_count++;
242 244  
243   - down(&bdev->bd_mount_sem);
244   - sb = get_super(bdev);
245   - if (sb && !(sb->s_flags & MS_RDONLY)) {
246   - sb->s_frozen = SB_FREEZE_WRITE;
247   - smp_wmb();
  245 + sb = get_active_super(bdev);
  246 + if (!sb)
  247 + goto out;
  248 + if (sb->s_flags & MS_RDONLY) {
  249 + deactivate_locked_super(sb);
  250 + mutex_unlock(&bdev->bd_fsfreeze_mutex);
  251 + return sb;
  252 + }
248 253  
249   - sync_filesystem(sb);
  254 + sb->s_frozen = SB_FREEZE_WRITE;
  255 + smp_wmb();
250 256  
251   - sb->s_frozen = SB_FREEZE_TRANS;
252   - smp_wmb();
  257 + sync_filesystem(sb);
253 258  
254   - sync_blockdev(sb->s_bdev);
  259 + sb->s_frozen = SB_FREEZE_TRANS;
  260 + smp_wmb();
255 261  
256   - if (sb->s_op->freeze_fs) {
257   - error = sb->s_op->freeze_fs(sb);
258   - if (error) {
259   - printk(KERN_ERR
260   - "VFS:Filesystem freeze failed\n");
261   - sb->s_frozen = SB_UNFROZEN;
262   - drop_super(sb);
263   - up(&bdev->bd_mount_sem);
264   - bdev->bd_fsfreeze_count--;
265   - mutex_unlock(&bdev->bd_fsfreeze_mutex);
266   - return ERR_PTR(error);
267   - }
  262 + sync_blockdev(sb->s_bdev);
  263 +
  264 + if (sb->s_op->freeze_fs) {
  265 + error = sb->s_op->freeze_fs(sb);
  266 + if (error) {
  267 + printk(KERN_ERR
  268 + "VFS:Filesystem freeze failed\n");
  269 + sb->s_frozen = SB_UNFROZEN;
  270 + deactivate_locked_super(sb);
  271 + bdev->bd_fsfreeze_count--;
  272 + mutex_unlock(&bdev->bd_fsfreeze_mutex);
  273 + return ERR_PTR(error);
268 274 }
269 275 }
  276 + up_write(&sb->s_umount);
270 277  
  278 + out:
271 279 sync_blockdev(bdev);
272 280 mutex_unlock(&bdev->bd_fsfreeze_mutex);
273   -
274   - return sb; /* thaw_bdev releases s->s_umount and bd_mount_sem */
  281 + return sb; /* thaw_bdev releases s->s_umount */
275 282 }
276 283 EXPORT_SYMBOL(freeze_bdev);
277 284  
278 285  
279 286  
280 287  
281 288  
282 289  
... ... @@ -284,44 +291,44 @@
284 291 */
285 292 int thaw_bdev(struct block_device *bdev, struct super_block *sb)
286 293 {
287   - int error = 0;
  294 + int error = -EINVAL;
288 295  
289 296 mutex_lock(&bdev->bd_fsfreeze_mutex);
290   - if (!bdev->bd_fsfreeze_count) {
291   - mutex_unlock(&bdev->bd_fsfreeze_mutex);
292   - return -EINVAL;
293   - }
  297 + if (!bdev->bd_fsfreeze_count)
  298 + goto out_unlock;
294 299  
295   - bdev->bd_fsfreeze_count--;
296   - if (bdev->bd_fsfreeze_count > 0) {
297   - if (sb)
298   - drop_super(sb);
299   - mutex_unlock(&bdev->bd_fsfreeze_mutex);
300   - return 0;
301   - }
  300 + error = 0;
  301 + if (--bdev->bd_fsfreeze_count > 0)
  302 + goto out_unlock;
302 303  
303   - if (sb) {
304   - BUG_ON(sb->s_bdev != bdev);
305   - if (!(sb->s_flags & MS_RDONLY)) {
306   - if (sb->s_op->unfreeze_fs) {
307   - error = sb->s_op->unfreeze_fs(sb);
308   - if (error) {
309   - printk(KERN_ERR
310   - "VFS:Filesystem thaw failed\n");
311   - sb->s_frozen = SB_FREEZE_TRANS;
312   - bdev->bd_fsfreeze_count++;
313   - mutex_unlock(&bdev->bd_fsfreeze_mutex);
314   - return error;
315   - }
316   - }
317   - sb->s_frozen = SB_UNFROZEN;
318   - smp_wmb();
319   - wake_up(&sb->s_wait_unfrozen);
  304 + if (!sb)
  305 + goto out_unlock;
  306 +
  307 + BUG_ON(sb->s_bdev != bdev);
  308 + down_write(&sb->s_umount);
  309 + if (sb->s_flags & MS_RDONLY)
  310 + goto out_deactivate;
  311 +
  312 + if (sb->s_op->unfreeze_fs) {
  313 + error = sb->s_op->unfreeze_fs(sb);
  314 + if (error) {
  315 + printk(KERN_ERR
  316 + "VFS:Filesystem thaw failed\n");
  317 + sb->s_frozen = SB_FREEZE_TRANS;
  318 + bdev->bd_fsfreeze_count++;
  319 + mutex_unlock(&bdev->bd_fsfreeze_mutex);
  320 + return error;
320 321 }
321   - drop_super(sb);
322 322 }
323 323  
324   - up(&bdev->bd_mount_sem);
  324 + sb->s_frozen = SB_UNFROZEN;
  325 + smp_wmb();
  326 + wake_up(&sb->s_wait_unfrozen);
  327 +
  328 +out_deactivate:
  329 + if (sb)
  330 + deactivate_locked_super(sb);
  331 +out_unlock:
325 332 mutex_unlock(&bdev->bd_fsfreeze_mutex);
326 333 return 0;
327 334 }
... ... @@ -430,7 +437,6 @@
430 437  
431 438 memset(bdev, 0, sizeof(*bdev));
432 439 mutex_init(&bdev->bd_mutex);
433   - sema_init(&bdev->bd_mount_sem, 1);
434 440 INIT_LIST_HEAD(&bdev->bd_inodes);
435 441 INIT_LIST_HEAD(&bdev->bd_list);
436 442 #ifdef CONFIG_SYSFS
... ... @@ -2239,16 +2239,10 @@
2239 2239 struct address_space *mapping = inode->i_mapping;
2240 2240 struct page *page;
2241 2241 void *fsdata;
2242   - unsigned long limit;
2243 2242 int err;
2244 2243  
2245   - err = -EFBIG;
2246   - limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
2247   - if (limit != RLIM_INFINITY && size > (loff_t)limit) {
2248   - send_sig(SIGXFSZ, current, 0);
2249   - goto out;
2250   - }
2251   - if (size > inode->i_sb->s_maxbytes)
  2244 + err = inode_newsize_ok(inode, size);
  2245 + if (err)
2252 2246 goto out;
2253 2247  
2254 2248 err = pagecache_write_begin(NULL, mapping, size, 0,
... ... @@ -185,8 +185,7 @@
185 185 cifs_sb->mountdata = NULL;
186 186 }
187 187 #endif
188   - if (cifs_sb->local_nls)
189   - unload_nls(cifs_sb->local_nls);
  188 + unload_nls(cifs_sb->local_nls);
190 189 kfree(cifs_sb);
191 190 }
192 191 return rc;
... ... @@ -1557,57 +1557,24 @@
1557 1557  
1558 1558 static int cifs_vmtruncate(struct inode *inode, loff_t offset)
1559 1559 {
1560   - struct address_space *mapping = inode->i_mapping;
1561   - unsigned long limit;
  1560 + loff_t oldsize;
  1561 + int err;
1562 1562  
1563 1563 spin_lock(&inode->i_lock);
1564   - if (inode->i_size < offset)
1565   - goto do_expand;
1566   - /*
1567   - * truncation of in-use swapfiles is disallowed - it would cause
1568   - * subsequent swapout to scribble on the now-freed blocks.
1569   - */
1570   - if (IS_SWAPFILE(inode)) {
  1564 + err = inode_newsize_ok(inode, offset);
  1565 + if (err) {
1571 1566 spin_unlock(&inode->i_lock);
1572   - goto out_busy;
  1567 + goto out;
1573 1568 }
1574   - i_size_write(inode, offset);
1575   - spin_unlock(&inode->i_lock);
1576   - /*
1577   - * unmap_mapping_range is called twice, first simply for efficiency
1578   - * so that truncate_inode_pages does fewer single-page unmaps. However
1579   - * after this first call, and before truncate_inode_pages finishes,
1580   - * it is possible for private pages to be COWed, which remain after
1581   - * truncate_inode_pages finishes, hence the second unmap_mapping_range
1582   - * call must be made for correctness.
1583   - */
1584   - unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
1585   - truncate_inode_pages(mapping, offset);
1586   - unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
1587   - goto out_truncate;
1588 1569  
1589   -do_expand:
1590   - limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
1591   - if (limit != RLIM_INFINITY && offset > limit) {
1592   - spin_unlock(&inode->i_lock);
1593   - goto out_sig;
1594   - }
1595   - if (offset > inode->i_sb->s_maxbytes) {
1596   - spin_unlock(&inode->i_lock);
1597   - goto out_big;
1598   - }
  1570 + oldsize = inode->i_size;
1599 1571 i_size_write(inode, offset);
1600 1572 spin_unlock(&inode->i_lock);
1601   -out_truncate:
  1573 + truncate_pagecache(inode, oldsize, offset);
1602 1574 if (inode->i_op->truncate)
1603 1575 inode->i_op->truncate(inode);
1604   - return 0;
1605   -out_sig:
1606   - send_sig(SIGXFSZ, current, 0);
1607   -out_big:
1608   - return -EFBIG;
1609   -out_busy:
1610   - return -ETXTBSY;
  1576 +out:
  1577 + return err;
1611 1578 }
1612 1579  
1613 1580 static int
... ... @@ -768,13 +768,13 @@
768 768 char __user * type, unsigned long flags,
769 769 void __user * data)
770 770 {
771   - unsigned long type_page;
  771 + char *kernel_type;
772 772 unsigned long data_page;
773   - unsigned long dev_page;
  773 + char *kernel_dev;
774 774 char *dir_page;
775 775 int retval;
776 776  
777   - retval = copy_mount_options (type, &type_page);
  777 + retval = copy_mount_string(type, &kernel_type);
778 778 if (retval < 0)
779 779 goto out;
780 780  
781 781  
782 782  
783 783  
784 784  
785 785  
786 786  
787 787  
... ... @@ -783,38 +783,38 @@
783 783 if (IS_ERR(dir_page))
784 784 goto out1;
785 785  
786   - retval = copy_mount_options (dev_name, &dev_page);
  786 + retval = copy_mount_string(dev_name, &kernel_dev);
787 787 if (retval < 0)
788 788 goto out2;
789 789  
790   - retval = copy_mount_options (data, &data_page);
  790 + retval = copy_mount_options(data, &data_page);
791 791 if (retval < 0)
792 792 goto out3;
793 793  
794 794 retval = -EINVAL;
795 795  
796   - if (type_page && data_page) {
797   - if (!strcmp((char *)type_page, SMBFS_NAME)) {
  796 + if (kernel_type && data_page) {
  797 + if (!strcmp(kernel_type, SMBFS_NAME)) {
798 798 do_smb_super_data_conv((void *)data_page);
799   - } else if (!strcmp((char *)type_page, NCPFS_NAME)) {
  799 + } else if (!strcmp(kernel_type, NCPFS_NAME)) {
800 800 do_ncp_super_data_conv((void *)data_page);
801   - } else if (!strcmp((char *)type_page, NFS4_NAME)) {
  801 + } else if (!strcmp(kernel_type, NFS4_NAME)) {
802 802 if (do_nfs4_super_data_conv((void *) data_page))
803 803 goto out4;
804 804 }
805 805 }
806 806  
807   - retval = do_mount((char*)dev_page, dir_page, (char*)type_page,
  807 + retval = do_mount(kernel_dev, dir_page, kernel_type,
808 808 flags, (void*)data_page);
809 809  
810 810 out4:
811 811 free_page(data_page);
812 812 out3:
813   - free_page(dev_page);
  813 + kfree(kernel_dev);
814 814 out2:
815 815 putname(dir_page);
816 816 out1:
817   - free_page(type_page);
  817 + kfree(kernel_type);
818 818 out:
819 819 return retval;
820 820 }
... ... @@ -214,7 +214,6 @@
214 214 }
215 215  
216 216 lock_super(sb);
217   - lock_kernel();
218 217 sbi = sb->s_fs_info;
219 218 fscb->s_nextid = cpu_to_le64(sbi->s_nextid);
220 219 fscb->s_numfiles = cpu_to_le32(sbi->s_numfiles);
... ... @@ -245,7 +244,6 @@
245 244 out:
246 245 if (or)
247 246 osd_end_request(or);
248   - unlock_kernel();
249 247 unlock_super(sb);
250 248 kfree(fscb);
251 249 return ret;
... ... @@ -268,8 +266,6 @@
268 266 int num_pend;
269 267 struct exofs_sb_info *sbi = sb->s_fs_info;
270 268  
271   - lock_kernel();
272   -
273 269 if (sb->s_dirt)
274 270 exofs_write_super(sb);
275 271  
... ... @@ -286,8 +282,6 @@
286 282 osduld_put_device(sbi->s_dev);
287 283 kfree(sb->s_fs_info);
288 284 sb->s_fs_info = NULL;
289   -
290   - unlock_kernel();
291 285 }
292 286  
293 287 /*
... ... @@ -470,19 +470,11 @@
470 470  
471 471 iput(sbi->fat_inode);
472 472  
473   - if (sbi->nls_disk) {
474   - unload_nls(sbi->nls_disk);
475   - sbi->nls_disk = NULL;
476   - sbi->options.codepage = fat_default_codepage;
477   - }
478   - if (sbi->nls_io) {
479   - unload_nls(sbi->nls_io);
480   - sbi->nls_io = NULL;
481   - }
482   - if (sbi->options.iocharset != fat_default_iocharset) {
  473 + unload_nls(sbi->nls_disk);
  474 + unload_nls(sbi->nls_io);
  475 +
  476 + if (sbi->options.iocharset != fat_default_iocharset)
483 477 kfree(sbi->options.iocharset);
484   - sbi->options.iocharset = fat_default_iocharset;
485   - }
486 478  
487 479 sb->s_fs_info = NULL;
488 480 kfree(sbi);
... ... @@ -1276,14 +1276,9 @@
1276 1276 return 0;
1277 1277  
1278 1278 if (attr->ia_valid & ATTR_SIZE) {
1279   - unsigned long limit;
1280   - if (IS_SWAPFILE(inode))
1281   - return -ETXTBSY;
1282   - limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
1283   - if (limit != RLIM_INFINITY && attr->ia_size > (loff_t) limit) {
1284   - send_sig(SIGXFSZ, current, 0);
1285   - return -EFBIG;
1286   - }
  1279 + err = inode_newsize_ok(inode, attr->ia_size);
  1280 + if (err)
  1281 + return err;
1287 1282 is_truncate = true;
1288 1283 }
1289 1284  
... ... @@ -1350,8 +1345,7 @@
1350 1345 * FUSE_NOWRITE, otherwise fuse_launder_page() would deadlock.
1351 1346 */
1352 1347 if (S_ISREG(inode->i_mode) && oldsize != outarg.attr.size) {
1353   - if (outarg.attr.size < oldsize)
1354   - fuse_truncate(inode->i_mapping, outarg.attr.size);
  1348 + truncate_pagecache(inode, oldsize, outarg.attr.size);
1355 1349 invalidate_inode_pages2(inode->i_mapping);
1356 1350 }
1357 1351  
... ... @@ -606,8 +606,6 @@
606 606 void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
607 607 u64 attr_valid);
608 608  
609   -void fuse_truncate(struct address_space *mapping, loff_t offset);
610   -
611 609 /**
612 610 * Initialize the client device
613 611 */
... ... @@ -140,14 +140,6 @@
140 140 return 0;
141 141 }
142 142  
143   -void fuse_truncate(struct address_space *mapping, loff_t offset)
144   -{
145   - /* See vmtruncate() */
146   - unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
147   - truncate_inode_pages(mapping, offset);
148   - unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
149   -}
150   -
151 143 void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
152 144 u64 attr_valid)
153 145 {
... ... @@ -205,8 +197,7 @@
205 197 spin_unlock(&fc->lock);
206 198  
207 199 if (S_ISREG(inode->i_mode) && oldsize != attr->size) {
208   - if (attr->size < oldsize)
209   - fuse_truncate(inode->i_mapping, attr->size);
  200 + truncate_pagecache(inode, oldsize, attr->size);
210 201 invalidate_inode_pages2(inode->i_mapping);
211 202 }
212 203 }
... ... @@ -344,10 +344,8 @@
344 344 brelse(HFS_SB(sb)->mdb_bh);
345 345 brelse(HFS_SB(sb)->alt_mdb_bh);
346 346  
347   - if (HFS_SB(sb)->nls_io)
348   - unload_nls(HFS_SB(sb)->nls_io);
349   - if (HFS_SB(sb)->nls_disk)
350   - unload_nls(HFS_SB(sb)->nls_disk);
  347 + unload_nls(HFS_SB(sb)->nls_io);
  348 + unload_nls(HFS_SB(sb)->nls_disk);
351 349  
352 350 free_pages((unsigned long)HFS_SB(sb)->bitmap, PAGE_SIZE < 8192 ? 1 : 0);
353 351 kfree(HFS_SB(sb));
... ... @@ -229,8 +229,7 @@
229 229 iput(HFSPLUS_SB(sb).alloc_file);
230 230 iput(HFSPLUS_SB(sb).hidden_dir);
231 231 brelse(HFSPLUS_SB(sb).s_vhbh);
232   - if (HFSPLUS_SB(sb).nls)
233   - unload_nls(HFSPLUS_SB(sb).nls);
  232 + unload_nls(HFSPLUS_SB(sb).nls);
234 233 kfree(sb->s_fs_info);
235 234 sb->s_fs_info = NULL;
236 235  
... ... @@ -464,8 +463,7 @@
464 463  
465 464 cleanup:
466 465 hfsplus_put_super(sb);
467   - if (nls)
468   - unload_nls(nls);
  466 + unload_nls(nls);
469 467 return err;
470 468 }
471 469  
fs/hugetlbfs/inode.c
... ... @@ -380,36 +380,11 @@
380 380  
381 381 static void hugetlbfs_forget_inode(struct inode *inode) __releases(inode_lock)
382 382 {
383   - struct super_block *sb = inode->i_sb;
384   -
385   - if (!hlist_unhashed(&inode->i_hash)) {
386   - if (!(inode->i_state & (I_DIRTY|I_SYNC)))
387   - list_move(&inode->i_list, &inode_unused);
388   - inodes_stat.nr_unused++;
389   - if (!sb || (sb->s_flags & MS_ACTIVE)) {
390   - spin_unlock(&inode_lock);
391   - return;
392   - }
393   - inode->i_state |= I_WILL_FREE;
394   - spin_unlock(&inode_lock);
395   - /*
396   - * write_inode_now is a noop as we set BDI_CAP_NO_WRITEBACK
397   - * in our backing_dev_info.
398   - */
399   - write_inode_now(inode, 1);
400   - spin_lock(&inode_lock);
401   - inode->i_state &= ~I_WILL_FREE;
402   - inodes_stat.nr_unused--;
403   - hlist_del_init(&inode->i_hash);
  383 + if (generic_detach_inode(inode)) {
  384 + truncate_hugepages(inode, 0);
  385 + clear_inode(inode);
  386 + destroy_inode(inode);
404 387 }
405   - list_del_init(&inode->i_list);
406   - list_del_init(&inode->i_sb_list);
407   - inode->i_state |= I_FREEING;
408   - inodes_stat.nr_inodes--;
409   - spin_unlock(&inode_lock);
410   - truncate_hugepages(inode, 0);
411   - clear_inode(inode);
412   - destroy_inode(inode);
413 388 }
414 389  
415 390 static void hugetlbfs_drop_inode(struct inode *inode)
... ... @@ -1241,7 +1241,16 @@
1241 1241 }
1242 1242 EXPORT_SYMBOL(generic_delete_inode);
1243 1243  
1244   -static void generic_forget_inode(struct inode *inode)
  1244 +/**
  1245 + * generic_detach_inode - remove inode from inode lists
  1246 + * @inode: inode to remove
  1247 + *
  1248 + * Remove inode from inode lists, write it if it's dirty. This is just an
  1249 + * internal VFS helper exported for hugetlbfs. Do not use!
  1250 + *
  1251 + * Returns 1 if inode should be completely destroyed.
  1252 + */
  1253 +int generic_detach_inode(struct inode *inode)
1245 1254 {
1246 1255 struct super_block *sb = inode->i_sb;
1247 1256  
... ... @@ -1251,7 +1260,7 @@
1251 1260 inodes_stat.nr_unused++;
1252 1261 if (sb->s_flags & MS_ACTIVE) {
1253 1262 spin_unlock(&inode_lock);
1254   - return;
  1263 + return 0;
1255 1264 }
1256 1265 WARN_ON(inode->i_state & I_NEW);
1257 1266 inode->i_state |= I_WILL_FREE;
... ... @@ -1269,6 +1278,14 @@
1269 1278 inode->i_state |= I_FREEING;
1270 1279 inodes_stat.nr_inodes--;
1271 1280 spin_unlock(&inode_lock);
  1281 + return 1;
  1282 +}
  1283 +EXPORT_SYMBOL_GPL(generic_detach_inode);
  1284 +
  1285 +static void generic_forget_inode(struct inode *inode)
  1286 +{
  1287 + if (!generic_detach_inode(inode))
  1288 + return;
1272 1289 if (inode->i_data.nrpages)
1273 1290 truncate_inode_pages(&inode->i_data, 0);
1274 1291 clear_inode(inode);
1275 1292  
1276 1293  
1277 1294  
1278 1295  
1279 1296  
1280 1297  
1281 1298  
1282 1299  
1283 1300  
... ... @@ -1399,31 +1416,31 @@
1399 1416 struct inode *inode = dentry->d_inode;
1400 1417 struct timespec now;
1401 1418  
1402   - if (mnt_want_write(mnt))
1403   - return;
1404 1419 if (inode->i_flags & S_NOATIME)
1405   - goto out;
  1420 + return;
1406 1421 if (IS_NOATIME(inode))
1407   - goto out;
  1422 + return;
1408 1423 if ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode))
1409   - goto out;
  1424 + return;
1410 1425  
1411 1426 if (mnt->mnt_flags & MNT_NOATIME)
1412   - goto out;
  1427 + return;
1413 1428 if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode))
1414   - goto out;
  1429 + return;
1415 1430  
1416 1431 now = current_fs_time(inode->i_sb);
1417 1432  
1418 1433 if (!relatime_need_update(mnt, inode, now))
1419   - goto out;
  1434 + return;
1420 1435  
1421 1436 if (timespec_equal(&inode->i_atime, &now))
1422   - goto out;
  1437 + return;
1423 1438  
  1439 + if (mnt_want_write(mnt))
  1440 + return;
  1441 +
1424 1442 inode->i_atime = now;
1425 1443 mark_inode_dirty_sync(inode);
1426   -out:
1427 1444 mnt_drop_write(mnt);
1428 1445 }
1429 1446 EXPORT_SYMBOL(touch_atime);
1430 1447  
1431 1448  
1432 1449  
1433 1450  
1434 1451  
1435 1452  
... ... @@ -1444,34 +1461,37 @@
1444 1461 {
1445 1462 struct inode *inode = file->f_path.dentry->d_inode;
1446 1463 struct timespec now;
1447   - int sync_it = 0;
1448   - int err;
  1464 + enum { S_MTIME = 1, S_CTIME = 2, S_VERSION = 4 } sync_it = 0;
1449 1465  
  1466 + /* First try to exhaust all avenues to not sync */
1450 1467 if (IS_NOCMTIME(inode))
1451 1468 return;
1452 1469  
1453   - err = mnt_want_write_file(file);
1454   - if (err)
1455   - return;
1456   -
1457 1470 now = current_fs_time(inode->i_sb);
1458   - if (!timespec_equal(&inode->i_mtime, &now)) {
1459   - inode->i_mtime = now;
1460   - sync_it = 1;
1461   - }
  1471 + if (!timespec_equal(&inode->i_mtime, &now))
  1472 + sync_it = S_MTIME;
1462 1473  
1463   - if (!timespec_equal(&inode->i_ctime, &now)) {
1464   - inode->i_ctime = now;
1465   - sync_it = 1;
1466   - }
  1474 + if (!timespec_equal(&inode->i_ctime, &now))
  1475 + sync_it |= S_CTIME;
1467 1476  
1468   - if (IS_I_VERSION(inode)) {
1469   - inode_inc_iversion(inode);
1470   - sync_it = 1;
1471   - }
  1477 + if (IS_I_VERSION(inode))
  1478 + sync_it |= S_VERSION;
1472 1479  
1473   - if (sync_it)
1474   - mark_inode_dirty_sync(inode);
  1480 + if (!sync_it)
  1481 + return;
  1482 +
  1483 + /* Finally allowed to write? Takes lock. */
  1484 + if (mnt_want_write_file(file))
  1485 + return;
  1486 +
  1487 + /* Only change inode inside the lock region */
  1488 + if (sync_it & S_VERSION)
  1489 + inode_inc_iversion(inode);
  1490 + if (sync_it & S_CTIME)
  1491 + inode->i_ctime = now;
  1492 + if (sync_it & S_MTIME)
  1493 + inode->i_mtime = now;
  1494 + mark_inode_dirty_sync(inode);
1475 1495 mnt_drop_write(file->f_path.mnt);
1476 1496 }
1477 1497 EXPORT_SYMBOL(file_update_time);
... ... @@ -1599,8 +1619,9 @@
1599 1619 else if (S_ISSOCK(mode))
1600 1620 inode->i_fop = &bad_sock_fops;
1601 1621 else
1602   - printk(KERN_DEBUG "init_special_inode: bogus i_mode (%o)\n",
1603   - mode);
  1622 + printk(KERN_DEBUG "init_special_inode: bogus i_mode (%o) for"
  1623 + " inode %s:%lu\n", mode, inode->i_sb->s_id,
  1624 + inode->i_ino);
1604 1625 }
1605 1626 EXPORT_SYMBOL(init_special_inode);
... ... @@ -57,6 +57,7 @@
57 57 * namespace.c
58 58 */
59 59 extern int copy_mount_options(const void __user *, unsigned long *);
  60 +extern int copy_mount_string(const void __user *, char **);
60 61  
61 62 extern void free_vfsmnt(struct vfsmount *);
62 63 extern struct vfsmount *alloc_vfsmnt(const char *);
... ... @@ -162,20 +162,21 @@
162 162 static int fiemap_check_ranges(struct super_block *sb,
163 163 u64 start, u64 len, u64 *new_len)
164 164 {
  165 + u64 maxbytes = (u64) sb->s_maxbytes;
  166 +
165 167 *new_len = len;
166 168  
167 169 if (len == 0)
168 170 return -EINVAL;
169 171  
170   - if (start > sb->s_maxbytes)
  172 + if (start > maxbytes)
171 173 return -EFBIG;
172 174  
173 175 /*
174 176 * Shrink request scope to what the fs can actually handle.
175 177 */
176   - if ((len > sb->s_maxbytes) ||
177   - (sb->s_maxbytes - len) < start)
178   - *new_len = sb->s_maxbytes - start;
  178 + if (len > maxbytes || (maxbytes - len) < start)
  179 + *new_len = maxbytes - start;
179 180  
180 181 return 0;
181 182 }
... ... @@ -46,10 +46,7 @@
46 46 #ifdef CONFIG_JOLIET
47 47 lock_kernel();
48 48  
49   - if (sbi->s_nls_iocharset) {
50   - unload_nls(sbi->s_nls_iocharset);
51   - sbi->s_nls_iocharset = NULL;
52   - }
  49 + unload_nls(sbi->s_nls_iocharset);
53 50  
54 51 unlock_kernel();
55 52 #endif
... ... @@ -912,8 +909,7 @@
912 909 printk(KERN_WARNING "%s: get root inode failed\n", __func__);
913 910 out_no_inode:
914 911 #ifdef CONFIG_JOLIET
915   - if (sbi->s_nls_iocharset)
916   - unload_nls(sbi->s_nls_iocharset);
  912 + unload_nls(sbi->s_nls_iocharset);
917 913 #endif
918 914 goto out_freesbi;
919 915 out_no_read:
... ... @@ -178,13 +178,11 @@
178 178 rc = jfs_umount(sb);
179 179 if (rc)
180 180 jfs_err("jfs_umount failed with return code %d", rc);
181   - if (sbi->nls_tab)
182   - unload_nls(sbi->nls_tab);
183   - sbi->nls_tab = NULL;
184 181  
  182 + unload_nls(sbi->nls_tab);
  183 +
185 184 truncate_inode_pages(sbi->direct_inode->i_mapping, 0);
186 185 iput(sbi->direct_inode);
187   - sbi->direct_inode = NULL;
188 186  
189 187 kfree(sbi);
190 188  
... ... @@ -347,8 +345,7 @@
347 345  
348 346 if (nls_map != (void *) -1) {
349 347 /* Discard old (if remount) */
350   - if (sbi->nls_tab)
351   - unload_nls(sbi->nls_tab);
  348 + unload_nls(sbi->nls_tab);
352 349 sbi->nls_tab = nls_map;
353 350 }
354 351 return 1;
... ... @@ -527,14 +527,18 @@
527 527 const void *from, size_t available)
528 528 {
529 529 loff_t pos = *ppos;
  530 + size_t ret;
  531 +
530 532 if (pos < 0)
531 533 return -EINVAL;
532   - if (pos >= available)
  534 + if (pos >= available || !count)
533 535 return 0;
534 536 if (count > available - pos)
535 537 count = available - pos;
536   - if (copy_to_user(to, from + pos, count))
  538 + ret = copy_to_user(to, from + pos, count);
  539 + if (ret == count)
537 540 return -EFAULT;
  541 + count -= ret;
538 542 *ppos = pos + count;
539 543 return count;
540 544 }
541 545  
... ... @@ -735,10 +739,11 @@
735 739 if (copy_from_user(attr->set_buf, buf, size))
736 740 goto out;
737 741  
738   - ret = len; /* claim we got the whole input */
739 742 attr->set_buf[size] = '\0';
740 743 val = simple_strtol(attr->set_buf, NULL, 0);
741   - attr->set(attr->data, val);
  744 + ret = attr->set(attr->data, val);
  745 + if (ret == 0)
  746 + ret = len; /* on success, claim we got the whole input */
742 747 out:
743 748 mutex_unlock(&attr->mutex);
744 749 return ret;
... ... @@ -1640,7 +1640,7 @@
1640 1640 {
1641 1641 struct vfsmount *mnt;
1642 1642  
1643   - if (!type || !memchr(type, 0, PAGE_SIZE))
  1643 + if (!type)
1644 1644 return -EINVAL;
1645 1645  
1646 1646 /* we need capabilities... */
... ... @@ -1871,6 +1871,23 @@
1871 1871 return 0;
1872 1872 }
1873 1873  
  1874 +int copy_mount_string(const void __user *data, char **where)
  1875 +{
  1876 + char *tmp;
  1877 +
  1878 + if (!data) {
  1879 + *where = NULL;
  1880 + return 0;
  1881 + }
  1882 +
  1883 + tmp = strndup_user(data, PAGE_SIZE);
  1884 + if (IS_ERR(tmp))
  1885 + return PTR_ERR(tmp);
  1886 +
  1887 + *where = tmp;
  1888 + return 0;
  1889 +}
  1890 +
1874 1891 /*
1875 1892 * Flags is a 32-bit value that allows up to 31 non-fs dependent flags to
1876 1893 * be given to the mount() call (ie: read-only, no-dev, no-suid etc).
... ... @@ -1900,8 +1917,6 @@
1900 1917  
1901 1918 if (!dir_name || !*dir_name || !memchr(dir_name, 0, PAGE_SIZE))
1902 1919 return -EINVAL;
1903   - if (dev_name && !memchr(dev_name, 0, PAGE_SIZE))
1904   - return -EINVAL;
1905 1920  
1906 1921 if (data_page)
1907 1922 ((char *)data_page)[PAGE_SIZE - 1] = 0;
1908 1923  
1909 1924  
1910 1925  
1911 1926  
1912 1927  
1913 1928  
1914 1929  
... ... @@ -2070,40 +2085,42 @@
2070 2085 SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name,
2071 2086 char __user *, type, unsigned long, flags, void __user *, data)
2072 2087 {
2073   - int retval;
  2088 + int ret;
  2089 + char *kernel_type;
  2090 + char *kernel_dir;
  2091 + char *kernel_dev;
2074 2092 unsigned long data_page;
2075   - unsigned long type_page;
2076   - unsigned long dev_page;
2077   - char *dir_page;
2078 2093  
2079   - retval = copy_mount_options(type, &type_page);
2080   - if (retval < 0)
2081   - return retval;
  2094 + ret = copy_mount_string(type, &kernel_type);
  2095 + if (ret < 0)
  2096 + goto out_type;
2082 2097  
2083   - dir_page = getname(dir_name);
2084   - retval = PTR_ERR(dir_page);
2085   - if (IS_ERR(dir_page))
2086   - goto out1;
  2098 + kernel_dir = getname(dir_name);
  2099 + if (IS_ERR(kernel_dir)) {
  2100 + ret = PTR_ERR(kernel_dir);
  2101 + goto out_dir;
  2102 + }
2087 2103  
2088   - retval = copy_mount_options(dev_name, &dev_page);
2089   - if (retval < 0)
2090   - goto out2;
  2104 + ret = copy_mount_string(dev_name, &kernel_dev);
  2105 + if (ret < 0)
  2106 + goto out_dev;
2091 2107  
2092   - retval = copy_mount_options(data, &data_page);
2093   - if (retval < 0)
2094   - goto out3;
  2108 + ret = copy_mount_options(data, &data_page);
  2109 + if (ret < 0)
  2110 + goto out_data;
2095 2111  
2096   - retval = do_mount((char *)dev_page, dir_page, (char *)type_page,
2097   - flags, (void *)data_page);
2098   - free_page(data_page);
  2112 + ret = do_mount(kernel_dev, kernel_dir, kernel_type, flags,
  2113 + (void *) data_page);
2099 2114  
2100   -out3:
2101   - free_page(dev_page);
2102   -out2:
2103   - putname(dir_page);
2104   -out1:
2105   - free_page(type_page);
2106   - return retval;
  2115 + free_page(data_page);
  2116 +out_data:
  2117 + kfree(kernel_dev);
  2118 +out_dev:
  2119 + putname(kernel_dir);
  2120 +out_dir:
  2121 + kfree(kernel_type);
  2122 +out_type:
  2123 + return ret;
2107 2124 }
2108 2125  
2109 2126 /*
... ... @@ -746,16 +746,8 @@
746 746  
747 747 #ifdef CONFIG_NCPFS_NLS
748 748 /* unload the NLS charsets */
749   - if (server->nls_vol)
750   - {
751   - unload_nls(server->nls_vol);
752   - server->nls_vol = NULL;
753   - }
754   - if (server->nls_io)
755   - {
756   - unload_nls(server->nls_io);
757   - server->nls_io = NULL;
758   - }
  749 + unload_nls(server->nls_vol);
  750 + unload_nls(server->nls_io);
759 751 #endif /* CONFIG_NCPFS_NLS */
760 752  
761 753 if (server->info_filp)
... ... @@ -223,10 +223,8 @@
223 223 oldset_io = server->nls_io;
224 224 server->nls_io = iocharset;
225 225  
226   - if (oldset_cp)
227   - unload_nls(oldset_cp);
228   - if (oldset_io)
229   - unload_nls(oldset_io);
  226 + unload_nls(oldset_cp);
  227 + unload_nls(oldset_io);
230 228  
231 229 return 0;
232 230 }
... ... @@ -458,49 +458,21 @@
458 458 */
459 459 static int nfs_vmtruncate(struct inode * inode, loff_t offset)
460 460 {
461   - if (i_size_read(inode) < offset) {
462   - unsigned long limit;
  461 + loff_t oldsize;
  462 + int err;
463 463  
464   - limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
465   - if (limit != RLIM_INFINITY && offset > limit)
466   - goto out_sig;
467   - if (offset > inode->i_sb->s_maxbytes)
468   - goto out_big;
469   - spin_lock(&inode->i_lock);
470   - i_size_write(inode, offset);
471   - spin_unlock(&inode->i_lock);
472   - } else {
473   - struct address_space *mapping = inode->i_mapping;
  464 + err = inode_newsize_ok(inode, offset);
  465 + if (err)
  466 + goto out;
474 467  
475   - /*
476   - * truncation of in-use swapfiles is disallowed - it would
477   - * cause subsequent swapout to scribble on the now-freed
478   - * blocks.
479   - */
480   - if (IS_SWAPFILE(inode))
481   - return -ETXTBSY;
482   - spin_lock(&inode->i_lock);
483   - i_size_write(inode, offset);
484   - spin_unlock(&inode->i_lock);
  468 + spin_lock(&inode->i_lock);
  469 + oldsize = inode->i_size;
  470 + i_size_write(inode, offset);
  471 + spin_unlock(&inode->i_lock);
485 472  
486   - /*
487   - * unmap_mapping_range is called twice, first simply for
488   - * efficiency so that truncate_inode_pages does fewer
489   - * single-page unmaps. However after this first call, and
490   - * before truncate_inode_pages finishes, it is possible for
491   - * private pages to be COWed, which remain after
492   - * truncate_inode_pages finishes, hence the second
493   - * unmap_mapping_range call must be made for correctness.
494   - */
495   - unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
496   - truncate_inode_pages(mapping, offset);
497   - unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
498   - }
499   - return 0;
500   -out_sig:
501   - send_sig(SIGXFSZ, current, 0);
502   -out_big:
503   - return -EFBIG;
  473 + truncate_pagecache(inode, oldsize, offset);
  474 +out:
  475 + return err;
504 476 }
505 477  
506 478 /**
... ... @@ -270,7 +270,8 @@
270 270  
271 271 void unload_nls(struct nls_table *nls)
272 272 {
273   - module_put(nls->owner);
  273 + if (nls)
  274 + module_put(nls->owner);
274 275 }
275 276  
276 277 static const wchar_t charset2uni[256] = {
... ... @@ -201,8 +201,7 @@
201 201 v, old_nls->charset);
202 202 nls_map = old_nls;
203 203 } else /* nls_map */ {
204   - if (old_nls)
205   - unload_nls(old_nls);
  204 + unload_nls(old_nls);
206 205 }
207 206 } else if (!strcmp(p, "utf8")) {
208 207 bool val = false;
... ... @@ -2427,10 +2426,9 @@
2427 2426 ntfs_free(vol->upcase);
2428 2427 vol->upcase = NULL;
2429 2428 }
2430   - if (vol->nls_map) {
2431   - unload_nls(vol->nls_map);
2432   - vol->nls_map = NULL;
2433   - }
  2429 +
  2430 + unload_nls(vol->nls_map);
  2431 +
2434 2432 sb->s_fs_info = NULL;
2435 2433 kfree(vol);
2436 2434  
fs/ramfs/file-nommu.c
... ... @@ -69,15 +69,12 @@
69 69 /* make various checks */
70 70 order = get_order(newsize);
71 71 if (unlikely(order >= MAX_ORDER))
72   - goto too_big;
  72 + return -EFBIG;
73 73  
74   - limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
75   - if (limit != RLIM_INFINITY && newsize > limit)
76   - goto fsize_exceeded;
  74 + ret = inode_newsize_ok(inode, newsize);
  75 + if (ret)
  76 + return ret;
77 77  
78   - if (newsize > inode->i_sb->s_maxbytes)
79   - goto too_big;
80   -
81 78 i_size_write(inode, newsize);
82 79  
83 80 /* allocate enough contiguous pages to be able to satisfy the
... ... @@ -118,12 +115,7 @@
118 115  
119 116 return 0;
120 117  
121   - fsize_exceeded:
122   - send_sig(SIGXFSZ, current, 0);
123   - too_big:
124   - return -EFBIG;
125   -
126   - add_error:
  118 +add_error:
127 119 while (loop < npages)
128 120 __free_page(pages + loop++);
129 121 return ret;
... ... @@ -839,9 +839,6 @@
839 839 max = min(in_inode->i_sb->s_maxbytes, out_inode->i_sb->s_maxbytes);
840 840  
841 841 pos = *ppos;
842   - retval = -EINVAL;
843   - if (unlikely(pos < 0))
844   - goto fput_out;
845 842 if (unlikely(pos + count > max)) {
846 843 retval = -EOVERFLOW;
847 844 if (pos >= max)
... ... @@ -429,20 +429,21 @@
429 429 */
430 430 int seq_path(struct seq_file *m, struct path *path, char *esc)
431 431 {
432   - if (m->count < m->size) {
433   - char *s = m->buf + m->count;
434   - char *p = d_path(path, s, m->size - m->count);
  432 + char *buf;
  433 + size_t size = seq_get_buf(m, &buf);
  434 + int res = -1;
  435 +
  436 + if (size) {
  437 + char *p = d_path(path, buf, size);
435 438 if (!IS_ERR(p)) {
436   - s = mangle_path(s, p, esc);
437   - if (s) {
438   - p = m->buf + m->count;
439   - m->count = s - m->buf;
440   - return s - p;
441   - }
  439 + char *end = mangle_path(buf, p, esc);
  440 + if (end)
  441 + res = end - buf;
442 442 }
443 443 }
444   - m->count = m->size;
445   - return -1;
  444 + seq_commit(m, res);
  445 +
  446 + return res;
446 447 }
447 448 EXPORT_SYMBOL(seq_path);
448 449  
449 450  
450 451  
451 452  
452 453  
... ... @@ -454,26 +455,28 @@
454 455 int seq_path_root(struct seq_file *m, struct path *path, struct path *root,
455 456 char *esc)
456 457 {
457   - int err = -ENAMETOOLONG;
458   - if (m->count < m->size) {
459   - char *s = m->buf + m->count;
  458 + char *buf;
  459 + size_t size = seq_get_buf(m, &buf);
  460 + int res = -ENAMETOOLONG;
  461 +
  462 + if (size) {
460 463 char *p;
461 464  
462 465 spin_lock(&dcache_lock);
463   - p = __d_path(path, root, s, m->size - m->count);
  466 + p = __d_path(path, root, buf, size);
464 467 spin_unlock(&dcache_lock);
465   - err = PTR_ERR(p);
  468 + res = PTR_ERR(p);
466 469 if (!IS_ERR(p)) {
467   - s = mangle_path(s, p, esc);
468   - if (s) {
469   - p = m->buf + m->count;
470   - m->count = s - m->buf;
471   - return 0;
472   - }
  470 + char *end = mangle_path(buf, p, esc);
  471 + if (end)
  472 + res = end - buf;
  473 + else
  474 + res = -ENAMETOOLONG;
473 475 }
474 476 }
475   - m->count = m->size;
476   - return err;
  477 + seq_commit(m, res);
  478 +
  479 + return res < 0 ? res : 0;
477 480 }
478 481  
479 482 /*
480 483  
481 484  
... ... @@ -481,20 +484,21 @@
481 484 */
482 485 int seq_dentry(struct seq_file *m, struct dentry *dentry, char *esc)
483 486 {
484   - if (m->count < m->size) {
485   - char *s = m->buf + m->count;
486   - char *p = dentry_path(dentry, s, m->size - m->count);
  487 + char *buf;
  488 + size_t size = seq_get_buf(m, &buf);
  489 + int res = -1;
  490 +
  491 + if (size) {
  492 + char *p = dentry_path(dentry, buf, size);
487 493 if (!IS_ERR(p)) {
488   - s = mangle_path(s, p, esc);
489   - if (s) {
490   - p = m->buf + m->count;
491   - m->count = s - m->buf;
492   - return s - p;
493   - }
  494 + char *end = mangle_path(buf, p, esc);
  495 + if (end)
  496 + res = end - buf;
494 497 }
495 498 }
496   - m->count = m->size;
497   - return -1;
  499 + seq_commit(m, res);
  500 +
  501 + return res;
498 502 }
499 503  
500 504 int seq_bitmap(struct seq_file *m, const unsigned long *bits,
... ... @@ -459,14 +459,8 @@
459 459 static void
460 460 smb_unload_nls(struct smb_sb_info *server)
461 461 {
462   - if (server->remote_nls) {
463   - unload_nls(server->remote_nls);
464   - server->remote_nls = NULL;
465   - }
466   - if (server->local_nls) {
467   - unload_nls(server->local_nls);
468   - server->local_nls = NULL;
469   - }
  462 + unload_nls(server->remote_nls);
  463 + unload_nls(server->local_nls);
470 464 }
471 465  
472 466 static void
... ... @@ -465,6 +465,48 @@
465 465 }
466 466  
467 467 EXPORT_SYMBOL(get_super);
  468 +
  469 +/**
  470 + * get_active_super - get an active reference to the superblock of a device
  471 + * @bdev: device to get the superblock for
  472 + *
  473 + * Scans the superblock list and finds the superblock of the file system
  474 + * mounted on the device given. Returns the superblock with an active
  475 + * reference and s_umount held exclusively or %NULL if none was found.
  476 + */
  477 +struct super_block *get_active_super(struct block_device *bdev)
  478 +{
  479 + struct super_block *sb;
  480 +
  481 + if (!bdev)
  482 + return NULL;
  483 +
  484 + spin_lock(&sb_lock);
  485 + list_for_each_entry(sb, &super_blocks, s_list) {
  486 + if (sb->s_bdev != bdev)
  487 + continue;
  488 +
  489 + sb->s_count++;
  490 + spin_unlock(&sb_lock);
  491 + down_write(&sb->s_umount);
  492 + if (sb->s_root) {
  493 + spin_lock(&sb_lock);
  494 + if (sb->s_count > S_BIAS) {
  495 + atomic_inc(&sb->s_active);
  496 + sb->s_count--;
  497 + spin_unlock(&sb_lock);
  498 + return sb;
  499 + }
  500 + spin_unlock(&sb_lock);
  501 + }
  502 + up_write(&sb->s_umount);
  503 + put_super(sb);
  504 + yield();
  505 + spin_lock(&sb_lock);
  506 + }
  507 + spin_unlock(&sb_lock);
  508 + return NULL;
  509 +}
468 510  
469 511 struct super_block * user_get_super(dev_t dev)
470 512 {
471 513  
... ... @@ -527,11 +569,15 @@
527 569 {
528 570 int retval;
529 571 int remount_rw;
530   -
  572 +
  573 + if (sb->s_frozen != SB_UNFROZEN)
  574 + return -EBUSY;
  575 +
531 576 #ifdef CONFIG_BLOCK
532 577 if (!(flags & MS_RDONLY) && bdev_read_only(sb->s_bdev))
533 578 return -EACCES;
534 579 #endif
  580 +
535 581 if (flags & MS_RDONLY)
536 582 acct_auto_close(sb);
537 583 shrink_dcache_sb(sb);
538 584  
... ... @@ -743,9 +789,14 @@
743 789 * will protect the lockfs code from trying to start a snapshot
744 790 * while we are mounting
745 791 */
746   - down(&bdev->bd_mount_sem);
  792 + mutex_lock(&bdev->bd_fsfreeze_mutex);
  793 + if (bdev->bd_fsfreeze_count > 0) {
  794 + mutex_unlock(&bdev->bd_fsfreeze_mutex);
  795 + error = -EBUSY;
  796 + goto error_bdev;
  797 + }
747 798 s = sget(fs_type, test_bdev_super, set_bdev_super, bdev);
748   - up(&bdev->bd_mount_sem);
  799 + mutex_unlock(&bdev->bd_fsfreeze_mutex);
749 800 if (IS_ERR(s))
750 801 goto error_s;
751 802  
... ... @@ -891,6 +942,16 @@
891 942 error = security_sb_kern_mount(mnt->mnt_sb, flags, secdata);
892 943 if (error)
893 944 goto out_sb;
  945 +
  946 + /*
  947 + * filesystems should never set s_maxbytes larger than MAX_LFS_FILESIZE
  948 + * but s_maxbytes was an unsigned long long for many releases. Throw
  949 + * this warning for a little while to try and catch filesystems that
  950 + * violate this rule. This warning should be either removed or
  951 + * converted to a BUG() in 2.6.34.
  952 + */
  953 + WARN((mnt->mnt_sb->s_maxbytes < 0), "%s set sb->s_maxbytes to "
  954 + "negative value (%lld)\n", type->name, mnt->mnt_sb->s_maxbytes);
894 955  
895 956 mnt->mnt_mountpoint = mnt->mnt_root;
896 957 mnt->mnt_parent = mnt;
... ... @@ -641,7 +641,6 @@
641 641 struct super_block * bd_super;
642 642 int bd_openers;
643 643 struct mutex bd_mutex; /* open/close mutex */
644   - struct semaphore bd_mount_sem;
645 644 struct list_head bd_inodes;
646 645 void * bd_holder;
647 646 int bd_holders;
... ... @@ -1316,7 +1315,7 @@
1316 1315 unsigned long s_blocksize;
1317 1316 unsigned char s_blocksize_bits;
1318 1317 unsigned char s_dirt;
1319   - unsigned long long s_maxbytes; /* Max file size */
  1318 + loff_t s_maxbytes; /* Max file size */
1320 1319 struct file_system_type *s_type;
1321 1320 const struct super_operations *s_op;
1322 1321 const struct dquot_operations *dq_op;
... ... @@ -2157,6 +2156,7 @@
2157 2156 extern int inode_needs_sync(struct inode *inode);
2158 2157 extern void generic_delete_inode(struct inode *inode);
2159 2158 extern void generic_drop_inode(struct inode *inode);
  2159 +extern int generic_detach_inode(struct inode *inode);
2160 2160  
2161 2161 extern struct inode *ilookup5_nowait(struct super_block *sb,
2162 2162 unsigned long hashval, int (*test)(struct inode *, void *),
... ... @@ -2335,6 +2335,7 @@
2335 2335 extern void put_filesystem(struct file_system_type *fs);
2336 2336 extern struct file_system_type *get_fs_type(const char *name);
2337 2337 extern struct super_block *get_super(struct block_device *);
  2338 +extern struct super_block *get_active_super(struct block_device *bdev);
2338 2339 extern struct super_block *user_get_super(dev_t);
2339 2340 extern void drop_super(struct super_block *sb);
2340 2341  
... ... @@ -2382,7 +2383,8 @@
2382 2383 #define buffer_migrate_page NULL
2383 2384 #endif
2384 2385  
2385   -extern int inode_change_ok(struct inode *, struct iattr *);
  2386 +extern int inode_change_ok(const struct inode *, struct iattr *);
  2387 +extern int inode_newsize_ok(const struct inode *, loff_t offset);
2386 2388 extern int __must_check inode_setattr(struct inode *, struct iattr *);
2387 2389  
2388 2390 extern void file_update_time(struct file *file);
... ... @@ -792,8 +792,9 @@
792 792 unmap_mapping_range(mapping, holebegin, holelen, 0);
793 793 }
794 794  
795   -extern int vmtruncate(struct inode * inode, loff_t offset);
796   -extern int vmtruncate_range(struct inode * inode, loff_t offset, loff_t end);
  795 +extern void truncate_pagecache(struct inode *inode, loff_t old, loff_t new);
  796 +extern int vmtruncate(struct inode *inode, loff_t offset);
  797 +extern int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end);
797 798  
798 799 int truncate_inode_page(struct address_space *mapping, struct page *page);
799 800 int generic_error_remove_page(struct address_space *mapping, struct page *page);
include/linux/seq_file.h
... ... @@ -35,6 +35,44 @@
35 35  
36 36 #define SEQ_SKIP 1
37 37  
  38 +/**
  39 + * seq_get_buf - get buffer to write arbitrary data to
  40 + * @m: the seq_file handle
  41 + * @bufp: the beginning of the buffer is stored here
  42 + *
  43 + * Return the number of bytes available in the buffer, or zero if
  44 + * there's no space.
  45 + */
  46 +static inline size_t seq_get_buf(struct seq_file *m, char **bufp)
  47 +{
  48 + BUG_ON(m->count > m->size);
  49 + if (m->count < m->size)
  50 + *bufp = m->buf + m->count;
  51 + else
  52 + *bufp = NULL;
  53 +
  54 + return m->size - m->count;
  55 +}
  56 +
  57 +/**
  58 + * seq_commit - commit data to the buffer
  59 + * @m: the seq_file handle
  60 + * @num: the number of bytes to commit
  61 + *
  62 + * Commit @num bytes of data written to a buffer previously acquired
  63 + * by seq_buf_get. To signal an error condition, or that the data
  64 + * didn't fit in the available space, pass a negative @num value.
  65 + */
  66 +static inline void seq_commit(struct seq_file *m, int num)
  67 +{
  68 + if (num < 0) {
  69 + m->count = m->size;
  70 + } else {
  71 + BUG_ON(m->count + num > m->size);
  72 + m->count += num;
  73 + }
  74 +}
  75 +
38 76 char *mangle_path(char *s, char *p, char *esc);
39 77 int seq_open(struct file *, const struct seq_operations *);
40 78 ssize_t seq_read(struct file *, char __user *, size_t, loff_t *);
... ... @@ -58,7 +58,7 @@
58 58 /*
59 59 * Lock ordering:
60 60 *
61   - * ->i_mmap_lock (vmtruncate)
  61 + * ->i_mmap_lock (truncate_pagecache)
62 62 * ->private_lock (__free_pte->__set_page_dirty_buffers)
63 63 * ->swap_lock (exclusive_swap_page, others)
64 64 * ->mapping->tree_lock
... ... @@ -297,7 +297,8 @@
297 297 unsigned long addr = vma->vm_start;
298 298  
299 299 /*
300   - * Hide vma from rmap and vmtruncate before freeing pgtables
  300 + * Hide vma from rmap and truncate_pagecache before freeing
  301 + * pgtables
301 302 */
302 303 anon_vma_unlink(vma);
303 304 unlink_file_vma(vma);
... ... @@ -2408,7 +2409,7 @@
2408 2409 * @mapping: the address space containing mmaps to be unmapped.
2409 2410 * @holebegin: byte in first page to unmap, relative to the start of
2410 2411 * the underlying file. This will be rounded down to a PAGE_SIZE
2411   - * boundary. Note that this is different from vmtruncate(), which
  2412 + * boundary. Note that this is different from truncate_pagecache(), which
2412 2413 * must keep the partial page. In contrast, we must get rid of
2413 2414 * partial pages.
2414 2415 * @holelen: size of prospective hole in bytes. This will be rounded
... ... @@ -2458,63 +2459,6 @@
2458 2459 spin_unlock(&mapping->i_mmap_lock);
2459 2460 }
2460 2461 EXPORT_SYMBOL(unmap_mapping_range);
2461   -
2462   -/**
2463   - * vmtruncate - unmap mappings "freed" by truncate() syscall
2464   - * @inode: inode of the file used
2465   - * @offset: file offset to start truncating
2466   - *
2467   - * NOTE! We have to be ready to update the memory sharing
2468   - * between the file and the memory map for a potential last
2469   - * incomplete page. Ugly, but necessary.
2470   - */
2471   -int vmtruncate(struct inode * inode, loff_t offset)
2472   -{
2473   - if (inode->i_size < offset) {
2474   - unsigned long limit;
2475   -
2476   - limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
2477   - if (limit != RLIM_INFINITY && offset > limit)
2478   - goto out_sig;
2479   - if (offset > inode->i_sb->s_maxbytes)
2480   - goto out_big;
2481   - i_size_write(inode, offset);
2482   - } else {
2483   - struct address_space *mapping = inode->i_mapping;
2484   -
2485   - /*
2486   - * truncation of in-use swapfiles is disallowed - it would
2487   - * cause subsequent swapout to scribble on the now-freed
2488   - * blocks.
2489   - */
2490   - if (IS_SWAPFILE(inode))
2491   - return -ETXTBSY;
2492   - i_size_write(inode, offset);
2493   -
2494   - /*
2495   - * unmap_mapping_range is called twice, first simply for
2496   - * efficiency so that truncate_inode_pages does fewer
2497   - * single-page unmaps. However after this first call, and
2498   - * before truncate_inode_pages finishes, it is possible for
2499   - * private pages to be COWed, which remain after
2500   - * truncate_inode_pages finishes, hence the second
2501   - * unmap_mapping_range call must be made for correctness.
2502   - */
2503   - unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
2504   - truncate_inode_pages(mapping, offset);
2505   - unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
2506   - }
2507   -
2508   - if (inode->i_op->truncate)
2509   - inode->i_op->truncate(inode);
2510   - return 0;
2511   -
2512   -out_sig:
2513   - send_sig(SIGXFSZ, current, 0);
2514   -out_big:
2515   - return -EFBIG;
2516   -}
2517   -EXPORT_SYMBOL(vmtruncate);
2518 2462  
2519 2463 int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end)
2520 2464 {
... ... @@ -86,8 +86,8 @@
86 86 if (vma->vm_file) {
87 87 /*
88 88 * Subtle point from Rajesh Venkatasubramanian: before
89   - * moving file-based ptes, we must lock vmtruncate out,
90   - * since it might clean the dst vma before the src vma,
  89 + * moving file-based ptes, we must lock truncate_pagecache
  90 + * out, since it might clean the dst vma before the src vma,
91 91 * and we propagate stale pages into the dst afterward.
92 92 */
93 93 mapping = vma->vm_file->f_mapping;
... ... @@ -83,46 +83,6 @@
83 83 };
84 84  
85 85 /*
86   - * Handle all mappings that got truncated by a "truncate()"
87   - * system call.
88   - *
89   - * NOTE! We have to be ready to update the memory sharing
90   - * between the file and the memory map for a potential last
91   - * incomplete page. Ugly, but necessary.
92   - */
93   -int vmtruncate(struct inode *inode, loff_t offset)
94   -{
95   - struct address_space *mapping = inode->i_mapping;
96   - unsigned long limit;
97   -
98   - if (inode->i_size < offset)
99   - goto do_expand;
100   - i_size_write(inode, offset);
101   -
102   - truncate_inode_pages(mapping, offset);
103   - goto out_truncate;
104   -
105   -do_expand:
106   - limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
107   - if (limit != RLIM_INFINITY && offset > limit)
108   - goto out_sig;
109   - if (offset > inode->i_sb->s_maxbytes)
110   - goto out;
111   - i_size_write(inode, offset);
112   -
113   -out_truncate:
114   - if (inode->i_op->truncate)
115   - inode->i_op->truncate(inode);
116   - return 0;
117   -out_sig:
118   - send_sig(SIGXFSZ, current, 0);
119   -out:
120   - return -EFBIG;
121   -}
122   -
123   -EXPORT_SYMBOL(vmtruncate);
124   -
125   -/*
126 86 * Return the total memory allocated for this pointer, not
127 87 * just what the caller asked for.
128 88 *
... ... @@ -497,4 +497,68 @@
497 497 return invalidate_inode_pages2_range(mapping, 0, -1);
498 498 }
499 499 EXPORT_SYMBOL_GPL(invalidate_inode_pages2);
  500 +
  501 +/**
  502 + * truncate_pagecache - unmap and remove pagecache that has been truncated
  503 + * @inode: inode
  504 + * @old: old file offset
  505 + * @new: new file offset
  506 + *
  507 + * inode's new i_size must already be written before truncate_pagecache
  508 + * is called.
  509 + *
  510 + * This function should typically be called before the filesystem
  511 + * releases resources associated with the freed range (eg. deallocates
  512 + * blocks). This way, pagecache will always stay logically coherent
  513 + * with on-disk format, and the filesystem would not have to deal with
  514 + * situations such as writepage being called for a page that has already
  515 + * had its underlying blocks deallocated.
  516 + */
  517 +void truncate_pagecache(struct inode *inode, loff_t old, loff_t new)
  518 +{
  519 + if (new < old) {
  520 + struct address_space *mapping = inode->i_mapping;
  521 +
  522 + /*
  523 + * unmap_mapping_range is called twice, first simply for
  524 + * efficiency so that truncate_inode_pages does fewer
  525 + * single-page unmaps. However after this first call, and
  526 + * before truncate_inode_pages finishes, it is possible for
  527 + * private pages to be COWed, which remain after
  528 + * truncate_inode_pages finishes, hence the second
  529 + * unmap_mapping_range call must be made for correctness.
  530 + */
  531 + unmap_mapping_range(mapping, new + PAGE_SIZE - 1, 0, 1);
  532 + truncate_inode_pages(mapping, new);
  533 + unmap_mapping_range(mapping, new + PAGE_SIZE - 1, 0, 1);
  534 + }
  535 +}
  536 +EXPORT_SYMBOL(truncate_pagecache);
  537 +
  538 +/**
  539 + * vmtruncate - unmap mappings "freed" by truncate() syscall
  540 + * @inode: inode of the file used
  541 + * @offset: file offset to start truncating
  542 + *
  543 + * NOTE! We have to be ready to update the memory sharing
  544 + * between the file and the memory map for a potential last
  545 + * incomplete page. Ugly, but necessary.
  546 + */
  547 +int vmtruncate(struct inode *inode, loff_t offset)
  548 +{
  549 + loff_t oldsize;
  550 + int error;
  551 +
  552 + error = inode_newsize_ok(inode, offset);
  553 + if (error)
  554 + return error;
  555 + oldsize = inode->i_size;
  556 + i_size_write(inode, offset);
  557 + truncate_pagecache(inode, oldsize, offset);
  558 + if (inode->i_op->truncate)
  559 + inode->i_op->truncate(inode);
  560 +
  561 + return error;
  562 +}
  563 +EXPORT_SYMBOL(vmtruncate);