Commit 25d9e2d15286281ec834b829a4aaf8969011f1cd
Committed by
al
1 parent
eca6f534e6
Exists in
master
and in
4 other branches
truncate: new helpers
Introduce new truncate helpers truncate_pagecache and inode_newsize_ok. vmtruncate is also consolidated from mm/memory.c and mm/nommu.c and into mm/truncate.c. Reviewed-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Nick Piggin <npiggin@suse.de> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Showing 9 changed files with 120 additions and 108 deletions Side-by-side Diff
Documentation/vm/locking
... | ... | @@ -80,7 +80,7 @@ |
80 | 80 | mm start up ... this is a loose form of stability on mm_users. For |
81 | 81 | example, it is used in copy_mm to protect against a racing tlb_gather_mmu |
82 | 82 | single address space optimization, so that the zap_page_range (from |
83 | -vmtruncate) does not lose sending ipi's to cloned threads that might | |
83 | +truncate) does not lose sending ipi's to cloned threads that might | |
84 | 84 | be spawned underneath it and go to user mode to drag in pte's into tlbs. |
85 | 85 | |
86 | 86 | swap_lock |
fs/attr.c
... | ... | @@ -18,7 +18,7 @@ |
18 | 18 | /* Taken over from the old code... */ |
19 | 19 | |
20 | 20 | /* POSIX UID/GID verification for setting inode attributes. */ |
21 | -int inode_change_ok(struct inode *inode, struct iattr *attr) | |
21 | +int inode_change_ok(const struct inode *inode, struct iattr *attr) | |
22 | 22 | { |
23 | 23 | int retval = -EPERM; |
24 | 24 | unsigned int ia_valid = attr->ia_valid; |
25 | 25 | |
... | ... | @@ -60,8 +60,50 @@ |
60 | 60 | error: |
61 | 61 | return retval; |
62 | 62 | } |
63 | - | |
64 | 63 | EXPORT_SYMBOL(inode_change_ok); |
64 | + | |
65 | +/** | |
66 | + * inode_newsize_ok - may this inode be truncated to a given size | |
67 | + * @inode: the inode to be truncated | |
68 | + * @offset: the new size to assign to the inode | |
69 | + * @Returns: 0 on success, -ve errno on failure | |
70 | + * | |
71 | + * inode_newsize_ok will check filesystem limits and ulimits to check that the | |
72 | + * new inode size is within limits. inode_newsize_ok will also send SIGXFSZ | |
73 | + * when necessary. Caller must not proceed with inode size change if failure is | |
74 | + * returned. @inode must be a file (not directory), with appropriate | |
75 | + * permissions to allow truncate (inode_newsize_ok does NOT check these | |
76 | + * conditions). | |
77 | + * | |
78 | + * inode_newsize_ok must be called with i_mutex held. | |
79 | + */ | |
80 | +int inode_newsize_ok(const struct inode *inode, loff_t offset) | |
81 | +{ | |
82 | + if (inode->i_size < offset) { | |
83 | + unsigned long limit; | |
84 | + | |
85 | + limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur; | |
86 | + if (limit != RLIM_INFINITY && offset > limit) | |
87 | + goto out_sig; | |
88 | + if (offset > inode->i_sb->s_maxbytes) | |
89 | + goto out_big; | |
90 | + } else { | |
91 | + /* | |
92 | + * truncation of in-use swapfiles is disallowed - it would | |
93 | + * cause subsequent swapout to scribble on the now-freed | |
94 | + * blocks. | |
95 | + */ | |
96 | + if (IS_SWAPFILE(inode)) | |
97 | + return -ETXTBSY; | |
98 | + } | |
99 | + | |
100 | + return 0; | |
101 | +out_sig: | |
102 | + send_sig(SIGXFSZ, current, 0); | |
103 | +out_big: | |
104 | + return -EFBIG; | |
105 | +} | |
106 | +EXPORT_SYMBOL(inode_newsize_ok); | |
65 | 107 | |
66 | 108 | int inode_setattr(struct inode * inode, struct iattr * attr) |
67 | 109 | { |
include/linux/fs.h
... | ... | @@ -2382,7 +2382,8 @@ |
2382 | 2382 | #define buffer_migrate_page NULL |
2383 | 2383 | #endif |
2384 | 2384 | |
2385 | -extern int inode_change_ok(struct inode *, struct iattr *); | |
2385 | +extern int inode_change_ok(const struct inode *, struct iattr *); | |
2386 | +extern int inode_newsize_ok(const struct inode *, loff_t offset); | |
2386 | 2387 | extern int __must_check inode_setattr(struct inode *, struct iattr *); |
2387 | 2388 | |
2388 | 2389 | extern void file_update_time(struct file *file); |
include/linux/mm.h
... | ... | @@ -791,8 +791,9 @@ |
791 | 791 | unmap_mapping_range(mapping, holebegin, holelen, 0); |
792 | 792 | } |
793 | 793 | |
794 | -extern int vmtruncate(struct inode * inode, loff_t offset); | |
795 | -extern int vmtruncate_range(struct inode * inode, loff_t offset, loff_t end); | |
794 | +extern void truncate_pagecache(struct inode *inode, loff_t old, loff_t new); | |
795 | +extern int vmtruncate(struct inode *inode, loff_t offset); | |
796 | +extern int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end); | |
796 | 797 | |
797 | 798 | #ifdef CONFIG_MMU |
798 | 799 | extern int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, |
mm/filemap.c
mm/memory.c
... | ... | @@ -297,7 +297,8 @@ |
297 | 297 | unsigned long addr = vma->vm_start; |
298 | 298 | |
299 | 299 | /* |
300 | - * Hide vma from rmap and vmtruncate before freeing pgtables | |
300 | + * Hide vma from rmap and truncate_pagecache before freeing | |
301 | + * pgtables | |
301 | 302 | */ |
302 | 303 | anon_vma_unlink(vma); |
303 | 304 | unlink_file_vma(vma); |
... | ... | @@ -2407,7 +2408,7 @@ |
2407 | 2408 | * @mapping: the address space containing mmaps to be unmapped. |
2408 | 2409 | * @holebegin: byte in first page to unmap, relative to the start of |
2409 | 2410 | * the underlying file. This will be rounded down to a PAGE_SIZE |
2410 | - * boundary. Note that this is different from vmtruncate(), which | |
2411 | + * boundary. Note that this is different from truncate_pagecache(), which | |
2411 | 2412 | * must keep the partial page. In contrast, we must get rid of |
2412 | 2413 | * partial pages. |
2413 | 2414 | * @holelen: size of prospective hole in bytes. This will be rounded |
... | ... | @@ -2457,63 +2458,6 @@ |
2457 | 2458 | spin_unlock(&mapping->i_mmap_lock); |
2458 | 2459 | } |
2459 | 2460 | EXPORT_SYMBOL(unmap_mapping_range); |
2460 | - | |
2461 | -/** | |
2462 | - * vmtruncate - unmap mappings "freed" by truncate() syscall | |
2463 | - * @inode: inode of the file used | |
2464 | - * @offset: file offset to start truncating | |
2465 | - * | |
2466 | - * NOTE! We have to be ready to update the memory sharing | |
2467 | - * between the file and the memory map for a potential last | |
2468 | - * incomplete page. Ugly, but necessary. | |
2469 | - */ | |
2470 | -int vmtruncate(struct inode * inode, loff_t offset) | |
2471 | -{ | |
2472 | - if (inode->i_size < offset) { | |
2473 | - unsigned long limit; | |
2474 | - | |
2475 | - limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur; | |
2476 | - if (limit != RLIM_INFINITY && offset > limit) | |
2477 | - goto out_sig; | |
2478 | - if (offset > inode->i_sb->s_maxbytes) | |
2479 | - goto out_big; | |
2480 | - i_size_write(inode, offset); | |
2481 | - } else { | |
2482 | - struct address_space *mapping = inode->i_mapping; | |
2483 | - | |
2484 | - /* | |
2485 | - * truncation of in-use swapfiles is disallowed - it would | |
2486 | - * cause subsequent swapout to scribble on the now-freed | |
2487 | - * blocks. | |
2488 | - */ | |
2489 | - if (IS_SWAPFILE(inode)) | |
2490 | - return -ETXTBSY; | |
2491 | - i_size_write(inode, offset); | |
2492 | - | |
2493 | - /* | |
2494 | - * unmap_mapping_range is called twice, first simply for | |
2495 | - * efficiency so that truncate_inode_pages does fewer | |
2496 | - * single-page unmaps. However after this first call, and | |
2497 | - * before truncate_inode_pages finishes, it is possible for | |
2498 | - * private pages to be COWed, which remain after | |
2499 | - * truncate_inode_pages finishes, hence the second | |
2500 | - * unmap_mapping_range call must be made for correctness. | |
2501 | - */ | |
2502 | - unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1); | |
2503 | - truncate_inode_pages(mapping, offset); | |
2504 | - unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1); | |
2505 | - } | |
2506 | - | |
2507 | - if (inode->i_op->truncate) | |
2508 | - inode->i_op->truncate(inode); | |
2509 | - return 0; | |
2510 | - | |
2511 | -out_sig: | |
2512 | - send_sig(SIGXFSZ, current, 0); | |
2513 | -out_big: | |
2514 | - return -EFBIG; | |
2515 | -} | |
2516 | -EXPORT_SYMBOL(vmtruncate); | |
2517 | 2461 | |
2518 | 2462 | int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end) |
2519 | 2463 | { |
mm/mremap.c
... | ... | @@ -86,8 +86,8 @@ |
86 | 86 | if (vma->vm_file) { |
87 | 87 | /* |
88 | 88 | * Subtle point from Rajesh Venkatasubramanian: before |
89 | - * moving file-based ptes, we must lock vmtruncate out, | |
90 | - * since it might clean the dst vma before the src vma, | |
89 | + * moving file-based ptes, we must lock truncate_pagecache | |
90 | + * out, since it might clean the dst vma before the src vma, | |
91 | 91 | * and we propagate stale pages into the dst afterward. |
92 | 92 | */ |
93 | 93 | mapping = vma->vm_file->f_mapping; |
mm/nommu.c
... | ... | @@ -83,46 +83,6 @@ |
83 | 83 | }; |
84 | 84 | |
85 | 85 | /* |
86 | - * Handle all mappings that got truncated by a "truncate()" | |
87 | - * system call. | |
88 | - * | |
89 | - * NOTE! We have to be ready to update the memory sharing | |
90 | - * between the file and the memory map for a potential last | |
91 | - * incomplete page. Ugly, but necessary. | |
92 | - */ | |
93 | -int vmtruncate(struct inode *inode, loff_t offset) | |
94 | -{ | |
95 | - struct address_space *mapping = inode->i_mapping; | |
96 | - unsigned long limit; | |
97 | - | |
98 | - if (inode->i_size < offset) | |
99 | - goto do_expand; | |
100 | - i_size_write(inode, offset); | |
101 | - | |
102 | - truncate_inode_pages(mapping, offset); | |
103 | - goto out_truncate; | |
104 | - | |
105 | -do_expand: | |
106 | - limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur; | |
107 | - if (limit != RLIM_INFINITY && offset > limit) | |
108 | - goto out_sig; | |
109 | - if (offset > inode->i_sb->s_maxbytes) | |
110 | - goto out; | |
111 | - i_size_write(inode, offset); | |
112 | - | |
113 | -out_truncate: | |
114 | - if (inode->i_op->truncate) | |
115 | - inode->i_op->truncate(inode); | |
116 | - return 0; | |
117 | -out_sig: | |
118 | - send_sig(SIGXFSZ, current, 0); | |
119 | -out: | |
120 | - return -EFBIG; | |
121 | -} | |
122 | - | |
123 | -EXPORT_SYMBOL(vmtruncate); | |
124 | - | |
125 | -/* | |
126 | 86 | * Return the total memory allocated for this pointer, not |
127 | 87 | * just what the caller asked for. |
128 | 88 | * |
mm/truncate.c
... | ... | @@ -465,4 +465,68 @@ |
465 | 465 | return invalidate_inode_pages2_range(mapping, 0, -1); |
466 | 466 | } |
467 | 467 | EXPORT_SYMBOL_GPL(invalidate_inode_pages2); |
468 | + | |
469 | +/** | |
470 | + * truncate_pagecache - unmap and remove pagecache that has been truncated | |
471 | + * @inode: inode | |
472 | + * @old: old file offset | |
473 | + * @new: new file offset | |
474 | + * | |
475 | + * inode's new i_size must already be written before truncate_pagecache | |
476 | + * is called. | |
477 | + * | |
478 | + * This function should typically be called before the filesystem | |
479 | + * releases resources associated with the freed range (eg. deallocates | |
480 | + * blocks). This way, pagecache will always stay logically coherent | |
481 | + * with on-disk format, and the filesystem would not have to deal with | |
482 | + * situations such as writepage being called for a page that has already | |
483 | + * had its underlying blocks deallocated. | |
484 | + */ | |
485 | +void truncate_pagecache(struct inode *inode, loff_t old, loff_t new) | |
486 | +{ | |
487 | + if (new < old) { | |
488 | + struct address_space *mapping = inode->i_mapping; | |
489 | + | |
490 | + /* | |
491 | + * unmap_mapping_range is called twice, first simply for | |
492 | + * efficiency so that truncate_inode_pages does fewer | |
493 | + * single-page unmaps. However after this first call, and | |
494 | + * before truncate_inode_pages finishes, it is possible for | |
495 | + * private pages to be COWed, which remain after | |
496 | + * truncate_inode_pages finishes, hence the second | |
497 | + * unmap_mapping_range call must be made for correctness. | |
498 | + */ | |
499 | + unmap_mapping_range(mapping, new + PAGE_SIZE - 1, 0, 1); | |
500 | + truncate_inode_pages(mapping, new); | |
501 | + unmap_mapping_range(mapping, new + PAGE_SIZE - 1, 0, 1); | |
502 | + } | |
503 | +} | |
504 | +EXPORT_SYMBOL(truncate_pagecache); | |
505 | + | |
506 | +/** | |
507 | + * vmtruncate - unmap mappings "freed" by truncate() syscall | |
508 | + * @inode: inode of the file used | |
509 | + * @offset: file offset to start truncating | |
510 | + * | |
511 | + * NOTE! We have to be ready to update the memory sharing | |
512 | + * between the file and the memory map for a potential last | |
513 | + * incomplete page. Ugly, but necessary. | |
514 | + */ | |
515 | +int vmtruncate(struct inode *inode, loff_t offset) | |
516 | +{ | |
517 | + loff_t oldsize; | |
518 | + int error; | |
519 | + | |
520 | + error = inode_newsize_ok(inode, offset); | |
521 | + if (error) | |
522 | + return error; | |
523 | + oldsize = inode->i_size; | |
524 | + i_size_write(inode, offset); | |
525 | + truncate_pagecache(inode, oldsize, offset); | |
526 | + if (inode->i_op->truncate) | |
527 | + inode->i_op->truncate(inode); | |
528 | + | |
529 | + return error; | |
530 | +} | |
531 | +EXPORT_SYMBOL(vmtruncate); |