Commit 91b0abe36a7b2b3b02d7500925a5f8455334f0e5
Committed by
Linus Torvalds
1 parent
0cd6144aad
Exists in
master
and in
13 other branches
mm + fs: store shadow entries in page cache
Reclaim will be leaving shadow entries in the page cache radix tree upon evicting the real page. As those pages are found from the LRU, an iput() can lead to the inode being freed concurrently. At this point, reclaim must no longer install shadow pages because the inode freeing code needs to ensure the page tree is really empty. Add an address_space flag, AS_EXITING, that the inode freeing code sets under the tree lock before doing the final truncate. Reclaim will check for this flag before installing shadow pages. Signed-off-by: Johannes Weiner <hannes@cmpxchg.org> Reviewed-by: Rik van Riel <riel@redhat.com> Reviewed-by: Minchan Kim <minchan@kernel.org> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Bob Liu <bob.liu@oracle.com> Cc: Christoph Hellwig <hch@infradead.org> Cc: Dave Chinner <david@fromorbit.com> Cc: Greg Thelen <gthelen@google.com> Cc: Hugh Dickins <hughd@google.com> Cc: Jan Kara <jack@suse.cz> Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Cc: Luigi Semenzato <semenzato@google.com> Cc: Mel Gorman <mgorman@suse.de> Cc: Metin Doslu <metin@citusdata.com> Cc: Michel Lespinasse <walken@google.com> Cc: Ozgun Erdogan <ozgun@citusdata.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Roman Gushchin <klamm@yandex-team.ru> Cc: Ryan Mallon <rmallon@gmail.com> Cc: Tejun Heo <tj@kernel.org> Cc: Vlastimil Babka <vbabka@suse.cz> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Showing 50 changed files with 147 additions and 65 deletions Side-by-side Diff
- Documentation/filesystems/porting
- drivers/staging/lustre/lustre/llite/llite_lib.c
- fs/9p/vfs_inode.c
- fs/affs/inode.c
- fs/afs/inode.c
- fs/bfs/inode.c
- fs/block_dev.c
- fs/btrfs/inode.c
- fs/cifs/cifsfs.c
- fs/coda/inode.c
- fs/ecryptfs/super.c
- fs/exofs/inode.c
- fs/ext2/inode.c
- fs/ext3/inode.c
- fs/ext4/inode.c
- fs/f2fs/inode.c
- fs/fat/inode.c
- fs/freevxfs/vxfs_inode.c
- fs/fuse/inode.c
- fs/gfs2/super.c
- fs/hfs/inode.c
- fs/hfsplus/super.c
- fs/hostfs/hostfs_kern.c
- fs/hpfs/inode.c
- fs/inode.c
- fs/jffs2/fs.c
- fs/jfs/inode.c
- fs/kernfs/inode.c
- fs/logfs/readwrite.c
- fs/minix/inode.c
- fs/ncpfs/inode.c
- fs/nfs/inode.c
- fs/nfs/nfs4super.c
- fs/nilfs2/inode.c
- fs/ntfs/inode.c
- fs/ocfs2/inode.c
- fs/omfs/inode.c
- fs/proc/inode.c
- fs/reiserfs/inode.c
- fs/sysv/inode.c
- fs/ubifs/super.c
- fs/udf/inode.c
- fs/ufs/inode.c
- fs/xfs/xfs_super.c
- include/linux/fs.h
- include/linux/mm.h
- include/linux/pagemap.h
- mm/filemap.c
- mm/truncate.c
- mm/vmscan.c
Documentation/filesystems/porting
... | ... | @@ -295,9 +295,9 @@ |
295 | 295 | ->clear_inode() and ->delete_inode() are gone; ->evict_inode() should |
296 | 296 | be used instead. It gets called whenever the inode is evicted, whether it has |
297 | 297 | remaining links or not. Caller does *not* evict the pagecache or inode-associated |
298 | -metadata buffers; getting rid of those is responsibility of method, as it had | |
299 | -been for ->delete_inode(). Caller makes sure async writeback cannot be running | |
300 | -for the inode while (or after) ->evict_inode() is called. | |
298 | +metadata buffers; the method has to use truncate_inode_pages_final() to get rid | |
299 | +of those. Caller makes sure async writeback cannot be running for the inode while | |
300 | +(or after) ->evict_inode() is called. | |
301 | 301 | |
302 | 302 | ->drop_inode() returns int now; it's called on final iput() with |
303 | 303 | inode->i_lock held and it returns true if filesystems wants the inode to be |
drivers/staging/lustre/lustre/llite/llite_lib.c
... | ... | @@ -1877,7 +1877,7 @@ |
1877 | 1877 | cl_sync_file_range(inode, 0, OBD_OBJECT_EOF, |
1878 | 1878 | CL_FSYNC_DISCARD, 1); |
1879 | 1879 | |
1880 | - truncate_inode_pages(&inode->i_data, 0); | |
1880 | + truncate_inode_pages_final(&inode->i_data); | |
1881 | 1881 | |
1882 | 1882 | /* Workaround for LU-118 */ |
1883 | 1883 | if (inode->i_data.nrpages) { |
fs/9p/vfs_inode.c
fs/affs/inode.c
... | ... | @@ -259,7 +259,7 @@ |
259 | 259 | { |
260 | 260 | unsigned long cache_page; |
261 | 261 | pr_debug("AFFS: evict_inode(ino=%lu, nlink=%u)\n", inode->i_ino, inode->i_nlink); |
262 | - truncate_inode_pages(&inode->i_data, 0); | |
262 | + truncate_inode_pages_final(&inode->i_data); | |
263 | 263 | |
264 | 264 | if (!inode->i_nlink) { |
265 | 265 | inode->i_size = 0; |
fs/afs/inode.c
fs/bfs/inode.c
fs/block_dev.c
... | ... | @@ -83,7 +83,7 @@ |
83 | 83 | { |
84 | 84 | struct address_space *mapping = bdev->bd_inode->i_mapping; |
85 | 85 | |
86 | - if (mapping->nrpages == 0) | |
86 | + if (mapping->nrpages == 0 && mapping->nrshadows == 0) | |
87 | 87 | return; |
88 | 88 | |
89 | 89 | invalidate_bh_lrus(); |
... | ... | @@ -419,7 +419,7 @@ |
419 | 419 | { |
420 | 420 | struct block_device *bdev = &BDEV_I(inode)->bdev; |
421 | 421 | struct list_head *p; |
422 | - truncate_inode_pages(&inode->i_data, 0); | |
422 | + truncate_inode_pages_final(&inode->i_data); | |
423 | 423 | invalidate_inode_buffers(inode); /* is it needed here? */ |
424 | 424 | clear_inode(inode); |
425 | 425 | spin_lock(&bdev_lock); |
fs/btrfs/inode.c
... | ... | @@ -4593,7 +4593,7 @@ |
4593 | 4593 | struct rb_node *node; |
4594 | 4594 | |
4595 | 4595 | ASSERT(inode->i_state & I_FREEING); |
4596 | - truncate_inode_pages(&inode->i_data, 0); | |
4596 | + truncate_inode_pages_final(&inode->i_data); | |
4597 | 4597 | |
4598 | 4598 | write_lock(&map_tree->lock); |
4599 | 4599 | while (!RB_EMPTY_ROOT(&map_tree->map)) { |
fs/cifs/cifsfs.c
fs/coda/inode.c
fs/ecryptfs/super.c
fs/exofs/inode.c
... | ... | @@ -1486,7 +1486,7 @@ |
1486 | 1486 | struct ore_io_state *ios; |
1487 | 1487 | int ret; |
1488 | 1488 | |
1489 | - truncate_inode_pages(&inode->i_data, 0); | |
1489 | + truncate_inode_pages_final(&inode->i_data); | |
1490 | 1490 | |
1491 | 1491 | /* TODO: should do better here */ |
1492 | 1492 | if (inode->i_nlink || is_bad_inode(inode)) |
fs/ext2/inode.c
fs/ext3/inode.c
... | ... | @@ -228,7 +228,7 @@ |
228 | 228 | log_wait_commit(journal, commit_tid); |
229 | 229 | filemap_write_and_wait(&inode->i_data); |
230 | 230 | } |
231 | - truncate_inode_pages(&inode->i_data, 0); | |
231 | + truncate_inode_pages_final(&inode->i_data); | |
232 | 232 | |
233 | 233 | ext3_discard_reservation(inode); |
234 | 234 | rsv = ei->i_block_alloc_info; |
fs/ext4/inode.c
... | ... | @@ -215,7 +215,7 @@ |
215 | 215 | jbd2_complete_transaction(journal, commit_tid); |
216 | 216 | filemap_write_and_wait(&inode->i_data); |
217 | 217 | } |
218 | - truncate_inode_pages(&inode->i_data, 0); | |
218 | + truncate_inode_pages_final(&inode->i_data); | |
219 | 219 | |
220 | 220 | WARN_ON(atomic_read(&EXT4_I(inode)->i_ioend_count)); |
221 | 221 | goto no_delete; |
... | ... | @@ -226,7 +226,7 @@ |
226 | 226 | |
227 | 227 | if (ext4_should_order_data(inode)) |
228 | 228 | ext4_begin_ordered_truncate(inode, 0); |
229 | - truncate_inode_pages(&inode->i_data, 0); | |
229 | + truncate_inode_pages_final(&inode->i_data); | |
230 | 230 | |
231 | 231 | WARN_ON(atomic_read(&EXT4_I(inode)->i_ioend_count)); |
232 | 232 | if (is_bad_inode(inode)) |
fs/f2fs/inode.c
... | ... | @@ -260,7 +260,7 @@ |
260 | 260 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); |
261 | 261 | |
262 | 262 | trace_f2fs_evict_inode(inode); |
263 | - truncate_inode_pages(&inode->i_data, 0); | |
263 | + truncate_inode_pages_final(&inode->i_data); | |
264 | 264 | |
265 | 265 | if (inode->i_ino == F2FS_NODE_INO(sbi) || |
266 | 266 | inode->i_ino == F2FS_META_INO(sbi)) |
fs/fat/inode.c
fs/freevxfs/vxfs_inode.c
fs/fuse/inode.c
... | ... | @@ -123,7 +123,7 @@ |
123 | 123 | |
124 | 124 | static void fuse_evict_inode(struct inode *inode) |
125 | 125 | { |
126 | - truncate_inode_pages(&inode->i_data, 0); | |
126 | + truncate_inode_pages_final(&inode->i_data); | |
127 | 127 | clear_inode(inode); |
128 | 128 | if (inode->i_sb->s_flags & MS_ACTIVE) { |
129 | 129 | struct fuse_conn *fc = get_fuse_conn(inode); |
fs/gfs2/super.c
... | ... | @@ -1558,7 +1558,7 @@ |
1558 | 1558 | fs_warn(sdp, "gfs2_evict_inode: %d\n", error); |
1559 | 1559 | out: |
1560 | 1560 | /* Case 3 starts here */ |
1561 | - truncate_inode_pages(&inode->i_data, 0); | |
1561 | + truncate_inode_pages_final(&inode->i_data); | |
1562 | 1562 | gfs2_rs_delete(ip, NULL); |
1563 | 1563 | gfs2_ordered_del_inode(ip); |
1564 | 1564 | clear_inode(inode); |
fs/hfs/inode.c
... | ... | @@ -547,7 +547,7 @@ |
547 | 547 | |
548 | 548 | void hfs_evict_inode(struct inode *inode) |
549 | 549 | { |
550 | - truncate_inode_pages(&inode->i_data, 0); | |
550 | + truncate_inode_pages_final(&inode->i_data); | |
551 | 551 | clear_inode(inode); |
552 | 552 | if (HFS_IS_RSRC(inode) && HFS_I(inode)->rsrc_inode) { |
553 | 553 | HFS_I(HFS_I(inode)->rsrc_inode)->rsrc_inode = NULL; |
fs/hfsplus/super.c
... | ... | @@ -161,7 +161,7 @@ |
161 | 161 | static void hfsplus_evict_inode(struct inode *inode) |
162 | 162 | { |
163 | 163 | hfs_dbg(INODE, "hfsplus_evict_inode: %lu\n", inode->i_ino); |
164 | - truncate_inode_pages(&inode->i_data, 0); | |
164 | + truncate_inode_pages_final(&inode->i_data); | |
165 | 165 | clear_inode(inode); |
166 | 166 | if (HFSPLUS_IS_RSRC(inode)) { |
167 | 167 | HFSPLUS_I(HFSPLUS_I(inode)->rsrc_inode)->rsrc_inode = NULL; |
fs/hostfs/hostfs_kern.c
... | ... | @@ -230,7 +230,7 @@ |
230 | 230 | |
231 | 231 | static void hostfs_evict_inode(struct inode *inode) |
232 | 232 | { |
233 | - truncate_inode_pages(&inode->i_data, 0); | |
233 | + truncate_inode_pages_final(&inode->i_data); | |
234 | 234 | clear_inode(inode); |
235 | 235 | if (HOSTFS_I(inode)->fd != -1) { |
236 | 236 | close_file(&HOSTFS_I(inode)->fd); |
fs/hpfs/inode.c
fs/inode.c
... | ... | @@ -503,6 +503,7 @@ |
503 | 503 | */ |
504 | 504 | spin_lock_irq(&inode->i_data.tree_lock); |
505 | 505 | BUG_ON(inode->i_data.nrpages); |
506 | + BUG_ON(inode->i_data.nrshadows); | |
506 | 507 | spin_unlock_irq(&inode->i_data.tree_lock); |
507 | 508 | BUG_ON(!list_empty(&inode->i_data.private_list)); |
508 | 509 | BUG_ON(!(inode->i_state & I_FREEING)); |
... | ... | @@ -548,8 +549,7 @@ |
548 | 549 | if (op->evict_inode) { |
549 | 550 | op->evict_inode(inode); |
550 | 551 | } else { |
551 | - if (inode->i_data.nrpages) | |
552 | - truncate_inode_pages(&inode->i_data, 0); | |
552 | + truncate_inode_pages_final(&inode->i_data); | |
553 | 553 | clear_inode(inode); |
554 | 554 | } |
555 | 555 | if (S_ISBLK(inode->i_mode) && inode->i_bdev) |
fs/jffs2/fs.c
... | ... | @@ -242,7 +242,7 @@ |
242 | 242 | |
243 | 243 | jffs2_dbg(1, "%s(): ino #%lu mode %o\n", |
244 | 244 | __func__, inode->i_ino, inode->i_mode); |
245 | - truncate_inode_pages(&inode->i_data, 0); | |
245 | + truncate_inode_pages_final(&inode->i_data); | |
246 | 246 | clear_inode(inode); |
247 | 247 | jffs2_do_clear_inode(c, f); |
248 | 248 | } |
fs/jfs/inode.c
... | ... | @@ -154,7 +154,7 @@ |
154 | 154 | dquot_initialize(inode); |
155 | 155 | |
156 | 156 | if (JFS_IP(inode)->fileset == FILESYSTEM_I) { |
157 | - truncate_inode_pages(&inode->i_data, 0); | |
157 | + truncate_inode_pages_final(&inode->i_data); | |
158 | 158 | |
159 | 159 | if (test_cflag(COMMIT_Freewmap, inode)) |
160 | 160 | jfs_free_zero_link(inode); |
... | ... | @@ -168,7 +168,7 @@ |
168 | 168 | dquot_free_inode(inode); |
169 | 169 | } |
170 | 170 | } else { |
171 | - truncate_inode_pages(&inode->i_data, 0); | |
171 | + truncate_inode_pages_final(&inode->i_data); | |
172 | 172 | } |
173 | 173 | clear_inode(inode); |
174 | 174 | dquot_drop(inode); |
fs/kernfs/inode.c
fs/logfs/readwrite.c
... | ... | @@ -2180,7 +2180,7 @@ |
2180 | 2180 | do_delete_inode(inode); |
2181 | 2181 | } |
2182 | 2182 | } |
2183 | - truncate_inode_pages(&inode->i_data, 0); | |
2183 | + truncate_inode_pages_final(&inode->i_data); | |
2184 | 2184 | clear_inode(inode); |
2185 | 2185 | |
2186 | 2186 | /* Cheaper version of write_inode. All changes are concealed in |
fs/minix/inode.c
fs/ncpfs/inode.c
fs/nfs/inode.c
fs/nfs/nfs4super.c
fs/nilfs2/inode.c
... | ... | @@ -783,16 +783,14 @@ |
783 | 783 | int ret; |
784 | 784 | |
785 | 785 | if (inode->i_nlink || !ii->i_root || unlikely(is_bad_inode(inode))) { |
786 | - if (inode->i_data.nrpages) | |
787 | - truncate_inode_pages(&inode->i_data, 0); | |
786 | + truncate_inode_pages_final(&inode->i_data); | |
788 | 787 | clear_inode(inode); |
789 | 788 | nilfs_clear_inode(inode); |
790 | 789 | return; |
791 | 790 | } |
792 | 791 | nilfs_transaction_begin(sb, &ti, 0); /* never fails */ |
793 | 792 | |
794 | - if (inode->i_data.nrpages) | |
795 | - truncate_inode_pages(&inode->i_data, 0); | |
793 | + truncate_inode_pages_final(&inode->i_data); | |
796 | 794 | |
797 | 795 | /* TODO: some of the following operations may fail. */ |
798 | 796 | nilfs_truncate_bmap(ii, 0); |
fs/ntfs/inode.c
fs/ocfs2/inode.c
... | ... | @@ -964,7 +964,7 @@ |
964 | 964 | (unsigned long long)OCFS2_I(inode)->ip_blkno, sync_data); |
965 | 965 | if (sync_data) |
966 | 966 | filemap_write_and_wait(inode->i_mapping); |
967 | - truncate_inode_pages(&inode->i_data, 0); | |
967 | + truncate_inode_pages_final(&inode->i_data); | |
968 | 968 | } |
969 | 969 | |
970 | 970 | static void ocfs2_delete_inode(struct inode *inode) |
... | ... | @@ -1181,7 +1181,7 @@ |
1181 | 1181 | (OCFS2_I(inode)->ip_flags & OCFS2_INODE_MAYBE_ORPHANED)) { |
1182 | 1182 | ocfs2_delete_inode(inode); |
1183 | 1183 | } else { |
1184 | - truncate_inode_pages(&inode->i_data, 0); | |
1184 | + truncate_inode_pages_final(&inode->i_data); | |
1185 | 1185 | } |
1186 | 1186 | ocfs2_clear_inode(inode); |
1187 | 1187 | } |
fs/omfs/inode.c
fs/proc/inode.c
fs/reiserfs/inode.c
fs/sysv/inode.c
fs/ubifs/super.c
... | ... | @@ -351,7 +351,7 @@ |
351 | 351 | dbg_gen("inode %lu, mode %#x", inode->i_ino, (int)inode->i_mode); |
352 | 352 | ubifs_assert(!atomic_read(&inode->i_count)); |
353 | 353 | |
354 | - truncate_inode_pages(&inode->i_data, 0); | |
354 | + truncate_inode_pages_final(&inode->i_data); | |
355 | 355 | |
356 | 356 | if (inode->i_nlink) |
357 | 357 | goto done; |
fs/udf/inode.c
... | ... | @@ -146,8 +146,8 @@ |
146 | 146 | want_delete = 1; |
147 | 147 | udf_setsize(inode, 0); |
148 | 148 | udf_update_inode(inode, IS_SYNC(inode)); |
149 | - } else | |
150 | - truncate_inode_pages(&inode->i_data, 0); | |
149 | + } | |
150 | + truncate_inode_pages_final(&inode->i_data); | |
151 | 151 | invalidate_inode_buffers(inode); |
152 | 152 | clear_inode(inode); |
153 | 153 | if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB && |
fs/ufs/inode.c
... | ... | @@ -885,7 +885,7 @@ |
885 | 885 | if (!inode->i_nlink && !is_bad_inode(inode)) |
886 | 886 | want_delete = 1; |
887 | 887 | |
888 | - truncate_inode_pages(&inode->i_data, 0); | |
888 | + truncate_inode_pages_final(&inode->i_data); | |
889 | 889 | if (want_delete) { |
890 | 890 | loff_t old_i_size; |
891 | 891 | /*UFS_I(inode)->i_dtime = CURRENT_TIME;*/ |
fs/xfs/xfs_super.c
include/linux/fs.h
... | ... | @@ -419,6 +419,7 @@ |
419 | 419 | struct mutex i_mmap_mutex; /* protect tree, count, list */ |
420 | 420 | /* Protected by tree_lock together with the radix tree */ |
421 | 421 | unsigned long nrpages; /* number of total pages */ |
422 | + unsigned long nrshadows; /* number of shadow entries */ | |
422 | 423 | pgoff_t writeback_index;/* writeback starts here */ |
423 | 424 | const struct address_space_operations *a_ops; /* methods */ |
424 | 425 | unsigned long flags; /* error bits/gfp mask */ |
include/linux/mm.h
... | ... | @@ -1834,6 +1834,7 @@ |
1834 | 1834 | extern void truncate_inode_pages(struct address_space *, loff_t); |
1835 | 1835 | extern void truncate_inode_pages_range(struct address_space *, |
1836 | 1836 | loff_t lstart, loff_t lend); |
1837 | +extern void truncate_inode_pages_final(struct address_space *); | |
1837 | 1838 | |
1838 | 1839 | /* generic vm_area_ops exported for stackable file systems */ |
1839 | 1840 | extern int filemap_fault(struct vm_area_struct *, struct vm_fault *); |
include/linux/pagemap.h
... | ... | @@ -25,6 +25,7 @@ |
25 | 25 | AS_MM_ALL_LOCKS = __GFP_BITS_SHIFT + 2, /* under mm_take_all_locks() */ |
26 | 26 | AS_UNEVICTABLE = __GFP_BITS_SHIFT + 3, /* e.g., ramdisk, SHM_LOCK */ |
27 | 27 | AS_BALLOON_MAP = __GFP_BITS_SHIFT + 4, /* balloon page special map */ |
28 | + AS_EXITING = __GFP_BITS_SHIFT + 5, /* final truncate in progress */ | |
28 | 29 | }; |
29 | 30 | |
30 | 31 | static inline void mapping_set_error(struct address_space *mapping, int error) |
... | ... | @@ -69,6 +70,16 @@ |
69 | 70 | return mapping && test_bit(AS_BALLOON_MAP, &mapping->flags); |
70 | 71 | } |
71 | 72 | |
73 | +static inline void mapping_set_exiting(struct address_space *mapping) | |
74 | +{ | |
75 | + set_bit(AS_EXITING, &mapping->flags); | |
76 | +} | |
77 | + | |
78 | +static inline int mapping_exiting(struct address_space *mapping) | |
79 | +{ | |
80 | + return test_bit(AS_EXITING, &mapping->flags); | |
81 | +} | |
82 | + | |
72 | 83 | static inline gfp_t mapping_gfp_mask(struct address_space * mapping) |
73 | 84 | { |
74 | 85 | return (__force gfp_t)mapping->flags & __GFP_BITS_MASK; |
... | ... | @@ -547,7 +558,7 @@ |
547 | 558 | int add_to_page_cache_lru(struct page *page, struct address_space *mapping, |
548 | 559 | pgoff_t index, gfp_t gfp_mask); |
549 | 560 | extern void delete_from_page_cache(struct page *page); |
550 | -extern void __delete_from_page_cache(struct page *page); | |
561 | +extern void __delete_from_page_cache(struct page *page, void *shadow); | |
551 | 562 | int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask); |
552 | 563 | |
553 | 564 | /* |
mm/filemap.c
... | ... | @@ -107,12 +107,33 @@ |
107 | 107 | * ->tasklist_lock (memory_failure, collect_procs_ao) |
108 | 108 | */ |
109 | 109 | |
110 | +static void page_cache_tree_delete(struct address_space *mapping, | |
111 | + struct page *page, void *shadow) | |
112 | +{ | |
113 | + if (shadow) { | |
114 | + void **slot; | |
115 | + | |
116 | + slot = radix_tree_lookup_slot(&mapping->page_tree, page->index); | |
117 | + radix_tree_replace_slot(slot, shadow); | |
118 | + mapping->nrshadows++; | |
119 | + /* | |
120 | + * Make sure the nrshadows update is committed before | |
121 | + * the nrpages update so that final truncate racing | |
122 | + * with reclaim does not see both counters 0 at the | |
123 | + * same time and miss a shadow entry. | |
124 | + */ | |
125 | + smp_wmb(); | |
126 | + } else | |
127 | + radix_tree_delete(&mapping->page_tree, page->index); | |
128 | + mapping->nrpages--; | |
129 | +} | |
130 | + | |
110 | 131 | /* |
111 | 132 | * Delete a page from the page cache and free it. Caller has to make |
112 | 133 | * sure the page is locked and that nobody else uses it - or that usage |
113 | 134 | * is safe. The caller must hold the mapping's tree_lock. |
114 | 135 | */ |
115 | -void __delete_from_page_cache(struct page *page) | |
136 | +void __delete_from_page_cache(struct page *page, void *shadow) | |
116 | 137 | { |
117 | 138 | struct address_space *mapping = page->mapping; |
118 | 139 | |
119 | 140 | |
... | ... | @@ -127,10 +148,11 @@ |
127 | 148 | else |
128 | 149 | cleancache_invalidate_page(mapping, page); |
129 | 150 | |
130 | - radix_tree_delete(&mapping->page_tree, page->index); | |
151 | + page_cache_tree_delete(mapping, page, shadow); | |
152 | + | |
131 | 153 | page->mapping = NULL; |
132 | 154 | /* Leave page->index set: truncation lookup relies upon it */ |
133 | - mapping->nrpages--; | |
155 | + | |
134 | 156 | __dec_zone_page_state(page, NR_FILE_PAGES); |
135 | 157 | if (PageSwapBacked(page)) |
136 | 158 | __dec_zone_page_state(page, NR_SHMEM); |
... | ... | @@ -166,7 +188,7 @@ |
166 | 188 | |
167 | 189 | freepage = mapping->a_ops->freepage; |
168 | 190 | spin_lock_irq(&mapping->tree_lock); |
169 | - __delete_from_page_cache(page); | |
191 | + __delete_from_page_cache(page, NULL); | |
170 | 192 | spin_unlock_irq(&mapping->tree_lock); |
171 | 193 | mem_cgroup_uncharge_cache_page(page); |
172 | 194 | |
... | ... | @@ -426,7 +448,7 @@ |
426 | 448 | new->index = offset; |
427 | 449 | |
428 | 450 | spin_lock_irq(&mapping->tree_lock); |
429 | - __delete_from_page_cache(old); | |
451 | + __delete_from_page_cache(old, NULL); | |
430 | 452 | error = radix_tree_insert(&mapping->page_tree, offset, new); |
431 | 453 | BUG_ON(error); |
432 | 454 | mapping->nrpages++; |
... | ... | @@ -460,6 +482,7 @@ |
460 | 482 | if (!radix_tree_exceptional_entry(p)) |
461 | 483 | return -EEXIST; |
462 | 484 | radix_tree_replace_slot(slot, page); |
485 | + mapping->nrshadows--; | |
463 | 486 | mapping->nrpages++; |
464 | 487 | return 0; |
465 | 488 | } |
mm/truncate.c
... | ... | @@ -35,7 +35,8 @@ |
35 | 35 | * without the tree itself locked. These unlocked entries |
36 | 36 | * need verification under the tree lock. |
37 | 37 | */ |
38 | - radix_tree_delete_item(&mapping->page_tree, index, entry); | |
38 | + if (radix_tree_delete_item(&mapping->page_tree, index, entry) == entry) | |
39 | + mapping->nrshadows--; | |
39 | 40 | spin_unlock_irq(&mapping->tree_lock); |
40 | 41 | } |
41 | 42 | |
... | ... | @@ -229,7 +230,7 @@ |
229 | 230 | int i; |
230 | 231 | |
231 | 232 | cleancache_invalidate_inode(mapping); |
232 | - if (mapping->nrpages == 0) | |
233 | + if (mapping->nrpages == 0 && mapping->nrshadows == 0) | |
233 | 234 | return; |
234 | 235 | |
235 | 236 | /* Offsets within partial pages */ |
... | ... | @@ -392,6 +393,53 @@ |
392 | 393 | EXPORT_SYMBOL(truncate_inode_pages); |
393 | 394 | |
394 | 395 | /** |
396 | + * truncate_inode_pages_final - truncate *all* pages before inode dies | |
397 | + * @mapping: mapping to truncate | |
398 | + * | |
399 | + * Called under (and serialized by) inode->i_mutex. | |
400 | + * | |
401 | + * Filesystems have to use this in the .evict_inode path to inform the | |
402 | + * VM that this is the final truncate and the inode is going away. | |
403 | + */ | |
404 | +void truncate_inode_pages_final(struct address_space *mapping) | |
405 | +{ | |
406 | + unsigned long nrshadows; | |
407 | + unsigned long nrpages; | |
408 | + | |
409 | + /* | |
410 | + * Page reclaim can not participate in regular inode lifetime | |
411 | + * management (can't call iput()) and thus can race with the | |
412 | + * inode teardown. Tell it when the address space is exiting, | |
413 | + * so that it does not install eviction information after the | |
414 | + * final truncate has begun. | |
415 | + */ | |
416 | + mapping_set_exiting(mapping); | |
417 | + | |
418 | + /* | |
419 | + * When reclaim installs eviction entries, it increases | |
420 | + * nrshadows first, then decreases nrpages. Make sure we see | |
421 | + * this in the right order or we might miss an entry. | |
422 | + */ | |
423 | + nrpages = mapping->nrpages; | |
424 | + smp_rmb(); | |
425 | + nrshadows = mapping->nrshadows; | |
426 | + | |
427 | + if (nrpages || nrshadows) { | |
428 | + /* | |
429 | + * As truncation uses a lockless tree lookup, cycle | |
430 | + * the tree lock to make sure any ongoing tree | |
431 | + * modification that does not see AS_EXITING is | |
432 | + * completed before starting the final truncate. | |
433 | + */ | |
434 | + spin_lock_irq(&mapping->tree_lock); | |
435 | + spin_unlock_irq(&mapping->tree_lock); | |
436 | + | |
437 | + truncate_inode_pages(mapping, 0); | |
438 | + } | |
439 | +} | |
440 | +EXPORT_SYMBOL(truncate_inode_pages_final); | |
441 | + | |
442 | +/** | |
395 | 443 | * invalidate_mapping_pages - Invalidate all the unlocked pages of one inode |
396 | 444 | * @mapping: the address_space which holds the pages to invalidate |
397 | 445 | * @start: the offset 'from' which to invalidate |
... | ... | @@ -484,7 +532,7 @@ |
484 | 532 | goto failed; |
485 | 533 | |
486 | 534 | BUG_ON(page_has_private(page)); |
487 | - __delete_from_page_cache(page); | |
535 | + __delete_from_page_cache(page, NULL); | |
488 | 536 | spin_unlock_irq(&mapping->tree_lock); |
489 | 537 | mem_cgroup_uncharge_cache_page(page); |
490 | 538 |
mm/vmscan.c