Blame view

mm/truncate.c 27.3 KB
457c89965   Thomas Gleixner   treewide: Add SPD...
1
  // SPDX-License-Identifier: GPL-2.0-only
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2
3
4
5
6
  /*
   * mm/truncate.c - code for taking down pages from address_spaces
   *
   * Copyright (C) 2002, Linus Torvalds
   *
e1f8e8744   Francois Cami   Remove Andrew Mor...
7
   * 10Sep2002	Andrew Morton
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
8
9
10
11
   *		Initial version.
   */
  
  #include <linux/kernel.h>
4af3c9cc4   Alexey Dobriyan   Drop some headers...
12
  #include <linux/backing-dev.h>
f9fe48bec   Ross Zwisler   dax: support dirt...
13
  #include <linux/dax.h>
5a0e3ad6a   Tejun Heo   include cleanup: ...
14
  #include <linux/gfp.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
15
  #include <linux/mm.h>
0fd0e6b05   Nick Piggin   [PATCH] page inva...
16
  #include <linux/swap.h>
b95f1b31b   Paul Gortmaker   mm: Map most file...
17
  #include <linux/export.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
18
  #include <linux/pagemap.h>
01f2705da   Nate Diller   fs: convert core ...
19
  #include <linux/highmem.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
20
  #include <linux/pagevec.h>
e08748ce0   Andrew Morton   [PATCH] io-accoun...
21
  #include <linux/task_io_accounting_ops.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
22
  #include <linux/buffer_head.h>	/* grr. try_to_release_page,
aaa4059bc   Jan Kara   [PATCH] ext3: Fix...
23
  				   do_invalidatepage */
3a4f8a0b3   Hugh Dickins   mm: remove shmem_...
24
  #include <linux/shmem_fs.h>
c515e1fd3   Dan Magenheimer   mm/fs: add hooks ...
25
  #include <linux/cleancache.h>
90a802027   Jan Kara   vfs: fix data cor...
26
  #include <linux/rmap.h>
ba470de43   Rik van Riel   mmap: handle mloc...
27
  #include "internal.h"
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
28

f21875991   Mel Gorman   mm, truncate: rem...
29
30
31
32
33
34
35
  /*
   * Regular page slots are stabilized by the page lock even without the tree
   * itself locked.  These unlocked entries need verification under the tree
   * lock.
   */
  static inline void __clear_shadow_entry(struct address_space *mapping,
  				pgoff_t index, void *entry)
0cd6144aa   Johannes Weiner   mm + fs: prepare ...
36
  {
69b6c1319   Matthew Wilcox   mm: Convert trunc...
37
  	XA_STATE(xas, &mapping->i_pages, index);
449dd6984   Johannes Weiner   mm: keep page cac...
38

69b6c1319   Matthew Wilcox   mm: Convert trunc...
39
40
  	xas_set_update(&xas, workingset_update_node);
  	if (xas_load(&xas) != entry)
f21875991   Mel Gorman   mm, truncate: rem...
41
  		return;
69b6c1319   Matthew Wilcox   mm: Convert trunc...
42
  	xas_store(&xas, NULL);
ac401cc78   Jan Kara   dax: New fault lo...
43
  	mapping->nrexceptional--;
f21875991   Mel Gorman   mm, truncate: rem...
44
45
46
47
48
  }
  
  static void clear_shadow_entry(struct address_space *mapping, pgoff_t index,
  			       void *entry)
  {
b93b01631   Matthew Wilcox   page cache: use x...
49
  	xa_lock_irq(&mapping->i_pages);
f21875991   Mel Gorman   mm, truncate: rem...
50
  	__clear_shadow_entry(mapping, index, entry);
b93b01631   Matthew Wilcox   page cache: use x...
51
  	xa_unlock_irq(&mapping->i_pages);
0cd6144aa   Johannes Weiner   mm + fs: prepare ...
52
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
53

c6dcf52c2   Jan Kara   mm: Invalidate DA...
54
  /*
f21875991   Mel Gorman   mm, truncate: rem...
55
56
57
   * Unconditionally remove exceptional entries. Usually called from truncate
   * path. Note that the pagevec may be altered by this function by removing
   * exceptional entries similar to what pagevec_remove_exceptionals does.
c6dcf52c2   Jan Kara   mm: Invalidate DA...
58
   */
f21875991   Mel Gorman   mm, truncate: rem...
59
60
61
  static void truncate_exceptional_pvec_entries(struct address_space *mapping,
  				struct pagevec *pvec, pgoff_t *indices,
  				pgoff_t end)
c6dcf52c2   Jan Kara   mm: Invalidate DA...
62
  {
f21875991   Mel Gorman   mm, truncate: rem...
63
64
  	int i, j;
  	bool dax, lock;
c6dcf52c2   Jan Kara   mm: Invalidate DA...
65
66
67
  	/* Handled by shmem itself */
  	if (shmem_mapping(mapping))
  		return;
f21875991   Mel Gorman   mm, truncate: rem...
68
  	for (j = 0; j < pagevec_count(pvec); j++)
3159f943a   Matthew Wilcox   xarray: Replace e...
69
  		if (xa_is_value(pvec->pages[j]))
f21875991   Mel Gorman   mm, truncate: rem...
70
71
72
  			break;
  
  	if (j == pagevec_count(pvec))
c6dcf52c2   Jan Kara   mm: Invalidate DA...
73
  		return;
f21875991   Mel Gorman   mm, truncate: rem...
74
75
76
77
  
  	dax = dax_mapping(mapping);
  	lock = !dax && indices[j] < end;
  	if (lock)
b93b01631   Matthew Wilcox   page cache: use x...
78
  		xa_lock_irq(&mapping->i_pages);
f21875991   Mel Gorman   mm, truncate: rem...
79
80
81
82
  
  	for (i = j; i < pagevec_count(pvec); i++) {
  		struct page *page = pvec->pages[i];
  		pgoff_t index = indices[i];
3159f943a   Matthew Wilcox   xarray: Replace e...
83
  		if (!xa_is_value(page)) {
f21875991   Mel Gorman   mm, truncate: rem...
84
85
86
87
88
89
90
91
92
93
94
95
96
  			pvec->pages[j++] = page;
  			continue;
  		}
  
  		if (index >= end)
  			continue;
  
  		if (unlikely(dax)) {
  			dax_delete_mapping_entry(mapping, index);
  			continue;
  		}
  
  		__clear_shadow_entry(mapping, index, page);
c6dcf52c2   Jan Kara   mm: Invalidate DA...
97
  	}
f21875991   Mel Gorman   mm, truncate: rem...
98
99
  
  	if (lock)
b93b01631   Matthew Wilcox   page cache: use x...
100
  		xa_unlock_irq(&mapping->i_pages);
f21875991   Mel Gorman   mm, truncate: rem...
101
  	pvec->nr = j;
c6dcf52c2   Jan Kara   mm: Invalidate DA...
102
103
104
105
  }
  
  /*
   * Invalidate exceptional entry if easily possible. This handles exceptional
4636e70bb   Ross Zwisler   dax: prevent inva...
106
   * entries for invalidate_inode_pages().
c6dcf52c2   Jan Kara   mm: Invalidate DA...
107
108
109
110
   */
  static int invalidate_exceptional_entry(struct address_space *mapping,
  					pgoff_t index, void *entry)
  {
4636e70bb   Ross Zwisler   dax: prevent inva...
111
112
  	/* Handled by shmem itself, or for DAX we do nothing. */
  	if (shmem_mapping(mapping) || dax_mapping(mapping))
c6dcf52c2   Jan Kara   mm: Invalidate DA...
113
  		return 1;
c6dcf52c2   Jan Kara   mm: Invalidate DA...
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
  	clear_shadow_entry(mapping, index, entry);
  	return 1;
  }
  
  /*
   * Invalidate exceptional entry if clean. This handles exceptional entries for
   * invalidate_inode_pages2() so for DAX it evicts only clean entries.
   */
  static int invalidate_exceptional_entry2(struct address_space *mapping,
  					 pgoff_t index, void *entry)
  {
  	/* Handled by shmem itself */
  	if (shmem_mapping(mapping))
  		return 1;
  	if (dax_mapping(mapping))
  		return dax_invalidate_mapping_entry_sync(mapping, index);
  	clear_shadow_entry(mapping, index, entry);
  	return 1;
  }
cf9a2ae8d   David Howells   [PATCH] BLOCK: Mo...
133
  /**
28bc44d7d   Fengguang Wu   do_invalidatepage...
134
   * do_invalidatepage - invalidate part or all of a page
cf9a2ae8d   David Howells   [PATCH] BLOCK: Mo...
135
   * @page: the page which is affected
d47992f86   Lukas Czerner   mm: change invali...
136
137
   * @offset: start of the range to invalidate
   * @length: length of the range to invalidate
cf9a2ae8d   David Howells   [PATCH] BLOCK: Mo...
138
139
140
141
142
143
144
145
146
147
   *
   * do_invalidatepage() is called when all or part of the page has become
   * invalidated by a truncate operation.
   *
   * do_invalidatepage() does not have to release all buffers, but it must
   * ensure that no dirty buffer is left outside @offset and that no I/O
   * is underway against any of the blocks which are outside the truncation
   * point.  Because the caller is about to free (and possibly reuse) those
   * blocks on-disk.
   */
d47992f86   Lukas Czerner   mm: change invali...
148
149
  void do_invalidatepage(struct page *page, unsigned int offset,
  		       unsigned int length)
cf9a2ae8d   David Howells   [PATCH] BLOCK: Mo...
150
  {
d47992f86   Lukas Czerner   mm: change invali...
151
  	void (*invalidatepage)(struct page *, unsigned int, unsigned int);
cf9a2ae8d   David Howells   [PATCH] BLOCK: Mo...
152
  	invalidatepage = page->mapping->a_ops->invalidatepage;
9361401eb   David Howells   [PATCH] BLOCK: Ma...
153
  #ifdef CONFIG_BLOCK
cf9a2ae8d   David Howells   [PATCH] BLOCK: Mo...
154
155
  	if (!invalidatepage)
  		invalidatepage = block_invalidatepage;
9361401eb   David Howells   [PATCH] BLOCK: Ma...
156
  #endif
cf9a2ae8d   David Howells   [PATCH] BLOCK: Mo...
157
  	if (invalidatepage)
d47992f86   Lukas Czerner   mm: change invali...
158
  		(*invalidatepage)(page, offset, length);
cf9a2ae8d   David Howells   [PATCH] BLOCK: Mo...
159
  }
ecdfc9787   Linus Torvalds   Resurrect 'try_to...
160
  /*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
161
   * If truncate cannot remove the fs-private metadata from the page, the page
62e1c5530   Shaohua Li   page migraton: ha...
162
   * becomes orphaned.  It will be left on the LRU and may even be mapped into
54cb8821d   Nick Piggin   mm: merge populat...
163
   * user pagetables if we're racing with filemap_fault().
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
164
   *
fc3a5ac52   Matthew Wilcox (Oracle)   mm/truncate: fix ...
165
   * We need to bail out if page->mapping is no longer equal to the original
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
166
   * mapping.  This happens a) when the VM reclaimed the page while we waited on
fc0ecff69   Andrew Morton   [PATCH] remove in...
167
   * its lock, b) when a concurrent invalidate_mapping_pages got there first and
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
168
169
   * c) when tmpfs swizzles a page between a tmpfs inode and swapper_space.
   */
9f4e41f47   Jan Kara   mm: refactor trun...
170
171
  static void
  truncate_cleanup_page(struct address_space *mapping, struct page *page)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
172
  {
9f4e41f47   Jan Kara   mm: refactor trun...
173
  	if (page_mapped(page)) {
fc3a5ac52   Matthew Wilcox (Oracle)   mm/truncate: fix ...
174
  		unsigned int nr = thp_nr_pages(page);
977fbdcd5   Matthew Wilcox   mm: add unmap_map...
175
  		unmap_mapping_pages(mapping, page->index, nr, false);
9f4e41f47   Jan Kara   mm: refactor trun...
176
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
177

266cf658e   David Howells   FS-Cache: Recruit...
178
  	if (page_has_private(page))
fc3a5ac52   Matthew Wilcox (Oracle)   mm/truncate: fix ...
179
  		do_invalidatepage(page, 0, thp_size(page));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
180

b9ea25152   Konstantin Khlebnikov   page_writeback: c...
181
182
183
184
185
  	/*
  	 * Some filesystems seem to re-dirty the page even after
  	 * the VM has canceled the dirty bit (eg ext3 journaling).
  	 * Hence dirty accounting check is placed after invalidation.
  	 */
11f81becc   Tejun Heo   page_writeback: r...
186
  	cancel_dirty_page(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
187
  	ClearPageMappedToDisk(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
188
189
190
  }
  
  /*
fc0ecff69   Andrew Morton   [PATCH] remove in...
191
   * This is for invalidate_mapping_pages().  That function can be called at
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
192
   * any time, and is not supposed to throw away dirty pages.  But pages can
0fd0e6b05   Nick Piggin   [PATCH] page inva...
193
194
   * be marked dirty at any time too, so use remove_mapping which safely
   * discards clean, unused pages.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
195
196
197
198
199
200
   *
   * Returns non-zero if the page was successfully invalidated.
   */
  static int
  invalidate_complete_page(struct address_space *mapping, struct page *page)
  {
0fd0e6b05   Nick Piggin   [PATCH] page inva...
201
  	int ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
202
203
  	if (page->mapping != mapping)
  		return 0;
266cf658e   David Howells   FS-Cache: Recruit...
204
  	if (page_has_private(page) && !try_to_release_page(page, 0))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
205
  		return 0;
0fd0e6b05   Nick Piggin   [PATCH] page inva...
206
  	ret = remove_mapping(mapping, page);
0fd0e6b05   Nick Piggin   [PATCH] page inva...
207
208
  
  	return ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
209
  }
750b4987b   Nick Piggin   HWPOISON: Refacto...
210
211
  int truncate_inode_page(struct address_space *mapping, struct page *page)
  {
fc127da08   Kirill A. Shutemov   truncate: handle ...
212
  	VM_BUG_ON_PAGE(PageTail(page), page);
9f4e41f47   Jan Kara   mm: refactor trun...
213
214
215
216
217
218
  	if (page->mapping != mapping)
  		return -EIO;
  
  	truncate_cleanup_page(mapping, page);
  	delete_from_page_cache(page);
  	return 0;
750b4987b   Nick Piggin   HWPOISON: Refacto...
219
  }
83f786680   Wu Fengguang   HWPOISON: Add inv...
220
  /*
257187362   Andi Kleen   HWPOISON: Define ...
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
   * Used to get rid of pages on hardware memory corruption.
   */
  int generic_error_remove_page(struct address_space *mapping, struct page *page)
  {
  	if (!mapping)
  		return -EINVAL;
  	/*
  	 * Only punch for normal data pages for now.
  	 * Handling other types like directories would need more auditing.
  	 */
  	if (!S_ISREG(mapping->host->i_mode))
  		return -EIO;
  	return truncate_inode_page(mapping, page);
  }
  EXPORT_SYMBOL(generic_error_remove_page);
  
  /*
83f786680   Wu Fengguang   HWPOISON: Add inv...
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
   * Safely invalidate one page from its pagecache mapping.
   * It only drops clean, unused pages. The page must be locked.
   *
   * Returns 1 if the page is successfully invalidated, otherwise 0.
   */
  int invalidate_inode_page(struct page *page)
  {
  	struct address_space *mapping = page_mapping(page);
  	if (!mapping)
  		return 0;
  	if (PageDirty(page) || PageWriteback(page))
  		return 0;
  	if (page_mapped(page))
  		return 0;
  	return invalidate_complete_page(mapping, page);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
254
  /**
73c1e2043   Liu Bo   mm: fix comment t...
255
   * truncate_inode_pages_range - truncate range of pages specified by start & end byte offsets
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
256
257
   * @mapping: mapping to truncate
   * @lstart: offset from which to truncate
5a7203947   Lukas Czerner   mm: teach truncat...
258
   * @lend: offset to which to truncate (inclusive)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
259
   *
d7339071f   Hans Reiser   [PATCH] reiser4: ...
260
   * Truncate the page cache, removing the pages that are between
5a7203947   Lukas Czerner   mm: teach truncat...
261
262
   * specified offsets (and zeroing out partial pages
   * if lstart or lend + 1 is not page aligned).
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
263
264
265
266
267
268
269
   *
   * Truncate takes two passes - the first pass is nonblocking.  It will not
   * block on page locks and it will not block on writeback.  The second pass
   * will wait.  This is to prevent as much IO as possible in the affected region.
   * The first pass will remove most pages, so the search cost of the second pass
   * is low.
   *
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
270
271
272
   * We pass down the cache-hot hint to the page freeing code.  Even if the
   * mapping is large, it is probably the case that the final pages are the most
   * recently touched, and freeing happens in ascending file offset order.
5a7203947   Lukas Czerner   mm: teach truncat...
273
274
275
276
   *
   * Note that since ->invalidatepage() accepts range to invalidate
   * truncate_inode_pages_range is able to handle cases where lend + 1 is not
   * page aligned properly.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
277
   */
d7339071f   Hans Reiser   [PATCH] reiser4: ...
278
279
  void truncate_inode_pages_range(struct address_space *mapping,
  				loff_t lstart, loff_t lend)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
280
  {
5a7203947   Lukas Czerner   mm: teach truncat...
281
282
283
284
285
  	pgoff_t		start;		/* inclusive */
  	pgoff_t		end;		/* exclusive */
  	unsigned int	partial_start;	/* inclusive */
  	unsigned int	partial_end;	/* exclusive */
  	struct pagevec	pvec;
0cd6144aa   Johannes Weiner   mm + fs: prepare ...
286
  	pgoff_t		indices[PAGEVEC_SIZE];
5a7203947   Lukas Czerner   mm: teach truncat...
287
288
  	pgoff_t		index;
  	int		i;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
289

f9fe48bec   Ross Zwisler   dax: support dirt...
290
  	if (mapping->nrpages == 0 && mapping->nrexceptional == 0)
34ccb69ea   Andrey Ryabinin   mm/truncate: avoi...
291
  		goto out;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
292

5a7203947   Lukas Czerner   mm: teach truncat...
293
  	/* Offsets within partial pages */
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
294
295
  	partial_start = lstart & (PAGE_SIZE - 1);
  	partial_end = (lend + 1) & (PAGE_SIZE - 1);
5a7203947   Lukas Czerner   mm: teach truncat...
296
297
298
299
300
301
302
  
  	/*
  	 * 'start' and 'end' always covers the range of pages to be fully
  	 * truncated. Partial pages are covered with 'partial_start' at the
  	 * start of the range and 'partial_end' at the end of the range.
  	 * Note that 'end' is exclusive while 'lend' is inclusive.
  	 */
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
303
  	start = (lstart + PAGE_SIZE - 1) >> PAGE_SHIFT;
5a7203947   Lukas Czerner   mm: teach truncat...
304
305
306
307
308
309
310
311
  	if (lend == -1)
  		/*
  		 * lend == -1 indicates end-of-file so we have to set 'end'
  		 * to the highest possible pgoff_t and since the type is
  		 * unsigned we're using -1.
  		 */
  		end = -1;
  	else
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
312
  		end = (lend + 1) >> PAGE_SHIFT;
d7339071f   Hans Reiser   [PATCH] reiser4: ...
313

866798201   Mel Gorman   mm, pagevec: remo...
314
  	pagevec_init(&pvec);
b85e0effd   Hugh Dickins   mm: consistent tr...
315
  	index = start;
0cd6144aa   Johannes Weiner   mm + fs: prepare ...
316
317
318
  	while (index < end && pagevec_lookup_entries(&pvec, mapping, index,
  			min(end - index, (pgoff_t)PAGEVEC_SIZE),
  			indices)) {
aa65c29ce   Jan Kara   mm: batch radix t...
319
320
321
322
323
324
  		/*
  		 * Pagevec array has exceptional entries and we may also fail
  		 * to lock some pages. So we store pages that can be deleted
  		 * in a new pagevec.
  		 */
  		struct pagevec locked_pvec;
866798201   Mel Gorman   mm, pagevec: remo...
325
  		pagevec_init(&locked_pvec);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
326
327
  		for (i = 0; i < pagevec_count(&pvec); i++) {
  			struct page *page = pvec.pages[i];
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
328

b85e0effd   Hugh Dickins   mm: consistent tr...
329
  			/* We rely upon deletion not changing page->index */
0cd6144aa   Johannes Weiner   mm + fs: prepare ...
330
  			index = indices[i];
5a7203947   Lukas Czerner   mm: teach truncat...
331
  			if (index >= end)
d7339071f   Hans Reiser   [PATCH] reiser4: ...
332
  				break;
d7339071f   Hans Reiser   [PATCH] reiser4: ...
333

3159f943a   Matthew Wilcox   xarray: Replace e...
334
  			if (xa_is_value(page))
0cd6144aa   Johannes Weiner   mm + fs: prepare ...
335
  				continue;
0cd6144aa   Johannes Weiner   mm + fs: prepare ...
336

529ae9aaa   Nick Piggin   mm: rename page t...
337
  			if (!trylock_page(page))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
338
  				continue;
5cbc198ae   Kirill A. Shutemov   mm: fix false-pos...
339
  			WARN_ON(page_to_index(page) != index);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
340
341
342
343
  			if (PageWriteback(page)) {
  				unlock_page(page);
  				continue;
  			}
aa65c29ce   Jan Kara   mm: batch radix t...
344
345
346
347
348
  			if (page->mapping != mapping) {
  				unlock_page(page);
  				continue;
  			}
  			pagevec_add(&locked_pvec, page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
349
  		}
aa65c29ce   Jan Kara   mm: batch radix t...
350
351
352
353
354
  		for (i = 0; i < pagevec_count(&locked_pvec); i++)
  			truncate_cleanup_page(mapping, locked_pvec.pages[i]);
  		delete_from_page_cache_batch(mapping, &locked_pvec);
  		for (i = 0; i < pagevec_count(&locked_pvec); i++)
  			unlock_page(locked_pvec.pages[i]);
f21875991   Mel Gorman   mm, truncate: rem...
355
  		truncate_exceptional_pvec_entries(mapping, &pvec, indices, end);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
356
357
  		pagevec_release(&pvec);
  		cond_resched();
b85e0effd   Hugh Dickins   mm: consistent tr...
358
  		index++;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
359
  	}
5a7203947   Lukas Czerner   mm: teach truncat...
360
  	if (partial_start) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
361
362
  		struct page *page = find_lock_page(mapping, start - 1);
  		if (page) {
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
363
  			unsigned int top = PAGE_SIZE;
5a7203947   Lukas Czerner   mm: teach truncat...
364
365
366
367
368
  			if (start > end) {
  				/* Truncation within a single page */
  				top = partial_end;
  				partial_end = 0;
  			}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
369
  			wait_on_page_writeback(page);
5a7203947   Lukas Czerner   mm: teach truncat...
370
371
372
373
374
  			zero_user_segment(page, partial_start, top);
  			cleancache_invalidate_page(mapping, page);
  			if (page_has_private(page))
  				do_invalidatepage(page, partial_start,
  						  top - partial_start);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
375
  			unlock_page(page);
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
376
  			put_page(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
377
378
  		}
  	}
5a7203947   Lukas Czerner   mm: teach truncat...
379
380
381
382
383
384
385
386
387
388
  	if (partial_end) {
  		struct page *page = find_lock_page(mapping, end);
  		if (page) {
  			wait_on_page_writeback(page);
  			zero_user_segment(page, 0, partial_end);
  			cleancache_invalidate_page(mapping, page);
  			if (page_has_private(page))
  				do_invalidatepage(page, 0,
  						  partial_end);
  			unlock_page(page);
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
389
  			put_page(page);
5a7203947   Lukas Czerner   mm: teach truncat...
390
391
392
393
394
395
396
  		}
  	}
  	/*
  	 * If the truncation happened within a single page no pages
  	 * will be released, just zeroed, so we can bail out now.
  	 */
  	if (start >= end)
34ccb69ea   Andrey Ryabinin   mm/truncate: avoi...
397
  		goto out;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
398

b85e0effd   Hugh Dickins   mm: consistent tr...
399
  	index = start;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
400
401
  	for ( ; ; ) {
  		cond_resched();
0cd6144aa   Johannes Weiner   mm + fs: prepare ...
402
  		if (!pagevec_lookup_entries(&pvec, mapping, index,
792ceaefe   Hugh Dickins   mm/fs: fix pessim...
403
404
  			min(end - index, (pgoff_t)PAGEVEC_SIZE), indices)) {
  			/* If all gone from start onwards, we're done */
b85e0effd   Hugh Dickins   mm: consistent tr...
405
  			if (index == start)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
406
  				break;
792ceaefe   Hugh Dickins   mm/fs: fix pessim...
407
  			/* Otherwise restart to make sure all gone */
b85e0effd   Hugh Dickins   mm: consistent tr...
408
  			index = start;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
409
410
  			continue;
  		}
0cd6144aa   Johannes Weiner   mm + fs: prepare ...
411
  		if (index == start && indices[0] >= end) {
792ceaefe   Hugh Dickins   mm/fs: fix pessim...
412
  			/* All gone out of hole to be punched, we're done */
0cd6144aa   Johannes Weiner   mm + fs: prepare ...
413
  			pagevec_remove_exceptionals(&pvec);
d7339071f   Hans Reiser   [PATCH] reiser4: ...
414
415
416
  			pagevec_release(&pvec);
  			break;
  		}
f21875991   Mel Gorman   mm, truncate: rem...
417

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
418
419
  		for (i = 0; i < pagevec_count(&pvec); i++) {
  			struct page *page = pvec.pages[i];
b85e0effd   Hugh Dickins   mm: consistent tr...
420
  			/* We rely upon deletion not changing page->index */
0cd6144aa   Johannes Weiner   mm + fs: prepare ...
421
  			index = indices[i];
792ceaefe   Hugh Dickins   mm/fs: fix pessim...
422
423
424
  			if (index >= end) {
  				/* Restart punch to make sure all gone */
  				index = start - 1;
d7339071f   Hans Reiser   [PATCH] reiser4: ...
425
  				break;
792ceaefe   Hugh Dickins   mm/fs: fix pessim...
426
  			}
b85e0effd   Hugh Dickins   mm: consistent tr...
427

3159f943a   Matthew Wilcox   xarray: Replace e...
428
  			if (xa_is_value(page))
0cd6144aa   Johannes Weiner   mm + fs: prepare ...
429
  				continue;
0cd6144aa   Johannes Weiner   mm + fs: prepare ...
430

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
431
  			lock_page(page);
5cbc198ae   Kirill A. Shutemov   mm: fix false-pos...
432
  			WARN_ON(page_to_index(page) != index);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
433
  			wait_on_page_writeback(page);
750b4987b   Nick Piggin   HWPOISON: Refacto...
434
  			truncate_inode_page(mapping, page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
435
436
  			unlock_page(page);
  		}
f21875991   Mel Gorman   mm, truncate: rem...
437
  		truncate_exceptional_pvec_entries(mapping, &pvec, indices, end);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
438
  		pagevec_release(&pvec);
b85e0effd   Hugh Dickins   mm: consistent tr...
439
  		index++;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
440
  	}
34ccb69ea   Andrey Ryabinin   mm/truncate: avoi...
441
442
  
  out:
3167760f8   Dan Magenheimer   mm: cleancache: s...
443
  	cleancache_invalidate_inode(mapping);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
444
  }
d7339071f   Hans Reiser   [PATCH] reiser4: ...
445
  EXPORT_SYMBOL(truncate_inode_pages_range);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
446

d7339071f   Hans Reiser   [PATCH] reiser4: ...
447
448
449
450
451
  /**
   * truncate_inode_pages - truncate *all* the pages from an offset
   * @mapping: mapping to truncate
   * @lstart: offset from which to truncate
   *
1b1dcc1b5   Jes Sorensen   [PATCH] mutex sub...
452
   * Called under (and serialised by) inode->i_mutex.
08142579b   Jan Kara   mm: fix assertion...
453
454
455
456
457
   *
   * Note: When this function returns, there can be a page in the process of
   * deletion (inside __delete_from_page_cache()) in the specified range.  Thus
   * mapping->nrpages can be non-zero when this function returns even after
   * truncation of the whole mapping.
d7339071f   Hans Reiser   [PATCH] reiser4: ...
458
459
460
461
462
   */
  void truncate_inode_pages(struct address_space *mapping, loff_t lstart)
  {
  	truncate_inode_pages_range(mapping, lstart, (loff_t)-1);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
463
  EXPORT_SYMBOL(truncate_inode_pages);
286973552   Mike Waychison   mm: remove __inva...
464
  /**
91b0abe36   Johannes Weiner   mm + fs: store sh...
465
466
467
468
469
470
471
472
473
474
   * truncate_inode_pages_final - truncate *all* pages before inode dies
   * @mapping: mapping to truncate
   *
   * Called under (and serialized by) inode->i_mutex.
   *
   * Filesystems have to use this in the .evict_inode path to inform the
   * VM that this is the final truncate and the inode is going away.
   */
  void truncate_inode_pages_final(struct address_space *mapping)
  {
f9fe48bec   Ross Zwisler   dax: support dirt...
475
  	unsigned long nrexceptional;
91b0abe36   Johannes Weiner   mm + fs: store sh...
476
477
478
479
480
481
482
483
484
485
486
487
488
  	unsigned long nrpages;
  
  	/*
  	 * Page reclaim can not participate in regular inode lifetime
  	 * management (can't call iput()) and thus can race with the
  	 * inode teardown.  Tell it when the address space is exiting,
  	 * so that it does not install eviction information after the
  	 * final truncate has begun.
  	 */
  	mapping_set_exiting(mapping);
  
  	/*
  	 * When reclaim installs eviction entries, it increases
f9fe48bec   Ross Zwisler   dax: support dirt...
489
  	 * nrexceptional first, then decreases nrpages.  Make sure we see
91b0abe36   Johannes Weiner   mm + fs: store sh...
490
491
492
493
  	 * this in the right order or we might miss an entry.
  	 */
  	nrpages = mapping->nrpages;
  	smp_rmb();
f9fe48bec   Ross Zwisler   dax: support dirt...
494
  	nrexceptional = mapping->nrexceptional;
91b0abe36   Johannes Weiner   mm + fs: store sh...
495

f9fe48bec   Ross Zwisler   dax: support dirt...
496
  	if (nrpages || nrexceptional) {
91b0abe36   Johannes Weiner   mm + fs: store sh...
497
498
499
500
501
502
  		/*
  		 * As truncation uses a lockless tree lookup, cycle
  		 * the tree lock to make sure any ongoing tree
  		 * modification that does not see AS_EXITING is
  		 * completed before starting the final truncate.
  		 */
b93b01631   Matthew Wilcox   page cache: use x...
503
504
  		xa_lock_irq(&mapping->i_pages);
  		xa_unlock_irq(&mapping->i_pages);
91b0abe36   Johannes Weiner   mm + fs: store sh...
505
  	}
6ff38bd40   Pavel Tikhomirov   mm: cleancache: f...
506
507
508
509
510
511
  
  	/*
  	 * Cleancache needs notification even if there are no pages or shadow
  	 * entries.
  	 */
  	truncate_inode_pages(mapping, 0);
91b0abe36   Johannes Weiner   mm + fs: store sh...
512
513
  }
  EXPORT_SYMBOL(truncate_inode_pages_final);
a77eedbc8   Jason Yan   mm/truncate.c: ma...
514
  static unsigned long __invalidate_mapping_pages(struct address_space *mapping,
eb1d7a65f   Yafang Shao   mm, fadvise: impr...
515
  		pgoff_t start, pgoff_t end, unsigned long *nr_pagevec)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
516
  {
0cd6144aa   Johannes Weiner   mm + fs: prepare ...
517
  	pgoff_t indices[PAGEVEC_SIZE];
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
518
  	struct pagevec pvec;
b85e0effd   Hugh Dickins   mm: consistent tr...
519
  	pgoff_t index = start;
315601809   Minchan Kim   mm: deactivate in...
520
521
  	unsigned long ret;
  	unsigned long count = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
522
  	int i;
866798201   Mel Gorman   mm, pagevec: remo...
523
  	pagevec_init(&pvec);
0cd6144aa   Johannes Weiner   mm + fs: prepare ...
524
525
526
  	while (index <= end && pagevec_lookup_entries(&pvec, mapping, index,
  			min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1,
  			indices)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
527
528
  		for (i = 0; i < pagevec_count(&pvec); i++) {
  			struct page *page = pvec.pages[i];
e0f23603f   NeilBrown   [PATCH] Remove se...
529

b85e0effd   Hugh Dickins   mm: consistent tr...
530
  			/* We rely upon deletion not changing page->index */
0cd6144aa   Johannes Weiner   mm + fs: prepare ...
531
  			index = indices[i];
b85e0effd   Hugh Dickins   mm: consistent tr...
532
533
  			if (index > end)
  				break;
e0f23603f   NeilBrown   [PATCH] Remove se...
534

3159f943a   Matthew Wilcox   xarray: Replace e...
535
  			if (xa_is_value(page)) {
c6dcf52c2   Jan Kara   mm: Invalidate DA...
536
537
  				invalidate_exceptional_entry(mapping, index,
  							     page);
0cd6144aa   Johannes Weiner   mm + fs: prepare ...
538
539
  				continue;
  			}
b85e0effd   Hugh Dickins   mm: consistent tr...
540
541
  			if (!trylock_page(page))
  				continue;
fc127da08   Kirill A. Shutemov   truncate: handle ...
542

5cbc198ae   Kirill A. Shutemov   mm: fix false-pos...
543
  			WARN_ON(page_to_index(page) != index);
fc127da08   Kirill A. Shutemov   truncate: handle ...
544
545
546
547
548
549
550
551
  
  			/* Middle of THP: skip */
  			if (PageTransTail(page)) {
  				unlock_page(page);
  				continue;
  			} else if (PageTransHuge(page)) {
  				index += HPAGE_PMD_NR - 1;
  				i += HPAGE_PMD_NR - 1;
76b6f9b7e   Jan Kara   mm/truncate.c: fi...
552
553
554
555
556
557
558
  				/*
  				 * 'end' is in the middle of THP. Don't
  				 * invalidate the page as the part outside of
  				 * 'end' could be still useful.
  				 */
  				if (index > end) {
  					unlock_page(page);
fc127da08   Kirill A. Shutemov   truncate: handle ...
559
  					continue;
76b6f9b7e   Jan Kara   mm/truncate.c: fi...
560
  				}
ef18a1ca8   Kirill A. Shutemov   mm/thp: allow dro...
561
562
563
564
565
566
567
568
569
570
  
  				/* Take a pin outside pagevec */
  				get_page(page);
  
  				/*
  				 * Drop extra pins before trying to invalidate
  				 * the huge page.
  				 */
  				pagevec_remove_exceptionals(&pvec);
  				pagevec_release(&pvec);
fc127da08   Kirill A. Shutemov   truncate: handle ...
571
  			}
315601809   Minchan Kim   mm: deactivate in...
572
  			ret = invalidate_inode_page(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
573
  			unlock_page(page);
315601809   Minchan Kim   mm: deactivate in...
574
575
576
577
  			/*
  			 * Invalidation is a hint that the page is no longer
  			 * of interest and try to speed up its reclaim.
  			 */
eb1d7a65f   Yafang Shao   mm, fadvise: impr...
578
  			if (!ret) {
cc5993bd7   Minchan Kim   mm: rename deacti...
579
  				deactivate_file_page(page);
eb1d7a65f   Yafang Shao   mm, fadvise: impr...
580
581
582
583
  				/* It is likely on the pagevec of a remote CPU */
  				if (nr_pagevec)
  					(*nr_pagevec)++;
  			}
ef18a1ca8   Kirill A. Shutemov   mm/thp: allow dro...
584
585
  			if (PageTransHuge(page))
  				put_page(page);
315601809   Minchan Kim   mm: deactivate in...
586
  			count += ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
587
  		}
0cd6144aa   Johannes Weiner   mm + fs: prepare ...
588
  		pagevec_remove_exceptionals(&pvec);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
589
  		pagevec_release(&pvec);
286973552   Mike Waychison   mm: remove __inva...
590
  		cond_resched();
b85e0effd   Hugh Dickins   mm: consistent tr...
591
  		index++;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
592
  	}
315601809   Minchan Kim   mm: deactivate in...
593
  	return count;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
594
  }
eb1d7a65f   Yafang Shao   mm, fadvise: impr...
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
  
  /**
   * invalidate_mapping_pages - Invalidate all the unlocked pages of one inode
   * @mapping: the address_space which holds the pages to invalidate
   * @start: the offset 'from' which to invalidate
   * @end: the offset 'to' which to invalidate (inclusive)
   *
   * This function only removes the unlocked pages, if you want to
   * remove all the pages of one inode, you must call truncate_inode_pages.
   *
   * invalidate_mapping_pages() will not block on IO activity. It will not
   * invalidate pages which are dirty, locked, under writeback or mapped into
   * pagetables.
   *
   * Return: the number of the pages that were invalidated
   */
  unsigned long invalidate_mapping_pages(struct address_space *mapping,
  		pgoff_t start, pgoff_t end)
  {
  	return __invalidate_mapping_pages(mapping, start, end, NULL);
  }
54bc48552   Anton Altaparmakov   [PATCH] Export in...
616
  EXPORT_SYMBOL(invalidate_mapping_pages);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
617

eb1d7a65f   Yafang Shao   mm, fadvise: impr...
618
619
620
621
622
623
624
625
626
627
  /**
   * This helper is similar with the above one, except that it accounts for pages
   * that are likely on a pagevec and count them in @nr_pagevec, which will used by
   * the caller.
   */
  void invalidate_mapping_pagevec(struct address_space *mapping,
  		pgoff_t start, pgoff_t end, unsigned long *nr_pagevec)
  {
  	__invalidate_mapping_pages(mapping, start, end, nr_pagevec);
  }
bd4c8ce41   Andrew Morton   [PATCH] invalidat...
628
629
630
631
  /*
   * This is like invalidate_complete_page(), except it ignores the page's
   * refcount.  We do this because invalidate_inode_pages2() needs stronger
   * invalidation guarantees, and cannot afford to leave pages behind because
2706a1b89   Anderson Briglia   vmscan: fix comme...
632
633
   * shrink_page_list() has a temp ref on them, or because they're transiently
   * sitting in the lru_cache_add() pagevecs.
bd4c8ce41   Andrew Morton   [PATCH] invalidat...
634
635
636
637
   */
  static int
  invalidate_complete_page2(struct address_space *mapping, struct page *page)
  {
c4843a759   Greg Thelen   memcg: add per cg...
638
  	unsigned long flags;
bd4c8ce41   Andrew Morton   [PATCH] invalidat...
639
640
  	if (page->mapping != mapping)
  		return 0;
266cf658e   David Howells   FS-Cache: Recruit...
641
  	if (page_has_private(page) && !try_to_release_page(page, GFP_KERNEL))
bd4c8ce41   Andrew Morton   [PATCH] invalidat...
642
  		return 0;
b93b01631   Matthew Wilcox   page cache: use x...
643
  	xa_lock_irqsave(&mapping->i_pages, flags);
bd4c8ce41   Andrew Morton   [PATCH] invalidat...
644
645
  	if (PageDirty(page))
  		goto failed;
266cf658e   David Howells   FS-Cache: Recruit...
646
  	BUG_ON(page_has_private(page));
62cccb8c8   Johannes Weiner   mm: simplify lock...
647
  	__delete_from_page_cache(page, NULL);
b93b01631   Matthew Wilcox   page cache: use x...
648
  	xa_unlock_irqrestore(&mapping->i_pages, flags);
6072d13c4   Linus Torvalds   Call the filesyst...
649
650
651
  
  	if (mapping->a_ops->freepage)
  		mapping->a_ops->freepage(page);
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
652
  	put_page(page);	/* pagecache ref */
bd4c8ce41   Andrew Morton   [PATCH] invalidat...
653
654
  	return 1;
  failed:
b93b01631   Matthew Wilcox   page cache: use x...
655
  	xa_unlock_irqrestore(&mapping->i_pages, flags);
bd4c8ce41   Andrew Morton   [PATCH] invalidat...
656
657
  	return 0;
  }
e3db7691e   Trond Myklebust   [PATCH] NFS: Fix ...
658
659
660
661
662
663
664
665
  static int do_launder_page(struct address_space *mapping, struct page *page)
  {
  	if (!PageDirty(page))
  		return 0;
  	if (page->mapping != mapping || mapping->a_ops->launder_page == NULL)
  		return 0;
  	return mapping->a_ops->launder_page(page);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
666
667
  /**
   * invalidate_inode_pages2_range - remove range of pages from an address_space
67be2dd1b   Martin Waitz   [PATCH] DocBook: ...
668
   * @mapping: the address_space
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
669
670
671
672
673
674
   * @start: the page offset 'from' which to invalidate
   * @end: the page offset 'to' which to invalidate (inclusive)
   *
   * Any pages which are found to be mapped into pagetables are unmapped prior to
   * invalidation.
   *
a862f68a8   Mike Rapoport   docs/core-api/mm:...
675
   * Return: -EBUSY if any pages could not be invalidated.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
676
677
678
679
   */
  int invalidate_inode_pages2_range(struct address_space *mapping,
  				  pgoff_t start, pgoff_t end)
  {
0cd6144aa   Johannes Weiner   mm + fs: prepare ...
680
  	pgoff_t indices[PAGEVEC_SIZE];
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
681
  	struct pagevec pvec;
b85e0effd   Hugh Dickins   mm: consistent tr...
682
  	pgoff_t index;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
683
684
  	int i;
  	int ret = 0;
0dd1334fa   Hisashi Hifumi   fix invalidate_in...
685
  	int ret2 = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
686
  	int did_range_unmap = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
687

32691f0fb   Andrey Ryabinin   mm/truncate: bail...
688
  	if (mapping->nrpages == 0 && mapping->nrexceptional == 0)
34ccb69ea   Andrey Ryabinin   mm/truncate: avoi...
689
  		goto out;
32691f0fb   Andrey Ryabinin   mm/truncate: bail...
690

866798201   Mel Gorman   mm, pagevec: remo...
691
  	pagevec_init(&pvec);
b85e0effd   Hugh Dickins   mm: consistent tr...
692
  	index = start;
0cd6144aa   Johannes Weiner   mm + fs: prepare ...
693
694
695
  	while (index <= end && pagevec_lookup_entries(&pvec, mapping, index,
  			min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1,
  			indices)) {
7b965e088   Trond Myklebust   [PATCH] VM: inval...
696
  		for (i = 0; i < pagevec_count(&pvec); i++) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
697
  			struct page *page = pvec.pages[i];
b85e0effd   Hugh Dickins   mm: consistent tr...
698
699
  
  			/* We rely upon deletion not changing page->index */
0cd6144aa   Johannes Weiner   mm + fs: prepare ...
700
  			index = indices[i];
b85e0effd   Hugh Dickins   mm: consistent tr...
701
702
  			if (index > end)
  				break;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
703

3159f943a   Matthew Wilcox   xarray: Replace e...
704
  			if (xa_is_value(page)) {
c6dcf52c2   Jan Kara   mm: Invalidate DA...
705
706
707
  				if (!invalidate_exceptional_entry2(mapping,
  								   index, page))
  					ret = -EBUSY;
0cd6144aa   Johannes Weiner   mm + fs: prepare ...
708
709
  				continue;
  			}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
710
  			lock_page(page);
5cbc198ae   Kirill A. Shutemov   mm: fix false-pos...
711
  			WARN_ON(page_to_index(page) != index);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
712
713
714
715
  			if (page->mapping != mapping) {
  				unlock_page(page);
  				continue;
  			}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
716
  			wait_on_page_writeback(page);
d00806b18   Nick Piggin   mm: fix fault vs ...
717
  			if (page_mapped(page)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
718
719
720
721
  				if (!did_range_unmap) {
  					/*
  					 * Zap the rest of the file in one hit.
  					 */
977fbdcd5   Matthew Wilcox   mm: add unmap_map...
722
723
  					unmap_mapping_pages(mapping, index,
  						(1 + end - index), false);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
724
725
726
727
728
  					did_range_unmap = 1;
  				} else {
  					/*
  					 * Just zap this page
  					 */
977fbdcd5   Matthew Wilcox   mm: add unmap_map...
729
730
  					unmap_mapping_pages(mapping, index,
  								1, false);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
731
732
  				}
  			}
d00806b18   Nick Piggin   mm: fix fault vs ...
733
  			BUG_ON(page_mapped(page));
0dd1334fa   Hisashi Hifumi   fix invalidate_in...
734
735
736
  			ret2 = do_launder_page(mapping, page);
  			if (ret2 == 0) {
  				if (!invalidate_complete_page2(mapping, page))
6ccfa806a   Hisashi Hifumi   VFS: fix dio writ...
737
  					ret2 = -EBUSY;
0dd1334fa   Hisashi Hifumi   fix invalidate_in...
738
739
740
  			}
  			if (ret2 < 0)
  				ret = ret2;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
741
742
  			unlock_page(page);
  		}
0cd6144aa   Johannes Weiner   mm + fs: prepare ...
743
  		pagevec_remove_exceptionals(&pvec);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
744
745
  		pagevec_release(&pvec);
  		cond_resched();
b85e0effd   Hugh Dickins   mm: consistent tr...
746
  		index++;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
747
  	}
cd656375f   Jan Kara   mm: fix data corr...
748
  	/*
69b6c1319   Matthew Wilcox   mm: Convert trunc...
749
  	 * For DAX we invalidate page tables after invalidating page cache.  We
cd656375f   Jan Kara   mm: fix data corr...
750
751
  	 * could invalidate page tables while invalidating each entry however
  	 * that would be expensive. And doing range unmapping before doesn't
69b6c1319   Matthew Wilcox   mm: Convert trunc...
752
  	 * work as we have no cheap way to find whether page cache entry didn't
cd656375f   Jan Kara   mm: fix data corr...
753
754
755
  	 * get remapped later.
  	 */
  	if (dax_mapping(mapping)) {
977fbdcd5   Matthew Wilcox   mm: add unmap_map...
756
  		unmap_mapping_pages(mapping, start, end - start + 1, false);
cd656375f   Jan Kara   mm: fix data corr...
757
  	}
34ccb69ea   Andrey Ryabinin   mm/truncate: avoi...
758
  out:
3167760f8   Dan Magenheimer   mm: cleancache: s...
759
  	cleancache_invalidate_inode(mapping);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
760
761
762
763
764
765
  	return ret;
  }
  EXPORT_SYMBOL_GPL(invalidate_inode_pages2_range);
  
  /**
   * invalidate_inode_pages2 - remove all pages from an address_space
67be2dd1b   Martin Waitz   [PATCH] DocBook: ...
766
   * @mapping: the address_space
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
767
768
769
770
   *
   * Any pages which are found to be mapped into pagetables are unmapped prior to
   * invalidation.
   *
a862f68a8   Mike Rapoport   docs/core-api/mm:...
771
   * Return: -EBUSY if any pages could not be invalidated.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
772
773
774
775
776
777
   */
  int invalidate_inode_pages2(struct address_space *mapping)
  {
  	return invalidate_inode_pages2_range(mapping, 0, -1);
  }
  EXPORT_SYMBOL_GPL(invalidate_inode_pages2);
25d9e2d15   npiggin@suse.de   truncate: new hel...
778
779
780
781
  
  /**
   * truncate_pagecache - unmap and remove pagecache that has been truncated
   * @inode: inode
8a549bea5   Hugh Dickins   mm: tidy vmtrunca...
782
   * @newsize: new file size
25d9e2d15   npiggin@suse.de   truncate: new hel...
783
784
785
786
787
788
789
790
791
792
793
   *
   * inode's new i_size must already be written before truncate_pagecache
   * is called.
   *
   * This function should typically be called before the filesystem
   * releases resources associated with the freed range (eg. deallocates
   * blocks). This way, pagecache will always stay logically coherent
   * with on-disk format, and the filesystem would not have to deal with
   * situations such as writepage being called for a page that has already
   * had its underlying blocks deallocated.
   */
7caef2676   Kirill A. Shutemov   truncate: drop 'o...
794
  void truncate_pagecache(struct inode *inode, loff_t newsize)
25d9e2d15   npiggin@suse.de   truncate: new hel...
795
  {
cedabed49   OGAWA Hirofumi   vfs: Fix vmtrunca...
796
  	struct address_space *mapping = inode->i_mapping;
8a549bea5   Hugh Dickins   mm: tidy vmtrunca...
797
  	loff_t holebegin = round_up(newsize, PAGE_SIZE);
cedabed49   OGAWA Hirofumi   vfs: Fix vmtrunca...
798
799
800
801
802
803
804
805
806
807
  
  	/*
  	 * unmap_mapping_range is called twice, first simply for
  	 * efficiency so that truncate_inode_pages does fewer
  	 * single-page unmaps.  However after this first call, and
  	 * before truncate_inode_pages finishes, it is possible for
  	 * private pages to be COWed, which remain after
  	 * truncate_inode_pages finishes, hence the second
  	 * unmap_mapping_range call must be made for correctness.
  	 */
8a549bea5   Hugh Dickins   mm: tidy vmtrunca...
808
809
810
  	unmap_mapping_range(mapping, holebegin, 0, 1);
  	truncate_inode_pages(mapping, newsize);
  	unmap_mapping_range(mapping, holebegin, 0, 1);
25d9e2d15   npiggin@suse.de   truncate: new hel...
811
812
813
814
  }
  EXPORT_SYMBOL(truncate_pagecache);
  
  /**
2c27c65ed   Christoph Hellwig   check ATTR_SIZE c...
815
816
817
818
   * truncate_setsize - update inode and pagecache for a new file size
   * @inode: inode
   * @newsize: new file size
   *
382e27daa   Jan Kara   mm: fix truncate_...
819
820
821
   * truncate_setsize updates i_size and performs pagecache truncation (if
   * necessary) to @newsize. It will be typically be called from the filesystem's
   * setattr function when ATTR_SIZE is passed in.
2c27c65ed   Christoph Hellwig   check ATTR_SIZE c...
822
   *
77783d064   Jan Kara   mm: Fix comment b...
823
824
825
   * Must be called with a lock serializing truncates and writes (generally
   * i_mutex but e.g. xfs uses a different lock) and before all filesystem
   * specific block truncation has been performed.
2c27c65ed   Christoph Hellwig   check ATTR_SIZE c...
826
827
828
   */
  void truncate_setsize(struct inode *inode, loff_t newsize)
  {
90a802027   Jan Kara   vfs: fix data cor...
829
  	loff_t oldsize = inode->i_size;
2c27c65ed   Christoph Hellwig   check ATTR_SIZE c...
830
  	i_size_write(inode, newsize);
90a802027   Jan Kara   vfs: fix data cor...
831
832
  	if (newsize > oldsize)
  		pagecache_isize_extended(inode, oldsize, newsize);
7caef2676   Kirill A. Shutemov   truncate: drop 'o...
833
  	truncate_pagecache(inode, newsize);
2c27c65ed   Christoph Hellwig   check ATTR_SIZE c...
834
835
836
837
  }
  EXPORT_SYMBOL(truncate_setsize);
  
  /**
90a802027   Jan Kara   vfs: fix data cor...
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
   * pagecache_isize_extended - update pagecache after extension of i_size
   * @inode:	inode for which i_size was extended
   * @from:	original inode size
   * @to:		new inode size
   *
   * Handle extension of inode size either caused by extending truncate or by
   * write starting after current i_size. We mark the page straddling current
   * i_size RO so that page_mkwrite() is called on the nearest write access to
   * the page.  This way filesystem can be sure that page_mkwrite() is called on
   * the page before user writes to the page via mmap after the i_size has been
   * changed.
   *
   * The function must be called after i_size is updated so that page fault
   * coming after we unlock the page will already see the new i_size.
   * The function must be called while we still hold i_mutex - this not only
   * makes sure i_size is stable but also that userspace cannot observe new
   * i_size value before we are prepared to store mmap writes at new inode size.
   */
  void pagecache_isize_extended(struct inode *inode, loff_t from, loff_t to)
  {
93407472a   Fabian Frederick   fs: add i_blocksi...
858
  	int bsize = i_blocksize(inode);
90a802027   Jan Kara   vfs: fix data cor...
859
860
861
  	loff_t rounded_from;
  	struct page *page;
  	pgoff_t index;
90a802027   Jan Kara   vfs: fix data cor...
862
  	WARN_ON(to > inode->i_size);
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
863
  	if (from >= to || bsize == PAGE_SIZE)
90a802027   Jan Kara   vfs: fix data cor...
864
865
866
  		return;
  	/* Page straddling @from will not have any hole block created? */
  	rounded_from = round_up(from, bsize);
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
867
  	if (to <= rounded_from || !(rounded_from & (PAGE_SIZE - 1)))
90a802027   Jan Kara   vfs: fix data cor...
868
  		return;
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
869
  	index = from >> PAGE_SHIFT;
90a802027   Jan Kara   vfs: fix data cor...
870
871
872
873
874
875
876
877
878
879
880
  	page = find_lock_page(inode->i_mapping, index);
  	/* Page not cached? Nothing to do */
  	if (!page)
  		return;
  	/*
  	 * See clear_page_dirty_for_io() for details why set_page_dirty()
  	 * is needed.
  	 */
  	if (page_mkclean(page))
  		set_page_dirty(page);
  	unlock_page(page);
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
881
  	put_page(page);
90a802027   Jan Kara   vfs: fix data cor...
882
883
884
885
  }
  EXPORT_SYMBOL(pagecache_isize_extended);
  
  /**
623e3db9f   Hugh Dickins   mm for fs: add tr...
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
   * truncate_pagecache_range - unmap and remove pagecache that is hole-punched
   * @inode: inode
   * @lstart: offset of beginning of hole
   * @lend: offset of last byte of hole
   *
   * This function should typically be called before the filesystem
   * releases resources associated with the freed range (eg. deallocates
   * blocks). This way, pagecache will always stay logically coherent
   * with on-disk format, and the filesystem would not have to deal with
   * situations such as writepage being called for a page that has already
   * had its underlying blocks deallocated.
   */
  void truncate_pagecache_range(struct inode *inode, loff_t lstart, loff_t lend)
  {
  	struct address_space *mapping = inode->i_mapping;
  	loff_t unmap_start = round_up(lstart, PAGE_SIZE);
  	loff_t unmap_end = round_down(1 + lend, PAGE_SIZE) - 1;
  	/*
  	 * This rounding is currently just for example: unmap_mapping_range
  	 * expands its hole outwards, whereas we want it to contract the hole
  	 * inwards.  However, existing callers of truncate_pagecache_range are
5a7203947   Lukas Czerner   mm: teach truncat...
907
908
  	 * doing their own page rounding first.  Note that unmap_mapping_range
  	 * allows holelen 0 for all, and we allow lend -1 for end of file.
623e3db9f   Hugh Dickins   mm for fs: add tr...
909
910
911
912
913
914
915
916
917
918
919
920
921
  	 */
  
  	/*
  	 * Unlike in truncate_pagecache, unmap_mapping_range is called only
  	 * once (before truncating pagecache), and without "even_cows" flag:
  	 * hole-punching should not remove private COWed pages from the hole.
  	 */
  	if ((u64)unmap_end > (u64)unmap_start)
  		unmap_mapping_range(mapping, unmap_start,
  				    1 + unmap_end - unmap_start, 0);
  	truncate_inode_pages_range(mapping, lstart, lend);
  }
  EXPORT_SYMBOL(truncate_pagecache_range);