Blame view

mm/truncate.c 19.7 KB
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1
2
3
4
5
  /*
   * mm/truncate.c - code for taking down pages from address_spaces
   *
   * Copyright (C) 2002, Linus Torvalds
   *
e1f8e8744   Francois Cami   Remove Andrew Mor...
6
   * 10Sep2002	Andrew Morton
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
7
8
9
10
   *		Initial version.
   */
  
  #include <linux/kernel.h>
4af3c9cc4   Alexey Dobriyan   Drop some headers...
11
  #include <linux/backing-dev.h>
5a0e3ad6a   Tejun Heo   include cleanup: ...
12
  #include <linux/gfp.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
13
  #include <linux/mm.h>
0fd0e6b05   Nick Piggin   [PATCH] page inva...
14
  #include <linux/swap.h>
b95f1b31b   Paul Gortmaker   mm: Map most file...
15
  #include <linux/export.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
16
  #include <linux/pagemap.h>
01f2705da   Nate Diller   fs: convert core ...
17
  #include <linux/highmem.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
18
  #include <linux/pagevec.h>
e08748ce0   Andrew Morton   [PATCH] io-accoun...
19
  #include <linux/task_io_accounting_ops.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
20
  #include <linux/buffer_head.h>	/* grr. try_to_release_page,
aaa4059bc   Jan Kara   [PATCH] ext3: Fix...
21
  				   do_invalidatepage */
c515e1fd3   Dan Magenheimer   mm/fs: add hooks ...
22
  #include <linux/cleancache.h>
ba470de43   Rik van Riel   mmap: handle mloc...
23
  #include "internal.h"
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
24

cf9a2ae8d   David Howells   [PATCH] BLOCK: Mo...
25
  /**
28bc44d7d   Fengguang Wu   do_invalidatepage...
26
   * do_invalidatepage - invalidate part or all of a page
cf9a2ae8d   David Howells   [PATCH] BLOCK: Mo...
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
   * @page: the page which is affected
   * @offset: the index of the truncation point
   *
   * do_invalidatepage() is called when all or part of the page has become
   * invalidated by a truncate operation.
   *
   * do_invalidatepage() does not have to release all buffers, but it must
   * ensure that no dirty buffer is left outside @offset and that no I/O
   * is underway against any of the blocks which are outside the truncation
   * point.  Because the caller is about to free (and possibly reuse) those
   * blocks on-disk.
   */
  void do_invalidatepage(struct page *page, unsigned long offset)
  {
  	void (*invalidatepage)(struct page *, unsigned long);
  	invalidatepage = page->mapping->a_ops->invalidatepage;
9361401eb   David Howells   [PATCH] BLOCK: Ma...
43
  #ifdef CONFIG_BLOCK
cf9a2ae8d   David Howells   [PATCH] BLOCK: Mo...
44
45
  	if (!invalidatepage)
  		invalidatepage = block_invalidatepage;
9361401eb   David Howells   [PATCH] BLOCK: Ma...
46
  #endif
cf9a2ae8d   David Howells   [PATCH] BLOCK: Mo...
47
48
49
  	if (invalidatepage)
  		(*invalidatepage)(page, offset);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
50
51
  static inline void truncate_partial_page(struct page *page, unsigned partial)
  {
eebd2aa35   Christoph Lameter   Pagecache zeroing...
52
  	zero_user_segment(page, partial, PAGE_CACHE_SIZE);
3167760f8   Dan Magenheimer   mm: cleancache: s...
53
  	cleancache_invalidate_page(page->mapping, page);
266cf658e   David Howells   FS-Cache: Recruit...
54
  	if (page_has_private(page))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
55
56
  		do_invalidatepage(page, partial);
  }
ecdfc9787   Linus Torvalds   Resurrect 'try_to...
57
58
59
60
61
62
63
64
65
66
67
68
69
70
  /*
   * This cancels just the dirty bit on the kernel page itself, it
   * does NOT actually remove dirty bits on any mmap's that may be
   * around. It also leaves the page tagged dirty, so any sync
   * activity will still find it on the dirty lists, and in particular,
   * clear_page_dirty_for_io() will still look at the dirty bits in
   * the VM.
   *
   * Doing this should *normally* only ever be done when a page
   * is truncated, and is not actually mapped anywhere at all. However,
   * fs/buffer.c does this when it notices that somebody has cleaned
   * out all the buffers on a page without actually doing it through
   * the VM. Can you say "ext3 is horribly ugly"? Tought you could.
   */
fba2591bf   Linus Torvalds   VM: Remove "clear...
71
72
  void cancel_dirty_page(struct page *page, unsigned int account_size)
  {
8368e328d   Linus Torvalds   Clean up and expo...
73
74
75
76
  	if (TestClearPageDirty(page)) {
  		struct address_space *mapping = page->mapping;
  		if (mapping && mapping_cap_account_dirty(mapping)) {
  			dec_zone_page_state(page, NR_FILE_DIRTY);
c9e51e418   Peter Zijlstra   mm: count reclaim...
77
78
  			dec_bdi_stat(mapping->backing_dev_info,
  					BDI_RECLAIMABLE);
8368e328d   Linus Torvalds   Clean up and expo...
79
80
81
  			if (account_size)
  				task_io_account_cancelled_write(account_size);
  		}
3e67c0987   Andrew Morton   [PATCH] truncate:...
82
  	}
fba2591bf   Linus Torvalds   VM: Remove "clear...
83
  }
8368e328d   Linus Torvalds   Clean up and expo...
84
  EXPORT_SYMBOL(cancel_dirty_page);
fba2591bf   Linus Torvalds   VM: Remove "clear...
85

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
86
87
  /*
   * If truncate cannot remove the fs-private metadata from the page, the page
62e1c5530   Shaohua Li   page migraton: ha...
88
   * becomes orphaned.  It will be left on the LRU and may even be mapped into
54cb8821d   Nick Piggin   mm: merge populat...
89
   * user pagetables if we're racing with filemap_fault().
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
90
91
92
   *
   * We need to bale out if page->mapping is no longer equal to the original
   * mapping.  This happens a) when the VM reclaimed the page while we waited on
fc0ecff69   Andrew Morton   [PATCH] remove in...
93
   * its lock, b) when a concurrent invalidate_mapping_pages got there first and
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
94
95
   * c) when tmpfs swizzles a page between a tmpfs inode and swapper_space.
   */
750b4987b   Nick Piggin   HWPOISON: Refacto...
96
  static int
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
97
98
99
  truncate_complete_page(struct address_space *mapping, struct page *page)
  {
  	if (page->mapping != mapping)
750b4987b   Nick Piggin   HWPOISON: Refacto...
100
  		return -EIO;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
101

266cf658e   David Howells   FS-Cache: Recruit...
102
  	if (page_has_private(page))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
103
  		do_invalidatepage(page, 0);
a2b345642   Björn Steinbrink   Fix dirty page ac...
104
  	cancel_dirty_page(page, PAGE_CACHE_SIZE);
ba470de43   Rik van Riel   mmap: handle mloc...
105
  	clear_page_mlock(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
106
  	ClearPageMappedToDisk(page);
5adc7b518   Minchan Kim   mm: truncate: cha...
107
  	delete_from_page_cache(page);
750b4987b   Nick Piggin   HWPOISON: Refacto...
108
  	return 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
109
110
111
  }
  
  /*
fc0ecff69   Andrew Morton   [PATCH] remove in...
112
   * This is for invalidate_mapping_pages().  That function can be called at
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
113
   * any time, and is not supposed to throw away dirty pages.  But pages can
0fd0e6b05   Nick Piggin   [PATCH] page inva...
114
115
   * be marked dirty at any time too, so use remove_mapping which safely
   * discards clean, unused pages.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
116
117
118
119
120
121
   *
   * Returns non-zero if the page was successfully invalidated.
   */
  static int
  invalidate_complete_page(struct address_space *mapping, struct page *page)
  {
0fd0e6b05   Nick Piggin   [PATCH] page inva...
122
  	int ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
123
124
  	if (page->mapping != mapping)
  		return 0;
266cf658e   David Howells   FS-Cache: Recruit...
125
  	if (page_has_private(page) && !try_to_release_page(page, 0))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
126
  		return 0;
ba470de43   Rik van Riel   mmap: handle mloc...
127
  	clear_page_mlock(page);
0fd0e6b05   Nick Piggin   [PATCH] page inva...
128
  	ret = remove_mapping(mapping, page);
0fd0e6b05   Nick Piggin   [PATCH] page inva...
129
130
  
  	return ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
131
  }
750b4987b   Nick Piggin   HWPOISON: Refacto...
132
133
134
135
136
137
138
139
140
  int truncate_inode_page(struct address_space *mapping, struct page *page)
  {
  	if (page_mapped(page)) {
  		unmap_mapping_range(mapping,
  				   (loff_t)page->index << PAGE_CACHE_SHIFT,
  				   PAGE_CACHE_SIZE, 0);
  	}
  	return truncate_complete_page(mapping, page);
  }
83f786680   Wu Fengguang   HWPOISON: Add inv...
141
  /*
257187362   Andi Kleen   HWPOISON: Define ...
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
   * Used to get rid of pages on hardware memory corruption.
   */
  int generic_error_remove_page(struct address_space *mapping, struct page *page)
  {
  	if (!mapping)
  		return -EINVAL;
  	/*
  	 * Only punch for normal data pages for now.
  	 * Handling other types like directories would need more auditing.
  	 */
  	if (!S_ISREG(mapping->host->i_mode))
  		return -EIO;
  	return truncate_inode_page(mapping, page);
  }
  EXPORT_SYMBOL(generic_error_remove_page);
  
  /*
83f786680   Wu Fengguang   HWPOISON: Add inv...
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
   * Safely invalidate one page from its pagecache mapping.
   * It only drops clean, unused pages. The page must be locked.
   *
   * Returns 1 if the page is successfully invalidated, otherwise 0.
   */
  int invalidate_inode_page(struct page *page)
  {
  	struct address_space *mapping = page_mapping(page);
  	if (!mapping)
  		return 0;
  	if (PageDirty(page) || PageWriteback(page))
  		return 0;
  	if (page_mapped(page))
  		return 0;
  	return invalidate_complete_page(mapping, page);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
175
  /**
73c1e2043   Liu Bo   mm: fix comment t...
176
   * truncate_inode_pages_range - truncate range of pages specified by start & end byte offsets
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
177
178
   * @mapping: mapping to truncate
   * @lstart: offset from which to truncate
d7339071f   Hans Reiser   [PATCH] reiser4: ...
179
   * @lend: offset to which to truncate
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
180
   *
d7339071f   Hans Reiser   [PATCH] reiser4: ...
181
182
183
   * Truncate the page cache, removing the pages that are between
   * specified offsets (and zeroing out partial page
   * (if lstart is not page aligned)).
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
184
185
186
187
188
189
190
   *
   * Truncate takes two passes - the first pass is nonblocking.  It will not
   * block on page locks and it will not block on writeback.  The second pass
   * will wait.  This is to prevent as much IO as possible in the affected region.
   * The first pass will remove most pages, so the search cost of the second pass
   * is low.
   *
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
191
192
193
   * We pass down the cache-hot hint to the page freeing code.  Even if the
   * mapping is large, it is probably the case that the final pages are the most
   * recently touched, and freeing happens in ascending file offset order.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
194
   */
d7339071f   Hans Reiser   [PATCH] reiser4: ...
195
196
  void truncate_inode_pages_range(struct address_space *mapping,
  				loff_t lstart, loff_t lend)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
197
198
199
200
  {
  	const pgoff_t start = (lstart + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT;
  	const unsigned partial = lstart & (PAGE_CACHE_SIZE - 1);
  	struct pagevec pvec;
b85e0effd   Hugh Dickins   mm: consistent tr...
201
202
  	pgoff_t index;
  	pgoff_t end;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
203
  	int i;
3167760f8   Dan Magenheimer   mm: cleancache: s...
204
  	cleancache_invalidate_inode(mapping);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
205
206
  	if (mapping->nrpages == 0)
  		return;
d7339071f   Hans Reiser   [PATCH] reiser4: ...
207
208
  	BUG_ON((lend & (PAGE_CACHE_SIZE - 1)) != (PAGE_CACHE_SIZE - 1));
  	end = (lend >> PAGE_CACHE_SHIFT);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
209
  	pagevec_init(&pvec, 0);
b85e0effd   Hugh Dickins   mm: consistent tr...
210
211
212
  	index = start;
  	while (index <= end && pagevec_lookup(&pvec, mapping, index,
  			min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) {
e5598f8bf   Hugh Dickins   memcg: more mem_c...
213
  		mem_cgroup_uncharge_start();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
214
215
  		for (i = 0; i < pagevec_count(&pvec); i++) {
  			struct page *page = pvec.pages[i];
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
216

b85e0effd   Hugh Dickins   mm: consistent tr...
217
218
219
  			/* We rely upon deletion not changing page->index */
  			index = page->index;
  			if (index > end)
d7339071f   Hans Reiser   [PATCH] reiser4: ...
220
  				break;
d7339071f   Hans Reiser   [PATCH] reiser4: ...
221

529ae9aaa   Nick Piggin   mm: rename page t...
222
  			if (!trylock_page(page))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
223
  				continue;
b85e0effd   Hugh Dickins   mm: consistent tr...
224
  			WARN_ON(page->index != index);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
225
226
227
228
  			if (PageWriteback(page)) {
  				unlock_page(page);
  				continue;
  			}
750b4987b   Nick Piggin   HWPOISON: Refacto...
229
  			truncate_inode_page(mapping, page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
230
231
232
  			unlock_page(page);
  		}
  		pagevec_release(&pvec);
e5598f8bf   Hugh Dickins   memcg: more mem_c...
233
  		mem_cgroup_uncharge_end();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
234
  		cond_resched();
b85e0effd   Hugh Dickins   mm: consistent tr...
235
  		index++;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
236
237
238
239
240
241
242
243
244
245
246
  	}
  
  	if (partial) {
  		struct page *page = find_lock_page(mapping, start - 1);
  		if (page) {
  			wait_on_page_writeback(page);
  			truncate_partial_page(page, partial);
  			unlock_page(page);
  			page_cache_release(page);
  		}
  	}
b85e0effd   Hugh Dickins   mm: consistent tr...
247
  	index = start;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
248
249
  	for ( ; ; ) {
  		cond_resched();
b85e0effd   Hugh Dickins   mm: consistent tr...
250
251
252
  		if (!pagevec_lookup(&pvec, mapping, index,
  			min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) {
  			if (index == start)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
253
  				break;
b85e0effd   Hugh Dickins   mm: consistent tr...
254
  			index = start;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
255
256
  			continue;
  		}
d0823576b   Hugh Dickins   mm: pincer in tru...
257
  		if (index == start && pvec.pages[0]->index > end) {
d7339071f   Hans Reiser   [PATCH] reiser4: ...
258
259
260
  			pagevec_release(&pvec);
  			break;
  		}
569b846df   KAMEZAWA Hiroyuki   memcg: coalesce u...
261
  		mem_cgroup_uncharge_start();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
262
263
  		for (i = 0; i < pagevec_count(&pvec); i++) {
  			struct page *page = pvec.pages[i];
b85e0effd   Hugh Dickins   mm: consistent tr...
264
265
266
  			/* We rely upon deletion not changing page->index */
  			index = page->index;
  			if (index > end)
d7339071f   Hans Reiser   [PATCH] reiser4: ...
267
  				break;
b85e0effd   Hugh Dickins   mm: consistent tr...
268

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
269
  			lock_page(page);
b85e0effd   Hugh Dickins   mm: consistent tr...
270
  			WARN_ON(page->index != index);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
271
  			wait_on_page_writeback(page);
750b4987b   Nick Piggin   HWPOISON: Refacto...
272
  			truncate_inode_page(mapping, page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
273
274
275
  			unlock_page(page);
  		}
  		pagevec_release(&pvec);
569b846df   KAMEZAWA Hiroyuki   memcg: coalesce u...
276
  		mem_cgroup_uncharge_end();
b85e0effd   Hugh Dickins   mm: consistent tr...
277
  		index++;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
278
  	}
3167760f8   Dan Magenheimer   mm: cleancache: s...
279
  	cleancache_invalidate_inode(mapping);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
280
  }
d7339071f   Hans Reiser   [PATCH] reiser4: ...
281
  EXPORT_SYMBOL(truncate_inode_pages_range);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
282

d7339071f   Hans Reiser   [PATCH] reiser4: ...
283
284
285
286
287
  /**
   * truncate_inode_pages - truncate *all* the pages from an offset
   * @mapping: mapping to truncate
   * @lstart: offset from which to truncate
   *
1b1dcc1b5   Jes Sorensen   [PATCH] mutex sub...
288
   * Called under (and serialised by) inode->i_mutex.
08142579b   Jan Kara   mm: fix assertion...
289
290
291
292
293
   *
   * Note: When this function returns, there can be a page in the process of
   * deletion (inside __delete_from_page_cache()) in the specified range.  Thus
   * mapping->nrpages can be non-zero when this function returns even after
   * truncation of the whole mapping.
d7339071f   Hans Reiser   [PATCH] reiser4: ...
294
295
296
297
298
   */
  void truncate_inode_pages(struct address_space *mapping, loff_t lstart)
  {
  	truncate_inode_pages_range(mapping, lstart, (loff_t)-1);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
299
  EXPORT_SYMBOL(truncate_inode_pages);
286973552   Mike Waychison   mm: remove __inva...
300
301
302
303
304
305
306
307
308
309
310
311
312
313
  /**
   * invalidate_mapping_pages - Invalidate all the unlocked pages of one inode
   * @mapping: the address_space which holds the pages to invalidate
   * @start: the offset 'from' which to invalidate
   * @end: the offset 'to' which to invalidate (inclusive)
   *
   * This function only removes the unlocked pages, if you want to
   * remove all the pages of one inode, you must call truncate_inode_pages.
   *
   * invalidate_mapping_pages() will not block on IO activity. It will not
   * invalidate pages which are dirty, locked, under writeback or mapped into
   * pagetables.
   */
  unsigned long invalidate_mapping_pages(struct address_space *mapping,
315601809   Minchan Kim   mm: deactivate in...
314
  		pgoff_t start, pgoff_t end)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
315
316
  {
  	struct pagevec pvec;
b85e0effd   Hugh Dickins   mm: consistent tr...
317
  	pgoff_t index = start;
315601809   Minchan Kim   mm: deactivate in...
318
319
  	unsigned long ret;
  	unsigned long count = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
320
  	int i;
31475dd61   Hugh Dickins   mm: a few small u...
321
322
323
324
325
326
327
  	/*
  	 * Note: this function may get called on a shmem/tmpfs mapping:
  	 * pagevec_lookup() might then return 0 prematurely (because it
  	 * got a gangful of swap entries); but it's hardly worth worrying
  	 * about - it can rarely have anything to free from such a mapping
  	 * (most pages are dirty), and already skips over any difficulties.
  	 */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
328
  	pagevec_init(&pvec, 0);
b85e0effd   Hugh Dickins   mm: consistent tr...
329
330
  	while (index <= end && pagevec_lookup(&pvec, mapping, index,
  			min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) {
569b846df   KAMEZAWA Hiroyuki   memcg: coalesce u...
331
  		mem_cgroup_uncharge_start();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
332
333
  		for (i = 0; i < pagevec_count(&pvec); i++) {
  			struct page *page = pvec.pages[i];
e0f23603f   NeilBrown   [PATCH] Remove se...
334

b85e0effd   Hugh Dickins   mm: consistent tr...
335
  			/* We rely upon deletion not changing page->index */
e0f23603f   NeilBrown   [PATCH] Remove se...
336
  			index = page->index;
b85e0effd   Hugh Dickins   mm: consistent tr...
337
338
  			if (index > end)
  				break;
e0f23603f   NeilBrown   [PATCH] Remove se...
339

b85e0effd   Hugh Dickins   mm: consistent tr...
340
341
342
  			if (!trylock_page(page))
  				continue;
  			WARN_ON(page->index != index);
315601809   Minchan Kim   mm: deactivate in...
343
  			ret = invalidate_inode_page(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
344
  			unlock_page(page);
315601809   Minchan Kim   mm: deactivate in...
345
346
347
348
349
350
351
  			/*
  			 * Invalidation is a hint that the page is no longer
  			 * of interest and try to speed up its reclaim.
  			 */
  			if (!ret)
  				deactivate_page(page);
  			count += ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
352
353
  		}
  		pagevec_release(&pvec);
569b846df   KAMEZAWA Hiroyuki   memcg: coalesce u...
354
  		mem_cgroup_uncharge_end();
286973552   Mike Waychison   mm: remove __inva...
355
  		cond_resched();
b85e0effd   Hugh Dickins   mm: consistent tr...
356
  		index++;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
357
  	}
315601809   Minchan Kim   mm: deactivate in...
358
  	return count;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
359
  }
54bc48552   Anton Altaparmakov   [PATCH] Export in...
360
  EXPORT_SYMBOL(invalidate_mapping_pages);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
361

bd4c8ce41   Andrew Morton   [PATCH] invalidat...
362
363
364
365
  /*
   * This is like invalidate_complete_page(), except it ignores the page's
   * refcount.  We do this because invalidate_inode_pages2() needs stronger
   * invalidation guarantees, and cannot afford to leave pages behind because
2706a1b89   Anderson Briglia   vmscan: fix comme...
366
367
   * shrink_page_list() has a temp ref on them, or because they're transiently
   * sitting in the lru_cache_add() pagevecs.
bd4c8ce41   Andrew Morton   [PATCH] invalidat...
368
369
370
371
372
373
   */
  static int
  invalidate_complete_page2(struct address_space *mapping, struct page *page)
  {
  	if (page->mapping != mapping)
  		return 0;
266cf658e   David Howells   FS-Cache: Recruit...
374
  	if (page_has_private(page) && !try_to_release_page(page, GFP_KERNEL))
bd4c8ce41   Andrew Morton   [PATCH] invalidat...
375
  		return 0;
19fd62312   Nick Piggin   mm: spinlock tree...
376
  	spin_lock_irq(&mapping->tree_lock);
bd4c8ce41   Andrew Morton   [PATCH] invalidat...
377
378
  	if (PageDirty(page))
  		goto failed;
ba470de43   Rik van Riel   mmap: handle mloc...
379
  	clear_page_mlock(page);
266cf658e   David Howells   FS-Cache: Recruit...
380
  	BUG_ON(page_has_private(page));
e64a782fe   Minchan Kim   mm: change __remo...
381
  	__delete_from_page_cache(page);
19fd62312   Nick Piggin   mm: spinlock tree...
382
  	spin_unlock_irq(&mapping->tree_lock);
e767e0561   Daisuke Nishimura   memcg: fix deadlo...
383
  	mem_cgroup_uncharge_cache_page(page);
6072d13c4   Linus Torvalds   Call the filesyst...
384
385
386
  
  	if (mapping->a_ops->freepage)
  		mapping->a_ops->freepage(page);
bd4c8ce41   Andrew Morton   [PATCH] invalidat...
387
388
389
  	page_cache_release(page);	/* pagecache ref */
  	return 1;
  failed:
19fd62312   Nick Piggin   mm: spinlock tree...
390
  	spin_unlock_irq(&mapping->tree_lock);
bd4c8ce41   Andrew Morton   [PATCH] invalidat...
391
392
  	return 0;
  }
e3db7691e   Trond Myklebust   [PATCH] NFS: Fix ...
393
394
395
396
397
398
399
400
  static int do_launder_page(struct address_space *mapping, struct page *page)
  {
  	if (!PageDirty(page))
  		return 0;
  	if (page->mapping != mapping || mapping->a_ops->launder_page == NULL)
  		return 0;
  	return mapping->a_ops->launder_page(page);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
401
402
  /**
   * invalidate_inode_pages2_range - remove range of pages from an address_space
67be2dd1b   Martin Waitz   [PATCH] DocBook: ...
403
   * @mapping: the address_space
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
404
405
406
407
408
409
   * @start: the page offset 'from' which to invalidate
   * @end: the page offset 'to' which to invalidate (inclusive)
   *
   * Any pages which are found to be mapped into pagetables are unmapped prior to
   * invalidation.
   *
6ccfa806a   Hisashi Hifumi   VFS: fix dio writ...
410
   * Returns -EBUSY if any pages could not be invalidated.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
411
412
413
414
415
   */
  int invalidate_inode_pages2_range(struct address_space *mapping,
  				  pgoff_t start, pgoff_t end)
  {
  	struct pagevec pvec;
b85e0effd   Hugh Dickins   mm: consistent tr...
416
  	pgoff_t index;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
417
418
  	int i;
  	int ret = 0;
0dd1334fa   Hisashi Hifumi   fix invalidate_in...
419
  	int ret2 = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
420
  	int did_range_unmap = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
421

3167760f8   Dan Magenheimer   mm: cleancache: s...
422
  	cleancache_invalidate_inode(mapping);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
423
  	pagevec_init(&pvec, 0);
b85e0effd   Hugh Dickins   mm: consistent tr...
424
425
426
  	index = start;
  	while (index <= end && pagevec_lookup(&pvec, mapping, index,
  			min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) {
569b846df   KAMEZAWA Hiroyuki   memcg: coalesce u...
427
  		mem_cgroup_uncharge_start();
7b965e088   Trond Myklebust   [PATCH] VM: inval...
428
  		for (i = 0; i < pagevec_count(&pvec); i++) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
429
  			struct page *page = pvec.pages[i];
b85e0effd   Hugh Dickins   mm: consistent tr...
430
431
432
433
434
  
  			/* We rely upon deletion not changing page->index */
  			index = page->index;
  			if (index > end)
  				break;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
435
436
  
  			lock_page(page);
b85e0effd   Hugh Dickins   mm: consistent tr...
437
  			WARN_ON(page->index != index);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
438
439
440
441
  			if (page->mapping != mapping) {
  				unlock_page(page);
  				continue;
  			}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
442
  			wait_on_page_writeback(page);
d00806b18   Nick Piggin   mm: fix fault vs ...
443
  			if (page_mapped(page)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
444
445
446
447
448
  				if (!did_range_unmap) {
  					/*
  					 * Zap the rest of the file in one hit.
  					 */
  					unmap_mapping_range(mapping,
b85e0effd   Hugh Dickins   mm: consistent tr...
449
450
451
  					   (loff_t)index << PAGE_CACHE_SHIFT,
  					   (loff_t)(1 + end - index)
  							 << PAGE_CACHE_SHIFT,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
452
453
454
455
456
457
458
  					    0);
  					did_range_unmap = 1;
  				} else {
  					/*
  					 * Just zap this page
  					 */
  					unmap_mapping_range(mapping,
b85e0effd   Hugh Dickins   mm: consistent tr...
459
460
  					   (loff_t)index << PAGE_CACHE_SHIFT,
  					   PAGE_CACHE_SIZE, 0);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
461
462
  				}
  			}
d00806b18   Nick Piggin   mm: fix fault vs ...
463
  			BUG_ON(page_mapped(page));
0dd1334fa   Hisashi Hifumi   fix invalidate_in...
464
465
466
  			ret2 = do_launder_page(mapping, page);
  			if (ret2 == 0) {
  				if (!invalidate_complete_page2(mapping, page))
6ccfa806a   Hisashi Hifumi   VFS: fix dio writ...
467
  					ret2 = -EBUSY;
0dd1334fa   Hisashi Hifumi   fix invalidate_in...
468
469
470
  			}
  			if (ret2 < 0)
  				ret = ret2;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
471
472
473
  			unlock_page(page);
  		}
  		pagevec_release(&pvec);
569b846df   KAMEZAWA Hiroyuki   memcg: coalesce u...
474
  		mem_cgroup_uncharge_end();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
475
  		cond_resched();
b85e0effd   Hugh Dickins   mm: consistent tr...
476
  		index++;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
477
  	}
3167760f8   Dan Magenheimer   mm: cleancache: s...
478
  	cleancache_invalidate_inode(mapping);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
479
480
481
482
483
484
  	return ret;
  }
  EXPORT_SYMBOL_GPL(invalidate_inode_pages2_range);
  
  /**
   * invalidate_inode_pages2 - remove all pages from an address_space
67be2dd1b   Martin Waitz   [PATCH] DocBook: ...
485
   * @mapping: the address_space
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
486
487
488
489
   *
   * Any pages which are found to be mapped into pagetables are unmapped prior to
   * invalidation.
   *
e9de25dda   Peng Tao   mm: fix comments ...
490
   * Returns -EBUSY if any pages could not be invalidated.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
491
492
493
494
495
496
   */
  int invalidate_inode_pages2(struct address_space *mapping)
  {
  	return invalidate_inode_pages2_range(mapping, 0, -1);
  }
  EXPORT_SYMBOL_GPL(invalidate_inode_pages2);
25d9e2d15   npiggin@suse.de   truncate: new hel...
497
498
499
500
  
  /**
   * truncate_pagecache - unmap and remove pagecache that has been truncated
   * @inode: inode
8a549bea5   Hugh Dickins   mm: tidy vmtrunca...
501
502
   * @oldsize: old file size
   * @newsize: new file size
25d9e2d15   npiggin@suse.de   truncate: new hel...
503
504
505
506
507
508
509
510
511
512
513
   *
   * inode's new i_size must already be written before truncate_pagecache
   * is called.
   *
   * This function should typically be called before the filesystem
   * releases resources associated with the freed range (eg. deallocates
   * blocks). This way, pagecache will always stay logically coherent
   * with on-disk format, and the filesystem would not have to deal with
   * situations such as writepage being called for a page that has already
   * had its underlying blocks deallocated.
   */
8a549bea5   Hugh Dickins   mm: tidy vmtrunca...
514
  void truncate_pagecache(struct inode *inode, loff_t oldsize, loff_t newsize)
25d9e2d15   npiggin@suse.de   truncate: new hel...
515
  {
cedabed49   OGAWA Hirofumi   vfs: Fix vmtrunca...
516
  	struct address_space *mapping = inode->i_mapping;
8a549bea5   Hugh Dickins   mm: tidy vmtrunca...
517
  	loff_t holebegin = round_up(newsize, PAGE_SIZE);
cedabed49   OGAWA Hirofumi   vfs: Fix vmtrunca...
518
519
520
521
522
523
524
525
526
527
  
  	/*
  	 * unmap_mapping_range is called twice, first simply for
  	 * efficiency so that truncate_inode_pages does fewer
  	 * single-page unmaps.  However after this first call, and
  	 * before truncate_inode_pages finishes, it is possible for
  	 * private pages to be COWed, which remain after
  	 * truncate_inode_pages finishes, hence the second
  	 * unmap_mapping_range call must be made for correctness.
  	 */
8a549bea5   Hugh Dickins   mm: tidy vmtrunca...
528
529
530
  	unmap_mapping_range(mapping, holebegin, 0, 1);
  	truncate_inode_pages(mapping, newsize);
  	unmap_mapping_range(mapping, holebegin, 0, 1);
25d9e2d15   npiggin@suse.de   truncate: new hel...
531
532
533
534
  }
  EXPORT_SYMBOL(truncate_pagecache);
  
  /**
2c27c65ed   Christoph Hellwig   check ATTR_SIZE c...
535
536
537
538
   * truncate_setsize - update inode and pagecache for a new file size
   * @inode: inode
   * @newsize: new file size
   *
382e27daa   Jan Kara   mm: fix truncate_...
539
540
541
   * truncate_setsize updates i_size and performs pagecache truncation (if
   * necessary) to @newsize. It will be typically be called from the filesystem's
   * setattr function when ATTR_SIZE is passed in.
2c27c65ed   Christoph Hellwig   check ATTR_SIZE c...
542
   *
382e27daa   Jan Kara   mm: fix truncate_...
543
544
   * Must be called with inode_mutex held and before all filesystem specific
   * block truncation has been performed.
2c27c65ed   Christoph Hellwig   check ATTR_SIZE c...
545
546
547
548
549
550
551
552
553
554
555
556
557
   */
  void truncate_setsize(struct inode *inode, loff_t newsize)
  {
  	loff_t oldsize;
  
  	oldsize = inode->i_size;
  	i_size_write(inode, newsize);
  
  	truncate_pagecache(inode, oldsize, newsize);
  }
  EXPORT_SYMBOL(truncate_setsize);
  
  /**
25d9e2d15   npiggin@suse.de   truncate: new hel...
558
559
   * vmtruncate - unmap mappings "freed" by truncate() syscall
   * @inode: inode of the file used
8a549bea5   Hugh Dickins   mm: tidy vmtrunca...
560
   * @newsize: file offset to start truncating
25d9e2d15   npiggin@suse.de   truncate: new hel...
561
   *
2c27c65ed   Christoph Hellwig   check ATTR_SIZE c...
562
563
   * This function is deprecated and truncate_setsize or truncate_pagecache
   * should be used instead, together with filesystem specific block truncation.
25d9e2d15   npiggin@suse.de   truncate: new hel...
564
   */
8a549bea5   Hugh Dickins   mm: tidy vmtrunca...
565
  int vmtruncate(struct inode *inode, loff_t newsize)
25d9e2d15   npiggin@suse.de   truncate: new hel...
566
  {
25d9e2d15   npiggin@suse.de   truncate: new hel...
567
  	int error;
8a549bea5   Hugh Dickins   mm: tidy vmtrunca...
568
  	error = inode_newsize_ok(inode, newsize);
25d9e2d15   npiggin@suse.de   truncate: new hel...
569
570
  	if (error)
  		return error;
7bb46a673   npiggin@suse.de   fs: introduce new...
571

8a549bea5   Hugh Dickins   mm: tidy vmtrunca...
572
  	truncate_setsize(inode, newsize);
25d9e2d15   npiggin@suse.de   truncate: new hel...
573
574
  	if (inode->i_op->truncate)
  		inode->i_op->truncate(inode);
2c27c65ed   Christoph Hellwig   check ATTR_SIZE c...
575
  	return 0;
25d9e2d15   npiggin@suse.de   truncate: new hel...
576
577
  }
  EXPORT_SYMBOL(vmtruncate);
5b8ba1019   Hugh Dickins   mm: move vmtrunca...
578

8a549bea5   Hugh Dickins   mm: tidy vmtrunca...
579
  int vmtruncate_range(struct inode *inode, loff_t lstart, loff_t lend)
5b8ba1019   Hugh Dickins   mm: move vmtrunca...
580
581
  {
  	struct address_space *mapping = inode->i_mapping;
8a549bea5   Hugh Dickins   mm: tidy vmtrunca...
582
583
  	loff_t holebegin = round_up(lstart, PAGE_SIZE);
  	loff_t holelen = 1 + lend - holebegin;
5b8ba1019   Hugh Dickins   mm: move vmtrunca...
584
585
586
587
588
589
590
591
592
593
  
  	/*
  	 * If the underlying filesystem is not going to provide
  	 * a way to truncate a range of blocks (punch a hole) -
  	 * we should return failure right now.
  	 */
  	if (!inode->i_op->truncate_range)
  		return -ENOSYS;
  
  	mutex_lock(&inode->i_mutex);
bd5fe6c5e   Christoph Hellwig   fs: kill i_alloc_sem
594
  	inode_dio_wait(inode);
8a549bea5   Hugh Dickins   mm: tidy vmtrunca...
595
596
  	unmap_mapping_range(mapping, holebegin, holelen, 1);
  	inode->i_op->truncate_range(inode, lstart, lend);
94c1e62df   Hugh Dickins   tmpfs: take contr...
597
  	/* unmap again to remove racily COWed private pages */
8a549bea5   Hugh Dickins   mm: tidy vmtrunca...
598
  	unmap_mapping_range(mapping, holebegin, holelen, 1);
5b8ba1019   Hugh Dickins   mm: move vmtrunca...
599
600
601
602
  	mutex_unlock(&inode->i_mutex);
  
  	return 0;
  }
623e3db9f   Hugh Dickins   mm for fs: add tr...
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
  
  /**
   * truncate_pagecache_range - unmap and remove pagecache that is hole-punched
   * @inode: inode
   * @lstart: offset of beginning of hole
   * @lend: offset of last byte of hole
   *
   * This function should typically be called before the filesystem
   * releases resources associated with the freed range (eg. deallocates
   * blocks). This way, pagecache will always stay logically coherent
   * with on-disk format, and the filesystem would not have to deal with
   * situations such as writepage being called for a page that has already
   * had its underlying blocks deallocated.
   */
  void truncate_pagecache_range(struct inode *inode, loff_t lstart, loff_t lend)
  {
  	struct address_space *mapping = inode->i_mapping;
  	loff_t unmap_start = round_up(lstart, PAGE_SIZE);
  	loff_t unmap_end = round_down(1 + lend, PAGE_SIZE) - 1;
  	/*
  	 * This rounding is currently just for example: unmap_mapping_range
  	 * expands its hole outwards, whereas we want it to contract the hole
  	 * inwards.  However, existing callers of truncate_pagecache_range are
  	 * doing their own page rounding first; and truncate_inode_pages_range
  	 * currently BUGs if lend is not pagealigned-1 (it handles partial
  	 * page at start of hole, but not partial page at end of hole).  Note
  	 * unmap_mapping_range allows holelen 0 for all, and we allow lend -1.
  	 */
  
  	/*
  	 * Unlike in truncate_pagecache, unmap_mapping_range is called only
  	 * once (before truncating pagecache), and without "even_cows" flag:
  	 * hole-punching should not remove private COWed pages from the hole.
  	 */
  	if ((u64)unmap_end > (u64)unmap_start)
  		unmap_mapping_range(mapping, unmap_start,
  				    1 + unmap_end - unmap_start, 0);
  	truncate_inode_pages_range(mapping, lstart, lend);
  }
  EXPORT_SYMBOL(truncate_pagecache_range);