Blame view

mm/truncate.c 11.2 KB
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1
2
3
4
5
6
7
8
9
10
11
  /*
   * mm/truncate.c - code for taking down pages from address_spaces
   *
   * Copyright (C) 2002, Linus Torvalds
   *
   * 10Sep2002	akpm@zip.com.au
   *		Initial version.
   */
  
  #include <linux/kernel.h>
  #include <linux/mm.h>
0fd0e6b05   Nick Piggin   [PATCH] page inva...
12
  #include <linux/swap.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
13
14
15
16
  #include <linux/module.h>
  #include <linux/pagemap.h>
  #include <linux/pagevec.h>
  #include <linux/buffer_head.h>	/* grr. try_to_release_page,
aaa4059bc   Jan Kara   [PATCH] ext3: Fix...
17
  				   do_invalidatepage */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
18

cf9a2ae8d   David Howells   [PATCH] BLOCK: Mo...
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
  /**
   * do_invalidatepage - invalidate part of all of a page
   * @page: the page which is affected
   * @offset: the index of the truncation point
   *
   * do_invalidatepage() is called when all or part of the page has become
   * invalidated by a truncate operation.
   *
   * do_invalidatepage() does not have to release all buffers, but it must
   * ensure that no dirty buffer is left outside @offset and that no I/O
   * is underway against any of the blocks which are outside the truncation
   * point.  Because the caller is about to free (and possibly reuse) those
   * blocks on-disk.
   */
  void do_invalidatepage(struct page *page, unsigned long offset)
  {
  	void (*invalidatepage)(struct page *, unsigned long);
  	invalidatepage = page->mapping->a_ops->invalidatepage;
9361401eb   David Howells   [PATCH] BLOCK: Ma...
37
  #ifdef CONFIG_BLOCK
cf9a2ae8d   David Howells   [PATCH] BLOCK: Mo...
38
39
  	if (!invalidatepage)
  		invalidatepage = block_invalidatepage;
9361401eb   David Howells   [PATCH] BLOCK: Ma...
40
  #endif
cf9a2ae8d   David Howells   [PATCH] BLOCK: Mo...
41
42
43
  	if (invalidatepage)
  		(*invalidatepage)(page, offset);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
  static inline void truncate_partial_page(struct page *page, unsigned partial)
  {
  	memclear_highpage_flush(page, partial, PAGE_CACHE_SIZE-partial);
  	if (PagePrivate(page))
  		do_invalidatepage(page, partial);
  }
  
  /*
   * If truncate cannot remove the fs-private metadata from the page, the page
   * becomes anonymous.  It will be left on the LRU and may even be mapped into
   * user pagetables if we're racing with filemap_nopage().
   *
   * We need to bale out if page->mapping is no longer equal to the original
   * mapping.  This happens a) when the VM reclaimed the page while we waited on
   * its lock, b) when a concurrent invalidate_inode_pages got there first and
   * c) when tmpfs swizzles a page between a tmpfs inode and swapper_space.
   */
  static void
  truncate_complete_page(struct address_space *mapping, struct page *page)
  {
  	if (page->mapping != mapping)
  		return;
  
  	if (PagePrivate(page))
  		do_invalidatepage(page, 0);
  
  	clear_page_dirty(page);
  	ClearPageUptodate(page);
  	ClearPageMappedToDisk(page);
  	remove_from_page_cache(page);
  	page_cache_release(page);	/* pagecache ref */
  }
  
  /*
   * This is for invalidate_inode_pages().  That function can be called at
   * any time, and is not supposed to throw away dirty pages.  But pages can
0fd0e6b05   Nick Piggin   [PATCH] page inva...
80
81
   * be marked dirty at any time too, so use remove_mapping which safely
   * discards clean, unused pages.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
82
83
84
85
86
87
   *
   * Returns non-zero if the page was successfully invalidated.
   */
  static int
  invalidate_complete_page(struct address_space *mapping, struct page *page)
  {
0fd0e6b05   Nick Piggin   [PATCH] page inva...
88
  	int ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
89
90
91
92
93
  	if (page->mapping != mapping)
  		return 0;
  
  	if (PagePrivate(page) && !try_to_release_page(page, 0))
  		return 0;
0fd0e6b05   Nick Piggin   [PATCH] page inva...
94
  	ret = remove_mapping(mapping, page);
0fd0e6b05   Nick Piggin   [PATCH] page inva...
95
96
  
  	return ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
97
98
99
  }
  
  /**
d7339071f   Hans Reiser   [PATCH] reiser4: ...
100
101
   * truncate_inode_pages - truncate range of pages specified by start and
   * end byte offsets
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
102
103
   * @mapping: mapping to truncate
   * @lstart: offset from which to truncate
d7339071f   Hans Reiser   [PATCH] reiser4: ...
104
   * @lend: offset to which to truncate
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
105
   *
d7339071f   Hans Reiser   [PATCH] reiser4: ...
106
107
108
   * Truncate the page cache, removing the pages that are between
   * specified offsets (and zeroing out partial page
   * (if lstart is not page aligned)).
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
109
110
111
112
113
114
115
116
117
118
119
120
121
   *
   * Truncate takes two passes - the first pass is nonblocking.  It will not
   * block on page locks and it will not block on writeback.  The second pass
   * will wait.  This is to prevent as much IO as possible in the affected region.
   * The first pass will remove most pages, so the search cost of the second pass
   * is low.
   *
   * When looking at page->index outside the page lock we need to be careful to
   * copy it into a local to avoid races (it could change at any time).
   *
   * We pass down the cache-hot hint to the page freeing code.  Even if the
   * mapping is large, it is probably the case that the final pages are the most
   * recently touched, and freeing happens in ascending file offset order.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
122
   */
d7339071f   Hans Reiser   [PATCH] reiser4: ...
123
124
  void truncate_inode_pages_range(struct address_space *mapping,
  				loff_t lstart, loff_t lend)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
125
126
  {
  	const pgoff_t start = (lstart + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT;
d7339071f   Hans Reiser   [PATCH] reiser4: ...
127
  	pgoff_t end;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
128
129
130
131
132
133
134
  	const unsigned partial = lstart & (PAGE_CACHE_SIZE - 1);
  	struct pagevec pvec;
  	pgoff_t next;
  	int i;
  
  	if (mapping->nrpages == 0)
  		return;
d7339071f   Hans Reiser   [PATCH] reiser4: ...
135
136
  	BUG_ON((lend & (PAGE_CACHE_SIZE - 1)) != (PAGE_CACHE_SIZE - 1));
  	end = (lend >> PAGE_CACHE_SHIFT);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
137
138
  	pagevec_init(&pvec, 0);
  	next = start;
d7339071f   Hans Reiser   [PATCH] reiser4: ...
139
140
  	while (next <= end &&
  	       pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
141
142
143
  		for (i = 0; i < pagevec_count(&pvec); i++) {
  			struct page *page = pvec.pages[i];
  			pgoff_t page_index = page->index;
d7339071f   Hans Reiser   [PATCH] reiser4: ...
144
145
146
147
  			if (page_index > end) {
  				next = page_index;
  				break;
  			}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
  			if (page_index > next)
  				next = page_index;
  			next++;
  			if (TestSetPageLocked(page))
  				continue;
  			if (PageWriteback(page)) {
  				unlock_page(page);
  				continue;
  			}
  			truncate_complete_page(mapping, page);
  			unlock_page(page);
  		}
  		pagevec_release(&pvec);
  		cond_resched();
  	}
  
  	if (partial) {
  		struct page *page = find_lock_page(mapping, start - 1);
  		if (page) {
  			wait_on_page_writeback(page);
  			truncate_partial_page(page, partial);
  			unlock_page(page);
  			page_cache_release(page);
  		}
  	}
  
  	next = start;
  	for ( ; ; ) {
  		cond_resched();
  		if (!pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) {
  			if (next == start)
  				break;
  			next = start;
  			continue;
  		}
d7339071f   Hans Reiser   [PATCH] reiser4: ...
183
184
185
186
  		if (pvec.pages[0]->index > end) {
  			pagevec_release(&pvec);
  			break;
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
187
188
  		for (i = 0; i < pagevec_count(&pvec); i++) {
  			struct page *page = pvec.pages[i];
d7339071f   Hans Reiser   [PATCH] reiser4: ...
189
190
  			if (page->index > end)
  				break;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
191
192
193
194
195
196
197
198
199
200
201
  			lock_page(page);
  			wait_on_page_writeback(page);
  			if (page->index > next)
  				next = page->index;
  			next++;
  			truncate_complete_page(mapping, page);
  			unlock_page(page);
  		}
  		pagevec_release(&pvec);
  	}
  }
d7339071f   Hans Reiser   [PATCH] reiser4: ...
202
  EXPORT_SYMBOL(truncate_inode_pages_range);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
203

d7339071f   Hans Reiser   [PATCH] reiser4: ...
204
205
206
207
208
  /**
   * truncate_inode_pages - truncate *all* the pages from an offset
   * @mapping: mapping to truncate
   * @lstart: offset from which to truncate
   *
1b1dcc1b5   Jes Sorensen   [PATCH] mutex sub...
209
   * Called under (and serialised by) inode->i_mutex.
d7339071f   Hans Reiser   [PATCH] reiser4: ...
210
211
212
213
214
   */
  void truncate_inode_pages(struct address_space *mapping, loff_t lstart)
  {
  	truncate_inode_pages_range(mapping, lstart, (loff_t)-1);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
  EXPORT_SYMBOL(truncate_inode_pages);
  
  /**
   * invalidate_mapping_pages - Invalidate all the unlocked pages of one inode
   * @mapping: the address_space which holds the pages to invalidate
   * @start: the offset 'from' which to invalidate
   * @end: the offset 'to' which to invalidate (inclusive)
   *
   * This function only removes the unlocked pages, if you want to
   * remove all the pages of one inode, you must call truncate_inode_pages.
   *
   * invalidate_mapping_pages() will not block on IO activity. It will not
   * invalidate pages which are dirty, locked, under writeback or mapped into
   * pagetables.
   */
  unsigned long invalidate_mapping_pages(struct address_space *mapping,
  				pgoff_t start, pgoff_t end)
  {
  	struct pagevec pvec;
  	pgoff_t next = start;
  	unsigned long ret = 0;
  	int i;
  
  	pagevec_init(&pvec, 0);
  	while (next <= end &&
  			pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) {
  		for (i = 0; i < pagevec_count(&pvec); i++) {
  			struct page *page = pvec.pages[i];
e0f23603f   NeilBrown   [PATCH] Remove se...
243
244
  			pgoff_t index;
  			int lock_failed;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
245

e0f23603f   NeilBrown   [PATCH] Remove se...
246
247
248
249
250
251
252
253
254
255
256
  			lock_failed = TestSetPageLocked(page);
  
  			/*
  			 * We really shouldn't be looking at the ->index of an
  			 * unlocked page.  But we're not allowed to lock these
  			 * pages.  So we rely upon nobody altering the ->index
  			 * of this (pinned-by-us) page.
  			 */
  			index = page->index;
  			if (index > next)
  				next = index;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
257
  			next++;
e0f23603f   NeilBrown   [PATCH] Remove se...
258
259
  			if (lock_failed)
  				continue;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
260
261
262
263
264
265
266
267
268
269
270
  			if (PageDirty(page) || PageWriteback(page))
  				goto unlock;
  			if (page_mapped(page))
  				goto unlock;
  			ret += invalidate_complete_page(mapping, page);
  unlock:
  			unlock_page(page);
  			if (next > end)
  				break;
  		}
  		pagevec_release(&pvec);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
271
272
273
274
275
276
277
278
  	}
  	return ret;
  }
  
  unsigned long invalidate_inode_pages(struct address_space *mapping)
  {
  	return invalidate_mapping_pages(mapping, 0, ~0UL);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
279
  EXPORT_SYMBOL(invalidate_inode_pages);
bd4c8ce41   Andrew Morton   [PATCH] invalidat...
280
281
282
283
284
285
286
287
288
289
290
291
  /*
   * This is like invalidate_complete_page(), except it ignores the page's
   * refcount.  We do this because invalidate_inode_pages2() needs stronger
   * invalidation guarantees, and cannot afford to leave pages behind because
   * shrink_list() has a temp ref on them, or because they're transiently sitting
   * in the lru_cache_add() pagevecs.
   */
  static int
  invalidate_complete_page2(struct address_space *mapping, struct page *page)
  {
  	if (page->mapping != mapping)
  		return 0;
887ed2f3a   Trond Myklebust   [PATCH] VM: Fix t...
292
  	if (PagePrivate(page) && !try_to_release_page(page, GFP_KERNEL))
bd4c8ce41   Andrew Morton   [PATCH] invalidat...
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
  		return 0;
  
  	write_lock_irq(&mapping->tree_lock);
  	if (PageDirty(page))
  		goto failed;
  
  	BUG_ON(PagePrivate(page));
  	__remove_from_page_cache(page);
  	write_unlock_irq(&mapping->tree_lock);
  	ClearPageUptodate(page);
  	page_cache_release(page);	/* pagecache ref */
  	return 1;
  failed:
  	write_unlock_irq(&mapping->tree_lock);
  	return 0;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
309
310
  /**
   * invalidate_inode_pages2_range - remove range of pages from an address_space
67be2dd1b   Martin Waitz   [PATCH] DocBook: ...
311
   * @mapping: the address_space
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
   * @start: the page offset 'from' which to invalidate
   * @end: the page offset 'to' which to invalidate (inclusive)
   *
   * Any pages which are found to be mapped into pagetables are unmapped prior to
   * invalidation.
   *
   * Returns -EIO if any pages could not be invalidated.
   */
  int invalidate_inode_pages2_range(struct address_space *mapping,
  				  pgoff_t start, pgoff_t end)
  {
  	struct pagevec pvec;
  	pgoff_t next;
  	int i;
  	int ret = 0;
  	int did_range_unmap = 0;
  	int wrapped = 0;
  
  	pagevec_init(&pvec, 0);
  	next = start;
  	while (next <= end && !ret && !wrapped &&
  		pagevec_lookup(&pvec, mapping, next,
  			min(end - next, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) {
  		for (i = 0; !ret && i < pagevec_count(&pvec); i++) {
  			struct page *page = pvec.pages[i];
  			pgoff_t page_index;
  			int was_dirty;
  
  			lock_page(page);
  			if (page->mapping != mapping) {
  				unlock_page(page);
  				continue;
  			}
  			page_index = page->index;
  			next = page_index + 1;
  			if (next == 0)
  				wrapped = 1;
  			if (page_index > end) {
  				unlock_page(page);
  				break;
  			}
  			wait_on_page_writeback(page);
  			while (page_mapped(page)) {
  				if (!did_range_unmap) {
  					/*
  					 * Zap the rest of the file in one hit.
  					 */
  					unmap_mapping_range(mapping,
479ef592f   Oleg Drokin   [PATCH] 32bit int...
360
361
  					   (loff_t)page_index<<PAGE_CACHE_SHIFT,
  					   (loff_t)(end - page_index + 1)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
362
363
364
365
366
367
368
369
  							<< PAGE_CACHE_SHIFT,
  					    0);
  					did_range_unmap = 1;
  				} else {
  					/*
  					 * Just zap this page
  					 */
  					unmap_mapping_range(mapping,
479ef592f   Oleg Drokin   [PATCH] 32bit int...
370
  					  (loff_t)page_index<<PAGE_CACHE_SHIFT,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
371
372
373
374
  					  PAGE_CACHE_SIZE, 0);
  				}
  			}
  			was_dirty = test_clear_page_dirty(page);
bd4c8ce41   Andrew Morton   [PATCH] invalidat...
375
  			if (!invalidate_complete_page2(mapping, page)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
376
377
378
379
380
381
382
383
384
  				if (was_dirty)
  					set_page_dirty(page);
  				ret = -EIO;
  			}
  			unlock_page(page);
  		}
  		pagevec_release(&pvec);
  		cond_resched();
  	}
8258d4a57   Andrew Morton   [PATCH] invalidat...
385
  	WARN_ON_ONCE(ret);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
386
387
388
389
390
391
  	return ret;
  }
  EXPORT_SYMBOL_GPL(invalidate_inode_pages2_range);
  
  /**
   * invalidate_inode_pages2 - remove all pages from an address_space
67be2dd1b   Martin Waitz   [PATCH] DocBook: ...
392
   * @mapping: the address_space
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
393
394
395
396
397
398
399
400
401
402
403
   *
   * Any pages which are found to be mapped into pagetables are unmapped prior to
   * invalidation.
   *
   * Returns -EIO if any pages could not be invalidated.
   */
  int invalidate_inode_pages2(struct address_space *mapping)
  {
  	return invalidate_inode_pages2_range(mapping, 0, -1);
  }
  EXPORT_SYMBOL_GPL(invalidate_inode_pages2);