Blame view
mm/truncate.c
13 KB
1da177e4c
|
1 2 3 4 5 |
/* * mm/truncate.c - code for taking down pages from address_spaces * * Copyright (C) 2002, Linus Torvalds * |
e1f8e8744
|
6 |
* 10Sep2002 Andrew Morton |
1da177e4c
|
7 8 9 10 |
* Initial version. */ #include <linux/kernel.h> |
4af3c9cc4
|
11 |
#include <linux/backing-dev.h> |
1da177e4c
|
12 |
#include <linux/mm.h> |
0fd0e6b05
|
13 |
#include <linux/swap.h> |
1da177e4c
|
14 15 |
#include <linux/module.h> #include <linux/pagemap.h> |
01f2705da
|
16 |
#include <linux/highmem.h> |
1da177e4c
|
17 |
#include <linux/pagevec.h> |
e08748ce0
|
18 |
#include <linux/task_io_accounting_ops.h> |
1da177e4c
|
19 |
#include <linux/buffer_head.h> /* grr. try_to_release_page, |
aaa4059bc
|
20 |
do_invalidatepage */ |
ba470de43
|
21 |
#include "internal.h" |
1da177e4c
|
22 |
|
cf9a2ae8d
|
23 |
/** |
28bc44d7d
|
24 |
* do_invalidatepage - invalidate part or all of a page |
cf9a2ae8d
|
25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 |
* @page: the page which is affected * @offset: the index of the truncation point * * do_invalidatepage() is called when all or part of the page has become * invalidated by a truncate operation. * * do_invalidatepage() does not have to release all buffers, but it must * ensure that no dirty buffer is left outside @offset and that no I/O * is underway against any of the blocks which are outside the truncation * point. Because the caller is about to free (and possibly reuse) those * blocks on-disk. */ void do_invalidatepage(struct page *page, unsigned long offset) { void (*invalidatepage)(struct page *, unsigned long); invalidatepage = page->mapping->a_ops->invalidatepage; |
9361401eb
|
41 |
#ifdef CONFIG_BLOCK |
cf9a2ae8d
|
42 43 |
if (!invalidatepage) invalidatepage = block_invalidatepage; |
9361401eb
|
44 |
#endif |
cf9a2ae8d
|
45 46 47 |
if (invalidatepage) (*invalidatepage)(page, offset); } |
1da177e4c
|
48 49 |
static inline void truncate_partial_page(struct page *page, unsigned partial) { |
eebd2aa35
|
50 |
zero_user_segment(page, partial, PAGE_CACHE_SIZE); |
1da177e4c
|
51 52 53 |
if (PagePrivate(page)) do_invalidatepage(page, partial); } |
ecdfc9787
|
54 55 56 57 58 59 60 61 62 63 64 65 66 67 |
/* * This cancels just the dirty bit on the kernel page itself, it * does NOT actually remove dirty bits on any mmap's that may be * around. It also leaves the page tagged dirty, so any sync * activity will still find it on the dirty lists, and in particular, * clear_page_dirty_for_io() will still look at the dirty bits in * the VM. * * Doing this should *normally* only ever be done when a page * is truncated, and is not actually mapped anywhere at all. However, * fs/buffer.c does this when it notices that somebody has cleaned * out all the buffers on a page without actually doing it through * the VM. Can you say "ext3 is horribly ugly"? Tought you could. */ |
fba2591bf
|
68 69 |
void cancel_dirty_page(struct page *page, unsigned int account_size) { |
8368e328d
|
70 71 72 73 |
if (TestClearPageDirty(page)) { struct address_space *mapping = page->mapping; if (mapping && mapping_cap_account_dirty(mapping)) { dec_zone_page_state(page, NR_FILE_DIRTY); |
c9e51e418
|
74 75 |
dec_bdi_stat(mapping->backing_dev_info, BDI_RECLAIMABLE); |
8368e328d
|
76 77 78 |
if (account_size) task_io_account_cancelled_write(account_size); } |
3e67c0987
|
79 |
} |
fba2591bf
|
80 |
} |
8368e328d
|
81 |
EXPORT_SYMBOL(cancel_dirty_page); |
fba2591bf
|
82 |
|
1da177e4c
|
83 84 |
/* * If truncate cannot remove the fs-private metadata from the page, the page |
62e1c5530
|
85 |
* becomes orphaned. It will be left on the LRU and may even be mapped into |
54cb8821d
|
86 |
* user pagetables if we're racing with filemap_fault(). |
1da177e4c
|
87 88 89 |
* * We need to bale out if page->mapping is no longer equal to the original * mapping. This happens a) when the VM reclaimed the page while we waited on |
fc0ecff69
|
90 |
* its lock, b) when a concurrent invalidate_mapping_pages got there first and |
1da177e4c
|
91 92 93 94 95 96 97 98 99 100 |
* c) when tmpfs swizzles a page between a tmpfs inode and swapper_space. */ static void truncate_complete_page(struct address_space *mapping, struct page *page) { if (page->mapping != mapping) return; if (PagePrivate(page)) do_invalidatepage(page, 0); |
a2b345642
|
101 |
cancel_dirty_page(page, PAGE_CACHE_SIZE); |
ba470de43
|
102 |
clear_page_mlock(page); |
787d2214c
|
103 |
remove_from_page_cache(page); |
1da177e4c
|
104 |
ClearPageMappedToDisk(page); |
1da177e4c
|
105 106 107 108 |
page_cache_release(page); /* pagecache ref */ } /* |
fc0ecff69
|
109 |
* This is for invalidate_mapping_pages(). That function can be called at |
1da177e4c
|
110 |
* any time, and is not supposed to throw away dirty pages. But pages can |
0fd0e6b05
|
111 112 |
* be marked dirty at any time too, so use remove_mapping which safely * discards clean, unused pages. |
1da177e4c
|
113 114 115 116 117 118 |
* * Returns non-zero if the page was successfully invalidated. */ static int invalidate_complete_page(struct address_space *mapping, struct page *page) { |
0fd0e6b05
|
119 |
int ret; |
1da177e4c
|
120 121 122 123 124 |
if (page->mapping != mapping) return 0; if (PagePrivate(page) && !try_to_release_page(page, 0)) return 0; |
ba470de43
|
125 |
clear_page_mlock(page); |
0fd0e6b05
|
126 |
ret = remove_mapping(mapping, page); |
0fd0e6b05
|
127 128 |
return ret; |
1da177e4c
|
129 130 131 |
} /** |
0643245f5
|
132 |
* truncate_inode_pages - truncate range of pages specified by start & end byte offsets |
1da177e4c
|
133 134 |
* @mapping: mapping to truncate * @lstart: offset from which to truncate |
d7339071f
|
135 |
* @lend: offset to which to truncate |
1da177e4c
|
136 |
* |
d7339071f
|
137 138 139 |
* Truncate the page cache, removing the pages that are between * specified offsets (and zeroing out partial page * (if lstart is not page aligned)). |
1da177e4c
|
140 141 142 143 144 145 146 147 148 149 150 151 152 |
* * Truncate takes two passes - the first pass is nonblocking. It will not * block on page locks and it will not block on writeback. The second pass * will wait. This is to prevent as much IO as possible in the affected region. * The first pass will remove most pages, so the search cost of the second pass * is low. * * When looking at page->index outside the page lock we need to be careful to * copy it into a local to avoid races (it could change at any time). * * We pass down the cache-hot hint to the page freeing code. Even if the * mapping is large, it is probably the case that the final pages are the most * recently touched, and freeing happens in ascending file offset order. |
1da177e4c
|
153 |
*/ |
d7339071f
|
154 155 |
void truncate_inode_pages_range(struct address_space *mapping, loff_t lstart, loff_t lend) |
1da177e4c
|
156 157 |
{ const pgoff_t start = (lstart + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT; |
d7339071f
|
158 |
pgoff_t end; |
1da177e4c
|
159 160 161 162 163 164 165 |
const unsigned partial = lstart & (PAGE_CACHE_SIZE - 1); struct pagevec pvec; pgoff_t next; int i; if (mapping->nrpages == 0) return; |
d7339071f
|
166 167 |
BUG_ON((lend & (PAGE_CACHE_SIZE - 1)) != (PAGE_CACHE_SIZE - 1)); end = (lend >> PAGE_CACHE_SHIFT); |
1da177e4c
|
168 169 |
pagevec_init(&pvec, 0); next = start; |
d7339071f
|
170 171 |
while (next <= end && pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) { |
1da177e4c
|
172 173 174 |
for (i = 0; i < pagevec_count(&pvec); i++) { struct page *page = pvec.pages[i]; pgoff_t page_index = page->index; |
d7339071f
|
175 176 177 178 |
if (page_index > end) { next = page_index; break; } |
1da177e4c
|
179 180 181 |
if (page_index > next) next = page_index; next++; |
529ae9aaa
|
182 |
if (!trylock_page(page)) |
1da177e4c
|
183 184 185 186 187 |
continue; if (PageWriteback(page)) { unlock_page(page); continue; } |
d00806b18
|
188 189 190 191 192 |
if (page_mapped(page)) { unmap_mapping_range(mapping, (loff_t)page_index<<PAGE_CACHE_SHIFT, PAGE_CACHE_SIZE, 0); } |
1da177e4c
|
193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 |
truncate_complete_page(mapping, page); unlock_page(page); } pagevec_release(&pvec); cond_resched(); } if (partial) { struct page *page = find_lock_page(mapping, start - 1); if (page) { wait_on_page_writeback(page); truncate_partial_page(page, partial); unlock_page(page); page_cache_release(page); } } next = start; for ( ; ; ) { cond_resched(); if (!pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) { if (next == start) break; next = start; continue; } |
d7339071f
|
219 220 221 222 |
if (pvec.pages[0]->index > end) { pagevec_release(&pvec); break; } |
1da177e4c
|
223 224 |
for (i = 0; i < pagevec_count(&pvec); i++) { struct page *page = pvec.pages[i]; |
d7339071f
|
225 226 |
if (page->index > end) break; |
1da177e4c
|
227 228 |
lock_page(page); wait_on_page_writeback(page); |
d00806b18
|
229 230 231 232 233 |
if (page_mapped(page)) { unmap_mapping_range(mapping, (loff_t)page->index<<PAGE_CACHE_SHIFT, PAGE_CACHE_SIZE, 0); } |
1da177e4c
|
234 235 236 237 238 239 240 241 242 |
if (page->index > next) next = page->index; next++; truncate_complete_page(mapping, page); unlock_page(page); } pagevec_release(&pvec); } } |
d7339071f
|
243 |
EXPORT_SYMBOL(truncate_inode_pages_range); |
1da177e4c
|
244 |
|
d7339071f
|
245 246 247 248 249 |
/** * truncate_inode_pages - truncate *all* the pages from an offset * @mapping: mapping to truncate * @lstart: offset from which to truncate * |
1b1dcc1b5
|
250 |
* Called under (and serialised by) inode->i_mutex. |
d7339071f
|
251 252 253 254 255 |
*/ void truncate_inode_pages(struct address_space *mapping, loff_t lstart) { truncate_inode_pages_range(mapping, lstart, (loff_t)-1); } |
1da177e4c
|
256 |
EXPORT_SYMBOL(truncate_inode_pages); |
fc9a07e7b
|
257 258 |
unsigned long __invalidate_mapping_pages(struct address_space *mapping, pgoff_t start, pgoff_t end, bool be_atomic) |
1da177e4c
|
259 260 261 262 263 264 265 266 267 268 269 |
{ struct pagevec pvec; pgoff_t next = start; unsigned long ret = 0; int i; pagevec_init(&pvec, 0); while (next <= end && pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) { for (i = 0; i < pagevec_count(&pvec); i++) { struct page *page = pvec.pages[i]; |
e0f23603f
|
270 271 |
pgoff_t index; int lock_failed; |
1da177e4c
|
272 |
|
529ae9aaa
|
273 |
lock_failed = !trylock_page(page); |
e0f23603f
|
274 275 276 277 278 279 280 281 282 283 |
/* * We really shouldn't be looking at the ->index of an * unlocked page. But we're not allowed to lock these * pages. So we rely upon nobody altering the ->index * of this (pinned-by-us) page. */ index = page->index; if (index > next) next = index; |
1da177e4c
|
284 |
next++; |
e0f23603f
|
285 286 |
if (lock_failed) continue; |
1da177e4c
|
287 288 289 290 291 292 293 294 295 296 297 |
if (PageDirty(page) || PageWriteback(page)) goto unlock; if (page_mapped(page)) goto unlock; ret += invalidate_complete_page(mapping, page); unlock: unlock_page(page); if (next > end) break; } pagevec_release(&pvec); |
fc9a07e7b
|
298 299 |
if (likely(!be_atomic)) cond_resched(); |
1da177e4c
|
300 301 302 |
} return ret; } |
fc9a07e7b
|
303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 |
/** * invalidate_mapping_pages - Invalidate all the unlocked pages of one inode * @mapping: the address_space which holds the pages to invalidate * @start: the offset 'from' which to invalidate * @end: the offset 'to' which to invalidate (inclusive) * * This function only removes the unlocked pages, if you want to * remove all the pages of one inode, you must call truncate_inode_pages. * * invalidate_mapping_pages() will not block on IO activity. It will not * invalidate pages which are dirty, locked, under writeback or mapped into * pagetables. */ unsigned long invalidate_mapping_pages(struct address_space *mapping, pgoff_t start, pgoff_t end) { return __invalidate_mapping_pages(mapping, start, end, false); } |
54bc48552
|
322 |
EXPORT_SYMBOL(invalidate_mapping_pages); |
1da177e4c
|
323 |
|
bd4c8ce41
|
324 325 326 327 |
/* * This is like invalidate_complete_page(), except it ignores the page's * refcount. We do this because invalidate_inode_pages2() needs stronger * invalidation guarantees, and cannot afford to leave pages behind because |
2706a1b89
|
328 329 |
* shrink_page_list() has a temp ref on them, or because they're transiently * sitting in the lru_cache_add() pagevecs. |
bd4c8ce41
|
330 331 332 333 334 335 |
*/ static int invalidate_complete_page2(struct address_space *mapping, struct page *page) { if (page->mapping != mapping) return 0; |
887ed2f3a
|
336 |
if (PagePrivate(page) && !try_to_release_page(page, GFP_KERNEL)) |
bd4c8ce41
|
337 |
return 0; |
19fd62312
|
338 |
spin_lock_irq(&mapping->tree_lock); |
bd4c8ce41
|
339 340 |
if (PageDirty(page)) goto failed; |
ba470de43
|
341 |
clear_page_mlock(page); |
bd4c8ce41
|
342 343 |
BUG_ON(PagePrivate(page)); __remove_from_page_cache(page); |
19fd62312
|
344 |
spin_unlock_irq(&mapping->tree_lock); |
bd4c8ce41
|
345 346 347 |
page_cache_release(page); /* pagecache ref */ return 1; failed: |
19fd62312
|
348 |
spin_unlock_irq(&mapping->tree_lock); |
bd4c8ce41
|
349 350 |
return 0; } |
e3db7691e
|
351 352 353 354 355 356 357 358 |
static int do_launder_page(struct address_space *mapping, struct page *page) { if (!PageDirty(page)) return 0; if (page->mapping != mapping || mapping->a_ops->launder_page == NULL) return 0; return mapping->a_ops->launder_page(page); } |
1da177e4c
|
359 360 |
/** * invalidate_inode_pages2_range - remove range of pages from an address_space |
67be2dd1b
|
361 |
* @mapping: the address_space |
1da177e4c
|
362 363 364 365 366 367 |
* @start: the page offset 'from' which to invalidate * @end: the page offset 'to' which to invalidate (inclusive) * * Any pages which are found to be mapped into pagetables are unmapped prior to * invalidation. * |
6ccfa806a
|
368 |
* Returns -EBUSY if any pages could not be invalidated. |
1da177e4c
|
369 370 371 372 373 374 375 376 |
*/ int invalidate_inode_pages2_range(struct address_space *mapping, pgoff_t start, pgoff_t end) { struct pagevec pvec; pgoff_t next; int i; int ret = 0; |
0dd1334fa
|
377 |
int ret2 = 0; |
1da177e4c
|
378 379 380 381 382 |
int did_range_unmap = 0; int wrapped = 0; pagevec_init(&pvec, 0); next = start; |
7b965e088
|
383 |
while (next <= end && !wrapped && |
1da177e4c
|
384 385 |
pagevec_lookup(&pvec, mapping, next, min(end - next, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) { |
7b965e088
|
386 |
for (i = 0; i < pagevec_count(&pvec); i++) { |
1da177e4c
|
387 388 |
struct page *page = pvec.pages[i]; pgoff_t page_index; |
1da177e4c
|
389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 |
lock_page(page); if (page->mapping != mapping) { unlock_page(page); continue; } page_index = page->index; next = page_index + 1; if (next == 0) wrapped = 1; if (page_index > end) { unlock_page(page); break; } wait_on_page_writeback(page); |
d00806b18
|
404 |
if (page_mapped(page)) { |
1da177e4c
|
405 406 407 408 409 |
if (!did_range_unmap) { /* * Zap the rest of the file in one hit. */ unmap_mapping_range(mapping, |
479ef592f
|
410 411 |
(loff_t)page_index<<PAGE_CACHE_SHIFT, (loff_t)(end - page_index + 1) |
1da177e4c
|
412 413 414 415 416 417 418 419 |
<< PAGE_CACHE_SHIFT, 0); did_range_unmap = 1; } else { /* * Just zap this page */ unmap_mapping_range(mapping, |
479ef592f
|
420 |
(loff_t)page_index<<PAGE_CACHE_SHIFT, |
1da177e4c
|
421 422 423 |
PAGE_CACHE_SIZE, 0); } } |
d00806b18
|
424 |
BUG_ON(page_mapped(page)); |
0dd1334fa
|
425 426 427 |
ret2 = do_launder_page(mapping, page); if (ret2 == 0) { if (!invalidate_complete_page2(mapping, page)) |
6ccfa806a
|
428 |
ret2 = -EBUSY; |
0dd1334fa
|
429 430 431 |
} if (ret2 < 0) ret = ret2; |
1da177e4c
|
432 433 434 435 436 437 438 439 440 441 442 |
unlock_page(page); } pagevec_release(&pvec); cond_resched(); } return ret; } EXPORT_SYMBOL_GPL(invalidate_inode_pages2_range); /** * invalidate_inode_pages2 - remove all pages from an address_space |
67be2dd1b
|
443 |
* @mapping: the address_space |
1da177e4c
|
444 445 446 447 448 449 450 451 452 453 454 |
* * Any pages which are found to be mapped into pagetables are unmapped prior to * invalidation. * * Returns -EIO if any pages could not be invalidated. */ int invalidate_inode_pages2(struct address_space *mapping) { return invalidate_inode_pages2_range(mapping, 0, -1); } EXPORT_SYMBOL_GPL(invalidate_inode_pages2); |