Commit c475a8ab625d567eacf5e30ec35d6d8704558062
Committed by
Linus Torvalds
1 parent
d296e9cd02
Exists in
master
and in
20 other branches
[PATCH] can_share_swap_page: use page_mapcount
Remember that ironic get_user_pages race? when the raised page_count on a page swapped out led do_wp_page to decide that it had to copy on write, so substituted a different page into userspace. 2.6.7 onwards have Andrea's solution, where try_to_unmap_one backs out if it finds page_count raised. Which works, but is unsatisfying (rmap.c has no other page_count heuristics), and was found a few months ago to hang an intensive page migration test. A year ago I was hesitant to engage page_mapcount, now it seems the right fix. So remove the page_count hack from try_to_unmap_one; and use activate_page in unuse_mm when dropping lock, to replace its secondary effect of helping swapoff to make progress in that case. Simplify can_share_swap_page (now called only on anonymous pages) to check page_mapcount + page_swapcount == 1: still needs the page lock to stabilize their (pessimistic) sum, but does not need swapper_space.tree_lock for that. In do_swap_page, move swap_free and unlock_page below page_add_anon_rmap, to keep sum on the high side, and correct when can_share_swap_page called. Signed-off-by: Hugh Dickins <hugh@veritas.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Showing 3 changed files with 21 additions and 65 deletions Side-by-side Diff
mm/memory.c
... | ... | @@ -1686,10 +1686,6 @@ |
1686 | 1686 | } |
1687 | 1687 | |
1688 | 1688 | /* The page isn't present yet, go ahead with the fault. */ |
1689 | - | |
1690 | - swap_free(entry); | |
1691 | - if (vm_swap_full()) | |
1692 | - remove_exclusive_swap_page(page); | |
1693 | 1689 | |
1694 | 1690 | inc_mm_counter(mm, rss); |
1695 | 1691 | pte = mk_pte(page, vma->vm_page_prot); |
1696 | 1692 | |
... | ... | @@ -1697,11 +1693,15 @@ |
1697 | 1693 | pte = maybe_mkwrite(pte_mkdirty(pte), vma); |
1698 | 1694 | write_access = 0; |
1699 | 1695 | } |
1700 | - unlock_page(page); | |
1701 | 1696 | |
1702 | 1697 | flush_icache_page(vma, page); |
1703 | 1698 | set_pte_at(mm, address, page_table, pte); |
1704 | 1699 | page_add_anon_rmap(page, vma, address); |
1700 | + | |
1701 | + swap_free(entry); | |
1702 | + if (vm_swap_full()) | |
1703 | + remove_exclusive_swap_page(page); | |
1704 | + unlock_page(page); | |
1705 | 1705 | |
1706 | 1706 | if (write_access) { |
1707 | 1707 | if (do_wp_page(mm, vma, address, |
mm/rmap.c
... | ... | @@ -539,27 +539,6 @@ |
539 | 539 | goto out_unmap; |
540 | 540 | } |
541 | 541 | |
542 | - /* | |
543 | - * Don't pull an anonymous page out from under get_user_pages. | |
544 | - * GUP carefully breaks COW and raises page count (while holding | |
545 | - * page_table_lock, as we have here) to make sure that the page | |
546 | - * cannot be freed. If we unmap that page here, a user write | |
547 | - * access to the virtual address will bring back the page, but | |
548 | - * its raised count will (ironically) be taken to mean it's not | |
549 | - * an exclusive swap page, do_wp_page will replace it by a copy | |
550 | - * page, and the user never get to see the data GUP was holding | |
551 | - * the original page for. | |
552 | - * | |
553 | - * This test is also useful for when swapoff (unuse_process) has | |
554 | - * to drop page lock: its reference to the page stops existing | |
555 | - * ptes from being unmapped, so swapoff can make progress. | |
556 | - */ | |
557 | - if (PageSwapCache(page) && | |
558 | - page_count(page) != page_mapcount(page) + 2) { | |
559 | - ret = SWAP_FAIL; | |
560 | - goto out_unmap; | |
561 | - } | |
562 | - | |
563 | 542 | /* Nuke the page table entry. */ |
564 | 543 | flush_cache_page(vma, address, page_to_pfn(page)); |
565 | 544 | pteval = ptep_clear_flush(vma, address, pte); |
mm/swapfile.c
... | ... | @@ -276,61 +276,37 @@ |
276 | 276 | } |
277 | 277 | |
278 | 278 | /* |
279 | - * Check if we're the only user of a swap page, | |
280 | - * when the page is locked. | |
279 | + * How many references to page are currently swapped out? | |
281 | 280 | */ |
282 | -static int exclusive_swap_page(struct page *page) | |
281 | +static inline int page_swapcount(struct page *page) | |
283 | 282 | { |
284 | - int retval = 0; | |
285 | - struct swap_info_struct * p; | |
283 | + int count = 0; | |
284 | + struct swap_info_struct *p; | |
286 | 285 | swp_entry_t entry; |
287 | 286 | |
288 | 287 | entry.val = page->private; |
289 | 288 | p = swap_info_get(entry); |
290 | 289 | if (p) { |
291 | - /* Is the only swap cache user the cache itself? */ | |
292 | - if (p->swap_map[swp_offset(entry)] == 1) { | |
293 | - /* Recheck the page count with the swapcache lock held.. */ | |
294 | - write_lock_irq(&swapper_space.tree_lock); | |
295 | - if (page_count(page) == 2) | |
296 | - retval = 1; | |
297 | - write_unlock_irq(&swapper_space.tree_lock); | |
298 | - } | |
290 | + /* Subtract the 1 for the swap cache itself */ | |
291 | + count = p->swap_map[swp_offset(entry)] - 1; | |
299 | 292 | swap_info_put(p); |
300 | 293 | } |
301 | - return retval; | |
294 | + return count; | |
302 | 295 | } |
303 | 296 | |
304 | 297 | /* |
305 | 298 | * We can use this swap cache entry directly |
306 | 299 | * if there are no other references to it. |
307 | - * | |
308 | - * Here "exclusive_swap_page()" does the real | |
309 | - * work, but we opportunistically check whether | |
310 | - * we need to get all the locks first.. | |
311 | 300 | */ |
312 | 301 | int can_share_swap_page(struct page *page) |
313 | 302 | { |
314 | - int retval = 0; | |
303 | + int count; | |
315 | 304 | |
316 | - if (!PageLocked(page)) | |
317 | - BUG(); | |
318 | - switch (page_count(page)) { | |
319 | - case 3: | |
320 | - if (!PagePrivate(page)) | |
321 | - break; | |
322 | - /* Fallthrough */ | |
323 | - case 2: | |
324 | - if (!PageSwapCache(page)) | |
325 | - break; | |
326 | - retval = exclusive_swap_page(page); | |
327 | - break; | |
328 | - case 1: | |
329 | - if (PageReserved(page)) | |
330 | - break; | |
331 | - retval = 1; | |
332 | - } | |
333 | - return retval; | |
305 | + BUG_ON(!PageLocked(page)); | |
306 | + count = page_mapcount(page); | |
307 | + if (count <= 1 && PageSwapCache(page)) | |
308 | + count += page_swapcount(page); | |
309 | + return count == 1; | |
334 | 310 | } |
335 | 311 | |
336 | 312 | /* |
337 | 313 | |
... | ... | @@ -529,9 +505,10 @@ |
529 | 505 | |
530 | 506 | if (!down_read_trylock(&mm->mmap_sem)) { |
531 | 507 | /* |
532 | - * Our reference to the page stops try_to_unmap_one from | |
533 | - * unmapping its ptes, so swapoff can make progress. | |
508 | + * Activate page so shrink_cache is unlikely to unmap its | |
509 | + * ptes while lock is dropped, so swapoff can make progress. | |
534 | 510 | */ |
511 | + activate_page(page); | |
535 | 512 | unlock_page(page); |
536 | 513 | down_read(&mm->mmap_sem); |
537 | 514 | lock_page(page); |