Commit 044d66c1d2b1c5aa50b4d6d68c21c6c93dd678da
Committed by
Linus Torvalds
1 parent
3062fc67da
Exists in
master
and in
20 other branches
memcgroup: reinstate swapoff mod
This patch reinstates the "swapoff: scan ptes preemptibly" mod we started with: in due course it should be rendered down into the earlier patches, leaving us with a more straightforward mem_cgroup_charge mod to unuse_pte, allocating with GFP_KERNEL while holding no spinlock and no atomic kmap. Signed-off-by: Hugh Dickins <hugh@veritas.com> Cc: Pavel Emelianov <xemul@openvz.org> Acked-by: Balbir Singh <balbir@linux.vnet.ibm.com> Cc: Paul Menage <menage@google.com> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: "Eric W. Biederman" <ebiederm@xmission.com> Cc: Nick Piggin <nickpiggin@yahoo.com.au> Cc: Kirill Korotaev <dev@sw.ru> Cc: Herbert Poetzl <herbert@13thfloor.at> Cc: David Rientjes <rientjes@google.com> Cc: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Showing 1 changed file with 34 additions and 8 deletions Side-by-side Diff
mm/swapfile.c
... | ... | @@ -507,12 +507,24 @@ |
507 | 507 | * just let do_wp_page work it out if a write is requested later - to |
508 | 508 | * force COW, vm_page_prot omits write permission from any private vma. |
509 | 509 | */ |
510 | -static int unuse_pte(struct vm_area_struct *vma, pte_t *pte, | |
510 | +static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd, | |
511 | 511 | unsigned long addr, swp_entry_t entry, struct page *page) |
512 | 512 | { |
513 | + spinlock_t *ptl; | |
514 | + pte_t *pte; | |
515 | + int ret = 1; | |
516 | + | |
513 | 517 | if (mem_cgroup_charge(page, vma->vm_mm, GFP_KERNEL)) |
514 | - return -ENOMEM; | |
518 | + ret = -ENOMEM; | |
515 | 519 | |
520 | + pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); | |
521 | + if (unlikely(!pte_same(*pte, swp_entry_to_pte(entry)))) { | |
522 | + if (ret > 0) | |
523 | + mem_cgroup_uncharge_page(page); | |
524 | + ret = 0; | |
525 | + goto out; | |
526 | + } | |
527 | + | |
516 | 528 | inc_mm_counter(vma->vm_mm, anon_rss); |
517 | 529 | get_page(page); |
518 | 530 | set_pte_at(vma->vm_mm, addr, pte, |
... | ... | @@ -524,7 +536,9 @@ |
524 | 536 | * immediately swapped out again after swapon. |
525 | 537 | */ |
526 | 538 | activate_page(page); |
527 | - return 1; | |
539 | +out: | |
540 | + pte_unmap_unlock(pte, ptl); | |
541 | + return ret; | |
528 | 542 | } |
529 | 543 | |
530 | 544 | static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd, |
531 | 545 | |
532 | 546 | |
533 | 547 | |
... | ... | @@ -533,21 +547,33 @@ |
533 | 547 | { |
534 | 548 | pte_t swp_pte = swp_entry_to_pte(entry); |
535 | 549 | pte_t *pte; |
536 | - spinlock_t *ptl; | |
537 | 550 | int ret = 0; |
538 | 551 | |
539 | - pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); | |
552 | + /* | |
553 | + * We don't actually need pte lock while scanning for swp_pte: since | |
554 | + * we hold page lock and mmap_sem, swp_pte cannot be inserted into the | |
555 | + * page table while we're scanning; though it could get zapped, and on | |
556 | + * some architectures (e.g. x86_32 with PAE) we might catch a glimpse | |
557 | + * of unmatched parts which look like swp_pte, so unuse_pte must | |
558 | + * recheck under pte lock. Scanning without pte lock lets it be | |
559 | + * preemptible whenever CONFIG_PREEMPT but not CONFIG_HIGHPTE. | |
560 | + */ | |
561 | + pte = pte_offset_map(pmd, addr); | |
540 | 562 | do { |
541 | 563 | /* |
542 | 564 | * swapoff spends a _lot_ of time in this loop! |
543 | 565 | * Test inline before going to call unuse_pte. |
544 | 566 | */ |
545 | 567 | if (unlikely(pte_same(*pte, swp_pte))) { |
546 | - ret = unuse_pte(vma, pte++, addr, entry, page); | |
547 | - break; | |
568 | + pte_unmap(pte); | |
569 | + ret = unuse_pte(vma, pmd, addr, entry, page); | |
570 | + if (ret) | |
571 | + goto out; | |
572 | + pte = pte_offset_map(pmd, addr); | |
548 | 573 | } |
549 | 574 | } while (pte++, addr += PAGE_SIZE, addr != end); |
550 | - pte_unmap_unlock(pte - 1, ptl); | |
575 | + pte_unmap(pte - 1); | |
576 | +out: | |
551 | 577 | return ret; |
552 | 578 | } |
553 | 579 |