Commit 4969c1192d15afa3389e7ae3302096ff684ba655

Authored by Andrea Arcangeli
Committed by Linus Torvalds
1 parent 7c5367f205

mm: fix swapin race condition

The pte_same check is reliable only if the swap entry remains pinned (by
the page lock on swapcache).  We've also to ensure the swapcache isn't
removed before we take the lock as try_to_free_swap won't care about the
page pin.

One of the possible impacts of this patch is that a KSM-shared page can
point to the anon_vma of another process, which could exit before the page
is freed.

This can leave a page with a pointer to a recycled anon_vma object, or
worse, a pointer to something that is no longer an anon_vma.

[riel@redhat.com: changelog help]
Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Acked-by: Hugh Dickins <hughd@google.com>
Reviewed-by: Rik van Riel <riel@redhat.com>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

Showing 3 changed files with 43 additions and 19 deletions Side-by-side Diff

... ... @@ -16,6 +16,9 @@
16 16 struct stable_node;
17 17 struct mem_cgroup;
18 18  
  19 +struct page *ksm_does_need_to_copy(struct page *page,
  20 + struct vm_area_struct *vma, unsigned long address);
  21 +
19 22 #ifdef CONFIG_KSM
20 23 int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
21 24 unsigned long end, int advice, unsigned long *vm_flags);
22 25  
... ... @@ -70,19 +73,14 @@
70 73 * We'd like to make this conditional on vma->vm_flags & VM_MERGEABLE,
71 74 * but what if the vma was unmerged while the page was swapped out?
72 75 */
73   -struct page *ksm_does_need_to_copy(struct page *page,
74   - struct vm_area_struct *vma, unsigned long address);
75   -static inline struct page *ksm_might_need_to_copy(struct page *page,
  76 +static inline int ksm_might_need_to_copy(struct page *page,
76 77 struct vm_area_struct *vma, unsigned long address)
77 78 {
78 79 struct anon_vma *anon_vma = page_anon_vma(page);
79 80  
80   - if (!anon_vma ||
81   - (anon_vma->root == vma->anon_vma->root &&
82   - page->index == linear_page_index(vma, address)))
83   - return page;
84   -
85   - return ksm_does_need_to_copy(page, vma, address);
  81 + return anon_vma &&
  82 + (anon_vma->root != vma->anon_vma->root ||
  83 + page->index != linear_page_index(vma, address));
86 84 }
87 85  
88 86 int page_referenced_ksm(struct page *page,
89 87  
... ... @@ -115,10 +113,10 @@
115 113 return 0;
116 114 }
117 115  
118   -static inline struct page *ksm_might_need_to_copy(struct page *page,
  116 +static inline int ksm_might_need_to_copy(struct page *page,
119 117 struct vm_area_struct *vma, unsigned long address)
120 118 {
121   - return page;
  119 + return 0;
122 120 }
123 121  
124 122 static inline int page_referenced_ksm(struct page *page,
... ... @@ -1504,8 +1504,6 @@
1504 1504 {
1505 1505 struct page *new_page;
1506 1506  
1507   - unlock_page(page); /* any racers will COW it, not modify it */
1508   -
1509 1507 new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address);
1510 1508 if (new_page) {
1511 1509 copy_user_highpage(new_page, page, address, vma);
... ... @@ -1521,7 +1519,6 @@
1521 1519 add_page_to_unevictable_list(new_page);
1522 1520 }
1523 1521  
1524   - page_cache_release(page);
1525 1522 return new_page;
1526 1523 }
1527 1524  
... ... @@ -2623,7 +2623,7 @@
2623 2623 unsigned int flags, pte_t orig_pte)
2624 2624 {
2625 2625 spinlock_t *ptl;
2626   - struct page *page;
  2626 + struct page *page, *swapcache = NULL;
2627 2627 swp_entry_t entry;
2628 2628 pte_t pte;
2629 2629 struct mem_cgroup *ptr = NULL;
... ... @@ -2679,10 +2679,23 @@
2679 2679 lock_page(page);
2680 2680 delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
2681 2681  
2682   - page = ksm_might_need_to_copy(page, vma, address);
2683   - if (!page) {
2684   - ret = VM_FAULT_OOM;
2685   - goto out;
  2682 + /*
  2683 + * Make sure try_to_free_swap didn't release the swapcache
  2684 + * from under us. The page pin isn't enough to prevent that.
  2685 + */
  2686 + if (unlikely(!PageSwapCache(page)))
  2687 + goto out_page;
  2688 +
  2689 + if (ksm_might_need_to_copy(page, vma, address)) {
  2690 + swapcache = page;
  2691 + page = ksm_does_need_to_copy(page, vma, address);
  2692 +
  2693 + if (unlikely(!page)) {
  2694 + ret = VM_FAULT_OOM;
  2695 + page = swapcache;
  2696 + swapcache = NULL;
  2697 + goto out_page;
  2698 + }
2686 2699 }
2687 2700  
2688 2701 if (mem_cgroup_try_charge_swapin(mm, page, GFP_KERNEL, &ptr)) {
... ... @@ -2735,6 +2748,18 @@
2735 2748 if (vm_swap_full() || (vma->vm_flags & VM_LOCKED) || PageMlocked(page))
2736 2749 try_to_free_swap(page);
2737 2750 unlock_page(page);
  2751 + if (swapcache) {
  2752 + /*
  2753 + * Hold the lock to avoid the swap entry to be reused
  2754 + * until we take the PT lock for the pte_same() check
  2755 + * (to avoid false positives from pte_same). For
  2756 + * further safety release the lock after the swap_free
  2757 + * so that the swap count won't change under a
  2758 + * parallel locked swapcache.
  2759 + */
  2760 + unlock_page(swapcache);
  2761 + page_cache_release(swapcache);
  2762 + }
2738 2763  
2739 2764 if (flags & FAULT_FLAG_WRITE) {
2740 2765 ret |= do_wp_page(mm, vma, address, page_table, pmd, ptl, pte);
... ... @@ -2756,6 +2781,10 @@
2756 2781 unlock_page(page);
2757 2782 out_release:
2758 2783 page_cache_release(page);
  2784 + if (swapcache) {
  2785 + unlock_page(swapcache);
  2786 + page_cache_release(swapcache);
  2787 + }
2759 2788 return ret;
2760 2789 }
2761 2790