Commit 6d50e60cd2edb5a57154db5a6f64eef5aa59b751

Authored by David Rientjes
Committed by Linus Torvalds
1 parent 47f29df7db

mm, thp: fix collapsing of hugepages on madvise

If an anonymous mapping is not allowed to fault thp memory and then
madvise(MADV_HUGEPAGE) is used after fault, khugepaged will never
collapse this memory into thp memory.

This occurs because the madvise(2) handler for thp, hugepage_madvise(),
clears VM_NOHUGEPAGE on the stack and it isn't stored in vma->vm_flags
until the final action of madvise_behavior().  This causes the
khugepaged_enter_vma_merge() to be a no-op in hugepage_madvise() when
the vma had previously had VM_NOHUGEPAGE set.

Fix this by passing the correct vma flags to the khugepaged mm slot
handler.  There's no chance khugepaged can run on this vma until after
madvise_behavior() returns since we hold mm->mmap_sem.

It would be possible to clear VM_NOHUGEPAGE directly from vma->vm_flags
in hugepage_advise(), but I didn't want to introduce special case
behavior into madvise_behavior().  I think it's best to just let it
always set vma->vm_flags itself.

Signed-off-by: David Rientjes <rientjes@google.com>
Reported-by: Suleiman Souhlal <suleiman@google.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

Showing 3 changed files with 20 additions and 16 deletions Side-by-side Diff

include/linux/khugepaged.h
... ... @@ -6,7 +6,8 @@
6 6 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
7 7 extern int __khugepaged_enter(struct mm_struct *mm);
8 8 extern void __khugepaged_exit(struct mm_struct *mm);
9   -extern int khugepaged_enter_vma_merge(struct vm_area_struct *vma);
  9 +extern int khugepaged_enter_vma_merge(struct vm_area_struct *vma,
  10 + unsigned long vm_flags);
10 11  
11 12 #define khugepaged_enabled() \
12 13 (transparent_hugepage_flags & \
13 14  
... ... @@ -35,13 +36,13 @@
35 36 __khugepaged_exit(mm);
36 37 }
37 38  
38   -static inline int khugepaged_enter(struct vm_area_struct *vma)
  39 +static inline int khugepaged_enter(struct vm_area_struct *vma,
  40 + unsigned long vm_flags)
39 41 {
40 42 if (!test_bit(MMF_VM_HUGEPAGE, &vma->vm_mm->flags))
41 43 if ((khugepaged_always() ||
42   - (khugepaged_req_madv() &&
43   - vma->vm_flags & VM_HUGEPAGE)) &&
44   - !(vma->vm_flags & VM_NOHUGEPAGE))
  44 + (khugepaged_req_madv() && (vm_flags & VM_HUGEPAGE))) &&
  45 + !(vm_flags & VM_NOHUGEPAGE))
45 46 if (__khugepaged_enter(vma->vm_mm))
46 47 return -ENOMEM;
47 48 return 0;
48 49  
... ... @@ -54,11 +55,13 @@
54 55 static inline void khugepaged_exit(struct mm_struct *mm)
55 56 {
56 57 }
57   -static inline int khugepaged_enter(struct vm_area_struct *vma)
  58 +static inline int khugepaged_enter(struct vm_area_struct *vma,
  59 + unsigned long vm_flags)
58 60 {
59 61 return 0;
60 62 }
61   -static inline int khugepaged_enter_vma_merge(struct vm_area_struct *vma)
  63 +static inline int khugepaged_enter_vma_merge(struct vm_area_struct *vma,
  64 + unsigned long vm_flags)
62 65 {
63 66 return 0;
64 67 }
... ... @@ -803,7 +803,7 @@
803 803 return VM_FAULT_FALLBACK;
804 804 if (unlikely(anon_vma_prepare(vma)))
805 805 return VM_FAULT_OOM;
806   - if (unlikely(khugepaged_enter(vma)))
  806 + if (unlikely(khugepaged_enter(vma, vma->vm_flags)))
807 807 return VM_FAULT_OOM;
808 808 if (!(flags & FAULT_FLAG_WRITE) &&
809 809 transparent_hugepage_use_zero_page()) {
... ... @@ -1970,7 +1970,7 @@
1970 1970 * register it here without waiting a page fault that
1971 1971 * may not happen any time soon.
1972 1972 */
1973   - if (unlikely(khugepaged_enter_vma_merge(vma)))
  1973 + if (unlikely(khugepaged_enter_vma_merge(vma, *vm_flags)))
1974 1974 return -ENOMEM;
1975 1975 break;
1976 1976 case MADV_NOHUGEPAGE:
... ... @@ -2071,7 +2071,8 @@
2071 2071 return 0;
2072 2072 }
2073 2073  
2074   -int khugepaged_enter_vma_merge(struct vm_area_struct *vma)
  2074 +int khugepaged_enter_vma_merge(struct vm_area_struct *vma,
  2075 + unsigned long vm_flags)
2075 2076 {
2076 2077 unsigned long hstart, hend;
2077 2078 if (!vma->anon_vma)
2078 2079  
... ... @@ -2083,11 +2084,11 @@
2083 2084 if (vma->vm_ops)
2084 2085 /* khugepaged not yet working on file or special mappings */
2085 2086 return 0;
2086   - VM_BUG_ON_VMA(vma->vm_flags & VM_NO_THP, vma);
  2087 + VM_BUG_ON_VMA(vm_flags & VM_NO_THP, vma);
2087 2088 hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK;
2088 2089 hend = vma->vm_end & HPAGE_PMD_MASK;
2089 2090 if (hstart < hend)
2090   - return khugepaged_enter(vma);
  2091 + return khugepaged_enter(vma, vm_flags);
2091 2092 return 0;
2092 2093 }
2093 2094  
... ... @@ -1080,7 +1080,7 @@
1080 1080 end, prev->vm_pgoff, NULL);
1081 1081 if (err)
1082 1082 return NULL;
1083   - khugepaged_enter_vma_merge(prev);
  1083 + khugepaged_enter_vma_merge(prev, vm_flags);
1084 1084 return prev;
1085 1085 }
1086 1086  
... ... @@ -1099,7 +1099,7 @@
1099 1099 next->vm_pgoff - pglen, NULL);
1100 1100 if (err)
1101 1101 return NULL;
1102   - khugepaged_enter_vma_merge(area);
  1102 + khugepaged_enter_vma_merge(area, vm_flags);
1103 1103 return area;
1104 1104 }
1105 1105  
... ... @@ -2208,7 +2208,7 @@
2208 2208 }
2209 2209 }
2210 2210 vma_unlock_anon_vma(vma);
2211   - khugepaged_enter_vma_merge(vma);
  2211 + khugepaged_enter_vma_merge(vma, vma->vm_flags);
2212 2212 validate_mm(vma->vm_mm);
2213 2213 return error;
2214 2214 }
... ... @@ -2277,7 +2277,7 @@
2277 2277 }
2278 2278 }
2279 2279 vma_unlock_anon_vma(vma);
2280   - khugepaged_enter_vma_merge(vma);
  2280 + khugepaged_enter_vma_merge(vma, vma->vm_flags);
2281 2281 validate_mm(vma->vm_mm);
2282 2282 return error;
2283 2283 }