Commit 60ab3244ec85c44276c585a2a20d3750402e1cf4

Authored by Andrea Arcangeli
Committed by Linus Torvalds
1 parent a664b2d855

thp: khugepaged: make khugepaged aware about madvise

MADV_HUGEPAGE and MADV_NOHUGEPAGE were fully effective only if run after
mmap and before touching the memory.  While this is enough for most
usages, it's little effort to make madvise more dynamic at runtime on an
existing mapping by making khugepaged aware about madvise.

MADV_HUGEPAGE: register in khugepaged immediately without waiting a page
fault (that may not ever happen if all pages are already mapped and the
"enabled" knob was set to madvise during the initial page faults).

MADV_NOHUGEPAGE: skip vmas marked VM_NOHUGEPAGE in khugepaged to stop
collapsing pages where not needed.

[akpm@linux-foundation.org: tweak comment]
Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

Showing 3 changed files with 24 additions and 7 deletions Side-by-side Diff

include/linux/huge_mm.h
... ... @@ -105,7 +105,8 @@
105 105 #if HPAGE_PMD_ORDER > MAX_ORDER
106 106 #error "hugepages can't be allocated by the buddy allocator"
107 107 #endif
108   -extern int hugepage_madvise(unsigned long *vm_flags, int advice);
  108 +extern int hugepage_madvise(struct vm_area_struct *vma,
  109 + unsigned long *vm_flags, int advice);
109 110 extern void __vma_adjust_trans_huge(struct vm_area_struct *vma,
110 111 unsigned long start,
111 112 unsigned long end,
... ... @@ -143,7 +144,8 @@
143 144 do { } while (0)
144 145 #define wait_split_huge_page(__anon_vma, __pmd) \
145 146 do { } while (0)
146   -static inline int hugepage_madvise(unsigned long *vm_flags, int advice)
  147 +static inline int hugepage_madvise(struct vm_area_struct *vma,
  148 + unsigned long *vm_flags, int advice)
147 149 {
148 150 BUG();
149 151 return 0;
... ... @@ -1389,7 +1389,8 @@
1389 1389 return ret;
1390 1390 }
1391 1391  
1392   -int hugepage_madvise(unsigned long *vm_flags, int advice)
  1392 +int hugepage_madvise(struct vm_area_struct *vma,
  1393 + unsigned long *vm_flags, int advice)
1393 1394 {
1394 1395 switch (advice) {
1395 1396 case MADV_HUGEPAGE:
... ... @@ -1404,6 +1405,13 @@
1404 1405 return -EINVAL;
1405 1406 *vm_flags &= ~VM_NOHUGEPAGE;
1406 1407 *vm_flags |= VM_HUGEPAGE;
  1408 + /*
  1409 + * If the vma become good for khugepaged to scan,
  1410 + * register it here without waiting a page fault that
  1411 + * may not happen any time soon.
  1412 + */
  1413 + if (unlikely(khugepaged_enter_vma_merge(vma)))
  1414 + return -ENOMEM;
1407 1415 break;
1408 1416 case MADV_NOHUGEPAGE:
1409 1417 /*
... ... @@ -1417,6 +1425,11 @@
1417 1425 return -EINVAL;
1418 1426 *vm_flags &= ~VM_HUGEPAGE;
1419 1427 *vm_flags |= VM_NOHUGEPAGE;
  1428 + /*
  1429 + * Setting VM_NOHUGEPAGE will prevent khugepaged from scanning
  1430 + * this vma even if we leave the mm registered in khugepaged if
  1431 + * it got registered before VM_NOHUGEPAGE was set.
  1432 + */
1420 1433 break;
1421 1434 }
1422 1435  
... ... @@ -1784,7 +1797,8 @@
1784 1797 if (address < hstart || address + HPAGE_PMD_SIZE > hend)
1785 1798 goto out;
1786 1799  
1787   - if (!(vma->vm_flags & VM_HUGEPAGE) && !khugepaged_always())
  1800 + if ((!(vma->vm_flags & VM_HUGEPAGE) && !khugepaged_always()) ||
  1801 + (vma->vm_flags & VM_NOHUGEPAGE))
1788 1802 goto out;
1789 1803  
1790 1804 /* VM_PFNMAP vmas may have vm_ops null but vm_file set */
... ... @@ -2007,8 +2021,9 @@
2007 2021 break;
2008 2022 }
2009 2023  
2010   - if (!(vma->vm_flags & VM_HUGEPAGE) &&
2011   - !khugepaged_always()) {
  2024 + if ((!(vma->vm_flags & VM_HUGEPAGE) &&
  2025 + !khugepaged_always()) ||
  2026 + (vma->vm_flags & VM_NOHUGEPAGE)) {
2012 2027 progress++;
2013 2028 continue;
2014 2029 }
... ... @@ -73,7 +73,7 @@
73 73 break;
74 74 case MADV_HUGEPAGE:
75 75 case MADV_NOHUGEPAGE:
76   - error = hugepage_madvise(&new_flags, behavior);
  76 + error = hugepage_madvise(vma, &new_flags, behavior);
77 77 if (error)
78 78 goto out;
79 79 break;