Commit 78f11a255749d09025f54d4e2df4fbcb031530e2

Authored by Andrea Arcangeli
Committed by Linus Torvalds
1 parent 6d4831c283

mm: thp: fix /dev/zero MAP_PRIVATE and vm_flags cleanups

The huge_memory.c THP page fault was allowed to run if vm_ops was null
(which would succeed for /dev/zero MAP_PRIVATE, as the f_op->mmap wouldn't
setup a special vma->vm_ops and it would fallback to regular anonymous
memory) but other THP logics weren't fully activated for vmas with vm_file
not NULL (/dev/zero has a not NULL vma->vm_file).

So this removes the vm_file checks so that /dev/zero also can safely use
THP (the other albeit safer approach to fix this bug would have been to
prevent the THP initial page fault to run if vm_file was set).

After removing the vm_file checks, this also makes huge_memory.c stricter
in khugepaged for the DEBUG_VM=y case.  It doesn't replace the vm_file
check with a is_pfn_mapping check (but it keeps checking for VM_PFNMAP
under VM_BUG_ON) because for a is_cow_mapping() mapping VM_PFNMAP should
only be allowed to exist before the first page fault, and in turn when
vma->anon_vma is null (so preventing khugepaged registration).  So I tend
to think the previous comment saying if vm_file was set, VM_PFNMAP might
have been set and we could still be registered in khugepaged (despite
anon_vma was not NULL to be registered in khugepaged) was too paranoid.
The is_linear_pfn_mapping check is also I think superfluous (as described
by comment) but under DEBUG_VM it is safe to stay.

Addresses https://bugzilla.kernel.org/show_bug.cgi?id=33682

Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Reported-by: Caspar Zhang <bugs@casparzhang.com>
Acked-by: Mel Gorman <mel@csn.ul.ie>
Acked-by: Rik van Riel <riel@redhat.com>
Cc: <stable@kernel.org>		[2.6.38.x]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

Showing 3 changed files with 27 additions and 21 deletions Side-by-side Diff

include/linux/huge_mm.h
... ... @@ -117,7 +117,7 @@
117 117 unsigned long end,
118 118 long adjust_next)
119 119 {
120   - if (!vma->anon_vma || vma->vm_ops || vma->vm_file)
  120 + if (!vma->anon_vma || vma->vm_ops)
121 121 return;
122 122 __vma_adjust_trans_huge(vma, start, end, adjust_next);
123 123 }
... ... @@ -137,7 +137,8 @@
137 137 #define VM_RandomReadHint(v) ((v)->vm_flags & VM_RAND_READ)
138 138  
139 139 /*
140   - * special vmas that are non-mergable, non-mlock()able
  140 + * Special vmas that are non-mergable, non-mlock()able.
  141 + * Note: mm/huge_memory.c VM_NO_THP depends on this definition.
141 142 */
142 143 #define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_RESERVED | VM_PFNMAP)
143 144  
... ... @@ -1408,6 +1408,9 @@
1408 1408 return ret;
1409 1409 }
1410 1410  
  1411 +#define VM_NO_THP (VM_SPECIAL|VM_INSERTPAGE|VM_MIXEDMAP|VM_SAO| \
  1412 + VM_HUGETLB|VM_SHARED|VM_MAYSHARE)
  1413 +
1411 1414 int hugepage_madvise(struct vm_area_struct *vma,
1412 1415 unsigned long *vm_flags, int advice)
1413 1416 {
... ... @@ -1416,11 +1419,7 @@
1416 1419 /*
1417 1420 * Be somewhat over-protective like KSM for now!
1418 1421 */
1419   - if (*vm_flags & (VM_HUGEPAGE |
1420   - VM_SHARED | VM_MAYSHARE |
1421   - VM_PFNMAP | VM_IO | VM_DONTEXPAND |
1422   - VM_RESERVED | VM_HUGETLB | VM_INSERTPAGE |
1423   - VM_MIXEDMAP | VM_SAO))
  1422 + if (*vm_flags & (VM_HUGEPAGE | VM_NO_THP))
1424 1423 return -EINVAL;
1425 1424 *vm_flags &= ~VM_NOHUGEPAGE;
1426 1425 *vm_flags |= VM_HUGEPAGE;
... ... @@ -1436,11 +1435,7 @@
1436 1435 /*
1437 1436 * Be somewhat over-protective like KSM for now!
1438 1437 */
1439   - if (*vm_flags & (VM_NOHUGEPAGE |
1440   - VM_SHARED | VM_MAYSHARE |
1441   - VM_PFNMAP | VM_IO | VM_DONTEXPAND |
1442   - VM_RESERVED | VM_HUGETLB | VM_INSERTPAGE |
1443   - VM_MIXEDMAP | VM_SAO))
  1438 + if (*vm_flags & (VM_NOHUGEPAGE | VM_NO_THP))
1444 1439 return -EINVAL;
1445 1440 *vm_flags &= ~VM_HUGEPAGE;
1446 1441 *vm_flags |= VM_NOHUGEPAGE;
1447 1442  
... ... @@ -1574,10 +1569,14 @@
1574 1569 * page fault if needed.
1575 1570 */
1576 1571 return 0;
1577   - if (vma->vm_file || vma->vm_ops)
  1572 + if (vma->vm_ops)
1578 1573 /* khugepaged not yet working on file or special mappings */
1579 1574 return 0;
1580   - VM_BUG_ON(is_linear_pfn_mapping(vma) || is_pfn_mapping(vma));
  1575 + /*
  1576 + * If is_pfn_mapping() is true is_learn_pfn_mapping() must be
  1577 + * true too, verify it here.
  1578 + */
  1579 + VM_BUG_ON(is_linear_pfn_mapping(vma) || vma->vm_flags & VM_NO_THP);
1581 1580 hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK;
1582 1581 hend = vma->vm_end & HPAGE_PMD_MASK;
1583 1582 if (hstart < hend)
1584 1583  
... ... @@ -1828,12 +1827,15 @@
1828 1827 (vma->vm_flags & VM_NOHUGEPAGE))
1829 1828 goto out;
1830 1829  
1831   - /* VM_PFNMAP vmas may have vm_ops null but vm_file set */
1832   - if (!vma->anon_vma || vma->vm_ops || vma->vm_file)
  1830 + if (!vma->anon_vma || vma->vm_ops)
1833 1831 goto out;
1834 1832 if (is_vma_temporary_stack(vma))
1835 1833 goto out;
1836   - VM_BUG_ON(is_linear_pfn_mapping(vma) || is_pfn_mapping(vma));
  1834 + /*
  1835 + * If is_pfn_mapping() is true is_learn_pfn_mapping() must be
  1836 + * true too, verify it here.
  1837 + */
  1838 + VM_BUG_ON(is_linear_pfn_mapping(vma) || vma->vm_flags & VM_NO_THP);
1837 1839  
1838 1840 pgd = pgd_offset(mm, address);
1839 1841 if (!pgd_present(*pgd))
1840 1842  
... ... @@ -2066,13 +2068,16 @@
2066 2068 progress++;
2067 2069 continue;
2068 2070 }
2069   - /* VM_PFNMAP vmas may have vm_ops null but vm_file set */
2070   - if (!vma->anon_vma || vma->vm_ops || vma->vm_file)
  2071 + if (!vma->anon_vma || vma->vm_ops)
2071 2072 goto skip;
2072 2073 if (is_vma_temporary_stack(vma))
2073 2074 goto skip;
2074   -
2075   - VM_BUG_ON(is_linear_pfn_mapping(vma) || is_pfn_mapping(vma));
  2075 + /*
  2076 + * If is_pfn_mapping() is true is_learn_pfn_mapping()
  2077 + * must be true too, verify it here.
  2078 + */
  2079 + VM_BUG_ON(is_linear_pfn_mapping(vma) ||
  2080 + vma->vm_flags & VM_NO_THP);
2076 2081  
2077 2082 hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK;
2078 2083 hend = vma->vm_end & HPAGE_PMD_MASK;