Commit 78f11a255749d09025f54d4e2df4fbcb031530e2
Committed by
Linus Torvalds
1 parent
6d4831c283
Exists in
master
and in
20 other branches
mm: thp: fix /dev/zero MAP_PRIVATE and vm_flags cleanups
The huge_memory.c THP page fault was allowed to run if vm_ops was null (which would succeed for /dev/zero MAP_PRIVATE, as the f_op->mmap wouldn't setup a special vma->vm_ops and it would fallback to regular anonymous memory) but other THP logics weren't fully activated for vmas with vm_file not NULL (/dev/zero has a not NULL vma->vm_file). So this removes the vm_file checks so that /dev/zero also can safely use THP (the other albeit safer approach to fix this bug would have been to prevent the THP initial page fault to run if vm_file was set). After removing the vm_file checks, this also makes huge_memory.c stricter in khugepaged for the DEBUG_VM=y case. It doesn't replace the vm_file check with a is_pfn_mapping check (but it keeps checking for VM_PFNMAP under VM_BUG_ON) because for a is_cow_mapping() mapping VM_PFNMAP should only be allowed to exist before the first page fault, and in turn when vma->anon_vma is null (so preventing khugepaged registration). So I tend to think the previous comment saying if vm_file was set, VM_PFNMAP might have been set and we could still be registered in khugepaged (despite anon_vma was not NULL to be registered in khugepaged) was too paranoid. The is_linear_pfn_mapping check is also I think superfluous (as described by comment) but under DEBUG_VM it is safe to stay. Addresses https://bugzilla.kernel.org/show_bug.cgi?id=33682 Signed-off-by: Andrea Arcangeli <aarcange@redhat.com> Reported-by: Caspar Zhang <bugs@casparzhang.com> Acked-by: Mel Gorman <mel@csn.ul.ie> Acked-by: Rik van Riel <riel@redhat.com> Cc: <stable@kernel.org> [2.6.38.x] Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Showing 3 changed files with 27 additions and 21 deletions Side-by-side Diff
include/linux/huge_mm.h
include/linux/mm.h
... | ... | @@ -137,7 +137,8 @@ |
137 | 137 | #define VM_RandomReadHint(v) ((v)->vm_flags & VM_RAND_READ) |
138 | 138 | |
139 | 139 | /* |
140 | - * special vmas that are non-mergable, non-mlock()able | |
140 | + * Special vmas that are non-mergable, non-mlock()able. | |
141 | + * Note: mm/huge_memory.c VM_NO_THP depends on this definition. | |
141 | 142 | */ |
142 | 143 | #define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_RESERVED | VM_PFNMAP) |
143 | 144 |
mm/huge_memory.c
... | ... | @@ -1408,6 +1408,9 @@ |
1408 | 1408 | return ret; |
1409 | 1409 | } |
1410 | 1410 | |
1411 | +#define VM_NO_THP (VM_SPECIAL|VM_INSERTPAGE|VM_MIXEDMAP|VM_SAO| \ | |
1412 | + VM_HUGETLB|VM_SHARED|VM_MAYSHARE) | |
1413 | + | |
1411 | 1414 | int hugepage_madvise(struct vm_area_struct *vma, |
1412 | 1415 | unsigned long *vm_flags, int advice) |
1413 | 1416 | { |
... | ... | @@ -1416,11 +1419,7 @@ |
1416 | 1419 | /* |
1417 | 1420 | * Be somewhat over-protective like KSM for now! |
1418 | 1421 | */ |
1419 | - if (*vm_flags & (VM_HUGEPAGE | | |
1420 | - VM_SHARED | VM_MAYSHARE | | |
1421 | - VM_PFNMAP | VM_IO | VM_DONTEXPAND | | |
1422 | - VM_RESERVED | VM_HUGETLB | VM_INSERTPAGE | | |
1423 | - VM_MIXEDMAP | VM_SAO)) | |
1422 | + if (*vm_flags & (VM_HUGEPAGE | VM_NO_THP)) | |
1424 | 1423 | return -EINVAL; |
1425 | 1424 | *vm_flags &= ~VM_NOHUGEPAGE; |
1426 | 1425 | *vm_flags |= VM_HUGEPAGE; |
... | ... | @@ -1436,11 +1435,7 @@ |
1436 | 1435 | /* |
1437 | 1436 | * Be somewhat over-protective like KSM for now! |
1438 | 1437 | */ |
1439 | - if (*vm_flags & (VM_NOHUGEPAGE | | |
1440 | - VM_SHARED | VM_MAYSHARE | | |
1441 | - VM_PFNMAP | VM_IO | VM_DONTEXPAND | | |
1442 | - VM_RESERVED | VM_HUGETLB | VM_INSERTPAGE | | |
1443 | - VM_MIXEDMAP | VM_SAO)) | |
1438 | + if (*vm_flags & (VM_NOHUGEPAGE | VM_NO_THP)) | |
1444 | 1439 | return -EINVAL; |
1445 | 1440 | *vm_flags &= ~VM_HUGEPAGE; |
1446 | 1441 | *vm_flags |= VM_NOHUGEPAGE; |
1447 | 1442 | |
... | ... | @@ -1574,10 +1569,14 @@ |
1574 | 1569 | * page fault if needed. |
1575 | 1570 | */ |
1576 | 1571 | return 0; |
1577 | - if (vma->vm_file || vma->vm_ops) | |
1572 | + if (vma->vm_ops) | |
1578 | 1573 | /* khugepaged not yet working on file or special mappings */ |
1579 | 1574 | return 0; |
1580 | - VM_BUG_ON(is_linear_pfn_mapping(vma) || is_pfn_mapping(vma)); | |
1575 | + /* | |
1576 | + * If is_pfn_mapping() is true is_learn_pfn_mapping() must be | |
1577 | + * true too, verify it here. | |
1578 | + */ | |
1579 | + VM_BUG_ON(is_linear_pfn_mapping(vma) || vma->vm_flags & VM_NO_THP); | |
1581 | 1580 | hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK; |
1582 | 1581 | hend = vma->vm_end & HPAGE_PMD_MASK; |
1583 | 1582 | if (hstart < hend) |
1584 | 1583 | |
... | ... | @@ -1828,12 +1827,15 @@ |
1828 | 1827 | (vma->vm_flags & VM_NOHUGEPAGE)) |
1829 | 1828 | goto out; |
1830 | 1829 | |
1831 | - /* VM_PFNMAP vmas may have vm_ops null but vm_file set */ | |
1832 | - if (!vma->anon_vma || vma->vm_ops || vma->vm_file) | |
1830 | + if (!vma->anon_vma || vma->vm_ops) | |
1833 | 1831 | goto out; |
1834 | 1832 | if (is_vma_temporary_stack(vma)) |
1835 | 1833 | goto out; |
1836 | - VM_BUG_ON(is_linear_pfn_mapping(vma) || is_pfn_mapping(vma)); | |
1834 | + /* | |
1835 | + * If is_pfn_mapping() is true is_learn_pfn_mapping() must be | |
1836 | + * true too, verify it here. | |
1837 | + */ | |
1838 | + VM_BUG_ON(is_linear_pfn_mapping(vma) || vma->vm_flags & VM_NO_THP); | |
1837 | 1839 | |
1838 | 1840 | pgd = pgd_offset(mm, address); |
1839 | 1841 | if (!pgd_present(*pgd)) |
1840 | 1842 | |
... | ... | @@ -2066,13 +2068,16 @@ |
2066 | 2068 | progress++; |
2067 | 2069 | continue; |
2068 | 2070 | } |
2069 | - /* VM_PFNMAP vmas may have vm_ops null but vm_file set */ | |
2070 | - if (!vma->anon_vma || vma->vm_ops || vma->vm_file) | |
2071 | + if (!vma->anon_vma || vma->vm_ops) | |
2071 | 2072 | goto skip; |
2072 | 2073 | if (is_vma_temporary_stack(vma)) |
2073 | 2074 | goto skip; |
2074 | - | |
2075 | - VM_BUG_ON(is_linear_pfn_mapping(vma) || is_pfn_mapping(vma)); | |
2075 | + /* | |
2076 | + * If is_pfn_mapping() is true is_learn_pfn_mapping() | |
2077 | + * must be true too, verify it here. | |
2078 | + */ | |
2079 | + VM_BUG_ON(is_linear_pfn_mapping(vma) || | |
2080 | + vma->vm_flags & VM_NO_THP); | |
2076 | 2081 | |
2077 | 2082 | hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK; |
2078 | 2083 | hend = vma->vm_end & HPAGE_PMD_MASK; |