Commit 24669e58477e2752c1fbca9c1c988e9dd0d79d15
Committed by
Linus Torvalds
1 parent
972dc4de13
Exists in
smarc-l5.0.0_1.0.0-ga
and in
5 other branches
hugetlb: use mmu_gather instead of a temporary linked list for accumulating pages
Use a mmu_gather instead of a temporary linked list for accumulating pages when we unmap a hugepage range Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: David Rientjes <rientjes@google.com> Cc: Hillf Danton <dhillf@gmail.com> Cc: Michal Hocko <mhocko@suse.cz> Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Showing 4 changed files with 59 additions and 33 deletions Side-by-side Diff
fs/hugetlbfs/inode.c
include/linux/hugetlb.h
... | ... | @@ -7,6 +7,7 @@ |
7 | 7 | |
8 | 8 | struct ctl_table; |
9 | 9 | struct user_struct; |
10 | +struct mmu_gather; | |
10 | 11 | |
11 | 12 | #ifdef CONFIG_HUGETLB_PAGE |
12 | 13 | |
... | ... | @@ -40,9 +41,10 @@ |
40 | 41 | struct page **, struct vm_area_struct **, |
41 | 42 | unsigned long *, int *, int, unsigned int flags); |
42 | 43 | void unmap_hugepage_range(struct vm_area_struct *, |
43 | - unsigned long, unsigned long, struct page *); | |
44 | -void __unmap_hugepage_range(struct vm_area_struct *, | |
45 | - unsigned long, unsigned long, struct page *); | |
44 | + unsigned long, unsigned long, struct page *); | |
45 | +void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, | |
46 | + unsigned long start, unsigned long end, | |
47 | + struct page *ref_page); | |
46 | 48 | int hugetlb_prefault(struct address_space *, struct vm_area_struct *); |
47 | 49 | void hugetlb_report_meminfo(struct seq_file *); |
48 | 50 | int hugetlb_report_node_meminfo(int, char *); |
... | ... | @@ -98,7 +100,6 @@ |
98 | 100 | #define follow_huge_addr(mm, addr, write) ERR_PTR(-EINVAL) |
99 | 101 | #define copy_hugetlb_page_range(src, dst, vma) ({ BUG(); 0; }) |
100 | 102 | #define hugetlb_prefault(mapping, vma) ({ BUG(); 0; }) |
101 | -#define unmap_hugepage_range(vma, start, end, page) BUG() | |
102 | 103 | static inline void hugetlb_report_meminfo(struct seq_file *m) |
103 | 104 | { |
104 | 105 | } |
105 | 106 | |
... | ... | @@ -112,12 +113,23 @@ |
112 | 113 | #define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) ({BUG(); 0; }) |
113 | 114 | #define hugetlb_fault(mm, vma, addr, flags) ({ BUG(); 0; }) |
114 | 115 | #define huge_pte_offset(mm, address) 0 |
115 | -#define dequeue_hwpoisoned_huge_page(page) 0 | |
116 | +static inline int dequeue_hwpoisoned_huge_page(struct page *page) | |
117 | +{ | |
118 | + return 0; | |
119 | +} | |
120 | + | |
116 | 121 | static inline void copy_huge_page(struct page *dst, struct page *src) |
117 | 122 | { |
118 | 123 | } |
119 | 124 | |
120 | 125 | #define hugetlb_change_protection(vma, address, end, newprot) |
126 | + | |
127 | +static inline void __unmap_hugepage_range(struct mmu_gather *tlb, | |
128 | + struct vm_area_struct *vma, unsigned long start, | |
129 | + unsigned long end, struct page *ref_page) | |
130 | +{ | |
131 | + BUG(); | |
132 | +} | |
121 | 133 | |
122 | 134 | #endif /* !CONFIG_HUGETLB_PAGE */ |
123 | 135 |
mm/hugetlb.c
... | ... | @@ -24,8 +24,9 @@ |
24 | 24 | |
25 | 25 | #include <asm/page.h> |
26 | 26 | #include <asm/pgtable.h> |
27 | -#include <linux/io.h> | |
27 | +#include <asm/tlb.h> | |
28 | 28 | |
29 | +#include <linux/io.h> | |
29 | 30 | #include <linux/hugetlb.h> |
30 | 31 | #include <linux/node.h> |
31 | 32 | #include "internal.h" |
32 | 33 | |
33 | 34 | |
34 | 35 | |
35 | 36 | |
36 | 37 | |
... | ... | @@ -2310,30 +2311,26 @@ |
2310 | 2311 | return 0; |
2311 | 2312 | } |
2312 | 2313 | |
2313 | -void __unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, | |
2314 | - unsigned long end, struct page *ref_page) | |
2314 | +void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, | |
2315 | + unsigned long start, unsigned long end, | |
2316 | + struct page *ref_page) | |
2315 | 2317 | { |
2318 | + int force_flush = 0; | |
2316 | 2319 | struct mm_struct *mm = vma->vm_mm; |
2317 | 2320 | unsigned long address; |
2318 | 2321 | pte_t *ptep; |
2319 | 2322 | pte_t pte; |
2320 | 2323 | struct page *page; |
2321 | - struct page *tmp; | |
2322 | 2324 | struct hstate *h = hstate_vma(vma); |
2323 | 2325 | unsigned long sz = huge_page_size(h); |
2324 | 2326 | |
2325 | - /* | |
2326 | - * A page gathering list, protected by per file i_mmap_mutex. The | |
2327 | - * lock is used to avoid list corruption from multiple unmapping | |
2328 | - * of the same page since we are using page->lru. | |
2329 | - */ | |
2330 | - LIST_HEAD(page_list); | |
2331 | - | |
2332 | 2327 | WARN_ON(!is_vm_hugetlb_page(vma)); |
2333 | 2328 | BUG_ON(start & ~huge_page_mask(h)); |
2334 | 2329 | BUG_ON(end & ~huge_page_mask(h)); |
2335 | 2330 | |
2331 | + tlb_start_vma(tlb, vma); | |
2336 | 2332 | mmu_notifier_invalidate_range_start(mm, start, end); |
2333 | +again: | |
2337 | 2334 | spin_lock(&mm->page_table_lock); |
2338 | 2335 | for (address = start; address < end; address += sz) { |
2339 | 2336 | ptep = huge_pte_offset(mm, address); |
2340 | 2337 | |
2341 | 2338 | |
2342 | 2339 | |
2343 | 2340 | |
2344 | 2341 | |
2345 | 2342 | |
... | ... | @@ -2372,30 +2369,45 @@ |
2372 | 2369 | } |
2373 | 2370 | |
2374 | 2371 | pte = huge_ptep_get_and_clear(mm, address, ptep); |
2372 | + tlb_remove_tlb_entry(tlb, ptep, address); | |
2375 | 2373 | if (pte_dirty(pte)) |
2376 | 2374 | set_page_dirty(page); |
2377 | - list_add(&page->lru, &page_list); | |
2378 | 2375 | |
2376 | + page_remove_rmap(page); | |
2377 | + force_flush = !__tlb_remove_page(tlb, page); | |
2378 | + if (force_flush) | |
2379 | + break; | |
2379 | 2380 | /* Bail out after unmapping reference page if supplied */ |
2380 | 2381 | if (ref_page) |
2381 | 2382 | break; |
2382 | 2383 | } |
2383 | - flush_tlb_range(vma, start, end); | |
2384 | 2384 | spin_unlock(&mm->page_table_lock); |
2385 | - mmu_notifier_invalidate_range_end(mm, start, end); | |
2386 | - list_for_each_entry_safe(page, tmp, &page_list, lru) { | |
2387 | - page_remove_rmap(page); | |
2388 | - list_del(&page->lru); | |
2389 | - put_page(page); | |
2385 | + /* | |
2386 | + * mmu_gather ran out of room to batch pages, we break out of | |
2387 | + * the PTE lock to avoid doing the potential expensive TLB invalidate | |
2388 | + * and page-free while holding it. | |
2389 | + */ | |
2390 | + if (force_flush) { | |
2391 | + force_flush = 0; | |
2392 | + tlb_flush_mmu(tlb); | |
2393 | + if (address < end && !ref_page) | |
2394 | + goto again; | |
2390 | 2395 | } |
2396 | + mmu_notifier_invalidate_range_end(mm, start, end); | |
2397 | + tlb_end_vma(tlb, vma); | |
2391 | 2398 | } |
2392 | 2399 | |
2393 | 2400 | void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, |
2394 | 2401 | unsigned long end, struct page *ref_page) |
2395 | 2402 | { |
2396 | - mutex_lock(&vma->vm_file->f_mapping->i_mmap_mutex); | |
2397 | - __unmap_hugepage_range(vma, start, end, ref_page); | |
2398 | - mutex_unlock(&vma->vm_file->f_mapping->i_mmap_mutex); | |
2403 | + struct mm_struct *mm; | |
2404 | + struct mmu_gather tlb; | |
2405 | + | |
2406 | + mm = vma->vm_mm; | |
2407 | + | |
2408 | + tlb_gather_mmu(&tlb, mm, 0); | |
2409 | + __unmap_hugepage_range(&tlb, vma, start, end, ref_page); | |
2410 | + tlb_finish_mmu(&tlb, start, end); | |
2399 | 2411 | } |
2400 | 2412 | |
2401 | 2413 | /* |
... | ... | @@ -2440,9 +2452,8 @@ |
2440 | 2452 | * from the time of fork. This would look like data corruption |
2441 | 2453 | */ |
2442 | 2454 | if (!is_vma_resv_set(iter_vma, HPAGE_RESV_OWNER)) |
2443 | - __unmap_hugepage_range(iter_vma, | |
2444 | - address, address + huge_page_size(h), | |
2445 | - page); | |
2455 | + unmap_hugepage_range(iter_vma, address, | |
2456 | + address + huge_page_size(h), page); | |
2446 | 2457 | } |
2447 | 2458 | mutex_unlock(&mapping->i_mmap_mutex); |
2448 | 2459 |
mm/memory.c
... | ... | @@ -1343,8 +1343,11 @@ |
1343 | 1343 | * Since no pte has actually been setup, it is |
1344 | 1344 | * safe to do nothing in this case. |
1345 | 1345 | */ |
1346 | - if (vma->vm_file) | |
1347 | - unmap_hugepage_range(vma, start, end, NULL); | |
1346 | + if (vma->vm_file) { | |
1347 | + mutex_lock(&vma->vm_file->f_mapping->i_mmap_mutex); | |
1348 | + __unmap_hugepage_range(tlb, vma, start, end, NULL); | |
1349 | + mutex_unlock(&vma->vm_file->f_mapping->i_mmap_mutex); | |
1350 | + } | |
1348 | 1351 | } else |
1349 | 1352 | unmap_page_range(tlb, vma, start, end, details); |
1350 | 1353 | } |