Commit 4daae3b4b9e49b7e0935499a352f1c59d90287d2
1 parent
149c33e1c9
Exists in
master
and in
20 other branches
mm: mempolicy: Use _PAGE_NUMA to migrate pages
Note: Based on "mm/mpol: Use special PROT_NONE to migrate pages" but sufficiently different that the signed-off-bys were dropped Combine our previous _PAGE_NUMA, mpol_misplaced and migrate_misplaced_page() pieces into an effective migrate on fault scheme. Note that (on x86) we rely on PROT_NONE pages being !present and avoid the TLB flush from try_to_unmap(TTU_MIGRATION). This greatly improves the page-migration performance. Based-on-work-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Signed-off-by: Mel Gorman <mgorman@suse.de>
Showing 3 changed files with 60 additions and 12 deletions Side-by-side Diff
include/linux/huge_mm.h
... | ... | @@ -160,8 +160,8 @@ |
160 | 160 | return page; |
161 | 161 | } |
162 | 162 | |
163 | -extern int do_huge_pmd_numa_page(struct mm_struct *mm, unsigned long addr, | |
164 | - pmd_t pmd, pmd_t *pmdp); | |
163 | +extern int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |
164 | + unsigned long addr, pmd_t pmd, pmd_t *pmdp); | |
165 | 165 | |
166 | 166 | #else /* CONFIG_TRANSPARENT_HUGEPAGE */ |
167 | 167 | #define HPAGE_PMD_SHIFT ({ BUILD_BUG(); 0; }) |
168 | 168 | |
... | ... | @@ -200,9 +200,10 @@ |
200 | 200 | return 0; |
201 | 201 | } |
202 | 202 | |
203 | -static inline int do_huge_pmd_numa_page(struct mm_struct *mm, unsigned long addr, | |
204 | - pmd_t pmd, pmd_t *pmdp) | |
203 | +static inline int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |
204 | + unsigned long addr, pmd_t pmd, pmd_t *pmdp) | |
205 | 205 | { |
206 | + return 0; | |
206 | 207 | } |
207 | 208 | |
208 | 209 | #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ |
mm/huge_memory.c
... | ... | @@ -18,6 +18,7 @@ |
18 | 18 | #include <linux/freezer.h> |
19 | 19 | #include <linux/mman.h> |
20 | 20 | #include <linux/pagemap.h> |
21 | +#include <linux/migrate.h> | |
21 | 22 | #include <asm/tlb.h> |
22 | 23 | #include <asm/pgalloc.h> |
23 | 24 | #include "internal.h" |
24 | 25 | |
25 | 26 | |
26 | 27 | |
... | ... | @@ -1019,17 +1020,39 @@ |
1019 | 1020 | } |
1020 | 1021 | |
1021 | 1022 | /* NUMA hinting page fault entry point for trans huge pmds */ |
1022 | -int do_huge_pmd_numa_page(struct mm_struct *mm, unsigned long addr, | |
1023 | - pmd_t pmd, pmd_t *pmdp) | |
1023 | +int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |
1024 | + unsigned long addr, pmd_t pmd, pmd_t *pmdp) | |
1024 | 1025 | { |
1025 | - struct page *page; | |
1026 | + struct page *page = NULL; | |
1026 | 1027 | unsigned long haddr = addr & HPAGE_PMD_MASK; |
1028 | + int target_nid; | |
1027 | 1029 | |
1028 | 1030 | spin_lock(&mm->page_table_lock); |
1029 | 1031 | if (unlikely(!pmd_same(pmd, *pmdp))) |
1030 | 1032 | goto out_unlock; |
1031 | 1033 | |
1032 | 1034 | page = pmd_page(pmd); |
1035 | + get_page(page); | |
1036 | + spin_unlock(&mm->page_table_lock); | |
1037 | + | |
1038 | + target_nid = mpol_misplaced(page, vma, haddr); | |
1039 | + if (target_nid == -1) | |
1040 | + goto clear_pmdnuma; | |
1041 | + | |
1042 | + /* | |
1043 | + * Due to lacking code to migrate thp pages, we'll split | |
1044 | + * (which preserves the special PROT_NONE) and re-take the | |
1045 | + * fault on the normal pages. | |
1046 | + */ | |
1047 | + split_huge_page(page); | |
1048 | + put_page(page); | |
1049 | + return 0; | |
1050 | + | |
1051 | +clear_pmdnuma: | |
1052 | + spin_lock(&mm->page_table_lock); | |
1053 | + if (unlikely(!pmd_same(pmd, *pmdp))) | |
1054 | + goto out_unlock; | |
1055 | + | |
1033 | 1056 | pmd = pmd_mknonnuma(pmd); |
1034 | 1057 | set_pmd_at(mm, haddr, pmdp, pmd); |
1035 | 1058 | VM_BUG_ON(pmd_numa(*pmdp)); |
... | ... | @@ -1037,6 +1060,8 @@ |
1037 | 1060 | |
1038 | 1061 | out_unlock: |
1039 | 1062 | spin_unlock(&mm->page_table_lock); |
1063 | + if (page) | |
1064 | + put_page(page); | |
1040 | 1065 | return 0; |
1041 | 1066 | } |
1042 | 1067 |
mm/memory.c
... | ... | @@ -57,6 +57,7 @@ |
57 | 57 | #include <linux/swapops.h> |
58 | 58 | #include <linux/elf.h> |
59 | 59 | #include <linux/gfp.h> |
60 | +#include <linux/migrate.h> | |
60 | 61 | |
61 | 62 | #include <asm/io.h> |
62 | 63 | #include <asm/pgalloc.h> |
63 | 64 | |
... | ... | @@ -3451,8 +3452,9 @@ |
3451 | 3452 | int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, |
3452 | 3453 | unsigned long addr, pte_t pte, pte_t *ptep, pmd_t *pmd) |
3453 | 3454 | { |
3454 | - struct page *page; | |
3455 | + struct page *page = NULL; | |
3455 | 3456 | spinlock_t *ptl; |
3457 | + int current_nid, target_nid; | |
3456 | 3458 | |
3457 | 3459 | /* |
3458 | 3460 | * The "pte" at this point cannot be used safely without |
... | ... | @@ -3465,8 +3467,11 @@ |
3465 | 3467 | */ |
3466 | 3468 | ptl = pte_lockptr(mm, pmd); |
3467 | 3469 | spin_lock(ptl); |
3468 | - if (unlikely(!pte_same(*ptep, pte))) | |
3469 | - goto out_unlock; | |
3470 | + if (unlikely(!pte_same(*ptep, pte))) { | |
3471 | + pte_unmap_unlock(ptep, ptl); | |
3472 | + goto out; | |
3473 | + } | |
3474 | + | |
3470 | 3475 | pte = pte_mknonnuma(pte); |
3471 | 3476 | set_pte_at(mm, addr, ptep, pte); |
3472 | 3477 | update_mmu_cache(vma, addr, ptep); |
3473 | 3478 | |
... | ... | @@ -3477,8 +3482,25 @@ |
3477 | 3482 | return 0; |
3478 | 3483 | } |
3479 | 3484 | |
3480 | -out_unlock: | |
3485 | + get_page(page); | |
3486 | + current_nid = page_to_nid(page); | |
3487 | + target_nid = mpol_misplaced(page, vma, addr); | |
3481 | 3488 | pte_unmap_unlock(ptep, ptl); |
3489 | + if (target_nid == -1) { | |
3490 | + /* | |
3491 | + * Account for the fault against the current node if it not | |
3492 | + * being replaced regardless of where the page is located. | |
3493 | + */ | |
3494 | + current_nid = numa_node_id(); | |
3495 | + put_page(page); | |
3496 | + goto out; | |
3497 | + } | |
3498 | + | |
3499 | + /* Migrate to the requested node */ | |
3500 | + if (migrate_misplaced_page(page, target_nid)) | |
3501 | + current_nid = target_nid; | |
3502 | + | |
3503 | +out: | |
3482 | 3504 | return 0; |
3483 | 3505 | } |
3484 | 3506 | |
... | ... | @@ -3655,7 +3677,7 @@ |
3655 | 3677 | barrier(); |
3656 | 3678 | if (pmd_trans_huge(orig_pmd)) { |
3657 | 3679 | if (pmd_numa(*pmd)) |
3658 | - return do_huge_pmd_numa_page(mm, address, | |
3680 | + return do_huge_pmd_numa_page(mm, vma, address, | |
3659 | 3681 | orig_pmd, pmd); |
3660 | 3682 | |
3661 | 3683 | if ((flags & FAULT_FLAG_WRITE) && !pmd_write(orig_pmd)) { |