Commit 4daae3b4b9e49b7e0935499a352f1c59d90287d2

Authored by Mel Gorman
1 parent 149c33e1c9

mm: mempolicy: Use _PAGE_NUMA to migrate pages

Note: Based on "mm/mpol: Use special PROT_NONE to migrate pages" but
	sufficiently different that the signed-off-bys were dropped

Combine our previous _PAGE_NUMA, mpol_misplaced and migrate_misplaced_page()
pieces into an effective migrate on fault scheme.

Note that (on x86) we rely on PROT_NONE pages being !present and avoid
the TLB flush from try_to_unmap(TTU_MIGRATION). This greatly improves the
page-migration performance.

Based-on-work-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Mel Gorman <mgorman@suse.de>

Showing 3 changed files with 60 additions and 12 deletions Side-by-side Diff

include/linux/huge_mm.h
... ... @@ -160,8 +160,8 @@
160 160 return page;
161 161 }
162 162  
163   -extern int do_huge_pmd_numa_page(struct mm_struct *mm, unsigned long addr,
164   - pmd_t pmd, pmd_t *pmdp);
  163 +extern int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
  164 + unsigned long addr, pmd_t pmd, pmd_t *pmdp);
165 165  
166 166 #else /* CONFIG_TRANSPARENT_HUGEPAGE */
167 167 #define HPAGE_PMD_SHIFT ({ BUILD_BUG(); 0; })
168 168  
... ... @@ -200,9 +200,10 @@
200 200 return 0;
201 201 }
202 202  
203   -static inline int do_huge_pmd_numa_page(struct mm_struct *mm, unsigned long addr,
204   - pmd_t pmd, pmd_t *pmdp)
  203 +static inline int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
  204 + unsigned long addr, pmd_t pmd, pmd_t *pmdp)
205 205 {
  206 + return 0;
206 207 }
207 208  
208 209 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
... ... @@ -18,6 +18,7 @@
18 18 #include <linux/freezer.h>
19 19 #include <linux/mman.h>
20 20 #include <linux/pagemap.h>
  21 +#include <linux/migrate.h>
21 22 #include <asm/tlb.h>
22 23 #include <asm/pgalloc.h>
23 24 #include "internal.h"
24 25  
25 26  
26 27  
... ... @@ -1019,17 +1020,39 @@
1019 1020 }
1020 1021  
1021 1022 /* NUMA hinting page fault entry point for trans huge pmds */
1022   -int do_huge_pmd_numa_page(struct mm_struct *mm, unsigned long addr,
1023   - pmd_t pmd, pmd_t *pmdp)
  1023 +int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
  1024 + unsigned long addr, pmd_t pmd, pmd_t *pmdp)
1024 1025 {
1025   - struct page *page;
  1026 + struct page *page = NULL;
1026 1027 unsigned long haddr = addr & HPAGE_PMD_MASK;
  1028 + int target_nid;
1027 1029  
1028 1030 spin_lock(&mm->page_table_lock);
1029 1031 if (unlikely(!pmd_same(pmd, *pmdp)))
1030 1032 goto out_unlock;
1031 1033  
1032 1034 page = pmd_page(pmd);
  1035 + get_page(page);
  1036 + spin_unlock(&mm->page_table_lock);
  1037 +
  1038 + target_nid = mpol_misplaced(page, vma, haddr);
  1039 + if (target_nid == -1)
  1040 + goto clear_pmdnuma;
  1041 +
  1042 + /*
  1043 + * Due to lacking code to migrate thp pages, we'll split
  1044 + * (which preserves the special PROT_NONE) and re-take the
  1045 + * fault on the normal pages.
  1046 + */
  1047 + split_huge_page(page);
  1048 + put_page(page);
  1049 + return 0;
  1050 +
  1051 +clear_pmdnuma:
  1052 + spin_lock(&mm->page_table_lock);
  1053 + if (unlikely(!pmd_same(pmd, *pmdp)))
  1054 + goto out_unlock;
  1055 +
1033 1056 pmd = pmd_mknonnuma(pmd);
1034 1057 set_pmd_at(mm, haddr, pmdp, pmd);
1035 1058 VM_BUG_ON(pmd_numa(*pmdp));
... ... @@ -1037,6 +1060,8 @@
1037 1060  
1038 1061 out_unlock:
1039 1062 spin_unlock(&mm->page_table_lock);
  1063 + if (page)
  1064 + put_page(page);
1040 1065 return 0;
1041 1066 }
1042 1067  
... ... @@ -57,6 +57,7 @@
57 57 #include <linux/swapops.h>
58 58 #include <linux/elf.h>
59 59 #include <linux/gfp.h>
  60 +#include <linux/migrate.h>
60 61  
61 62 #include <asm/io.h>
62 63 #include <asm/pgalloc.h>
63 64  
... ... @@ -3451,8 +3452,9 @@
3451 3452 int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
3452 3453 unsigned long addr, pte_t pte, pte_t *ptep, pmd_t *pmd)
3453 3454 {
3454   - struct page *page;
  3455 + struct page *page = NULL;
3455 3456 spinlock_t *ptl;
  3457 + int current_nid, target_nid;
3456 3458  
3457 3459 /*
3458 3460 * The "pte" at this point cannot be used safely without
... ... @@ -3465,8 +3467,11 @@
3465 3467 */
3466 3468 ptl = pte_lockptr(mm, pmd);
3467 3469 spin_lock(ptl);
3468   - if (unlikely(!pte_same(*ptep, pte)))
3469   - goto out_unlock;
  3470 + if (unlikely(!pte_same(*ptep, pte))) {
  3471 + pte_unmap_unlock(ptep, ptl);
  3472 + goto out;
  3473 + }
  3474 +
3470 3475 pte = pte_mknonnuma(pte);
3471 3476 set_pte_at(mm, addr, ptep, pte);
3472 3477 update_mmu_cache(vma, addr, ptep);
3473 3478  
... ... @@ -3477,8 +3482,25 @@
3477 3482 return 0;
3478 3483 }
3479 3484  
3480   -out_unlock:
  3485 + get_page(page);
  3486 + current_nid = page_to_nid(page);
  3487 + target_nid = mpol_misplaced(page, vma, addr);
3481 3488 pte_unmap_unlock(ptep, ptl);
  3489 + if (target_nid == -1) {
  3490 + /*
  3491 + * Account for the fault against the current node if it not
  3492 + * being replaced regardless of where the page is located.
  3493 + */
  3494 + current_nid = numa_node_id();
  3495 + put_page(page);
  3496 + goto out;
  3497 + }
  3498 +
  3499 + /* Migrate to the requested node */
  3500 + if (migrate_misplaced_page(page, target_nid))
  3501 + current_nid = target_nid;
  3502 +
  3503 +out:
3482 3504 return 0;
3483 3505 }
3484 3506  
... ... @@ -3655,7 +3677,7 @@
3655 3677 barrier();
3656 3678 if (pmd_trans_huge(orig_pmd)) {
3657 3679 if (pmd_numa(*pmd))
3658   - return do_huge_pmd_numa_page(mm, address,
  3680 + return do_huge_pmd_numa_page(mm, vma, address,
3659 3681 orig_pmd, pmd);
3660 3682  
3661 3683 if ((flags & FAULT_FLAG_WRITE) && !pmd_write(orig_pmd)) {