Commit cb900f41215447433cbc456d1c4294e858a84d7c
Committed by
Linus Torvalds
1 parent
c389a250ab
Exists in
master
and in
16 other branches
mm, hugetlb: convert hugetlbfs to use split pmd lock
Hugetlb supports multiple page sizes. We use split lock only for PMD level, but not for PUD. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com> Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Tested-by: Alex Thorlton <athorlton@sgi.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: "Eric W . Biederman" <ebiederm@xmission.com> Cc: "Paul E . McKenney" <paulmck@linux.vnet.ibm.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Andi Kleen <ak@linux.intel.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Dave Hansen <dave.hansen@intel.com> Cc: Dave Jones <davej@redhat.com> Cc: David Howells <dhowells@redhat.com> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Kees Cook <keescook@chromium.org> Cc: Mel Gorman <mgorman@suse.de> Cc: Michael Kerrisk <mtk.manpages@gmail.com> Cc: Oleg Nesterov <oleg@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Rik van Riel <riel@redhat.com> Cc: Robin Holt <robinmholt@gmail.com> Cc: Sedat Dilek <sedat.dilek@gmail.com> Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Hugh Dickins <hughd@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Showing 7 changed files with 105 additions and 54 deletions Side-by-side Diff
fs/proc/meminfo.c
include/linux/hugetlb.h
... | ... | @@ -392,6 +392,15 @@ |
392 | 392 | return pmd_huge_support() && (huge_page_shift(h) == PMD_SHIFT); |
393 | 393 | } |
394 | 394 | |
395 | +static inline spinlock_t *huge_pte_lockptr(struct hstate *h, | |
396 | + struct mm_struct *mm, pte_t *pte) | |
397 | +{ | |
398 | + if (huge_page_size(h) == PMD_SIZE) | |
399 | + return pmd_lockptr(mm, (pmd_t *) pte); | |
400 | + VM_BUG_ON(huge_page_size(h) == PAGE_SIZE); | |
401 | + return &mm->page_table_lock; | |
402 | +} | |
403 | + | |
395 | 404 | #else /* CONFIG_HUGETLB_PAGE */ |
396 | 405 | struct hstate {}; |
397 | 406 | #define alloc_huge_page_node(h, nid) NULL |
... | ... | @@ -401,6 +410,7 @@ |
401 | 410 | #define hstate_sizelog(s) NULL |
402 | 411 | #define hstate_vma(v) NULL |
403 | 412 | #define hstate_inode(i) NULL |
413 | +#define page_hstate(page) NULL | |
404 | 414 | #define huge_page_size(h) PAGE_SIZE |
405 | 415 | #define huge_page_mask(h) PAGE_MASK |
406 | 416 | #define vma_kernel_pagesize(v) PAGE_SIZE |
407 | 417 | |
... | ... | @@ -421,7 +431,23 @@ |
421 | 431 | #define dissolve_free_huge_pages(s, e) do {} while (0) |
422 | 432 | #define pmd_huge_support() 0 |
423 | 433 | #define hugepage_migration_support(h) 0 |
434 | + | |
435 | +static inline spinlock_t *huge_pte_lockptr(struct hstate *h, | |
436 | + struct mm_struct *mm, pte_t *pte) | |
437 | +{ | |
438 | + return &mm->page_table_lock; | |
439 | +} | |
424 | 440 | #endif /* CONFIG_HUGETLB_PAGE */ |
441 | + | |
442 | +static inline spinlock_t *huge_pte_lock(struct hstate *h, | |
443 | + struct mm_struct *mm, pte_t *pte) | |
444 | +{ | |
445 | + spinlock_t *ptl; | |
446 | + | |
447 | + ptl = huge_pte_lockptr(h, mm, pte); | |
448 | + spin_lock(ptl); | |
449 | + return ptl; | |
450 | +} | |
425 | 451 | |
426 | 452 | #endif /* _LINUX_HUGETLB_H */ |
include/linux/swapops.h
... | ... | @@ -139,7 +139,8 @@ |
139 | 139 | |
140 | 140 | extern void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd, |
141 | 141 | unsigned long address); |
142 | -extern void migration_entry_wait_huge(struct mm_struct *mm, pte_t *pte); | |
142 | +extern void migration_entry_wait_huge(struct vm_area_struct *vma, | |
143 | + struct mm_struct *mm, pte_t *pte); | |
143 | 144 | #else |
144 | 145 | |
145 | 146 | #define make_migration_entry(page, write) swp_entry(0, 0) |
... | ... | @@ -151,8 +152,8 @@ |
151 | 152 | static inline void make_migration_entry_read(swp_entry_t *entryp) { } |
152 | 153 | static inline void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd, |
153 | 154 | unsigned long address) { } |
154 | -static inline void migration_entry_wait_huge(struct mm_struct *mm, | |
155 | - pte_t *pte) { } | |
155 | +static inline void migration_entry_wait_huge(struct vm_area_struct *vma, | |
156 | + struct mm_struct *mm, pte_t *pte) { } | |
156 | 157 | static inline int is_write_migration_entry(swp_entry_t entry) |
157 | 158 | { |
158 | 159 | return 0; |
mm/hugetlb.c
... | ... | @@ -2376,6 +2376,7 @@ |
2376 | 2376 | cow = (vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE; |
2377 | 2377 | |
2378 | 2378 | for (addr = vma->vm_start; addr < vma->vm_end; addr += sz) { |
2379 | + spinlock_t *src_ptl, *dst_ptl; | |
2379 | 2380 | src_pte = huge_pte_offset(src, addr); |
2380 | 2381 | if (!src_pte) |
2381 | 2382 | continue; |
... | ... | @@ -2387,8 +2388,9 @@ |
2387 | 2388 | if (dst_pte == src_pte) |
2388 | 2389 | continue; |
2389 | 2390 | |
2390 | - spin_lock(&dst->page_table_lock); | |
2391 | - spin_lock_nested(&src->page_table_lock, SINGLE_DEPTH_NESTING); | |
2391 | + dst_ptl = huge_pte_lock(h, dst, dst_pte); | |
2392 | + src_ptl = huge_pte_lockptr(h, src, src_pte); | |
2393 | + spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING); | |
2392 | 2394 | if (!huge_pte_none(huge_ptep_get(src_pte))) { |
2393 | 2395 | if (cow) |
2394 | 2396 | huge_ptep_set_wrprotect(src, addr, src_pte); |
... | ... | @@ -2398,8 +2400,8 @@ |
2398 | 2400 | page_dup_rmap(ptepage); |
2399 | 2401 | set_huge_pte_at(dst, addr, dst_pte, entry); |
2400 | 2402 | } |
2401 | - spin_unlock(&src->page_table_lock); | |
2402 | - spin_unlock(&dst->page_table_lock); | |
2403 | + spin_unlock(src_ptl); | |
2404 | + spin_unlock(dst_ptl); | |
2403 | 2405 | } |
2404 | 2406 | return 0; |
2405 | 2407 | |
... | ... | @@ -2442,6 +2444,7 @@ |
2442 | 2444 | unsigned long address; |
2443 | 2445 | pte_t *ptep; |
2444 | 2446 | pte_t pte; |
2447 | + spinlock_t *ptl; | |
2445 | 2448 | struct page *page; |
2446 | 2449 | struct hstate *h = hstate_vma(vma); |
2447 | 2450 | unsigned long sz = huge_page_size(h); |
2448 | 2451 | |
2449 | 2452 | |
2450 | 2453 | |
2451 | 2454 | |
... | ... | @@ -2455,25 +2458,25 @@ |
2455 | 2458 | tlb_start_vma(tlb, vma); |
2456 | 2459 | mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); |
2457 | 2460 | again: |
2458 | - spin_lock(&mm->page_table_lock); | |
2459 | 2461 | for (address = start; address < end; address += sz) { |
2460 | 2462 | ptep = huge_pte_offset(mm, address); |
2461 | 2463 | if (!ptep) |
2462 | 2464 | continue; |
2463 | 2465 | |
2466 | + ptl = huge_pte_lock(h, mm, ptep); | |
2464 | 2467 | if (huge_pmd_unshare(mm, &address, ptep)) |
2465 | - continue; | |
2468 | + goto unlock; | |
2466 | 2469 | |
2467 | 2470 | pte = huge_ptep_get(ptep); |
2468 | 2471 | if (huge_pte_none(pte)) |
2469 | - continue; | |
2472 | + goto unlock; | |
2470 | 2473 | |
2471 | 2474 | /* |
2472 | 2475 | * HWPoisoned hugepage is already unmapped and dropped reference |
2473 | 2476 | */ |
2474 | 2477 | if (unlikely(is_hugetlb_entry_hwpoisoned(pte))) { |
2475 | 2478 | huge_pte_clear(mm, address, ptep); |
2476 | - continue; | |
2479 | + goto unlock; | |
2477 | 2480 | } |
2478 | 2481 | |
2479 | 2482 | page = pte_page(pte); |
... | ... | @@ -2484,7 +2487,7 @@ |
2484 | 2487 | */ |
2485 | 2488 | if (ref_page) { |
2486 | 2489 | if (page != ref_page) |
2487 | - continue; | |
2490 | + goto unlock; | |
2488 | 2491 | |
2489 | 2492 | /* |
2490 | 2493 | * Mark the VMA as having unmapped its page so that |
2491 | 2494 | |
2492 | 2495 | |
2493 | 2496 | |
2494 | 2497 | |
... | ... | @@ -2501,13 +2504,18 @@ |
2501 | 2504 | |
2502 | 2505 | page_remove_rmap(page); |
2503 | 2506 | force_flush = !__tlb_remove_page(tlb, page); |
2504 | - if (force_flush) | |
2507 | + if (force_flush) { | |
2508 | + spin_unlock(ptl); | |
2505 | 2509 | break; |
2510 | + } | |
2506 | 2511 | /* Bail out after unmapping reference page if supplied */ |
2507 | - if (ref_page) | |
2512 | + if (ref_page) { | |
2513 | + spin_unlock(ptl); | |
2508 | 2514 | break; |
2515 | + } | |
2516 | +unlock: | |
2517 | + spin_unlock(ptl); | |
2509 | 2518 | } |
2510 | - spin_unlock(&mm->page_table_lock); | |
2511 | 2519 | /* |
2512 | 2520 | * mmu_gather ran out of room to batch pages, we break out of |
2513 | 2521 | * the PTE lock to avoid doing the potential expensive TLB invalidate |
... | ... | @@ -2613,7 +2621,7 @@ |
2613 | 2621 | */ |
2614 | 2622 | static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma, |
2615 | 2623 | unsigned long address, pte_t *ptep, pte_t pte, |
2616 | - struct page *pagecache_page) | |
2624 | + struct page *pagecache_page, spinlock_t *ptl) | |
2617 | 2625 | { |
2618 | 2626 | struct hstate *h = hstate_vma(vma); |
2619 | 2627 | struct page *old_page, *new_page; |
... | ... | @@ -2647,8 +2655,8 @@ |
2647 | 2655 | |
2648 | 2656 | page_cache_get(old_page); |
2649 | 2657 | |
2650 | - /* Drop page_table_lock as buddy allocator may be called */ | |
2651 | - spin_unlock(&mm->page_table_lock); | |
2658 | + /* Drop page table lock as buddy allocator may be called */ | |
2659 | + spin_unlock(ptl); | |
2652 | 2660 | new_page = alloc_huge_page(vma, address, outside_reserve); |
2653 | 2661 | |
2654 | 2662 | if (IS_ERR(new_page)) { |
2655 | 2663 | |
... | ... | @@ -2666,13 +2674,13 @@ |
2666 | 2674 | BUG_ON(huge_pte_none(pte)); |
2667 | 2675 | if (unmap_ref_private(mm, vma, old_page, address)) { |
2668 | 2676 | BUG_ON(huge_pte_none(pte)); |
2669 | - spin_lock(&mm->page_table_lock); | |
2677 | + spin_lock(ptl); | |
2670 | 2678 | ptep = huge_pte_offset(mm, address & huge_page_mask(h)); |
2671 | 2679 | if (likely(pte_same(huge_ptep_get(ptep), pte))) |
2672 | 2680 | goto retry_avoidcopy; |
2673 | 2681 | /* |
2674 | - * race occurs while re-acquiring page_table_lock, and | |
2675 | - * our job is done. | |
2682 | + * race occurs while re-acquiring page table | |
2683 | + * lock, and our job is done. | |
2676 | 2684 | */ |
2677 | 2685 | return 0; |
2678 | 2686 | } |
... | ... | @@ -2680,7 +2688,7 @@ |
2680 | 2688 | } |
2681 | 2689 | |
2682 | 2690 | /* Caller expects lock to be held */ |
2683 | - spin_lock(&mm->page_table_lock); | |
2691 | + spin_lock(ptl); | |
2684 | 2692 | if (err == -ENOMEM) |
2685 | 2693 | return VM_FAULT_OOM; |
2686 | 2694 | else |
... | ... | @@ -2695,7 +2703,7 @@ |
2695 | 2703 | page_cache_release(new_page); |
2696 | 2704 | page_cache_release(old_page); |
2697 | 2705 | /* Caller expects lock to be held */ |
2698 | - spin_lock(&mm->page_table_lock); | |
2706 | + spin_lock(ptl); | |
2699 | 2707 | return VM_FAULT_OOM; |
2700 | 2708 | } |
2701 | 2709 | |
2702 | 2710 | |
... | ... | @@ -2707,10 +2715,10 @@ |
2707 | 2715 | mmun_end = mmun_start + huge_page_size(h); |
2708 | 2716 | mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); |
2709 | 2717 | /* |
2710 | - * Retake the page_table_lock to check for racing updates | |
2718 | + * Retake the page table lock to check for racing updates | |
2711 | 2719 | * before the page tables are altered |
2712 | 2720 | */ |
2713 | - spin_lock(&mm->page_table_lock); | |
2721 | + spin_lock(ptl); | |
2714 | 2722 | ptep = huge_pte_offset(mm, address & huge_page_mask(h)); |
2715 | 2723 | if (likely(pte_same(huge_ptep_get(ptep), pte))) { |
2716 | 2724 | ClearPagePrivate(new_page); |
2717 | 2725 | |
... | ... | @@ -2724,13 +2732,13 @@ |
2724 | 2732 | /* Make the old page be freed below */ |
2725 | 2733 | new_page = old_page; |
2726 | 2734 | } |
2727 | - spin_unlock(&mm->page_table_lock); | |
2735 | + spin_unlock(ptl); | |
2728 | 2736 | mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); |
2729 | 2737 | page_cache_release(new_page); |
2730 | 2738 | page_cache_release(old_page); |
2731 | 2739 | |
2732 | 2740 | /* Caller expects lock to be held */ |
2733 | - spin_lock(&mm->page_table_lock); | |
2741 | + spin_lock(ptl); | |
2734 | 2742 | return 0; |
2735 | 2743 | } |
2736 | 2744 | |
... | ... | @@ -2778,6 +2786,7 @@ |
2778 | 2786 | struct page *page; |
2779 | 2787 | struct address_space *mapping; |
2780 | 2788 | pte_t new_pte; |
2789 | + spinlock_t *ptl; | |
2781 | 2790 | |
2782 | 2791 | /* |
2783 | 2792 | * Currently, we are forced to kill the process in the event the |
... | ... | @@ -2864,7 +2873,8 @@ |
2864 | 2873 | goto backout_unlocked; |
2865 | 2874 | } |
2866 | 2875 | |
2867 | - spin_lock(&mm->page_table_lock); | |
2876 | + ptl = huge_pte_lockptr(h, mm, ptep); | |
2877 | + spin_lock(ptl); | |
2868 | 2878 | size = i_size_read(mapping->host) >> huge_page_shift(h); |
2869 | 2879 | if (idx >= size) |
2870 | 2880 | goto backout; |
2871 | 2881 | |
2872 | 2882 | |
... | ... | @@ -2885,16 +2895,16 @@ |
2885 | 2895 | |
2886 | 2896 | if ((flags & FAULT_FLAG_WRITE) && !(vma->vm_flags & VM_SHARED)) { |
2887 | 2897 | /* Optimization, do the COW without a second fault */ |
2888 | - ret = hugetlb_cow(mm, vma, address, ptep, new_pte, page); | |
2898 | + ret = hugetlb_cow(mm, vma, address, ptep, new_pte, page, ptl); | |
2889 | 2899 | } |
2890 | 2900 | |
2891 | - spin_unlock(&mm->page_table_lock); | |
2901 | + spin_unlock(ptl); | |
2892 | 2902 | unlock_page(page); |
2893 | 2903 | out: |
2894 | 2904 | return ret; |
2895 | 2905 | |
2896 | 2906 | backout: |
2897 | - spin_unlock(&mm->page_table_lock); | |
2907 | + spin_unlock(ptl); | |
2898 | 2908 | backout_unlocked: |
2899 | 2909 | unlock_page(page); |
2900 | 2910 | put_page(page); |
... | ... | @@ -2906,6 +2916,7 @@ |
2906 | 2916 | { |
2907 | 2917 | pte_t *ptep; |
2908 | 2918 | pte_t entry; |
2919 | + spinlock_t *ptl; | |
2909 | 2920 | int ret; |
2910 | 2921 | struct page *page = NULL; |
2911 | 2922 | struct page *pagecache_page = NULL; |
... | ... | @@ -2918,7 +2929,7 @@ |
2918 | 2929 | if (ptep) { |
2919 | 2930 | entry = huge_ptep_get(ptep); |
2920 | 2931 | if (unlikely(is_hugetlb_entry_migration(entry))) { |
2921 | - migration_entry_wait_huge(mm, ptep); | |
2932 | + migration_entry_wait_huge(vma, mm, ptep); | |
2922 | 2933 | return 0; |
2923 | 2934 | } else if (unlikely(is_hugetlb_entry_hwpoisoned(entry))) |
2924 | 2935 | return VM_FAULT_HWPOISON_LARGE | |
2925 | 2936 | |
2926 | 2937 | |
... | ... | @@ -2974,17 +2985,18 @@ |
2974 | 2985 | if (page != pagecache_page) |
2975 | 2986 | lock_page(page); |
2976 | 2987 | |
2977 | - spin_lock(&mm->page_table_lock); | |
2988 | + ptl = huge_pte_lockptr(h, mm, ptep); | |
2989 | + spin_lock(ptl); | |
2978 | 2990 | /* Check for a racing update before calling hugetlb_cow */ |
2979 | 2991 | if (unlikely(!pte_same(entry, huge_ptep_get(ptep)))) |
2980 | - goto out_page_table_lock; | |
2992 | + goto out_ptl; | |
2981 | 2993 | |
2982 | 2994 | |
2983 | 2995 | if (flags & FAULT_FLAG_WRITE) { |
2984 | 2996 | if (!huge_pte_write(entry)) { |
2985 | 2997 | ret = hugetlb_cow(mm, vma, address, ptep, entry, |
2986 | - pagecache_page); | |
2987 | - goto out_page_table_lock; | |
2998 | + pagecache_page, ptl); | |
2999 | + goto out_ptl; | |
2988 | 3000 | } |
2989 | 3001 | entry = huge_pte_mkdirty(entry); |
2990 | 3002 | } |
... | ... | @@ -2993,8 +3005,8 @@ |
2993 | 3005 | flags & FAULT_FLAG_WRITE)) |
2994 | 3006 | update_mmu_cache(vma, address, ptep); |
2995 | 3007 | |
2996 | -out_page_table_lock: | |
2997 | - spin_unlock(&mm->page_table_lock); | |
3008 | +out_ptl: | |
3009 | + spin_unlock(ptl); | |
2998 | 3010 | |
2999 | 3011 | if (pagecache_page) { |
3000 | 3012 | unlock_page(pagecache_page); |
3001 | 3013 | |
... | ... | @@ -3020,9 +3032,9 @@ |
3020 | 3032 | unsigned long remainder = *nr_pages; |
3021 | 3033 | struct hstate *h = hstate_vma(vma); |
3022 | 3034 | |
3023 | - spin_lock(&mm->page_table_lock); | |
3024 | 3035 | while (vaddr < vma->vm_end && remainder) { |
3025 | 3036 | pte_t *pte; |
3037 | + spinlock_t *ptl = NULL; | |
3026 | 3038 | int absent; |
3027 | 3039 | struct page *page; |
3028 | 3040 | |
3029 | 3041 | |
... | ... | @@ -3030,8 +3042,12 @@ |
3030 | 3042 | * Some archs (sparc64, sh*) have multiple pte_ts to |
3031 | 3043 | * each hugepage. We have to make sure we get the |
3032 | 3044 | * first, for the page indexing below to work. |
3045 | + * | |
3046 | + * Note that page table lock is not held when pte is null. | |
3033 | 3047 | */ |
3034 | 3048 | pte = huge_pte_offset(mm, vaddr & huge_page_mask(h)); |
3049 | + if (pte) | |
3050 | + ptl = huge_pte_lock(h, mm, pte); | |
3035 | 3051 | absent = !pte || huge_pte_none(huge_ptep_get(pte)); |
3036 | 3052 | |
3037 | 3053 | /* |
... | ... | @@ -3043,6 +3059,8 @@ |
3043 | 3059 | */ |
3044 | 3060 | if (absent && (flags & FOLL_DUMP) && |
3045 | 3061 | !hugetlbfs_pagecache_present(h, vma, vaddr)) { |
3062 | + if (pte) | |
3063 | + spin_unlock(ptl); | |
3046 | 3064 | remainder = 0; |
3047 | 3065 | break; |
3048 | 3066 | } |
3049 | 3067 | |
... | ... | @@ -3062,10 +3080,10 @@ |
3062 | 3080 | !huge_pte_write(huge_ptep_get(pte)))) { |
3063 | 3081 | int ret; |
3064 | 3082 | |
3065 | - spin_unlock(&mm->page_table_lock); | |
3083 | + if (pte) | |
3084 | + spin_unlock(ptl); | |
3066 | 3085 | ret = hugetlb_fault(mm, vma, vaddr, |
3067 | 3086 | (flags & FOLL_WRITE) ? FAULT_FLAG_WRITE : 0); |
3068 | - spin_lock(&mm->page_table_lock); | |
3069 | 3087 | if (!(ret & VM_FAULT_ERROR)) |
3070 | 3088 | continue; |
3071 | 3089 | |
3072 | 3090 | |
... | ... | @@ -3096,8 +3114,8 @@ |
3096 | 3114 | */ |
3097 | 3115 | goto same_page; |
3098 | 3116 | } |
3117 | + spin_unlock(ptl); | |
3099 | 3118 | } |
3100 | - spin_unlock(&mm->page_table_lock); | |
3101 | 3119 | *nr_pages = remainder; |
3102 | 3120 | *position = vaddr; |
3103 | 3121 | |
3104 | 3122 | |
3105 | 3123 | |
3106 | 3124 | |
... | ... | @@ -3118,13 +3136,15 @@ |
3118 | 3136 | flush_cache_range(vma, address, end); |
3119 | 3137 | |
3120 | 3138 | mutex_lock(&vma->vm_file->f_mapping->i_mmap_mutex); |
3121 | - spin_lock(&mm->page_table_lock); | |
3122 | 3139 | for (; address < end; address += huge_page_size(h)) { |
3140 | + spinlock_t *ptl; | |
3123 | 3141 | ptep = huge_pte_offset(mm, address); |
3124 | 3142 | if (!ptep) |
3125 | 3143 | continue; |
3144 | + ptl = huge_pte_lock(h, mm, ptep); | |
3126 | 3145 | if (huge_pmd_unshare(mm, &address, ptep)) { |
3127 | 3146 | pages++; |
3147 | + spin_unlock(ptl); | |
3128 | 3148 | continue; |
3129 | 3149 | } |
3130 | 3150 | if (!huge_pte_none(huge_ptep_get(ptep))) { |
3131 | 3151 | |
... | ... | @@ -3134,8 +3154,8 @@ |
3134 | 3154 | set_huge_pte_at(mm, address, ptep, pte); |
3135 | 3155 | pages++; |
3136 | 3156 | } |
3157 | + spin_unlock(ptl); | |
3137 | 3158 | } |
3138 | - spin_unlock(&mm->page_table_lock); | |
3139 | 3159 | /* |
3140 | 3160 | * Must flush TLB before releasing i_mmap_mutex: x86's huge_pmd_unshare |
3141 | 3161 | * may have cleared our pud entry and done put_page on the page table: |
... | ... | @@ -3298,6 +3318,7 @@ |
3298 | 3318 | unsigned long saddr; |
3299 | 3319 | pte_t *spte = NULL; |
3300 | 3320 | pte_t *pte; |
3321 | + spinlock_t *ptl; | |
3301 | 3322 | |
3302 | 3323 | if (!vma_shareable(vma, addr)) |
3303 | 3324 | return (pte_t *)pmd_alloc(mm, pud, addr); |
3304 | 3325 | |
... | ... | @@ -3320,13 +3341,14 @@ |
3320 | 3341 | if (!spte) |
3321 | 3342 | goto out; |
3322 | 3343 | |
3323 | - spin_lock(&mm->page_table_lock); | |
3344 | + ptl = huge_pte_lockptr(hstate_vma(vma), mm, spte); | |
3345 | + spin_lock(ptl); | |
3324 | 3346 | if (pud_none(*pud)) |
3325 | 3347 | pud_populate(mm, pud, |
3326 | 3348 | (pmd_t *)((unsigned long)spte & PAGE_MASK)); |
3327 | 3349 | else |
3328 | 3350 | put_page(virt_to_page(spte)); |
3329 | - spin_unlock(&mm->page_table_lock); | |
3351 | + spin_unlock(ptl); | |
3330 | 3352 | out: |
3331 | 3353 | pte = (pte_t *)pmd_alloc(mm, pud, addr); |
3332 | 3354 | mutex_unlock(&mapping->i_mmap_mutex); |
... | ... | @@ -3340,7 +3362,7 @@ |
3340 | 3362 | * indicated by page_count > 1, unmap is achieved by clearing pud and |
3341 | 3363 | * decrementing the ref count. If count == 1, the pte page is not shared. |
3342 | 3364 | * |
3343 | - * called with vma->vm_mm->page_table_lock held. | |
3365 | + * called with page table lock held. | |
3344 | 3366 | * |
3345 | 3367 | * returns: 1 successfully unmapped a shared pte page |
3346 | 3368 | * 0 the underlying pte page is not shared, or it is the last user |
mm/mempolicy.c
... | ... | @@ -525,8 +525,9 @@ |
525 | 525 | #ifdef CONFIG_HUGETLB_PAGE |
526 | 526 | int nid; |
527 | 527 | struct page *page; |
528 | + spinlock_t *ptl; | |
528 | 529 | |
529 | - spin_lock(&vma->vm_mm->page_table_lock); | |
530 | + ptl = huge_pte_lock(hstate_vma(vma), vma->vm_mm, (pte_t *)pmd); | |
530 | 531 | page = pte_page(huge_ptep_get((pte_t *)pmd)); |
531 | 532 | nid = page_to_nid(page); |
532 | 533 | if (node_isset(nid, *nodes) == !!(flags & MPOL_MF_INVERT)) |
... | ... | @@ -536,7 +537,7 @@ |
536 | 537 | (flags & MPOL_MF_MOVE && page_mapcount(page) == 1)) |
537 | 538 | isolate_huge_page(page, private); |
538 | 539 | unlock: |
539 | - spin_unlock(&vma->vm_mm->page_table_lock); | |
540 | + spin_unlock(ptl); | |
540 | 541 | #else |
541 | 542 | BUG(); |
542 | 543 | #endif |
mm/migrate.c
... | ... | @@ -130,7 +130,7 @@ |
130 | 130 | ptep = huge_pte_offset(mm, addr); |
131 | 131 | if (!ptep) |
132 | 132 | goto out; |
133 | - ptl = &mm->page_table_lock; | |
133 | + ptl = huge_pte_lockptr(hstate_vma(vma), mm, ptep); | |
134 | 134 | } else { |
135 | 135 | pmd = mm_find_pmd(mm, addr); |
136 | 136 | if (!pmd) |
137 | 137 | |
... | ... | @@ -249,9 +249,10 @@ |
249 | 249 | __migration_entry_wait(mm, ptep, ptl); |
250 | 250 | } |
251 | 251 | |
252 | -void migration_entry_wait_huge(struct mm_struct *mm, pte_t *pte) | |
252 | +void migration_entry_wait_huge(struct vm_area_struct *vma, | |
253 | + struct mm_struct *mm, pte_t *pte) | |
253 | 254 | { |
254 | - spinlock_t *ptl = &(mm)->page_table_lock; | |
255 | + spinlock_t *ptl = huge_pte_lockptr(hstate_vma(vma), mm, pte); | |
255 | 256 | __migration_entry_wait(mm, pte, ptl); |
256 | 257 | } |
257 | 258 |