Commit 30dad30922ccc733cfdbfe232090cf674dc374dc
Committed by
Linus Torvalds
1 parent
27749f2ff0
Exists in
master
and in
20 other branches
mm: migration: add migrate_entry_wait_huge()
When we have a page fault for the address which is backed by a hugepage under migration, the kernel can't wait correctly and do busy looping on hugepage fault until the migration finishes. As a result, users who try to kick hugepage migration (via soft offlining, for example) occasionally experience long delay or soft lockup. This is because pte_offset_map_lock() can't get a correct migration entry or a correct page table lock for hugepage. This patch introduces migration_entry_wait_huge() to solve this. Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com> Reviewed-by: Rik van Riel <riel@redhat.com> Reviewed-by: Wanpeng Li <liwanp@linux.vnet.ibm.com> Reviewed-by: Michal Hocko <mhocko@suse.cz> Cc: Mel Gorman <mgorman@suse.de> Cc: Andi Kleen <andi@firstfloor.org> Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Cc: <stable@vger.kernel.org> [2.6.35+] Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Showing 3 changed files with 22 additions and 6 deletions Side-by-side Diff
include/linux/swapops.h
... | ... | @@ -137,6 +137,7 @@ |
137 | 137 | |
138 | 138 | extern void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd, |
139 | 139 | unsigned long address); |
140 | +extern void migration_entry_wait_huge(struct mm_struct *mm, pte_t *pte); | |
140 | 141 | #else |
141 | 142 | |
142 | 143 | #define make_migration_entry(page, write) swp_entry(0, 0) |
... | ... | @@ -148,6 +149,8 @@ |
148 | 149 | static inline void make_migration_entry_read(swp_entry_t *entryp) { } |
149 | 150 | static inline void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd, |
150 | 151 | unsigned long address) { } |
152 | +static inline void migration_entry_wait_huge(struct mm_struct *mm, | |
153 | + pte_t *pte) { } | |
151 | 154 | static inline int is_write_migration_entry(swp_entry_t entry) |
152 | 155 | { |
153 | 156 | return 0; |
mm/hugetlb.c
... | ... | @@ -2839,7 +2839,7 @@ |
2839 | 2839 | if (ptep) { |
2840 | 2840 | entry = huge_ptep_get(ptep); |
2841 | 2841 | if (unlikely(is_hugetlb_entry_migration(entry))) { |
2842 | - migration_entry_wait(mm, (pmd_t *)ptep, address); | |
2842 | + migration_entry_wait_huge(mm, ptep); | |
2843 | 2843 | return 0; |
2844 | 2844 | } else if (unlikely(is_hugetlb_entry_hwpoisoned(entry))) |
2845 | 2845 | return VM_FAULT_HWPOISON_LARGE | |
mm/migrate.c
... | ... | @@ -200,15 +200,14 @@ |
200 | 200 | * get to the page and wait until migration is finished. |
201 | 201 | * When we return from this function the fault will be retried. |
202 | 202 | */ |
203 | -void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd, | |
204 | - unsigned long address) | |
203 | +static void __migration_entry_wait(struct mm_struct *mm, pte_t *ptep, | |
204 | + spinlock_t *ptl) | |
205 | 205 | { |
206 | - pte_t *ptep, pte; | |
207 | - spinlock_t *ptl; | |
206 | + pte_t pte; | |
208 | 207 | swp_entry_t entry; |
209 | 208 | struct page *page; |
210 | 209 | |
211 | - ptep = pte_offset_map_lock(mm, pmd, address, &ptl); | |
210 | + spin_lock(ptl); | |
212 | 211 | pte = *ptep; |
213 | 212 | if (!is_swap_pte(pte)) |
214 | 213 | goto out; |
... | ... | @@ -234,6 +233,20 @@ |
234 | 233 | return; |
235 | 234 | out: |
236 | 235 | pte_unmap_unlock(ptep, ptl); |
236 | +} | |
237 | + | |
238 | +void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd, | |
239 | + unsigned long address) | |
240 | +{ | |
241 | + spinlock_t *ptl = pte_lockptr(mm, pmd); | |
242 | + pte_t *ptep = pte_offset_map(pmd, address); | |
243 | + __migration_entry_wait(mm, ptep, ptl); | |
244 | +} | |
245 | + | |
246 | +void migration_entry_wait_huge(struct mm_struct *mm, pte_t *pte) | |
247 | +{ | |
248 | + spinlock_t *ptl = &(mm)->page_table_lock; | |
249 | + __migration_entry_wait(mm, pte, ptl); | |
237 | 250 | } |
238 | 251 | |
239 | 252 | #ifdef CONFIG_BLOCK |