Commit a54a407fbf7735fd8f7841375574f5d9b0375f93
Committed by
Ingo Molnar
1 parent
8191acbd30
Exists in
master
and in
16 other branches
mm: Close races between THP migration and PMD numa clearing
THP migration uses the page lock to guard against parallel allocations but there are cases like this still open Task A Task B --------------------- --------------------- do_huge_pmd_numa_page do_huge_pmd_numa_page lock_page mpol_misplaced == -1 unlock_page goto clear_pmdnuma lock_page mpol_misplaced == 2 migrate_misplaced_transhuge pmd = pmd_mknonnuma set_pmd_at During hours of testing, one crashed with weird errors and while I have no direct evidence, I suspect something like the race above happened. This patch extends the page lock to being held until the pmd_numa is cleared to prevent migration starting in parallel while the pmd_numa is being cleared. It also flushes the old pmd entry and orders pagetable insertion before rmap insertion. Signed-off-by: Mel Gorman <mgorman@suse.de> Reviewed-by: Rik van Riel <riel@redhat.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com> Signed-off-by: Peter Zijlstra <peterz@infradead.org> Link: http://lkml.kernel.org/r/1381141781-10992-9-git-send-email-mgorman@suse.de Signed-off-by: Ingo Molnar <mingo@kernel.org>
Showing 2 changed files with 26 additions and 26 deletions Side-by-side Diff
mm/huge_memory.c
... | ... | @@ -1304,24 +1304,25 @@ |
1304 | 1304 | target_nid = mpol_misplaced(page, vma, haddr); |
1305 | 1305 | if (target_nid == -1) { |
1306 | 1306 | /* If the page was locked, there are no parallel migrations */ |
1307 | - if (page_locked) { | |
1308 | - unlock_page(page); | |
1307 | + if (page_locked) | |
1309 | 1308 | goto clear_pmdnuma; |
1310 | - } | |
1311 | 1309 | |
1312 | - /* Otherwise wait for potential migrations and retry fault */ | |
1310 | + /* | |
1311 | + * Otherwise wait for potential migrations and retry. We do | |
1312 | + * relock and check_same as the page may no longer be mapped. | |
1313 | + * As the fault is being retried, do not account for it. | |
1314 | + */ | |
1313 | 1315 | spin_unlock(&mm->page_table_lock); |
1314 | 1316 | wait_on_page_locked(page); |
1317 | + page_nid = -1; | |
1315 | 1318 | goto out; |
1316 | 1319 | } |
1317 | 1320 | |
1318 | 1321 | /* Page is misplaced, serialise migrations and parallel THP splits */ |
1319 | 1322 | get_page(page); |
1320 | 1323 | spin_unlock(&mm->page_table_lock); |
1321 | - if (!page_locked) { | |
1324 | + if (!page_locked) | |
1322 | 1325 | lock_page(page); |
1323 | - page_locked = true; | |
1324 | - } | |
1325 | 1326 | anon_vma = page_lock_anon_vma_read(page); |
1326 | 1327 | |
1327 | 1328 | /* Confirm the PMD did not change while page_table_lock was released */ |
1328 | 1329 | |
1329 | 1330 | |
1330 | 1331 | |
1331 | 1332 | |
1332 | 1333 | |
... | ... | @@ -1329,32 +1330,28 @@ |
1329 | 1330 | if (unlikely(!pmd_same(pmd, *pmdp))) { |
1330 | 1331 | unlock_page(page); |
1331 | 1332 | put_page(page); |
1333 | + page_nid = -1; | |
1332 | 1334 | goto out_unlock; |
1333 | 1335 | } |
1334 | 1336 | |
1335 | - /* Migrate the THP to the requested node */ | |
1337 | + /* | |
1338 | + * Migrate the THP to the requested node, returns with page unlocked | |
1339 | + * and pmd_numa cleared. | |
1340 | + */ | |
1336 | 1341 | spin_unlock(&mm->page_table_lock); |
1337 | 1342 | migrated = migrate_misplaced_transhuge_page(mm, vma, |
1338 | 1343 | pmdp, pmd, addr, page, target_nid); |
1339 | 1344 | if (migrated) |
1340 | 1345 | page_nid = target_nid; |
1341 | - else | |
1342 | - goto check_same; | |
1343 | 1346 | |
1344 | 1347 | goto out; |
1345 | - | |
1346 | -check_same: | |
1347 | - spin_lock(&mm->page_table_lock); | |
1348 | - if (unlikely(!pmd_same(pmd, *pmdp))) { | |
1349 | - /* Someone else took our fault */ | |
1350 | - page_nid = -1; | |
1351 | - goto out_unlock; | |
1352 | - } | |
1353 | 1348 | clear_pmdnuma: |
1349 | + BUG_ON(!PageLocked(page)); | |
1354 | 1350 | pmd = pmd_mknonnuma(pmd); |
1355 | 1351 | set_pmd_at(mm, haddr, pmdp, pmd); |
1356 | 1352 | VM_BUG_ON(pmd_numa(*pmdp)); |
1357 | 1353 | update_mmu_cache_pmd(vma, addr, pmdp); |
1354 | + unlock_page(page); | |
1358 | 1355 | out_unlock: |
1359 | 1356 | spin_unlock(&mm->page_table_lock); |
1360 | 1357 |
mm/migrate.c
... | ... | @@ -1713,12 +1713,12 @@ |
1713 | 1713 | unlock_page(new_page); |
1714 | 1714 | put_page(new_page); /* Free it */ |
1715 | 1715 | |
1716 | - unlock_page(page); | |
1716 | + /* Retake the callers reference and putback on LRU */ | |
1717 | + get_page(page); | |
1717 | 1718 | putback_lru_page(page); |
1718 | - | |
1719 | - count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR); | |
1720 | - isolated = 0; | |
1721 | - goto out; | |
1719 | + mod_zone_page_state(page_zone(page), | |
1720 | + NR_ISOLATED_ANON + page_lru, -HPAGE_PMD_NR); | |
1721 | + goto out_fail; | |
1722 | 1722 | } |
1723 | 1723 | |
1724 | 1724 | /* |
1725 | 1725 | |
... | ... | @@ -1735,9 +1735,9 @@ |
1735 | 1735 | entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); |
1736 | 1736 | entry = pmd_mkhuge(entry); |
1737 | 1737 | |
1738 | - page_add_new_anon_rmap(new_page, vma, haddr); | |
1739 | - | |
1738 | + pmdp_clear_flush(vma, haddr, pmd); | |
1740 | 1739 | set_pmd_at(mm, haddr, pmd, entry); |
1740 | + page_add_new_anon_rmap(new_page, vma, haddr); | |
1741 | 1741 | update_mmu_cache_pmd(vma, address, &entry); |
1742 | 1742 | page_remove_rmap(page); |
1743 | 1743 | /* |
... | ... | @@ -1756,7 +1756,6 @@ |
1756 | 1756 | count_vm_events(PGMIGRATE_SUCCESS, HPAGE_PMD_NR); |
1757 | 1757 | count_vm_numa_events(NUMA_PAGE_MIGRATE, HPAGE_PMD_NR); |
1758 | 1758 | |
1759 | -out: | |
1760 | 1759 | mod_zone_page_state(page_zone(page), |
1761 | 1760 | NR_ISOLATED_ANON + page_lru, |
1762 | 1761 | -HPAGE_PMD_NR); |
... | ... | @@ -1765,6 +1764,10 @@ |
1765 | 1764 | out_fail: |
1766 | 1765 | count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR); |
1767 | 1766 | out_dropref: |
1767 | + entry = pmd_mknonnuma(entry); | |
1768 | + set_pmd_at(mm, haddr, pmd, entry); | |
1769 | + update_mmu_cache_pmd(vma, address, &entry); | |
1770 | + | |
1768 | 1771 | unlock_page(page); |
1769 | 1772 | put_page(page); |
1770 | 1773 | return 0; |