Commit 340ef3902cf20cec43cdcd1e72ae5cb518be7328
Committed by
Linus Torvalds
1 parent
75980e97da
Exists in
master
and in
20 other branches
mm: numa: cleanup flow of transhuge page migration
When correcting commit 04fa5d6a6547 ("mm: migrate: check page_count of THP before migrating") Hugh Dickins noted that the control flow for transhuge migration was difficult to follow. Unconditionally calling put_page() in numamigrate_isolate_page() made the failure paths of both migrate_misplaced_transhuge_page() and migrate_misplaced_page() more complex that they should be. Further, he was extremely wary that an unlock_page() should ever happen after a put_page() even if the put_page() should never be the final put_page. Hugh implemented the following cleanup to simplify the path by calling putback_lru_page() inside numamigrate_isolate_page() if it failed to isolate and always calling unlock_page() within migrate_misplaced_transhuge_page(). There is no functional change after this patch is applied but the code is easier to follow and unlock_page() always happens before put_page(). [mgorman@suse.de: changelog only] Signed-off-by: Mel Gorman <mgorman@suse.de> Signed-off-by: Hugh Dickins <hughd@google.com> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Simon Jeons <simon.jeons@gmail.com> Cc: Wanpeng Li <liwanp@linux.vnet.ibm.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Showing 2 changed files with 52 additions and 71 deletions Side-by-side Diff
mm/huge_memory.c
... | ... | @@ -1296,7 +1296,6 @@ |
1296 | 1296 | int target_nid; |
1297 | 1297 | int current_nid = -1; |
1298 | 1298 | bool migrated; |
1299 | - bool page_locked = false; | |
1300 | 1299 | |
1301 | 1300 | spin_lock(&mm->page_table_lock); |
1302 | 1301 | if (unlikely(!pmd_same(pmd, *pmdp))) |
... | ... | @@ -1318,7 +1317,6 @@ |
1318 | 1317 | /* Acquire the page lock to serialise THP migrations */ |
1319 | 1318 | spin_unlock(&mm->page_table_lock); |
1320 | 1319 | lock_page(page); |
1321 | - page_locked = true; | |
1322 | 1320 | |
1323 | 1321 | /* Confirm the PTE did not while locked */ |
1324 | 1322 | spin_lock(&mm->page_table_lock); |
1325 | 1323 | |
1326 | 1324 | |
1327 | 1325 | |
1328 | 1326 | |
... | ... | @@ -1331,34 +1329,26 @@ |
1331 | 1329 | |
1332 | 1330 | /* Migrate the THP to the requested node */ |
1333 | 1331 | migrated = migrate_misplaced_transhuge_page(mm, vma, |
1334 | - pmdp, pmd, addr, | |
1335 | - page, target_nid); | |
1336 | - if (migrated) | |
1337 | - current_nid = target_nid; | |
1338 | - else { | |
1339 | - spin_lock(&mm->page_table_lock); | |
1340 | - if (unlikely(!pmd_same(pmd, *pmdp))) { | |
1341 | - unlock_page(page); | |
1342 | - goto out_unlock; | |
1343 | - } | |
1344 | - goto clear_pmdnuma; | |
1345 | - } | |
1332 | + pmdp, pmd, addr, page, target_nid); | |
1333 | + if (!migrated) | |
1334 | + goto check_same; | |
1346 | 1335 | |
1347 | - task_numa_fault(current_nid, HPAGE_PMD_NR, migrated); | |
1336 | + task_numa_fault(target_nid, HPAGE_PMD_NR, true); | |
1348 | 1337 | return 0; |
1349 | 1338 | |
1339 | +check_same: | |
1340 | + spin_lock(&mm->page_table_lock); | |
1341 | + if (unlikely(!pmd_same(pmd, *pmdp))) | |
1342 | + goto out_unlock; | |
1350 | 1343 | clear_pmdnuma: |
1351 | 1344 | pmd = pmd_mknonnuma(pmd); |
1352 | 1345 | set_pmd_at(mm, haddr, pmdp, pmd); |
1353 | 1346 | VM_BUG_ON(pmd_numa(*pmdp)); |
1354 | 1347 | update_mmu_cache_pmd(vma, addr, pmdp); |
1355 | - if (page_locked) | |
1356 | - unlock_page(page); | |
1357 | - | |
1358 | 1348 | out_unlock: |
1359 | 1349 | spin_unlock(&mm->page_table_lock); |
1360 | 1350 | if (current_nid != -1) |
1361 | - task_numa_fault(current_nid, HPAGE_PMD_NR, migrated); | |
1351 | + task_numa_fault(current_nid, HPAGE_PMD_NR, false); | |
1362 | 1352 | return 0; |
1363 | 1353 | } |
1364 | 1354 |
mm/migrate.c
... | ... | @@ -1557,41 +1557,40 @@ |
1557 | 1557 | |
1558 | 1558 | int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page) |
1559 | 1559 | { |
1560 | - int ret = 0; | |
1560 | + int page_lru; | |
1561 | 1561 | |
1562 | 1562 | VM_BUG_ON(compound_order(page) && !PageTransHuge(page)); |
1563 | 1563 | |
1564 | 1564 | /* Avoid migrating to a node that is nearly full */ |
1565 | - if (migrate_balanced_pgdat(pgdat, 1UL << compound_order(page))) { | |
1566 | - int page_lru; | |
1565 | + if (!migrate_balanced_pgdat(pgdat, 1UL << compound_order(page))) | |
1566 | + return 0; | |
1567 | 1567 | |
1568 | - if (isolate_lru_page(page)) { | |
1569 | - put_page(page); | |
1570 | - return 0; | |
1571 | - } | |
1568 | + if (isolate_lru_page(page)) | |
1569 | + return 0; | |
1572 | 1570 | |
1573 | - /* Page is isolated */ | |
1574 | - ret = 1; | |
1575 | - page_lru = page_is_file_cache(page); | |
1576 | - if (!PageTransHuge(page)) | |
1577 | - inc_zone_page_state(page, NR_ISOLATED_ANON + page_lru); | |
1578 | - else | |
1579 | - mod_zone_page_state(page_zone(page), | |
1580 | - NR_ISOLATED_ANON + page_lru, | |
1581 | - HPAGE_PMD_NR); | |
1571 | + /* | |
1572 | + * migrate_misplaced_transhuge_page() skips page migration's usual | |
1573 | + * check on page_count(), so we must do it here, now that the page | |
1574 | + * has been isolated: a GUP pin, or any other pin, prevents migration. | |
1575 | + * The expected page count is 3: 1 for page's mapcount and 1 for the | |
1576 | + * caller's pin and 1 for the reference taken by isolate_lru_page(). | |
1577 | + */ | |
1578 | + if (PageTransHuge(page) && page_count(page) != 3) { | |
1579 | + putback_lru_page(page); | |
1580 | + return 0; | |
1582 | 1581 | } |
1583 | 1582 | |
1583 | + page_lru = page_is_file_cache(page); | |
1584 | + mod_zone_page_state(page_zone(page), NR_ISOLATED_ANON + page_lru, | |
1585 | + hpage_nr_pages(page)); | |
1586 | + | |
1584 | 1587 | /* |
1585 | - * Page is either isolated or there is not enough space on the target | |
1586 | - * node. If isolated, then it has taken a reference count and the | |
1587 | - * callers reference can be safely dropped without the page | |
1588 | - * disappearing underneath us during migration. Otherwise the page is | |
1589 | - * not to be migrated but the callers reference should still be | |
1590 | - * dropped so it does not leak. | |
1588 | + * Isolating the page has taken another reference, so the | |
1589 | + * caller's reference can be safely dropped without the page | |
1590 | + * disappearing underneath us during migration. | |
1591 | 1591 | */ |
1592 | 1592 | put_page(page); |
1593 | - | |
1594 | - return ret; | |
1593 | + return 1; | |
1595 | 1594 | } |
1596 | 1595 | |
1597 | 1596 | /* |
... | ... | @@ -1602,7 +1601,7 @@ |
1602 | 1601 | int migrate_misplaced_page(struct page *page, int node) |
1603 | 1602 | { |
1604 | 1603 | pg_data_t *pgdat = NODE_DATA(node); |
1605 | - int isolated = 0; | |
1604 | + int isolated; | |
1606 | 1605 | int nr_remaining; |
1607 | 1606 | LIST_HEAD(migratepages); |
1608 | 1607 | |
1609 | 1608 | |
1610 | 1609 | |
1611 | 1610 | |
... | ... | @@ -1610,20 +1609,16 @@ |
1610 | 1609 | * Don't migrate pages that are mapped in multiple processes. |
1611 | 1610 | * TODO: Handle false sharing detection instead of this hammer |
1612 | 1611 | */ |
1613 | - if (page_mapcount(page) != 1) { | |
1614 | - put_page(page); | |
1612 | + if (page_mapcount(page) != 1) | |
1615 | 1613 | goto out; |
1616 | - } | |
1617 | 1614 | |
1618 | 1615 | /* |
1619 | 1616 | * Rate-limit the amount of data that is being migrated to a node. |
1620 | 1617 | * Optimal placement is no good if the memory bus is saturated and |
1621 | 1618 | * all the time is being spent migrating! |
1622 | 1619 | */ |
1623 | - if (numamigrate_update_ratelimit(pgdat, 1)) { | |
1624 | - put_page(page); | |
1620 | + if (numamigrate_update_ratelimit(pgdat, 1)) | |
1625 | 1621 | goto out; |
1626 | - } | |
1627 | 1622 | |
1628 | 1623 | isolated = numamigrate_isolate_page(pgdat, page); |
1629 | 1624 | if (!isolated) |
1630 | 1625 | |
1631 | 1626 | |
... | ... | @@ -1640,12 +1635,19 @@ |
1640 | 1635 | } else |
1641 | 1636 | count_vm_numa_event(NUMA_PAGE_MIGRATE); |
1642 | 1637 | BUG_ON(!list_empty(&migratepages)); |
1643 | -out: | |
1644 | 1638 | return isolated; |
1639 | + | |
1640 | +out: | |
1641 | + put_page(page); | |
1642 | + return 0; | |
1645 | 1643 | } |
1646 | 1644 | #endif /* CONFIG_NUMA_BALANCING */ |
1647 | 1645 | |
1648 | 1646 | #if defined(CONFIG_NUMA_BALANCING) && defined(CONFIG_TRANSPARENT_HUGEPAGE) |
1647 | +/* | |
1648 | + * Migrates a THP to a given target node. page must be locked and is unlocked | |
1649 | + * before returning. | |
1650 | + */ | |
1649 | 1651 | int migrate_misplaced_transhuge_page(struct mm_struct *mm, |
1650 | 1652 | struct vm_area_struct *vma, |
1651 | 1653 | pmd_t *pmd, pmd_t entry, |
1652 | 1654 | |
1653 | 1655 | |
... | ... | @@ -1676,29 +1678,15 @@ |
1676 | 1678 | |
1677 | 1679 | new_page = alloc_pages_node(node, |
1678 | 1680 | (GFP_TRANSHUGE | GFP_THISNODE) & ~__GFP_WAIT, HPAGE_PMD_ORDER); |
1679 | - if (!new_page) { | |
1680 | - count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR); | |
1681 | - goto out_dropref; | |
1682 | - } | |
1681 | + if (!new_page) | |
1682 | + goto out_fail; | |
1683 | + | |
1683 | 1684 | page_xchg_last_nid(new_page, page_last_nid(page)); |
1684 | 1685 | |
1685 | 1686 | isolated = numamigrate_isolate_page(pgdat, page); |
1686 | - | |
1687 | - /* | |
1688 | - * Failing to isolate or a GUP pin prevents migration. The expected | |
1689 | - * page count is 2. 1 for anonymous pages without a mapping and 1 | |
1690 | - * for the callers pin. If the page was isolated, the page will | |
1691 | - * need to be put back on the LRU. | |
1692 | - */ | |
1693 | - if (!isolated || page_count(page) != 2) { | |
1694 | - count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR); | |
1687 | + if (!isolated) { | |
1695 | 1688 | put_page(new_page); |
1696 | - if (isolated) { | |
1697 | - putback_lru_page(page); | |
1698 | - isolated = 0; | |
1699 | - goto out; | |
1700 | - } | |
1701 | - goto out_keep_locked; | |
1689 | + goto out_fail; | |
1702 | 1690 | } |
1703 | 1691 | |
1704 | 1692 | /* Prepare a page as a migration target */ |
... | ... | @@ -1730,6 +1718,7 @@ |
1730 | 1718 | putback_lru_page(page); |
1731 | 1719 | |
1732 | 1720 | count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR); |
1721 | + isolated = 0; | |
1733 | 1722 | goto out; |
1734 | 1723 | } |
1735 | 1724 | |
1736 | 1725 | |
1737 | 1726 | |
... | ... | @@ -1774,9 +1763,11 @@ |
1774 | 1763 | -HPAGE_PMD_NR); |
1775 | 1764 | return isolated; |
1776 | 1765 | |
1766 | +out_fail: | |
1767 | + count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR); | |
1777 | 1768 | out_dropref: |
1769 | + unlock_page(page); | |
1778 | 1770 | put_page(page); |
1779 | -out_keep_locked: | |
1780 | 1771 | return 0; |
1781 | 1772 | } |
1782 | 1773 | #endif /* CONFIG_NUMA_BALANCING */ |