Commit 340ef3902cf20cec43cdcd1e72ae5cb518be7328

Authored by Hugh Dickins
Committed by Linus Torvalds
1 parent 75980e97da

mm: numa: cleanup flow of transhuge page migration

When correcting commit 04fa5d6a6547 ("mm: migrate: check page_count of
THP before migrating") Hugh Dickins noted that the control flow for
transhuge migration was difficult to follow.  Unconditionally calling
put_page() in numamigrate_isolate_page() made the failure paths of both
migrate_misplaced_transhuge_page() and migrate_misplaced_page() more
complex that they should be.  Further, he was extremely wary that an
unlock_page() should ever happen after a put_page() even if the
put_page() should never be the final put_page.

Hugh implemented the following cleanup to simplify the path by calling
putback_lru_page() inside numamigrate_isolate_page() if it failed to
isolate and always calling unlock_page() within
migrate_misplaced_transhuge_page().

There is no functional change after this patch is applied but the code
is easier to follow and unlock_page() always happens before put_page().

[mgorman@suse.de: changelog only]
Signed-off-by: Mel Gorman <mgorman@suse.de>
Signed-off-by: Hugh Dickins <hughd@google.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Simon Jeons <simon.jeons@gmail.com>
Cc: Wanpeng Li <liwanp@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

Showing 2 changed files with 52 additions and 71 deletions Side-by-side Diff

... ... @@ -1296,7 +1296,6 @@
1296 1296 int target_nid;
1297 1297 int current_nid = -1;
1298 1298 bool migrated;
1299   - bool page_locked = false;
1300 1299  
1301 1300 spin_lock(&mm->page_table_lock);
1302 1301 if (unlikely(!pmd_same(pmd, *pmdp)))
... ... @@ -1318,7 +1317,6 @@
1318 1317 /* Acquire the page lock to serialise THP migrations */
1319 1318 spin_unlock(&mm->page_table_lock);
1320 1319 lock_page(page);
1321   - page_locked = true;
1322 1320  
1323 1321 /* Confirm the PTE did not while locked */
1324 1322 spin_lock(&mm->page_table_lock);
1325 1323  
1326 1324  
1327 1325  
1328 1326  
... ... @@ -1331,34 +1329,26 @@
1331 1329  
1332 1330 /* Migrate the THP to the requested node */
1333 1331 migrated = migrate_misplaced_transhuge_page(mm, vma,
1334   - pmdp, pmd, addr,
1335   - page, target_nid);
1336   - if (migrated)
1337   - current_nid = target_nid;
1338   - else {
1339   - spin_lock(&mm->page_table_lock);
1340   - if (unlikely(!pmd_same(pmd, *pmdp))) {
1341   - unlock_page(page);
1342   - goto out_unlock;
1343   - }
1344   - goto clear_pmdnuma;
1345   - }
  1332 + pmdp, pmd, addr, page, target_nid);
  1333 + if (!migrated)
  1334 + goto check_same;
1346 1335  
1347   - task_numa_fault(current_nid, HPAGE_PMD_NR, migrated);
  1336 + task_numa_fault(target_nid, HPAGE_PMD_NR, true);
1348 1337 return 0;
1349 1338  
  1339 +check_same:
  1340 + spin_lock(&mm->page_table_lock);
  1341 + if (unlikely(!pmd_same(pmd, *pmdp)))
  1342 + goto out_unlock;
1350 1343 clear_pmdnuma:
1351 1344 pmd = pmd_mknonnuma(pmd);
1352 1345 set_pmd_at(mm, haddr, pmdp, pmd);
1353 1346 VM_BUG_ON(pmd_numa(*pmdp));
1354 1347 update_mmu_cache_pmd(vma, addr, pmdp);
1355   - if (page_locked)
1356   - unlock_page(page);
1357   -
1358 1348 out_unlock:
1359 1349 spin_unlock(&mm->page_table_lock);
1360 1350 if (current_nid != -1)
1361   - task_numa_fault(current_nid, HPAGE_PMD_NR, migrated);
  1351 + task_numa_fault(current_nid, HPAGE_PMD_NR, false);
1362 1352 return 0;
1363 1353 }
1364 1354  
... ... @@ -1557,41 +1557,40 @@
1557 1557  
1558 1558 int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page)
1559 1559 {
1560   - int ret = 0;
  1560 + int page_lru;
1561 1561  
1562 1562 VM_BUG_ON(compound_order(page) && !PageTransHuge(page));
1563 1563  
1564 1564 /* Avoid migrating to a node that is nearly full */
1565   - if (migrate_balanced_pgdat(pgdat, 1UL << compound_order(page))) {
1566   - int page_lru;
  1565 + if (!migrate_balanced_pgdat(pgdat, 1UL << compound_order(page)))
  1566 + return 0;
1567 1567  
1568   - if (isolate_lru_page(page)) {
1569   - put_page(page);
1570   - return 0;
1571   - }
  1568 + if (isolate_lru_page(page))
  1569 + return 0;
1572 1570  
1573   - /* Page is isolated */
1574   - ret = 1;
1575   - page_lru = page_is_file_cache(page);
1576   - if (!PageTransHuge(page))
1577   - inc_zone_page_state(page, NR_ISOLATED_ANON + page_lru);
1578   - else
1579   - mod_zone_page_state(page_zone(page),
1580   - NR_ISOLATED_ANON + page_lru,
1581   - HPAGE_PMD_NR);
  1571 + /*
  1572 + * migrate_misplaced_transhuge_page() skips page migration's usual
  1573 + * check on page_count(), so we must do it here, now that the page
  1574 + * has been isolated: a GUP pin, or any other pin, prevents migration.
  1575 + * The expected page count is 3: 1 for page's mapcount and 1 for the
  1576 + * caller's pin and 1 for the reference taken by isolate_lru_page().
  1577 + */
  1578 + if (PageTransHuge(page) && page_count(page) != 3) {
  1579 + putback_lru_page(page);
  1580 + return 0;
1582 1581 }
1583 1582  
  1583 + page_lru = page_is_file_cache(page);
  1584 + mod_zone_page_state(page_zone(page), NR_ISOLATED_ANON + page_lru,
  1585 + hpage_nr_pages(page));
  1586 +
1584 1587 /*
1585   - * Page is either isolated or there is not enough space on the target
1586   - * node. If isolated, then it has taken a reference count and the
1587   - * callers reference can be safely dropped without the page
1588   - * disappearing underneath us during migration. Otherwise the page is
1589   - * not to be migrated but the callers reference should still be
1590   - * dropped so it does not leak.
  1588 + * Isolating the page has taken another reference, so the
  1589 + * caller's reference can be safely dropped without the page
  1590 + * disappearing underneath us during migration.
1591 1591 */
1592 1592 put_page(page);
1593   -
1594   - return ret;
  1593 + return 1;
1595 1594 }
1596 1595  
1597 1596 /*
... ... @@ -1602,7 +1601,7 @@
1602 1601 int migrate_misplaced_page(struct page *page, int node)
1603 1602 {
1604 1603 pg_data_t *pgdat = NODE_DATA(node);
1605   - int isolated = 0;
  1604 + int isolated;
1606 1605 int nr_remaining;
1607 1606 LIST_HEAD(migratepages);
1608 1607  
1609 1608  
1610 1609  
1611 1610  
... ... @@ -1610,20 +1609,16 @@
1610 1609 * Don't migrate pages that are mapped in multiple processes.
1611 1610 * TODO: Handle false sharing detection instead of this hammer
1612 1611 */
1613   - if (page_mapcount(page) != 1) {
1614   - put_page(page);
  1612 + if (page_mapcount(page) != 1)
1615 1613 goto out;
1616   - }
1617 1614  
1618 1615 /*
1619 1616 * Rate-limit the amount of data that is being migrated to a node.
1620 1617 * Optimal placement is no good if the memory bus is saturated and
1621 1618 * all the time is being spent migrating!
1622 1619 */
1623   - if (numamigrate_update_ratelimit(pgdat, 1)) {
1624   - put_page(page);
  1620 + if (numamigrate_update_ratelimit(pgdat, 1))
1625 1621 goto out;
1626   - }
1627 1622  
1628 1623 isolated = numamigrate_isolate_page(pgdat, page);
1629 1624 if (!isolated)
1630 1625  
1631 1626  
... ... @@ -1640,12 +1635,19 @@
1640 1635 } else
1641 1636 count_vm_numa_event(NUMA_PAGE_MIGRATE);
1642 1637 BUG_ON(!list_empty(&migratepages));
1643   -out:
1644 1638 return isolated;
  1639 +
  1640 +out:
  1641 + put_page(page);
  1642 + return 0;
1645 1643 }
1646 1644 #endif /* CONFIG_NUMA_BALANCING */
1647 1645  
1648 1646 #if defined(CONFIG_NUMA_BALANCING) && defined(CONFIG_TRANSPARENT_HUGEPAGE)
  1647 +/*
  1648 + * Migrates a THP to a given target node. page must be locked and is unlocked
  1649 + * before returning.
  1650 + */
1649 1651 int migrate_misplaced_transhuge_page(struct mm_struct *mm,
1650 1652 struct vm_area_struct *vma,
1651 1653 pmd_t *pmd, pmd_t entry,
1652 1654  
1653 1655  
... ... @@ -1676,29 +1678,15 @@
1676 1678  
1677 1679 new_page = alloc_pages_node(node,
1678 1680 (GFP_TRANSHUGE | GFP_THISNODE) & ~__GFP_WAIT, HPAGE_PMD_ORDER);
1679   - if (!new_page) {
1680   - count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR);
1681   - goto out_dropref;
1682   - }
  1681 + if (!new_page)
  1682 + goto out_fail;
  1683 +
1683 1684 page_xchg_last_nid(new_page, page_last_nid(page));
1684 1685  
1685 1686 isolated = numamigrate_isolate_page(pgdat, page);
1686   -
1687   - /*
1688   - * Failing to isolate or a GUP pin prevents migration. The expected
1689   - * page count is 2. 1 for anonymous pages without a mapping and 1
1690   - * for the callers pin. If the page was isolated, the page will
1691   - * need to be put back on the LRU.
1692   - */
1693   - if (!isolated || page_count(page) != 2) {
1694   - count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR);
  1687 + if (!isolated) {
1695 1688 put_page(new_page);
1696   - if (isolated) {
1697   - putback_lru_page(page);
1698   - isolated = 0;
1699   - goto out;
1700   - }
1701   - goto out_keep_locked;
  1689 + goto out_fail;
1702 1690 }
1703 1691  
1704 1692 /* Prepare a page as a migration target */
... ... @@ -1730,6 +1718,7 @@
1730 1718 putback_lru_page(page);
1731 1719  
1732 1720 count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR);
  1721 + isolated = 0;
1733 1722 goto out;
1734 1723 }
1735 1724  
1736 1725  
1737 1726  
... ... @@ -1774,9 +1763,11 @@
1774 1763 -HPAGE_PMD_NR);
1775 1764 return isolated;
1776 1765  
  1766 +out_fail:
  1767 + count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR);
1777 1768 out_dropref:
  1769 + unlock_page(page);
1778 1770 put_page(page);
1779   -out_keep_locked:
1780 1771 return 0;
1781 1772 }
1782 1773 #endif /* CONFIG_NUMA_BALANCING */