Commit 12d27107867fc7216e8faaff0b894b0f162dcf75

Authored by Hugh Dickins
Committed by Linus Torvalds
1 parent 0cee34fd72

memcg: fix split_huge_page_refcounts()

This patch started off as a cleanup: __split_huge_page_refcounts() has to
cope with two scenarios, when the hugepage being split is already on LRU,
and when it is not; but why does it have to split that accounting across
three different sites?  Consolidate it in lru_add_page_tail(), handling
evictable and unevictable alike, and use standard add_page_to_lru_list()
when accounting is needed (when the head is not yet on LRU).

But a recent regression in -next, I guess the removal of PageCgroupAcctLRU
test from mem_cgroup_split_huge_fixup(), makes this now a necessary fix:
under load, the MEM_CGROUP_ZSTAT count was wrapping to a huge number,
messing up reclaim calculations and causing a freeze at rmdir of cgroup.

Add a VM_BUG_ON to mem_cgroup_lru_del_list() when we're about to wrap that
count - this has not been the only such incident.  Document that
lru_add_page_tail() is for Transparent HugePages by #ifdef around it.

Signed-off-by: Hugh Dickins <hughd@google.com>
Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

Showing 3 changed files with 21 additions and 30 deletions Side-by-side Diff

... ... @@ -1229,7 +1229,6 @@
1229 1229 {
1230 1230 int i;
1231 1231 struct zone *zone = page_zone(page);
1232   - int zonestat;
1233 1232 int tail_count = 0;
1234 1233  
1235 1234 /* prevent PageLRU to go away from under us, and freeze lru stats */
... ... @@ -1316,15 +1315,6 @@
1316 1315  
1317 1316 __dec_zone_page_state(page, NR_ANON_TRANSPARENT_HUGEPAGES);
1318 1317 __mod_zone_page_state(zone, NR_ANON_PAGES, HPAGE_PMD_NR);
1319   -
1320   - /*
1321   - * A hugepage counts for HPAGE_PMD_NR pages on the LRU statistics,
1322   - * so adjust those appropriately if this page is on the LRU.
1323   - */
1324   - if (PageLRU(page)) {
1325   - zonestat = NR_LRU_BASE + page_lru(page);
1326   - __mod_zone_page_state(zone, zonestat, -(HPAGE_PMD_NR-1));
1327   - }
1328 1318  
1329 1319 ClearPageCompound(page);
1330 1320 compound_unlock(page);
... ... @@ -1071,6 +1071,7 @@
1071 1071 VM_BUG_ON(!memcg);
1072 1072 mz = page_cgroup_zoneinfo(memcg, page);
1073 1073 /* huge page split is done under lru_lock. so, we have no races. */
  1074 + VM_BUG_ON(MEM_CGROUP_ZSTAT(mz, lru) < (1 << compound_order(page)));
1074 1075 MEM_CGROUP_ZSTAT(mz, lru) -= 1 << compound_order(page);
1075 1076 }
1076 1077  
1077 1078  
... ... @@ -2465,9 +2466,7 @@
2465 2466 void mem_cgroup_split_huge_fixup(struct page *head)
2466 2467 {
2467 2468 struct page_cgroup *head_pc = lookup_page_cgroup(head);
2468   - struct mem_cgroup_per_zone *mz;
2469 2469 struct page_cgroup *pc;
2470   - enum lru_list lru;
2471 2470 int i;
2472 2471  
2473 2472 if (mem_cgroup_disabled())
2474 2473  
... ... @@ -2478,15 +2477,8 @@
2478 2477 smp_wmb();/* see __commit_charge() */
2479 2478 pc->flags = head_pc->flags & ~PCGF_NOCOPY_AT_SPLIT;
2480 2479 }
2481   - /*
2482   - * Tail pages will be added to LRU.
2483   - * We hold lru_lock,then,reduce counter directly.
2484   - */
2485   - lru = page_lru(head);
2486   - mz = page_cgroup_zoneinfo(head_pc->mem_cgroup, head);
2487   - MEM_CGROUP_ZSTAT(mz, lru) -= HPAGE_PMD_NR - 1;
2488 2480 }
2489   -#endif
  2481 +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
2490 2482  
2491 2483 /**
2492 2484 * mem_cgroup_move_account - move account of the page
... ... @@ -650,6 +650,7 @@
650 650  
651 651 EXPORT_SYMBOL(__pagevec_release);
652 652  
  653 +#ifdef CONFIG_TRANSPARENT_HUGEPAGE
653 654 /* used by __split_huge_page_refcount() */
654 655 void lru_add_page_tail(struct zone* zone,
655 656 struct page *page, struct page *page_tail)
... ... @@ -666,8 +667,6 @@
666 667 SetPageLRU(page_tail);
667 668  
668 669 if (page_evictable(page_tail, NULL)) {
669   - struct lruvec *lruvec;
670   -
671 670 if (PageActive(page)) {
672 671 SetPageActive(page_tail);
673 672 active = 1;
674 673  
675 674  
676 675  
... ... @@ -677,18 +676,28 @@
677 676 lru = LRU_INACTIVE_ANON;
678 677 }
679 678 update_page_reclaim_stat(zone, page_tail, file, active);
680   - lruvec = mem_cgroup_lru_add_list(zone, page_tail, lru);
681   - if (likely(PageLRU(page)))
682   - list_add(&page_tail->lru, page->lru.prev);
683   - else
684   - list_add(&page_tail->lru, lruvec->lists[lru].prev);
685   - __mod_zone_page_state(zone, NR_LRU_BASE + lru,
686   - hpage_nr_pages(page_tail));
687 679 } else {
688 680 SetPageUnevictable(page_tail);
689   - add_page_to_lru_list(zone, page_tail, LRU_UNEVICTABLE);
  681 + lru = LRU_UNEVICTABLE;
690 682 }
  683 +
  684 + if (likely(PageLRU(page)))
  685 + list_add_tail(&page_tail->lru, &page->lru);
  686 + else {
  687 + struct list_head *list_head;
  688 + /*
  689 + * Head page has not yet been counted, as an hpage,
  690 + * so we must account for each subpage individually.
  691 + *
  692 + * Use the standard add function to put page_tail on the list,
  693 + * but then correct its position so they all end up in order.
  694 + */
  695 + add_page_to_lru_list(zone, page_tail, lru);
  696 + list_head = page_tail->lru.prev;
  697 + list_move_tail(&page_tail->lru, list_head);
  698 + }
691 699 }
  700 +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
692 701  
693 702 static void ____pagevec_lru_add_fn(struct page *page, void *arg)
694 703 {