Commit 12d27107867fc7216e8faaff0b894b0f162dcf75
Committed by
Linus Torvalds
1 parent
0cee34fd72
Exists in
master
and in
6 other branches
memcg: fix split_huge_page_refcounts()
This patch started off as a cleanup: __split_huge_page_refcounts() has to cope with two scenarios, when the hugepage being split is already on LRU, and when it is not; but why does it have to split that accounting across three different sites? Consolidate it in lru_add_page_tail(), handling evictable and unevictable alike, and use standard add_page_to_lru_list() when accounting is needed (when the head is not yet on LRU). But a recent regression in -next, I guess the removal of PageCgroupAcctLRU test from mem_cgroup_split_huge_fixup(), makes this now a necessary fix: under load, the MEM_CGROUP_ZSTAT count was wrapping to a huge number, messing up reclaim calculations and causing a freeze at rmdir of cgroup. Add a VM_BUG_ON to mem_cgroup_lru_del_list() when we're about to wrap that count - this has not been the only such incident. Document that lru_add_page_tail() is for Transparent HugePages by #ifdef around it. Signed-off-by: Hugh Dickins <hughd@google.com> Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Michal Hocko <mhocko@suse.cz> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Showing 3 changed files with 21 additions and 30 deletions Side-by-side Diff
mm/huge_memory.c
... | ... | @@ -1229,7 +1229,6 @@ |
1229 | 1229 | { |
1230 | 1230 | int i; |
1231 | 1231 | struct zone *zone = page_zone(page); |
1232 | - int zonestat; | |
1233 | 1232 | int tail_count = 0; |
1234 | 1233 | |
1235 | 1234 | /* prevent PageLRU to go away from under us, and freeze lru stats */ |
... | ... | @@ -1316,15 +1315,6 @@ |
1316 | 1315 | |
1317 | 1316 | __dec_zone_page_state(page, NR_ANON_TRANSPARENT_HUGEPAGES); |
1318 | 1317 | __mod_zone_page_state(zone, NR_ANON_PAGES, HPAGE_PMD_NR); |
1319 | - | |
1320 | - /* | |
1321 | - * A hugepage counts for HPAGE_PMD_NR pages on the LRU statistics, | |
1322 | - * so adjust those appropriately if this page is on the LRU. | |
1323 | - */ | |
1324 | - if (PageLRU(page)) { | |
1325 | - zonestat = NR_LRU_BASE + page_lru(page); | |
1326 | - __mod_zone_page_state(zone, zonestat, -(HPAGE_PMD_NR-1)); | |
1327 | - } | |
1328 | 1318 | |
1329 | 1319 | ClearPageCompound(page); |
1330 | 1320 | compound_unlock(page); |
mm/memcontrol.c
... | ... | @@ -1071,6 +1071,7 @@ |
1071 | 1071 | VM_BUG_ON(!memcg); |
1072 | 1072 | mz = page_cgroup_zoneinfo(memcg, page); |
1073 | 1073 | /* huge page split is done under lru_lock. so, we have no races. */ |
1074 | + VM_BUG_ON(MEM_CGROUP_ZSTAT(mz, lru) < (1 << compound_order(page))); | |
1074 | 1075 | MEM_CGROUP_ZSTAT(mz, lru) -= 1 << compound_order(page); |
1075 | 1076 | } |
1076 | 1077 | |
1077 | 1078 | |
... | ... | @@ -2465,9 +2466,7 @@ |
2465 | 2466 | void mem_cgroup_split_huge_fixup(struct page *head) |
2466 | 2467 | { |
2467 | 2468 | struct page_cgroup *head_pc = lookup_page_cgroup(head); |
2468 | - struct mem_cgroup_per_zone *mz; | |
2469 | 2469 | struct page_cgroup *pc; |
2470 | - enum lru_list lru; | |
2471 | 2470 | int i; |
2472 | 2471 | |
2473 | 2472 | if (mem_cgroup_disabled()) |
2474 | 2473 | |
... | ... | @@ -2478,15 +2477,8 @@ |
2478 | 2477 | smp_wmb();/* see __commit_charge() */ |
2479 | 2478 | pc->flags = head_pc->flags & ~PCGF_NOCOPY_AT_SPLIT; |
2480 | 2479 | } |
2481 | - /* | |
2482 | - * Tail pages will be added to LRU. | |
2483 | - * We hold lru_lock,then,reduce counter directly. | |
2484 | - */ | |
2485 | - lru = page_lru(head); | |
2486 | - mz = page_cgroup_zoneinfo(head_pc->mem_cgroup, head); | |
2487 | - MEM_CGROUP_ZSTAT(mz, lru) -= HPAGE_PMD_NR - 1; | |
2488 | 2480 | } |
2489 | -#endif | |
2481 | +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ | |
2490 | 2482 | |
2491 | 2483 | /** |
2492 | 2484 | * mem_cgroup_move_account - move account of the page |
mm/swap.c
... | ... | @@ -650,6 +650,7 @@ |
650 | 650 | |
651 | 651 | EXPORT_SYMBOL(__pagevec_release); |
652 | 652 | |
653 | +#ifdef CONFIG_TRANSPARENT_HUGEPAGE | |
653 | 654 | /* used by __split_huge_page_refcount() */ |
654 | 655 | void lru_add_page_tail(struct zone* zone, |
655 | 656 | struct page *page, struct page *page_tail) |
... | ... | @@ -666,8 +667,6 @@ |
666 | 667 | SetPageLRU(page_tail); |
667 | 668 | |
668 | 669 | if (page_evictable(page_tail, NULL)) { |
669 | - struct lruvec *lruvec; | |
670 | - | |
671 | 670 | if (PageActive(page)) { |
672 | 671 | SetPageActive(page_tail); |
673 | 672 | active = 1; |
674 | 673 | |
675 | 674 | |
676 | 675 | |
... | ... | @@ -677,18 +676,28 @@ |
677 | 676 | lru = LRU_INACTIVE_ANON; |
678 | 677 | } |
679 | 678 | update_page_reclaim_stat(zone, page_tail, file, active); |
680 | - lruvec = mem_cgroup_lru_add_list(zone, page_tail, lru); | |
681 | - if (likely(PageLRU(page))) | |
682 | - list_add(&page_tail->lru, page->lru.prev); | |
683 | - else | |
684 | - list_add(&page_tail->lru, lruvec->lists[lru].prev); | |
685 | - __mod_zone_page_state(zone, NR_LRU_BASE + lru, | |
686 | - hpage_nr_pages(page_tail)); | |
687 | 679 | } else { |
688 | 680 | SetPageUnevictable(page_tail); |
689 | - add_page_to_lru_list(zone, page_tail, LRU_UNEVICTABLE); | |
681 | + lru = LRU_UNEVICTABLE; | |
690 | 682 | } |
683 | + | |
684 | + if (likely(PageLRU(page))) | |
685 | + list_add_tail(&page_tail->lru, &page->lru); | |
686 | + else { | |
687 | + struct list_head *list_head; | |
688 | + /* | |
689 | + * Head page has not yet been counted, as an hpage, | |
690 | + * so we must account for each subpage individually. | |
691 | + * | |
692 | + * Use the standard add function to put page_tail on the list, | |
693 | + * but then correct its position so they all end up in order. | |
694 | + */ | |
695 | + add_page_to_lru_list(zone, page_tail, lru); | |
696 | + list_head = page_tail->lru.prev; | |
697 | + list_move_tail(&page_tail->lru, list_head); | |
698 | + } | |
691 | 699 | } |
700 | +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ | |
692 | 701 | |
693 | 702 | static void ____pagevec_lru_add_fn(struct page *page, void *arg) |
694 | 703 | { |