Commit 36b62ad539498d00c2d280a151abad5f7630fa73

Authored by KAMEZAWA Hiroyuki
Committed by Linus Torvalds
1 parent dc67d50465

memcg: simplify corner case handling of LRU.

This patch simplifies LRU handling of racy case (memcg+SwapCache).  At
charging, SwapCache tend to be on LRU already.  So, before overwriting
pc->mem_cgroup, the page must be removed from LRU and added to LRU
later.

This patch does
        spin_lock(zone->lru_lock);
        if (PageLRU(page))
                remove from LRU
        overwrite pc->mem_cgroup
        if (PageLRU(page))
                add to new LRU.
        spin_unlock(zone->lru_lock);

And guarantee all pages are not on LRU at modifying pc->mem_cgroup.
This patch also unfies lru handling of replace_page_cache() and
swapin.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Miklos Szeredi <mszeredi@suse.cz>
Acked-by: Michal Hocko <mhocko@suse.cz>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Ying Han <yinghan@google.com>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

Showing 1 changed file with 16 additions and 93 deletions Side-by-side Diff

... ... @@ -1137,86 +1137,6 @@
1137 1137 }
1138 1138  
1139 1139 /*
1140   - * At handling SwapCache and other FUSE stuff, pc->mem_cgroup may be changed
1141   - * while it's linked to lru because the page may be reused after it's fully
1142   - * uncharged. To handle that, unlink page_cgroup from LRU when charge it again.
1143   - * It's done under lock_page and expected that zone->lru_lock isnever held.
1144   - */
1145   -static void mem_cgroup_lru_del_before_commit(struct page *page)
1146   -{
1147   - enum lru_list lru;
1148   - unsigned long flags;
1149   - struct zone *zone = page_zone(page);
1150   - struct page_cgroup *pc = lookup_page_cgroup(page);
1151   -
1152   - /*
1153   - * Doing this check without taking ->lru_lock seems wrong but this
1154   - * is safe. Because if page_cgroup's USED bit is unset, the page
1155   - * will not be added to any memcg's LRU. If page_cgroup's USED bit is
1156   - * set, the commit after this will fail, anyway.
1157   - * This all charge/uncharge is done under some mutual execustion.
1158   - * So, we don't need to taking care of changes in USED bit.
1159   - */
1160   - if (likely(!PageLRU(page)))
1161   - return;
1162   -
1163   - spin_lock_irqsave(&zone->lru_lock, flags);
1164   - lru = page_lru(page);
1165   - /*
1166   - * The uncharged page could still be registered to the LRU of
1167   - * the stale pc->mem_cgroup.
1168   - *
1169   - * As pc->mem_cgroup is about to get overwritten, the old LRU
1170   - * accounting needs to be taken care of. Let root_mem_cgroup
1171   - * babysit the page until the new memcg is responsible for it.
1172   - *
1173   - * The PCG_USED bit is guarded by lock_page() as the page is
1174   - * swapcache/pagecache.
1175   - */
1176   - if (PageLRU(page) && PageCgroupAcctLRU(pc) && !PageCgroupUsed(pc)) {
1177   - del_page_from_lru_list(zone, page, lru);
1178   - add_page_to_lru_list(zone, page, lru);
1179   - }
1180   - spin_unlock_irqrestore(&zone->lru_lock, flags);
1181   -}
1182   -
1183   -static void mem_cgroup_lru_add_after_commit(struct page *page)
1184   -{
1185   - enum lru_list lru;
1186   - unsigned long flags;
1187   - struct zone *zone = page_zone(page);
1188   - struct page_cgroup *pc = lookup_page_cgroup(page);
1189   - /*
1190   - * putback: charge:
1191   - * SetPageLRU SetPageCgroupUsed
1192   - * smp_mb smp_mb
1193   - * PageCgroupUsed && add to memcg LRU PageLRU && add to memcg LRU
1194   - *
1195   - * Ensure that one of the two sides adds the page to the memcg
1196   - * LRU during a race.
1197   - */
1198   - smp_mb();
1199   - /* taking care of that the page is added to LRU while we commit it */
1200   - if (likely(!PageLRU(page)))
1201   - return;
1202   - spin_lock_irqsave(&zone->lru_lock, flags);
1203   - lru = page_lru(page);
1204   - /*
1205   - * If the page is not on the LRU, someone will soon put it
1206   - * there. If it is, and also already accounted for on the
1207   - * memcg-side, it must be on the right lruvec as setting
1208   - * pc->mem_cgroup and PageCgroupUsed is properly ordered.
1209   - * Otherwise, root_mem_cgroup has been babysitting the page
1210   - * during the charge. Move it to the new memcg now.
1211   - */
1212   - if (PageLRU(page) && !PageCgroupAcctLRU(pc)) {
1213   - del_page_from_lru_list(zone, page, lru);
1214   - add_page_to_lru_list(zone, page, lru);
1215   - }
1216   - spin_unlock_irqrestore(&zone->lru_lock, flags);
1217   -}
1218   -
1219   -/*
1220 1140 * Checks whether given mem is same or in the root_mem_cgroup's
1221 1141 * hierarchy subtree
1222 1142 */
1223 1143  
1224 1144  
... ... @@ -2775,14 +2695,27 @@
2775 2695 enum charge_type ctype)
2776 2696 {
2777 2697 struct page_cgroup *pc = lookup_page_cgroup(page);
  2698 + struct zone *zone = page_zone(page);
  2699 + unsigned long flags;
  2700 + bool removed = false;
  2701 +
2778 2702 /*
2779 2703 * In some case, SwapCache, FUSE(splice_buf->radixtree), the page
2780 2704 * is already on LRU. It means the page may on some other page_cgroup's
2781 2705 * LRU. Take care of it.
2782 2706 */
2783   - mem_cgroup_lru_del_before_commit(page);
  2707 + spin_lock_irqsave(&zone->lru_lock, flags);
  2708 + if (PageLRU(page)) {
  2709 + del_page_from_lru_list(zone, page, page_lru(page));
  2710 + ClearPageLRU(page);
  2711 + removed = true;
  2712 + }
2784 2713 __mem_cgroup_commit_charge(memcg, page, 1, pc, ctype);
2785   - mem_cgroup_lru_add_after_commit(page);
  2714 + if (removed) {
  2715 + add_page_to_lru_list(zone, page, page_lru(page));
  2716 + SetPageLRU(page);
  2717 + }
  2718 + spin_unlock_irqrestore(&zone->lru_lock, flags);
2786 2719 return;
2787 2720 }
2788 2721  
2789 2722  
... ... @@ -3383,9 +3316,7 @@
3383 3316 {
3384 3317 struct mem_cgroup *memcg;
3385 3318 struct page_cgroup *pc;
3386   - struct zone *zone;
3387 3319 enum charge_type type = MEM_CGROUP_CHARGE_TYPE_CACHE;
3388   - unsigned long flags;
3389 3320  
3390 3321 if (mem_cgroup_disabled())
3391 3322 return;
3392 3323  
... ... @@ -3401,20 +3332,12 @@
3401 3332 if (PageSwapBacked(oldpage))
3402 3333 type = MEM_CGROUP_CHARGE_TYPE_SHMEM;
3403 3334  
3404   - zone = page_zone(newpage);
3405   - pc = lookup_page_cgroup(newpage);
3406 3335 /*
3407 3336 * Even if newpage->mapping was NULL before starting replacement,
3408 3337 * the newpage may be on LRU(or pagevec for LRU) already. We lock
3409 3338 * LRU while we overwrite pc->mem_cgroup.
3410 3339 */
3411   - spin_lock_irqsave(&zone->lru_lock, flags);
3412   - if (PageLRU(newpage))
3413   - del_page_from_lru_list(zone, newpage, page_lru(newpage));
3414   - __mem_cgroup_commit_charge(memcg, newpage, 1, pc, type);
3415   - if (PageLRU(newpage))
3416   - add_page_to_lru_list(zone, newpage, page_lru(newpage));
3417   - spin_unlock_irqrestore(&zone->lru_lock, flags);
  3340 + __mem_cgroup_commit_charge_lrucare(newpage, memcg, type);
3418 3341 }
3419 3342  
3420 3343 #ifdef CONFIG_DEBUG_VM