Commit 36b62ad539498d00c2d280a151abad5f7630fa73
Committed by
Linus Torvalds
1 parent
dc67d50465
Exists in
master
and in
6 other branches
memcg: simplify corner case handling of LRU.
This patch simplifies LRU handling of racy case (memcg+SwapCache). At charging, SwapCache tend to be on LRU already. So, before overwriting pc->mem_cgroup, the page must be removed from LRU and added to LRU later. This patch does spin_lock(zone->lru_lock); if (PageLRU(page)) remove from LRU overwrite pc->mem_cgroup if (PageLRU(page)) add to new LRU. spin_unlock(zone->lru_lock); And guarantee all pages are not on LRU at modifying pc->mem_cgroup. This patch also unfies lru handling of replace_page_cache() and swapin. Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Miklos Szeredi <mszeredi@suse.cz> Acked-by: Michal Hocko <mhocko@suse.cz> Acked-by: Johannes Weiner <hannes@cmpxchg.org> Cc: Ying Han <yinghan@google.com> Cc: Hugh Dickins <hughd@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Showing 1 changed file with 16 additions and 93 deletions Side-by-side Diff
mm/memcontrol.c
... | ... | @@ -1137,86 +1137,6 @@ |
1137 | 1137 | } |
1138 | 1138 | |
1139 | 1139 | /* |
1140 | - * At handling SwapCache and other FUSE stuff, pc->mem_cgroup may be changed | |
1141 | - * while it's linked to lru because the page may be reused after it's fully | |
1142 | - * uncharged. To handle that, unlink page_cgroup from LRU when charge it again. | |
1143 | - * It's done under lock_page and expected that zone->lru_lock isnever held. | |
1144 | - */ | |
1145 | -static void mem_cgroup_lru_del_before_commit(struct page *page) | |
1146 | -{ | |
1147 | - enum lru_list lru; | |
1148 | - unsigned long flags; | |
1149 | - struct zone *zone = page_zone(page); | |
1150 | - struct page_cgroup *pc = lookup_page_cgroup(page); | |
1151 | - | |
1152 | - /* | |
1153 | - * Doing this check without taking ->lru_lock seems wrong but this | |
1154 | - * is safe. Because if page_cgroup's USED bit is unset, the page | |
1155 | - * will not be added to any memcg's LRU. If page_cgroup's USED bit is | |
1156 | - * set, the commit after this will fail, anyway. | |
1157 | - * This all charge/uncharge is done under some mutual execustion. | |
1158 | - * So, we don't need to taking care of changes in USED bit. | |
1159 | - */ | |
1160 | - if (likely(!PageLRU(page))) | |
1161 | - return; | |
1162 | - | |
1163 | - spin_lock_irqsave(&zone->lru_lock, flags); | |
1164 | - lru = page_lru(page); | |
1165 | - /* | |
1166 | - * The uncharged page could still be registered to the LRU of | |
1167 | - * the stale pc->mem_cgroup. | |
1168 | - * | |
1169 | - * As pc->mem_cgroup is about to get overwritten, the old LRU | |
1170 | - * accounting needs to be taken care of. Let root_mem_cgroup | |
1171 | - * babysit the page until the new memcg is responsible for it. | |
1172 | - * | |
1173 | - * The PCG_USED bit is guarded by lock_page() as the page is | |
1174 | - * swapcache/pagecache. | |
1175 | - */ | |
1176 | - if (PageLRU(page) && PageCgroupAcctLRU(pc) && !PageCgroupUsed(pc)) { | |
1177 | - del_page_from_lru_list(zone, page, lru); | |
1178 | - add_page_to_lru_list(zone, page, lru); | |
1179 | - } | |
1180 | - spin_unlock_irqrestore(&zone->lru_lock, flags); | |
1181 | -} | |
1182 | - | |
1183 | -static void mem_cgroup_lru_add_after_commit(struct page *page) | |
1184 | -{ | |
1185 | - enum lru_list lru; | |
1186 | - unsigned long flags; | |
1187 | - struct zone *zone = page_zone(page); | |
1188 | - struct page_cgroup *pc = lookup_page_cgroup(page); | |
1189 | - /* | |
1190 | - * putback: charge: | |
1191 | - * SetPageLRU SetPageCgroupUsed | |
1192 | - * smp_mb smp_mb | |
1193 | - * PageCgroupUsed && add to memcg LRU PageLRU && add to memcg LRU | |
1194 | - * | |
1195 | - * Ensure that one of the two sides adds the page to the memcg | |
1196 | - * LRU during a race. | |
1197 | - */ | |
1198 | - smp_mb(); | |
1199 | - /* taking care of that the page is added to LRU while we commit it */ | |
1200 | - if (likely(!PageLRU(page))) | |
1201 | - return; | |
1202 | - spin_lock_irqsave(&zone->lru_lock, flags); | |
1203 | - lru = page_lru(page); | |
1204 | - /* | |
1205 | - * If the page is not on the LRU, someone will soon put it | |
1206 | - * there. If it is, and also already accounted for on the | |
1207 | - * memcg-side, it must be on the right lruvec as setting | |
1208 | - * pc->mem_cgroup and PageCgroupUsed is properly ordered. | |
1209 | - * Otherwise, root_mem_cgroup has been babysitting the page | |
1210 | - * during the charge. Move it to the new memcg now. | |
1211 | - */ | |
1212 | - if (PageLRU(page) && !PageCgroupAcctLRU(pc)) { | |
1213 | - del_page_from_lru_list(zone, page, lru); | |
1214 | - add_page_to_lru_list(zone, page, lru); | |
1215 | - } | |
1216 | - spin_unlock_irqrestore(&zone->lru_lock, flags); | |
1217 | -} | |
1218 | - | |
1219 | -/* | |
1220 | 1140 | * Checks whether given mem is same or in the root_mem_cgroup's |
1221 | 1141 | * hierarchy subtree |
1222 | 1142 | */ |
1223 | 1143 | |
1224 | 1144 | |
... | ... | @@ -2775,14 +2695,27 @@ |
2775 | 2695 | enum charge_type ctype) |
2776 | 2696 | { |
2777 | 2697 | struct page_cgroup *pc = lookup_page_cgroup(page); |
2698 | + struct zone *zone = page_zone(page); | |
2699 | + unsigned long flags; | |
2700 | + bool removed = false; | |
2701 | + | |
2778 | 2702 | /* |
2779 | 2703 | * In some case, SwapCache, FUSE(splice_buf->radixtree), the page |
2780 | 2704 | * is already on LRU. It means the page may on some other page_cgroup's |
2781 | 2705 | * LRU. Take care of it. |
2782 | 2706 | */ |
2783 | - mem_cgroup_lru_del_before_commit(page); | |
2707 | + spin_lock_irqsave(&zone->lru_lock, flags); | |
2708 | + if (PageLRU(page)) { | |
2709 | + del_page_from_lru_list(zone, page, page_lru(page)); | |
2710 | + ClearPageLRU(page); | |
2711 | + removed = true; | |
2712 | + } | |
2784 | 2713 | __mem_cgroup_commit_charge(memcg, page, 1, pc, ctype); |
2785 | - mem_cgroup_lru_add_after_commit(page); | |
2714 | + if (removed) { | |
2715 | + add_page_to_lru_list(zone, page, page_lru(page)); | |
2716 | + SetPageLRU(page); | |
2717 | + } | |
2718 | + spin_unlock_irqrestore(&zone->lru_lock, flags); | |
2786 | 2719 | return; |
2787 | 2720 | } |
2788 | 2721 | |
2789 | 2722 | |
... | ... | @@ -3383,9 +3316,7 @@ |
3383 | 3316 | { |
3384 | 3317 | struct mem_cgroup *memcg; |
3385 | 3318 | struct page_cgroup *pc; |
3386 | - struct zone *zone; | |
3387 | 3319 | enum charge_type type = MEM_CGROUP_CHARGE_TYPE_CACHE; |
3388 | - unsigned long flags; | |
3389 | 3320 | |
3390 | 3321 | if (mem_cgroup_disabled()) |
3391 | 3322 | return; |
3392 | 3323 | |
... | ... | @@ -3401,20 +3332,12 @@ |
3401 | 3332 | if (PageSwapBacked(oldpage)) |
3402 | 3333 | type = MEM_CGROUP_CHARGE_TYPE_SHMEM; |
3403 | 3334 | |
3404 | - zone = page_zone(newpage); | |
3405 | - pc = lookup_page_cgroup(newpage); | |
3406 | 3335 | /* |
3407 | 3336 | * Even if newpage->mapping was NULL before starting replacement, |
3408 | 3337 | * the newpage may be on LRU(or pagevec for LRU) already. We lock |
3409 | 3338 | * LRU while we overwrite pc->mem_cgroup. |
3410 | 3339 | */ |
3411 | - spin_lock_irqsave(&zone->lru_lock, flags); | |
3412 | - if (PageLRU(newpage)) | |
3413 | - del_page_from_lru_list(zone, newpage, page_lru(newpage)); | |
3414 | - __mem_cgroup_commit_charge(memcg, newpage, 1, pc, type); | |
3415 | - if (PageLRU(newpage)) | |
3416 | - add_page_to_lru_list(zone, newpage, page_lru(newpage)); | |
3417 | - spin_unlock_irqrestore(&zone->lru_lock, flags); | |
3340 | + __mem_cgroup_commit_charge_lrucare(newpage, memcg, type); | |
3418 | 3341 | } |
3419 | 3342 | |
3420 | 3343 | #ifdef CONFIG_DEBUG_VM |