Commit 544122e5e0ee27d5aac4a441f7746712afbf248c
Committed by
Linus Torvalds
1 parent
54595fe265
Exists in
master
and in
4 other branches
memcg: fix LRU accounting for SwapCache
Now, a page can be deleted from SwapCache while do_swap_page(). memcg-fix-swap-accounting-leak-v3.patch handles that, but, LRU handling is still broken. (above behavior broke assumption of memcg-synchronized-lru patch.) This patch is a fix for LRU handling (especially for per-zone counters). At charging SwapCache, - Remove page_cgroup from LRU if it's not used. - Add page cgroup to LRU if it's not linked to. Reported-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp> Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Balbir Singh <balbir@in.ibm.com> Cc: Paul Menage <menage@google.com> Cc: Li Zefan <lizf@cn.fujitsu.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Showing 1 changed file with 50 additions and 9 deletions Side-by-side Diff
mm/memcontrol.c
... | ... | @@ -331,8 +331,12 @@ |
331 | 331 | return; |
332 | 332 | pc = lookup_page_cgroup(page); |
333 | 333 | /* can happen while we handle swapcache. */ |
334 | - if (list_empty(&pc->lru)) | |
334 | + if (list_empty(&pc->lru) || !pc->mem_cgroup) | |
335 | 335 | return; |
336 | + /* | |
337 | + * We don't check PCG_USED bit. It's cleared when the "page" is finally | |
338 | + * removed from global LRU. | |
339 | + */ | |
336 | 340 | mz = page_cgroup_zoneinfo(pc); |
337 | 341 | mem = pc->mem_cgroup; |
338 | 342 | MEM_CGROUP_ZSTAT(mz, lru) -= 1; |
339 | 343 | |
340 | 344 | |
341 | 345 | |
342 | 346 | |
... | ... | @@ -379,16 +383,44 @@ |
379 | 383 | MEM_CGROUP_ZSTAT(mz, lru) += 1; |
380 | 384 | list_add(&pc->lru, &mz->lists[lru]); |
381 | 385 | } |
386 | + | |
382 | 387 | /* |
383 | - * To add swapcache into LRU. Be careful to all this function. | |
384 | - * zone->lru_lock shouldn't be held and irq must not be disabled. | |
388 | + * At handling SwapCache, pc->mem_cgroup may be changed while it's linked to | |
389 | + * lru because the page may.be reused after it's fully uncharged (because of | |
390 | + * SwapCache behavior).To handle that, unlink page_cgroup from LRU when charge | |
391 | + * it again. This function is only used to charge SwapCache. It's done under | |
392 | + * lock_page and expected that zone->lru_lock is never held. | |
385 | 393 | */ |
386 | -static void mem_cgroup_lru_fixup(struct page *page) | |
394 | +static void mem_cgroup_lru_del_before_commit_swapcache(struct page *page) | |
387 | 395 | { |
388 | - if (!isolate_lru_page(page)) | |
389 | - putback_lru_page(page); | |
396 | + unsigned long flags; | |
397 | + struct zone *zone = page_zone(page); | |
398 | + struct page_cgroup *pc = lookup_page_cgroup(page); | |
399 | + | |
400 | + spin_lock_irqsave(&zone->lru_lock, flags); | |
401 | + /* | |
402 | + * Forget old LRU when this page_cgroup is *not* used. This Used bit | |
403 | + * is guarded by lock_page() because the page is SwapCache. | |
404 | + */ | |
405 | + if (!PageCgroupUsed(pc)) | |
406 | + mem_cgroup_del_lru_list(page, page_lru(page)); | |
407 | + spin_unlock_irqrestore(&zone->lru_lock, flags); | |
390 | 408 | } |
391 | 409 | |
410 | +static void mem_cgroup_lru_add_after_commit_swapcache(struct page *page) | |
411 | +{ | |
412 | + unsigned long flags; | |
413 | + struct zone *zone = page_zone(page); | |
414 | + struct page_cgroup *pc = lookup_page_cgroup(page); | |
415 | + | |
416 | + spin_lock_irqsave(&zone->lru_lock, flags); | |
417 | + /* link when the page is linked to LRU but page_cgroup isn't */ | |
418 | + if (PageLRU(page) && list_empty(&pc->lru)) | |
419 | + mem_cgroup_add_lru_list(page, page_lru(page)); | |
420 | + spin_unlock_irqrestore(&zone->lru_lock, flags); | |
421 | +} | |
422 | + | |
423 | + | |
392 | 424 | void mem_cgroup_move_lists(struct page *page, |
393 | 425 | enum lru_list from, enum lru_list to) |
394 | 426 | { |
395 | 427 | |
... | ... | @@ -1168,8 +1200,11 @@ |
1168 | 1200 | mem = NULL; /* charge to current */ |
1169 | 1201 | } |
1170 | 1202 | } |
1203 | + /* SwapCache may be still linked to LRU now. */ | |
1204 | + mem_cgroup_lru_del_before_commit_swapcache(page); | |
1171 | 1205 | ret = mem_cgroup_charge_common(page, mm, mask, |
1172 | 1206 | MEM_CGROUP_CHARGE_TYPE_SHMEM, mem); |
1207 | + mem_cgroup_lru_add_after_commit_swapcache(page); | |
1173 | 1208 | /* drop extra refcnt from tryget */ |
1174 | 1209 | if (mem) |
1175 | 1210 | css_put(&mem->css); |
... | ... | @@ -1185,8 +1220,6 @@ |
1185 | 1220 | } |
1186 | 1221 | if (!locked) |
1187 | 1222 | unlock_page(page); |
1188 | - /* add this page(page_cgroup) to the LRU we want. */ | |
1189 | - mem_cgroup_lru_fixup(page); | |
1190 | 1223 | |
1191 | 1224 | return ret; |
1192 | 1225 | } |
1193 | 1226 | |
... | ... | @@ -1201,7 +1234,9 @@ |
1201 | 1234 | if (!ptr) |
1202 | 1235 | return; |
1203 | 1236 | pc = lookup_page_cgroup(page); |
1237 | + mem_cgroup_lru_del_before_commit_swapcache(page); | |
1204 | 1238 | __mem_cgroup_commit_charge(ptr, pc, MEM_CGROUP_CHARGE_TYPE_MAPPED); |
1239 | + mem_cgroup_lru_add_after_commit_swapcache(page); | |
1205 | 1240 | /* |
1206 | 1241 | * Now swap is on-memory. This means this page may be |
1207 | 1242 | * counted both as mem and swap....double count. |
... | ... | @@ -1220,7 +1255,7 @@ |
1220 | 1255 | |
1221 | 1256 | } |
1222 | 1257 | /* add this page(page_cgroup) to the LRU we want. */ |
1223 | - mem_cgroup_lru_fixup(page); | |
1258 | + | |
1224 | 1259 | } |
1225 | 1260 | |
1226 | 1261 | void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *mem) |
... | ... | @@ -1288,6 +1323,12 @@ |
1288 | 1323 | |
1289 | 1324 | mem_cgroup_charge_statistics(mem, pc, false); |
1290 | 1325 | ClearPageCgroupUsed(pc); |
1326 | + /* | |
1327 | + * pc->mem_cgroup is not cleared here. It will be accessed when it's | |
1328 | + * freed from LRU. This is safe because uncharged page is expected not | |
1329 | + * to be reused (freed soon). Exception is SwapCache, it's handled by | |
1330 | + * special functions. | |
1331 | + */ | |
1291 | 1332 | |
1292 | 1333 | mz = page_cgroup_zoneinfo(pc); |
1293 | 1334 | unlock_page_cgroup(pc); |