Commit 544122e5e0ee27d5aac4a441f7746712afbf248c

Authored by KAMEZAWA Hiroyuki
Committed by Linus Torvalds
1 parent 54595fe265

memcg: fix LRU accounting for SwapCache

Now, a page can be deleted from SwapCache while do_swap_page().
memcg-fix-swap-accounting-leak-v3.patch handles that, but, LRU handling is
still broken.  (above behavior broke assumption of memcg-synchronized-lru
patch.)

This patch is a fix for LRU handling (especially for per-zone counters).
At charging SwapCache,
 - Remove page_cgroup from LRU if it's not used.
 - Add page cgroup to LRU if it's not linked to.

Reported-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Paul Menage <menage@google.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

Showing 1 changed file with 50 additions and 9 deletions Side-by-side Diff

... ... @@ -331,8 +331,12 @@
331 331 return;
332 332 pc = lookup_page_cgroup(page);
333 333 /* can happen while we handle swapcache. */
334   - if (list_empty(&pc->lru))
  334 + if (list_empty(&pc->lru) || !pc->mem_cgroup)
335 335 return;
  336 + /*
  337 + * We don't check PCG_USED bit. It's cleared when the "page" is finally
  338 + * removed from global LRU.
  339 + */
336 340 mz = page_cgroup_zoneinfo(pc);
337 341 mem = pc->mem_cgroup;
338 342 MEM_CGROUP_ZSTAT(mz, lru) -= 1;
339 343  
340 344  
341 345  
342 346  
... ... @@ -379,16 +383,44 @@
379 383 MEM_CGROUP_ZSTAT(mz, lru) += 1;
380 384 list_add(&pc->lru, &mz->lists[lru]);
381 385 }
  386 +
382 387 /*
383   - * To add swapcache into LRU. Be careful to all this function.
384   - * zone->lru_lock shouldn't be held and irq must not be disabled.
  388 + * At handling SwapCache, pc->mem_cgroup may be changed while it's linked to
  389 + * lru because the page may.be reused after it's fully uncharged (because of
  390 + * SwapCache behavior).To handle that, unlink page_cgroup from LRU when charge
  391 + * it again. This function is only used to charge SwapCache. It's done under
  392 + * lock_page and expected that zone->lru_lock is never held.
385 393 */
386   -static void mem_cgroup_lru_fixup(struct page *page)
  394 +static void mem_cgroup_lru_del_before_commit_swapcache(struct page *page)
387 395 {
388   - if (!isolate_lru_page(page))
389   - putback_lru_page(page);
  396 + unsigned long flags;
  397 + struct zone *zone = page_zone(page);
  398 + struct page_cgroup *pc = lookup_page_cgroup(page);
  399 +
  400 + spin_lock_irqsave(&zone->lru_lock, flags);
  401 + /*
  402 + * Forget old LRU when this page_cgroup is *not* used. This Used bit
  403 + * is guarded by lock_page() because the page is SwapCache.
  404 + */
  405 + if (!PageCgroupUsed(pc))
  406 + mem_cgroup_del_lru_list(page, page_lru(page));
  407 + spin_unlock_irqrestore(&zone->lru_lock, flags);
390 408 }
391 409  
  410 +static void mem_cgroup_lru_add_after_commit_swapcache(struct page *page)
  411 +{
  412 + unsigned long flags;
  413 + struct zone *zone = page_zone(page);
  414 + struct page_cgroup *pc = lookup_page_cgroup(page);
  415 +
  416 + spin_lock_irqsave(&zone->lru_lock, flags);
  417 + /* link when the page is linked to LRU but page_cgroup isn't */
  418 + if (PageLRU(page) && list_empty(&pc->lru))
  419 + mem_cgroup_add_lru_list(page, page_lru(page));
  420 + spin_unlock_irqrestore(&zone->lru_lock, flags);
  421 +}
  422 +
  423 +
392 424 void mem_cgroup_move_lists(struct page *page,
393 425 enum lru_list from, enum lru_list to)
394 426 {
395 427  
... ... @@ -1168,8 +1200,11 @@
1168 1200 mem = NULL; /* charge to current */
1169 1201 }
1170 1202 }
  1203 + /* SwapCache may be still linked to LRU now. */
  1204 + mem_cgroup_lru_del_before_commit_swapcache(page);
1171 1205 ret = mem_cgroup_charge_common(page, mm, mask,
1172 1206 MEM_CGROUP_CHARGE_TYPE_SHMEM, mem);
  1207 + mem_cgroup_lru_add_after_commit_swapcache(page);
1173 1208 /* drop extra refcnt from tryget */
1174 1209 if (mem)
1175 1210 css_put(&mem->css);
... ... @@ -1185,8 +1220,6 @@
1185 1220 }
1186 1221 if (!locked)
1187 1222 unlock_page(page);
1188   - /* add this page(page_cgroup) to the LRU we want. */
1189   - mem_cgroup_lru_fixup(page);
1190 1223  
1191 1224 return ret;
1192 1225 }
1193 1226  
... ... @@ -1201,7 +1234,9 @@
1201 1234 if (!ptr)
1202 1235 return;
1203 1236 pc = lookup_page_cgroup(page);
  1237 + mem_cgroup_lru_del_before_commit_swapcache(page);
1204 1238 __mem_cgroup_commit_charge(ptr, pc, MEM_CGROUP_CHARGE_TYPE_MAPPED);
  1239 + mem_cgroup_lru_add_after_commit_swapcache(page);
1205 1240 /*
1206 1241 * Now swap is on-memory. This means this page may be
1207 1242 * counted both as mem and swap....double count.
... ... @@ -1220,7 +1255,7 @@
1220 1255  
1221 1256 }
1222 1257 /* add this page(page_cgroup) to the LRU we want. */
1223   - mem_cgroup_lru_fixup(page);
  1258 +
1224 1259 }
1225 1260  
1226 1261 void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *mem)
... ... @@ -1288,6 +1323,12 @@
1288 1323  
1289 1324 mem_cgroup_charge_statistics(mem, pc, false);
1290 1325 ClearPageCgroupUsed(pc);
  1326 + /*
  1327 + * pc->mem_cgroup is not cleared here. It will be accessed when it's
  1328 + * freed from LRU. This is safe because uncharged page is expected not
  1329 + * to be reused (freed soon). Exception is SwapCache, it's handled by
  1330 + * special functions.
  1331 + */
1291 1332  
1292 1333 mz = page_cgroup_zoneinfo(pc);
1293 1334 unlock_page_cgroup(pc);