Commit 3ea67d06e4679a16f69f66f43a8d6ee4778985fc
Committed by
Linus Torvalds
1 parent
658b72c5a7
Exists in
master
and in
20 other branches
memcg: add per cgroup writeback pages accounting
Add memcg routines to count writeback pages, later dirty pages will also be accounted. After Kame's commit 89c06bd52fb9 ("memcg: use new logic for page stat accounting"), we can use 'struct page' flag to test page state instead of per page_cgroup flag. But memcg has a feature to move a page from a cgroup to another one and may have race between "move" and "page stat accounting". So in order to avoid the race we have designed a new lock: mem_cgroup_begin_update_page_stat() modify page information -->(a) mem_cgroup_update_page_stat() -->(b) mem_cgroup_end_update_page_stat() It requires both (a) and (b)(writeback pages accounting) to be pretected in mem_cgroup_{begin/end}_update_page_stat(). It's full no-op for !CONFIG_MEMCG, almost no-op if memcg is disabled (but compiled in), rcu read lock in the most cases (no task is moving), and spin_lock_irqsave on top in the slow path. There're two writeback interfaces to modify: test_{clear/set}_page_writeback(). And the lock order is: --> memcg->move_lock --> mapping->tree_lock Signed-off-by: Sha Zhengju <handai.szj@taobao.com> Acked-by: Michal Hocko <mhocko@suse.cz> Reviewed-by: Greg Thelen <gthelen@google.com> Cc: Fengguang Wu <fengguang.wu@intel.com> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Showing 3 changed files with 39 additions and 7 deletions Side-by-side Diff
include/linux/memcontrol.h
... | ... | @@ -42,6 +42,7 @@ |
42 | 42 | MEM_CGROUP_STAT_RSS, /* # of pages charged as anon rss */ |
43 | 43 | MEM_CGROUP_STAT_RSS_HUGE, /* # of pages charged as anon huge */ |
44 | 44 | MEM_CGROUP_STAT_FILE_MAPPED, /* # of pages charged as file rss */ |
45 | + MEM_CGROUP_STAT_WRITEBACK, /* # of pages under writeback */ | |
45 | 46 | MEM_CGROUP_STAT_SWAP, /* # of pages, swapped out */ |
46 | 47 | MEM_CGROUP_STAT_NSTATS, |
47 | 48 | }; |
mm/memcontrol.c
... | ... | @@ -89,6 +89,7 @@ |
89 | 89 | "rss", |
90 | 90 | "rss_huge", |
91 | 91 | "mapped_file", |
92 | + "writeback", | |
92 | 93 | "swap", |
93 | 94 | }; |
94 | 95 | |
... | ... | @@ -3654,6 +3655,20 @@ |
3654 | 3655 | } |
3655 | 3656 | #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ |
3656 | 3657 | |
3658 | +static inline | |
3659 | +void mem_cgroup_move_account_page_stat(struct mem_cgroup *from, | |
3660 | + struct mem_cgroup *to, | |
3661 | + unsigned int nr_pages, | |
3662 | + enum mem_cgroup_stat_index idx) | |
3663 | +{ | |
3664 | + /* Update stat data for mem_cgroup */ | |
3665 | + preempt_disable(); | |
3666 | + WARN_ON_ONCE(from->stat->count[idx] < nr_pages); | |
3667 | + __this_cpu_add(from->stat->count[idx], -nr_pages); | |
3668 | + __this_cpu_add(to->stat->count[idx], nr_pages); | |
3669 | + preempt_enable(); | |
3670 | +} | |
3671 | + | |
3657 | 3672 | /** |
3658 | 3673 | * mem_cgroup_move_account - move account of the page |
3659 | 3674 | * @page: the page |
... | ... | @@ -3699,13 +3714,14 @@ |
3699 | 3714 | |
3700 | 3715 | move_lock_mem_cgroup(from, &flags); |
3701 | 3716 | |
3702 | - if (!anon && page_mapped(page)) { | |
3703 | - /* Update mapped_file data for mem_cgroup */ | |
3704 | - preempt_disable(); | |
3705 | - __this_cpu_dec(from->stat->count[MEM_CGROUP_STAT_FILE_MAPPED]); | |
3706 | - __this_cpu_inc(to->stat->count[MEM_CGROUP_STAT_FILE_MAPPED]); | |
3707 | - preempt_enable(); | |
3708 | - } | |
3717 | + if (!anon && page_mapped(page)) | |
3718 | + mem_cgroup_move_account_page_stat(from, to, nr_pages, | |
3719 | + MEM_CGROUP_STAT_FILE_MAPPED); | |
3720 | + | |
3721 | + if (PageWriteback(page)) | |
3722 | + mem_cgroup_move_account_page_stat(from, to, nr_pages, | |
3723 | + MEM_CGROUP_STAT_WRITEBACK); | |
3724 | + | |
3709 | 3725 | mem_cgroup_charge_statistics(from, page, anon, -nr_pages); |
3710 | 3726 | |
3711 | 3727 | /* caller should have done css_get */ |
mm/page-writeback.c
... | ... | @@ -2143,11 +2143,17 @@ |
2143 | 2143 | |
2144 | 2144 | /* |
2145 | 2145 | * Helper function for set_page_writeback family. |
2146 | + * | |
2147 | + * The caller must hold mem_cgroup_begin/end_update_page_stat() lock | |
2148 | + * while calling this function. | |
2149 | + * See test_set_page_writeback for example. | |
2150 | + * | |
2146 | 2151 | * NOTE: Unlike account_page_dirtied this does not rely on being atomic |
2147 | 2152 | * wrt interrupts. |
2148 | 2153 | */ |
2149 | 2154 | void account_page_writeback(struct page *page) |
2150 | 2155 | { |
2156 | + mem_cgroup_inc_page_stat(page, MEM_CGROUP_STAT_WRITEBACK); | |
2151 | 2157 | inc_zone_page_state(page, NR_WRITEBACK); |
2152 | 2158 | } |
2153 | 2159 | EXPORT_SYMBOL(account_page_writeback); |
2154 | 2160 | |
... | ... | @@ -2364,7 +2370,10 @@ |
2364 | 2370 | { |
2365 | 2371 | struct address_space *mapping = page_mapping(page); |
2366 | 2372 | int ret; |
2373 | + bool locked; | |
2374 | + unsigned long memcg_flags; | |
2367 | 2375 | |
2376 | + mem_cgroup_begin_update_page_stat(page, &locked, &memcg_flags); | |
2368 | 2377 | if (mapping) { |
2369 | 2378 | struct backing_dev_info *bdi = mapping->backing_dev_info; |
2370 | 2379 | unsigned long flags; |
2371 | 2380 | |
... | ... | @@ -2385,9 +2394,11 @@ |
2385 | 2394 | ret = TestClearPageWriteback(page); |
2386 | 2395 | } |
2387 | 2396 | if (ret) { |
2397 | + mem_cgroup_dec_page_stat(page, MEM_CGROUP_STAT_WRITEBACK); | |
2388 | 2398 | dec_zone_page_state(page, NR_WRITEBACK); |
2389 | 2399 | inc_zone_page_state(page, NR_WRITTEN); |
2390 | 2400 | } |
2401 | + mem_cgroup_end_update_page_stat(page, &locked, &memcg_flags); | |
2391 | 2402 | return ret; |
2392 | 2403 | } |
2393 | 2404 | |
2394 | 2405 | |
... | ... | @@ -2395,7 +2406,10 @@ |
2395 | 2406 | { |
2396 | 2407 | struct address_space *mapping = page_mapping(page); |
2397 | 2408 | int ret; |
2409 | + bool locked; | |
2410 | + unsigned long memcg_flags; | |
2398 | 2411 | |
2412 | + mem_cgroup_begin_update_page_stat(page, &locked, &memcg_flags); | |
2399 | 2413 | if (mapping) { |
2400 | 2414 | struct backing_dev_info *bdi = mapping->backing_dev_info; |
2401 | 2415 | unsigned long flags; |
... | ... | @@ -2422,6 +2436,7 @@ |
2422 | 2436 | } |
2423 | 2437 | if (!ret) |
2424 | 2438 | account_page_writeback(page); |
2439 | + mem_cgroup_end_update_page_stat(page, &locked, &memcg_flags); | |
2425 | 2440 | return ret; |
2426 | 2441 | |
2427 | 2442 | } |