Commit 82f9d486e59f588c7d100865c36510644abda356
Committed by
Linus Torvalds
1 parent
108b6a7846
Exists in
master
and in
38 other branches
memcg: add memory.vmscan_stat
The commit log of 0ae5e89c60c9 ("memcg: count the soft_limit reclaim in...") says it adds scanning stats to memory.stat file. But it doesn't because we considered we needed to make a concensus for such new APIs. This patch is a trial to add memory.scan_stat. This shows - the number of scanned pages(total, anon, file) - the number of rotated pages(total, anon, file) - the number of freed pages(total, anon, file) - the number of elaplsed time (including sleep/pause time) for both of direct/soft reclaim. The biggest difference with oringinal Ying's one is that this file can be reset by some write, as # echo 0 ...../memory.scan_stat Example of output is here. This is a result after make -j 6 kernel under 300M limit. [kamezawa@bluextal ~]$ cat /cgroup/memory/A/memory.scan_stat [kamezawa@bluextal ~]$ cat /cgroup/memory/A/memory.vmscan_stat scanned_pages_by_limit 9471864 scanned_anon_pages_by_limit 6640629 scanned_file_pages_by_limit 2831235 rotated_pages_by_limit 4243974 rotated_anon_pages_by_limit 3971968 rotated_file_pages_by_limit 272006 freed_pages_by_limit 2318492 freed_anon_pages_by_limit 962052 freed_file_pages_by_limit 1356440 elapsed_ns_by_limit 351386416101 scanned_pages_by_system 0 scanned_anon_pages_by_system 0 scanned_file_pages_by_system 0 rotated_pages_by_system 0 rotated_anon_pages_by_system 0 rotated_file_pages_by_system 0 freed_pages_by_system 0 freed_anon_pages_by_system 0 freed_file_pages_by_system 0 elapsed_ns_by_system 0 scanned_pages_by_limit_under_hierarchy 9471864 scanned_anon_pages_by_limit_under_hierarchy 6640629 scanned_file_pages_by_limit_under_hierarchy 2831235 rotated_pages_by_limit_under_hierarchy 4243974 rotated_anon_pages_by_limit_under_hierarchy 3971968 rotated_file_pages_by_limit_under_hierarchy 272006 freed_pages_by_limit_under_hierarchy 2318492 freed_anon_pages_by_limit_under_hierarchy 962052 freed_file_pages_by_limit_under_hierarchy 1356440 elapsed_ns_by_limit_under_hierarchy 351386416101 scanned_pages_by_system_under_hierarchy 0 scanned_anon_pages_by_system_under_hierarchy 0 scanned_file_pages_by_system_under_hierarchy 0 rotated_pages_by_system_under_hierarchy 0 rotated_anon_pages_by_system_under_hierarchy 0 rotated_file_pages_by_system_under_hierarchy 0 freed_pages_by_system_under_hierarchy 0 freed_anon_pages_by_system_under_hierarchy 0 freed_file_pages_by_system_under_hierarchy 0 elapsed_ns_by_system_under_hierarchy 0 total_xxxx is for hierarchy management. This will be useful for further memcg developments and need to be developped before we do some complicated rework on LRU/softlimit management. This patch adds a new struct memcg_scanrecord into scan_control struct. sc->nr_scanned at el is not designed for exporting information. For example, nr_scanned is reset frequentrly and incremented +2 at scanning mapped pages. To avoid complexity, I added a new param in scan_control which is for exporting scanning score. Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp> Cc: Michal Hocko <mhocko@suse.cz> Cc: Ying Han <yinghan@google.com> Cc: Andrew Bresticker <abrestic@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Showing 5 changed files with 303 additions and 18 deletions Side-by-side Diff
Documentation/cgroups/memory.txt
... | ... | @@ -380,7 +380,7 @@ |
380 | 380 | |
381 | 381 | 5.2 stat file |
382 | 382 | |
383 | -memory.stat file includes following statistics | |
383 | +5.2.1 memory.stat file includes following statistics | |
384 | 384 | |
385 | 385 | # per-memory cgroup local status |
386 | 386 | cache - # of bytes of page cache memory. |
... | ... | @@ -437,6 +437,89 @@ |
437 | 437 | (Note: file and shmem may be shared among other cgroups. In that case, |
438 | 438 | file_mapped is accounted only when the memory cgroup is owner of page |
439 | 439 | cache.) |
440 | + | |
441 | +5.2.2 memory.vmscan_stat | |
442 | + | |
443 | +memory.vmscan_stat includes statistics information for memory scanning and | |
444 | +freeing, reclaiming. The statistics shows memory scanning information since | |
445 | +memory cgroup creation and can be reset to 0 by writing 0 as | |
446 | + | |
447 | + #echo 0 > ../memory.vmscan_stat | |
448 | + | |
449 | +This file contains following statistics. | |
450 | + | |
451 | +[param]_[file_or_anon]_pages_by_[reason]_[under_heararchy] | |
452 | +[param]_elapsed_ns_by_[reason]_[under_hierarchy] | |
453 | + | |
454 | +For example, | |
455 | + | |
456 | + scanned_file_pages_by_limit indicates the number of scanned | |
457 | + file pages at vmscan. | |
458 | + | |
459 | +Now, 3 parameters are supported | |
460 | + | |
461 | + scanned - the number of pages scanned by vmscan | |
462 | + rotated - the number of pages activated at vmscan | |
463 | + freed - the number of pages freed by vmscan | |
464 | + | |
465 | +If "rotated" is high against scanned/freed, the memcg seems busy. | |
466 | + | |
467 | +Now, 2 reason are supported | |
468 | + | |
469 | + limit - the memory cgroup's limit | |
470 | + system - global memory pressure + softlimit | |
471 | + (global memory pressure not under softlimit is not handled now) | |
472 | + | |
473 | +When under_hierarchy is added in the tail, the number indicates the | |
474 | +total memcg scan of its children and itself. | |
475 | + | |
476 | +elapsed_ns is a elapsed time in nanosecond. This may include sleep time | |
477 | +and not indicates CPU usage. So, please take this as just showing | |
478 | +latency. | |
479 | + | |
480 | +Here is an example. | |
481 | + | |
482 | +# cat /cgroup/memory/A/memory.vmscan_stat | |
483 | +scanned_pages_by_limit 9471864 | |
484 | +scanned_anon_pages_by_limit 6640629 | |
485 | +scanned_file_pages_by_limit 2831235 | |
486 | +rotated_pages_by_limit 4243974 | |
487 | +rotated_anon_pages_by_limit 3971968 | |
488 | +rotated_file_pages_by_limit 272006 | |
489 | +freed_pages_by_limit 2318492 | |
490 | +freed_anon_pages_by_limit 962052 | |
491 | +freed_file_pages_by_limit 1356440 | |
492 | +elapsed_ns_by_limit 351386416101 | |
493 | +scanned_pages_by_system 0 | |
494 | +scanned_anon_pages_by_system 0 | |
495 | +scanned_file_pages_by_system 0 | |
496 | +rotated_pages_by_system 0 | |
497 | +rotated_anon_pages_by_system 0 | |
498 | +rotated_file_pages_by_system 0 | |
499 | +freed_pages_by_system 0 | |
500 | +freed_anon_pages_by_system 0 | |
501 | +freed_file_pages_by_system 0 | |
502 | +elapsed_ns_by_system 0 | |
503 | +scanned_pages_by_limit_under_hierarchy 9471864 | |
504 | +scanned_anon_pages_by_limit_under_hierarchy 6640629 | |
505 | +scanned_file_pages_by_limit_under_hierarchy 2831235 | |
506 | +rotated_pages_by_limit_under_hierarchy 4243974 | |
507 | +rotated_anon_pages_by_limit_under_hierarchy 3971968 | |
508 | +rotated_file_pages_by_limit_under_hierarchy 272006 | |
509 | +freed_pages_by_limit_under_hierarchy 2318492 | |
510 | +freed_anon_pages_by_limit_under_hierarchy 962052 | |
511 | +freed_file_pages_by_limit_under_hierarchy 1356440 | |
512 | +elapsed_ns_by_limit_under_hierarchy 351386416101 | |
513 | +scanned_pages_by_system_under_hierarchy 0 | |
514 | +scanned_anon_pages_by_system_under_hierarchy 0 | |
515 | +scanned_file_pages_by_system_under_hierarchy 0 | |
516 | +rotated_pages_by_system_under_hierarchy 0 | |
517 | +rotated_anon_pages_by_system_under_hierarchy 0 | |
518 | +rotated_file_pages_by_system_under_hierarchy 0 | |
519 | +freed_pages_by_system_under_hierarchy 0 | |
520 | +freed_anon_pages_by_system_under_hierarchy 0 | |
521 | +freed_file_pages_by_system_under_hierarchy 0 | |
522 | +elapsed_ns_by_system_under_hierarchy 0 | |
440 | 523 | |
441 | 524 | 5.3 swappiness |
442 | 525 |
include/linux/memcontrol.h
... | ... | @@ -39,6 +39,16 @@ |
39 | 39 | struct mem_cgroup *mem_cont, |
40 | 40 | int active, int file); |
41 | 41 | |
42 | +struct memcg_scanrecord { | |
43 | + struct mem_cgroup *mem; /* scanend memory cgroup */ | |
44 | + struct mem_cgroup *root; /* scan target hierarchy root */ | |
45 | + int context; /* scanning context (see memcontrol.c) */ | |
46 | + unsigned long nr_scanned[2]; /* the number of scanned pages */ | |
47 | + unsigned long nr_rotated[2]; /* the number of rotated pages */ | |
48 | + unsigned long nr_freed[2]; /* the number of freed pages */ | |
49 | + unsigned long elapsed; /* nsec of time elapsed while scanning */ | |
50 | +}; | |
51 | + | |
42 | 52 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR |
43 | 53 | /* |
44 | 54 | * All "charge" functions with gfp_mask should use GFP_KERNEL or |
... | ... | @@ -118,6 +128,15 @@ |
118 | 128 | mem_cgroup_get_reclaim_stat_from_page(struct page *page); |
119 | 129 | extern void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, |
120 | 130 | struct task_struct *p); |
131 | + | |
132 | +extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem, | |
133 | + gfp_t gfp_mask, bool noswap, | |
134 | + struct memcg_scanrecord *rec); | |
135 | +extern unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem, | |
136 | + gfp_t gfp_mask, bool noswap, | |
137 | + struct zone *zone, | |
138 | + struct memcg_scanrecord *rec, | |
139 | + unsigned long *nr_scanned); | |
121 | 140 | |
122 | 141 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP |
123 | 142 | extern int do_swap_account; |
include/linux/swap.h
... | ... | @@ -251,12 +251,6 @@ |
251 | 251 | /* linux/mm/vmscan.c */ |
252 | 252 | extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order, |
253 | 253 | gfp_t gfp_mask, nodemask_t *mask); |
254 | -extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem, | |
255 | - gfp_t gfp_mask, bool noswap); | |
256 | -extern unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem, | |
257 | - gfp_t gfp_mask, bool noswap, | |
258 | - struct zone *zone, | |
259 | - unsigned long *nr_scanned); | |
260 | 254 | extern int __isolate_lru_page(struct page *page, int mode, int file); |
261 | 255 | extern unsigned long shrink_all_memory(unsigned long nr_pages); |
262 | 256 | extern int vm_swappiness; |
mm/memcontrol.c
... | ... | @@ -205,6 +205,50 @@ |
205 | 205 | static void mem_cgroup_threshold(struct mem_cgroup *mem); |
206 | 206 | static void mem_cgroup_oom_notify(struct mem_cgroup *mem); |
207 | 207 | |
208 | +enum { | |
209 | + SCAN_BY_LIMIT, | |
210 | + SCAN_BY_SYSTEM, | |
211 | + NR_SCAN_CONTEXT, | |
212 | + SCAN_BY_SHRINK, /* not recorded now */ | |
213 | +}; | |
214 | + | |
215 | +enum { | |
216 | + SCAN, | |
217 | + SCAN_ANON, | |
218 | + SCAN_FILE, | |
219 | + ROTATE, | |
220 | + ROTATE_ANON, | |
221 | + ROTATE_FILE, | |
222 | + FREED, | |
223 | + FREED_ANON, | |
224 | + FREED_FILE, | |
225 | + ELAPSED, | |
226 | + NR_SCANSTATS, | |
227 | +}; | |
228 | + | |
229 | +struct scanstat { | |
230 | + spinlock_t lock; | |
231 | + unsigned long stats[NR_SCAN_CONTEXT][NR_SCANSTATS]; | |
232 | + unsigned long rootstats[NR_SCAN_CONTEXT][NR_SCANSTATS]; | |
233 | +}; | |
234 | + | |
235 | +const char *scanstat_string[NR_SCANSTATS] = { | |
236 | + "scanned_pages", | |
237 | + "scanned_anon_pages", | |
238 | + "scanned_file_pages", | |
239 | + "rotated_pages", | |
240 | + "rotated_anon_pages", | |
241 | + "rotated_file_pages", | |
242 | + "freed_pages", | |
243 | + "freed_anon_pages", | |
244 | + "freed_file_pages", | |
245 | + "elapsed_ns", | |
246 | +}; | |
247 | +#define SCANSTAT_WORD_LIMIT "_by_limit" | |
248 | +#define SCANSTAT_WORD_SYSTEM "_by_system" | |
249 | +#define SCANSTAT_WORD_HIERARCHY "_under_hierarchy" | |
250 | + | |
251 | + | |
208 | 252 | /* |
209 | 253 | * The memory controller data structure. The memory controller controls both |
210 | 254 | * page cache and RSS per cgroup. We would eventually like to provide |
... | ... | @@ -270,7 +314,8 @@ |
270 | 314 | |
271 | 315 | /* For oom notifier event fd */ |
272 | 316 | struct list_head oom_notify; |
273 | - | |
317 | + /* For recording LRU-scan statistics */ | |
318 | + struct scanstat scanstat; | |
274 | 319 | /* |
275 | 320 | * Should we move charges of a task when a task is moved into this |
276 | 321 | * mem_cgroup ? And what type of charges should we move ? |
... | ... | @@ -1623,6 +1668,44 @@ |
1623 | 1668 | } |
1624 | 1669 | #endif |
1625 | 1670 | |
1671 | +static void __mem_cgroup_record_scanstat(unsigned long *stats, | |
1672 | + struct memcg_scanrecord *rec) | |
1673 | +{ | |
1674 | + | |
1675 | + stats[SCAN] += rec->nr_scanned[0] + rec->nr_scanned[1]; | |
1676 | + stats[SCAN_ANON] += rec->nr_scanned[0]; | |
1677 | + stats[SCAN_FILE] += rec->nr_scanned[1]; | |
1678 | + | |
1679 | + stats[ROTATE] += rec->nr_rotated[0] + rec->nr_rotated[1]; | |
1680 | + stats[ROTATE_ANON] += rec->nr_rotated[0]; | |
1681 | + stats[ROTATE_FILE] += rec->nr_rotated[1]; | |
1682 | + | |
1683 | + stats[FREED] += rec->nr_freed[0] + rec->nr_freed[1]; | |
1684 | + stats[FREED_ANON] += rec->nr_freed[0]; | |
1685 | + stats[FREED_FILE] += rec->nr_freed[1]; | |
1686 | + | |
1687 | + stats[ELAPSED] += rec->elapsed; | |
1688 | +} | |
1689 | + | |
1690 | +static void mem_cgroup_record_scanstat(struct memcg_scanrecord *rec) | |
1691 | +{ | |
1692 | + struct mem_cgroup *mem; | |
1693 | + int context = rec->context; | |
1694 | + | |
1695 | + if (context >= NR_SCAN_CONTEXT) | |
1696 | + return; | |
1697 | + | |
1698 | + mem = rec->mem; | |
1699 | + spin_lock(&mem->scanstat.lock); | |
1700 | + __mem_cgroup_record_scanstat(mem->scanstat.stats[context], rec); | |
1701 | + spin_unlock(&mem->scanstat.lock); | |
1702 | + | |
1703 | + mem = rec->root; | |
1704 | + spin_lock(&mem->scanstat.lock); | |
1705 | + __mem_cgroup_record_scanstat(mem->scanstat.rootstats[context], rec); | |
1706 | + spin_unlock(&mem->scanstat.lock); | |
1707 | +} | |
1708 | + | |
1626 | 1709 | /* |
1627 | 1710 | * Scan the hierarchy if needed to reclaim memory. We remember the last child |
1628 | 1711 | * we reclaimed from, so that we don't end up penalizing one child extensively |
1629 | 1712 | |
... | ... | @@ -1647,8 +1730,9 @@ |
1647 | 1730 | bool noswap = reclaim_options & MEM_CGROUP_RECLAIM_NOSWAP; |
1648 | 1731 | bool shrink = reclaim_options & MEM_CGROUP_RECLAIM_SHRINK; |
1649 | 1732 | bool check_soft = reclaim_options & MEM_CGROUP_RECLAIM_SOFT; |
1733 | + struct memcg_scanrecord rec; | |
1650 | 1734 | unsigned long excess; |
1651 | - unsigned long nr_scanned; | |
1735 | + unsigned long scanned; | |
1652 | 1736 | |
1653 | 1737 | excess = res_counter_soft_limit_excess(&root_mem->res) >> PAGE_SHIFT; |
1654 | 1738 | |
... | ... | @@ -1656,6 +1740,15 @@ |
1656 | 1740 | if (!check_soft && !shrink && root_mem->memsw_is_minimum) |
1657 | 1741 | noswap = true; |
1658 | 1742 | |
1743 | + if (shrink) | |
1744 | + rec.context = SCAN_BY_SHRINK; | |
1745 | + else if (check_soft) | |
1746 | + rec.context = SCAN_BY_SYSTEM; | |
1747 | + else | |
1748 | + rec.context = SCAN_BY_LIMIT; | |
1749 | + | |
1750 | + rec.root = root_mem; | |
1751 | + | |
1659 | 1752 | while (1) { |
1660 | 1753 | victim = mem_cgroup_select_victim(root_mem); |
1661 | 1754 | if (victim == root_mem) { |
1662 | 1755 | |
1663 | 1756 | |
... | ... | @@ -1696,14 +1789,23 @@ |
1696 | 1789 | css_put(&victim->css); |
1697 | 1790 | continue; |
1698 | 1791 | } |
1792 | + rec.mem = victim; | |
1793 | + rec.nr_scanned[0] = 0; | |
1794 | + rec.nr_scanned[1] = 0; | |
1795 | + rec.nr_rotated[0] = 0; | |
1796 | + rec.nr_rotated[1] = 0; | |
1797 | + rec.nr_freed[0] = 0; | |
1798 | + rec.nr_freed[1] = 0; | |
1799 | + rec.elapsed = 0; | |
1699 | 1800 | /* we use swappiness of local cgroup */ |
1700 | 1801 | if (check_soft) { |
1701 | 1802 | ret = mem_cgroup_shrink_node_zone(victim, gfp_mask, |
1702 | - noswap, zone, &nr_scanned); | |
1703 | - *total_scanned += nr_scanned; | |
1803 | + noswap, zone, &rec, &scanned); | |
1804 | + *total_scanned += scanned; | |
1704 | 1805 | } else |
1705 | 1806 | ret = try_to_free_mem_cgroup_pages(victim, gfp_mask, |
1706 | - noswap); | |
1807 | + noswap, &rec); | |
1808 | + mem_cgroup_record_scanstat(&rec); | |
1707 | 1809 | css_put(&victim->css); |
1708 | 1810 | /* |
1709 | 1811 | * At shrinking usage, we can't check we should stop here or |
1710 | 1812 | |
1711 | 1813 | |
... | ... | @@ -3792,14 +3894,18 @@ |
3792 | 3894 | /* try to free all pages in this cgroup */ |
3793 | 3895 | shrink = 1; |
3794 | 3896 | while (nr_retries && mem->res.usage > 0) { |
3897 | + struct memcg_scanrecord rec; | |
3795 | 3898 | int progress; |
3796 | 3899 | |
3797 | 3900 | if (signal_pending(current)) { |
3798 | 3901 | ret = -EINTR; |
3799 | 3902 | goto out; |
3800 | 3903 | } |
3904 | + rec.context = SCAN_BY_SHRINK; | |
3905 | + rec.mem = mem; | |
3906 | + rec.root = mem; | |
3801 | 3907 | progress = try_to_free_mem_cgroup_pages(mem, GFP_KERNEL, |
3802 | - false); | |
3908 | + false, &rec); | |
3803 | 3909 | if (!progress) { |
3804 | 3910 | nr_retries--; |
3805 | 3911 | /* maybe some writeback is necessary */ |
... | ... | @@ -4643,6 +4749,54 @@ |
4643 | 4749 | } |
4644 | 4750 | #endif /* CONFIG_NUMA */ |
4645 | 4751 | |
4752 | +static int mem_cgroup_vmscan_stat_read(struct cgroup *cgrp, | |
4753 | + struct cftype *cft, | |
4754 | + struct cgroup_map_cb *cb) | |
4755 | +{ | |
4756 | + struct mem_cgroup *mem = mem_cgroup_from_cont(cgrp); | |
4757 | + char string[64]; | |
4758 | + int i; | |
4759 | + | |
4760 | + for (i = 0; i < NR_SCANSTATS; i++) { | |
4761 | + strcpy(string, scanstat_string[i]); | |
4762 | + strcat(string, SCANSTAT_WORD_LIMIT); | |
4763 | + cb->fill(cb, string, mem->scanstat.stats[SCAN_BY_LIMIT][i]); | |
4764 | + } | |
4765 | + | |
4766 | + for (i = 0; i < NR_SCANSTATS; i++) { | |
4767 | + strcpy(string, scanstat_string[i]); | |
4768 | + strcat(string, SCANSTAT_WORD_SYSTEM); | |
4769 | + cb->fill(cb, string, mem->scanstat.stats[SCAN_BY_SYSTEM][i]); | |
4770 | + } | |
4771 | + | |
4772 | + for (i = 0; i < NR_SCANSTATS; i++) { | |
4773 | + strcpy(string, scanstat_string[i]); | |
4774 | + strcat(string, SCANSTAT_WORD_LIMIT); | |
4775 | + strcat(string, SCANSTAT_WORD_HIERARCHY); | |
4776 | + cb->fill(cb, string, mem->scanstat.rootstats[SCAN_BY_LIMIT][i]); | |
4777 | + } | |
4778 | + for (i = 0; i < NR_SCANSTATS; i++) { | |
4779 | + strcpy(string, scanstat_string[i]); | |
4780 | + strcat(string, SCANSTAT_WORD_SYSTEM); | |
4781 | + strcat(string, SCANSTAT_WORD_HIERARCHY); | |
4782 | + cb->fill(cb, string, mem->scanstat.rootstats[SCAN_BY_SYSTEM][i]); | |
4783 | + } | |
4784 | + return 0; | |
4785 | +} | |
4786 | + | |
4787 | +static int mem_cgroup_reset_vmscan_stat(struct cgroup *cgrp, | |
4788 | + unsigned int event) | |
4789 | +{ | |
4790 | + struct mem_cgroup *mem = mem_cgroup_from_cont(cgrp); | |
4791 | + | |
4792 | + spin_lock(&mem->scanstat.lock); | |
4793 | + memset(&mem->scanstat.stats, 0, sizeof(mem->scanstat.stats)); | |
4794 | + memset(&mem->scanstat.rootstats, 0, sizeof(mem->scanstat.rootstats)); | |
4795 | + spin_unlock(&mem->scanstat.lock); | |
4796 | + return 0; | |
4797 | +} | |
4798 | + | |
4799 | + | |
4646 | 4800 | static struct cftype mem_cgroup_files[] = { |
4647 | 4801 | { |
4648 | 4802 | .name = "usage_in_bytes", |
... | ... | @@ -4713,6 +4867,11 @@ |
4713 | 4867 | .mode = S_IRUGO, |
4714 | 4868 | }, |
4715 | 4869 | #endif |
4870 | + { | |
4871 | + .name = "vmscan_stat", | |
4872 | + .read_map = mem_cgroup_vmscan_stat_read, | |
4873 | + .trigger = mem_cgroup_reset_vmscan_stat, | |
4874 | + }, | |
4716 | 4875 | }; |
4717 | 4876 | |
4718 | 4877 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP |
... | ... | @@ -4976,6 +5135,7 @@ |
4976 | 5135 | atomic_set(&mem->refcnt, 1); |
4977 | 5136 | mem->move_charge_at_immigrate = 0; |
4978 | 5137 | mutex_init(&mem->thresholds_lock); |
5138 | + spin_lock_init(&mem->scanstat.lock); | |
4979 | 5139 | return &mem->css; |
4980 | 5140 | free_out: |
4981 | 5141 | __mem_cgroup_free(mem); |
mm/vmscan.c
... | ... | @@ -105,6 +105,7 @@ |
105 | 105 | |
106 | 106 | /* Which cgroup do we reclaim from */ |
107 | 107 | struct mem_cgroup *mem_cgroup; |
108 | + struct memcg_scanrecord *memcg_record; | |
108 | 109 | |
109 | 110 | /* |
110 | 111 | * Nodemask of nodes allowed by the caller. If NULL, all nodes |
... | ... | @@ -1348,6 +1349,8 @@ |
1348 | 1349 | int file = is_file_lru(lru); |
1349 | 1350 | int numpages = hpage_nr_pages(page); |
1350 | 1351 | reclaim_stat->recent_rotated[file] += numpages; |
1352 | + if (!scanning_global_lru(sc)) | |
1353 | + sc->memcg_record->nr_rotated[file] += numpages; | |
1351 | 1354 | } |
1352 | 1355 | if (!pagevec_add(&pvec, page)) { |
1353 | 1356 | spin_unlock_irq(&zone->lru_lock); |
... | ... | @@ -1391,6 +1394,10 @@ |
1391 | 1394 | |
1392 | 1395 | reclaim_stat->recent_scanned[0] += *nr_anon; |
1393 | 1396 | reclaim_stat->recent_scanned[1] += *nr_file; |
1397 | + if (!scanning_global_lru(sc)) { | |
1398 | + sc->memcg_record->nr_scanned[0] += *nr_anon; | |
1399 | + sc->memcg_record->nr_scanned[1] += *nr_file; | |
1400 | + } | |
1394 | 1401 | } |
1395 | 1402 | |
1396 | 1403 | /* |
... | ... | @@ -1504,6 +1511,9 @@ |
1504 | 1511 | nr_reclaimed += shrink_page_list(&page_list, zone, sc); |
1505 | 1512 | } |
1506 | 1513 | |
1514 | + if (!scanning_global_lru(sc)) | |
1515 | + sc->memcg_record->nr_freed[file] += nr_reclaimed; | |
1516 | + | |
1507 | 1517 | local_irq_disable(); |
1508 | 1518 | if (current_is_kswapd()) |
1509 | 1519 | __count_vm_events(KSWAPD_STEAL, nr_reclaimed); |
... | ... | @@ -1603,6 +1613,8 @@ |
1603 | 1613 | } |
1604 | 1614 | |
1605 | 1615 | reclaim_stat->recent_scanned[file] += nr_taken; |
1616 | + if (!scanning_global_lru(sc)) | |
1617 | + sc->memcg_record->nr_scanned[file] += nr_taken; | |
1606 | 1618 | |
1607 | 1619 | __count_zone_vm_events(PGREFILL, zone, pgscanned); |
1608 | 1620 | if (file) |
... | ... | @@ -1654,6 +1666,8 @@ |
1654 | 1666 | * get_scan_ratio. |
1655 | 1667 | */ |
1656 | 1668 | reclaim_stat->recent_rotated[file] += nr_rotated; |
1669 | + if (!scanning_global_lru(sc)) | |
1670 | + sc->memcg_record->nr_rotated[file] += nr_rotated; | |
1657 | 1671 | |
1658 | 1672 | move_active_pages_to_lru(zone, &l_active, |
1659 | 1673 | LRU_ACTIVE + file * LRU_FILE); |
... | ... | @@ -2254,9 +2268,10 @@ |
2254 | 2268 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR |
2255 | 2269 | |
2256 | 2270 | unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem, |
2257 | - gfp_t gfp_mask, bool noswap, | |
2258 | - struct zone *zone, | |
2259 | - unsigned long *nr_scanned) | |
2271 | + gfp_t gfp_mask, bool noswap, | |
2272 | + struct zone *zone, | |
2273 | + struct memcg_scanrecord *rec, | |
2274 | + unsigned long *scanned) | |
2260 | 2275 | { |
2261 | 2276 | struct scan_control sc = { |
2262 | 2277 | .nr_scanned = 0, |
2263 | 2278 | |
... | ... | @@ -2266,7 +2281,9 @@ |
2266 | 2281 | .may_swap = !noswap, |
2267 | 2282 | .order = 0, |
2268 | 2283 | .mem_cgroup = mem, |
2284 | + .memcg_record = rec, | |
2269 | 2285 | }; |
2286 | + unsigned long start, end; | |
2270 | 2287 | |
2271 | 2288 | sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | |
2272 | 2289 | (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK); |
... | ... | @@ -2275,6 +2292,7 @@ |
2275 | 2292 | sc.may_writepage, |
2276 | 2293 | sc.gfp_mask); |
2277 | 2294 | |
2295 | + start = sched_clock(); | |
2278 | 2296 | /* |
2279 | 2297 | * NOTE: Although we can get the priority field, using it |
2280 | 2298 | * here is not a good idea, since it limits the pages we can scan. |
2281 | 2299 | |
2282 | 2300 | |
2283 | 2301 | |
2284 | 2302 | |
... | ... | @@ -2283,19 +2301,25 @@ |
2283 | 2301 | * the priority and make it zero. |
2284 | 2302 | */ |
2285 | 2303 | shrink_zone(0, zone, &sc); |
2304 | + end = sched_clock(); | |
2286 | 2305 | |
2306 | + if (rec) | |
2307 | + rec->elapsed += end - start; | |
2308 | + *scanned = sc.nr_scanned; | |
2309 | + | |
2287 | 2310 | trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed); |
2288 | 2311 | |
2289 | - *nr_scanned = sc.nr_scanned; | |
2290 | 2312 | return sc.nr_reclaimed; |
2291 | 2313 | } |
2292 | 2314 | |
2293 | 2315 | unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont, |
2294 | 2316 | gfp_t gfp_mask, |
2295 | - bool noswap) | |
2317 | + bool noswap, | |
2318 | + struct memcg_scanrecord *rec) | |
2296 | 2319 | { |
2297 | 2320 | struct zonelist *zonelist; |
2298 | 2321 | unsigned long nr_reclaimed; |
2322 | + unsigned long start, end; | |
2299 | 2323 | int nid; |
2300 | 2324 | struct scan_control sc = { |
2301 | 2325 | .may_writepage = !laptop_mode, |
... | ... | @@ -2304,6 +2328,7 @@ |
2304 | 2328 | .nr_to_reclaim = SWAP_CLUSTER_MAX, |
2305 | 2329 | .order = 0, |
2306 | 2330 | .mem_cgroup = mem_cont, |
2331 | + .memcg_record = rec, | |
2307 | 2332 | .nodemask = NULL, /* we don't care the placement */ |
2308 | 2333 | .gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | |
2309 | 2334 | (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK), |
... | ... | @@ -2312,6 +2337,7 @@ |
2312 | 2337 | .gfp_mask = sc.gfp_mask, |
2313 | 2338 | }; |
2314 | 2339 | |
2340 | + start = sched_clock(); | |
2315 | 2341 | /* |
2316 | 2342 | * Unlike direct reclaim via alloc_pages(), memcg's reclaim doesn't |
2317 | 2343 | * take care of from where we get pages. So the node where we start the |
... | ... | @@ -2326,6 +2352,9 @@ |
2326 | 2352 | sc.gfp_mask); |
2327 | 2353 | |
2328 | 2354 | nr_reclaimed = do_try_to_free_pages(zonelist, &sc, &shrink); |
2355 | + end = sched_clock(); | |
2356 | + if (rec) | |
2357 | + rec->elapsed += end - start; | |
2329 | 2358 | |
2330 | 2359 | trace_mm_vmscan_memcg_reclaim_end(nr_reclaimed); |
2331 | 2360 |