Commit 406eb0c9ba765eb066406fd5ce9d5e2b169a4d5a

Authored by Ying Han
Committed by Linus Torvalds
1 parent 1bac180bd2

memcg: add memory.numastat api for numa statistics

The new API exports numa_maps per-memcg basis.  This is a piece of useful
information where it exports per-memcg page distribution across real numa
nodes.

One of the usecases is evaluating application performance by combining
this information w/ the cpu allocation to the application.

The output of the memory.numastat tries to follow w/ simiar format of
numa_maps like:

  total=<total pages> N0=<node 0 pages> N1=<node 1 pages> ...
  file=<total file pages> N0=<node 0 pages> N1=<node 1 pages> ...
  anon=<total anon pages> N0=<node 0 pages> N1=<node 1 pages> ...
  unevictable=<total anon pages> N0=<node 0 pages> N1=<node 1 pages> ...

And we have per-node:

  total = file + anon + unevictable

  $ cat /dev/cgroup/memory/memory.numa_stat
  total=250020 N0=87620 N1=52367 N2=45298 N3=64735
  file=225232 N0=83402 N1=46160 N2=40522 N3=55148
  anon=21053 N0=3424 N1=6207 N2=4776 N3=6646
  unevictable=3735 N0=794 N1=0 N2=0 N3=2941

Signed-off-by: Ying Han <yinghan@google.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Acked-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Cc: Minchan Kim <minchan.kim@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

Showing 1 changed file with 155 additions and 0 deletions Side-by-side Diff

... ... @@ -1089,6 +1089,93 @@
1089 1089 return MEM_CGROUP_ZSTAT(mz, lru);
1090 1090 }
1091 1091  
  1092 +#ifdef CONFIG_NUMA
  1093 +static unsigned long mem_cgroup_node_nr_file_lru_pages(struct mem_cgroup *memcg,
  1094 + int nid)
  1095 +{
  1096 + unsigned long ret;
  1097 +
  1098 + ret = mem_cgroup_get_zonestat_node(memcg, nid, LRU_INACTIVE_FILE) +
  1099 + mem_cgroup_get_zonestat_node(memcg, nid, LRU_ACTIVE_FILE);
  1100 +
  1101 + return ret;
  1102 +}
  1103 +
  1104 +static unsigned long mem_cgroup_nr_file_lru_pages(struct mem_cgroup *memcg)
  1105 +{
  1106 + u64 total = 0;
  1107 + int nid;
  1108 +
  1109 + for_each_node_state(nid, N_HIGH_MEMORY)
  1110 + total += mem_cgroup_node_nr_file_lru_pages(memcg, nid);
  1111 +
  1112 + return total;
  1113 +}
  1114 +
  1115 +static unsigned long mem_cgroup_node_nr_anon_lru_pages(struct mem_cgroup *memcg,
  1116 + int nid)
  1117 +{
  1118 + unsigned long ret;
  1119 +
  1120 + ret = mem_cgroup_get_zonestat_node(memcg, nid, LRU_INACTIVE_ANON) +
  1121 + mem_cgroup_get_zonestat_node(memcg, nid, LRU_ACTIVE_ANON);
  1122 +
  1123 + return ret;
  1124 +}
  1125 +
  1126 +static unsigned long mem_cgroup_nr_anon_lru_pages(struct mem_cgroup *memcg)
  1127 +{
  1128 + u64 total = 0;
  1129 + int nid;
  1130 +
  1131 + for_each_node_state(nid, N_HIGH_MEMORY)
  1132 + total += mem_cgroup_node_nr_anon_lru_pages(memcg, nid);
  1133 +
  1134 + return total;
  1135 +}
  1136 +
  1137 +static unsigned long
  1138 +mem_cgroup_node_nr_unevictable_lru_pages(struct mem_cgroup *memcg, int nid)
  1139 +{
  1140 + return mem_cgroup_get_zonestat_node(memcg, nid, LRU_UNEVICTABLE);
  1141 +}
  1142 +
  1143 +static unsigned long
  1144 +mem_cgroup_nr_unevictable_lru_pages(struct mem_cgroup *memcg)
  1145 +{
  1146 + u64 total = 0;
  1147 + int nid;
  1148 +
  1149 + for_each_node_state(nid, N_HIGH_MEMORY)
  1150 + total += mem_cgroup_node_nr_unevictable_lru_pages(memcg, nid);
  1151 +
  1152 + return total;
  1153 +}
  1154 +
  1155 +static unsigned long mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg,
  1156 + int nid)
  1157 +{
  1158 + enum lru_list l;
  1159 + u64 total = 0;
  1160 +
  1161 + for_each_lru(l)
  1162 + total += mem_cgroup_get_zonestat_node(memcg, nid, l);
  1163 +
  1164 + return total;
  1165 +}
  1166 +
  1167 +static unsigned long mem_cgroup_nr_lru_pages(struct mem_cgroup *memcg)
  1168 +{
  1169 + u64 total = 0;
  1170 + int nid;
  1171 +
  1172 + for_each_node_state(nid, N_HIGH_MEMORY)
  1173 + total += mem_cgroup_node_nr_lru_pages(memcg, nid);
  1174 +
  1175 + return total;
  1176 +}
  1177 +#endif /* CONFIG_NUMA */
  1178 +
1092 1179 struct zone_reclaim_stat *mem_cgroup_get_reclaim_stat(struct mem_cgroup *memcg,
1093 1180 struct zone *zone)
1094 1181 {
... ... @@ -3944,6 +4031,51 @@
3944 4031 mem_cgroup_get_local_stat(iter, s);
3945 4032 }
3946 4033  
  4034 +#ifdef CONFIG_NUMA
  4035 +static int mem_control_numa_stat_show(struct seq_file *m, void *arg)
  4036 +{
  4037 + int nid;
  4038 + unsigned long total_nr, file_nr, anon_nr, unevictable_nr;
  4039 + unsigned long node_nr;
  4040 + struct cgroup *cont = m->private;
  4041 + struct mem_cgroup *mem_cont = mem_cgroup_from_cont(cont);
  4042 +
  4043 + total_nr = mem_cgroup_nr_lru_pages(mem_cont);
  4044 + seq_printf(m, "total=%lu", total_nr);
  4045 + for_each_node_state(nid, N_HIGH_MEMORY) {
  4046 + node_nr = mem_cgroup_node_nr_lru_pages(mem_cont, nid);
  4047 + seq_printf(m, " N%d=%lu", nid, node_nr);
  4048 + }
  4049 + seq_putc(m, '\n');
  4050 +
  4051 + file_nr = mem_cgroup_nr_file_lru_pages(mem_cont);
  4052 + seq_printf(m, "file=%lu", file_nr);
  4053 + for_each_node_state(nid, N_HIGH_MEMORY) {
  4054 + node_nr = mem_cgroup_node_nr_file_lru_pages(mem_cont, nid);
  4055 + seq_printf(m, " N%d=%lu", nid, node_nr);
  4056 + }
  4057 + seq_putc(m, '\n');
  4058 +
  4059 + anon_nr = mem_cgroup_nr_anon_lru_pages(mem_cont);
  4060 + seq_printf(m, "anon=%lu", anon_nr);
  4061 + for_each_node_state(nid, N_HIGH_MEMORY) {
  4062 + node_nr = mem_cgroup_node_nr_anon_lru_pages(mem_cont, nid);
  4063 + seq_printf(m, " N%d=%lu", nid, node_nr);
  4064 + }
  4065 + seq_putc(m, '\n');
  4066 +
  4067 + unevictable_nr = mem_cgroup_nr_unevictable_lru_pages(mem_cont);
  4068 + seq_printf(m, "unevictable=%lu", unevictable_nr);
  4069 + for_each_node_state(nid, N_HIGH_MEMORY) {
  4070 + node_nr = mem_cgroup_node_nr_unevictable_lru_pages(mem_cont,
  4071 + nid);
  4072 + seq_printf(m, " N%d=%lu", nid, node_nr);
  4073 + }
  4074 + seq_putc(m, '\n');
  4075 + return 0;
  4076 +}
  4077 +#endif /* CONFIG_NUMA */
  4078 +
3947 4079 static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft,
3948 4080 struct cgroup_map_cb *cb)
3949 4081 {
... ... @@ -3954,6 +4086,7 @@
3954 4086 memset(&mystat, 0, sizeof(mystat));
3955 4087 mem_cgroup_get_local_stat(mem_cont, &mystat);
3956 4088  
  4089 +
3957 4090 for (i = 0; i < NR_MCS_STAT; i++) {
3958 4091 if (i == MCS_SWAP && !do_swap_account)
3959 4092 continue;
... ... @@ -4377,6 +4510,22 @@
4377 4510 return 0;
4378 4511 }
4379 4512  
  4513 +#ifdef CONFIG_NUMA
  4514 +static const struct file_operations mem_control_numa_stat_file_operations = {
  4515 + .read = seq_read,
  4516 + .llseek = seq_lseek,
  4517 + .release = single_release,
  4518 +};
  4519 +
  4520 +static int mem_control_numa_stat_open(struct inode *unused, struct file *file)
  4521 +{
  4522 + struct cgroup *cont = file->f_dentry->d_parent->d_fsdata;
  4523 +
  4524 + file->f_op = &mem_control_numa_stat_file_operations;
  4525 + return single_open(file, mem_control_numa_stat_show, cont);
  4526 +}
  4527 +#endif /* CONFIG_NUMA */
  4528 +
4380 4529 static struct cftype mem_cgroup_files[] = {
4381 4530 {
4382 4531 .name = "usage_in_bytes",
... ... @@ -4440,6 +4589,12 @@
4440 4589 .unregister_event = mem_cgroup_oom_unregister_event,
4441 4590 .private = MEMFILE_PRIVATE(_OOM_TYPE, OOM_CONTROL),
4442 4591 },
  4592 +#ifdef CONFIG_NUMA
  4593 + {
  4594 + .name = "numa_stat",
  4595 + .open = mem_control_numa_stat_open,
  4596 + },
  4597 +#endif
4443 4598 };
4444 4599  
4445 4600 #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP