Commit 6b3ae58efca06623c197fd6d91ded4aa3a8fe039

Authored by Johannes Weiner
Committed by Linus Torvalds
1 parent 5564e88ba6

memcg: remove direct page_cgroup-to-page pointer

In struct page_cgroup, we have a full word for flags but only a few are
reserved.  Use the remaining upper bits to encode, depending on
configuration, the node or the section, to enable page_cgroup-to-page
lookups without a direct pointer.

This saves a full word for every page in a system with memory cgroups
enabled.

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Cc: Balbir Singh <balbir@linux.vnet.ibm.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
Cc: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

Showing 4 changed files with 117 additions and 55 deletions Side-by-side Diff

include/linux/page_cgroup.h
1 1 #ifndef __LINUX_PAGE_CGROUP_H
2 2 #define __LINUX_PAGE_CGROUP_H
3 3  
  4 +enum {
  5 + /* flags for mem_cgroup */
  6 + PCG_LOCK, /* Lock for pc->mem_cgroup and following bits. */
  7 + PCG_CACHE, /* charged as cache */
  8 + PCG_USED, /* this object is in use. */
  9 + PCG_MIGRATION, /* under page migration */
  10 + /* flags for mem_cgroup and file and I/O status */
  11 + PCG_MOVE_LOCK, /* For race between move_account v.s. following bits */
  12 + PCG_FILE_MAPPED, /* page is accounted as "mapped" */
  13 + /* No lock in page_cgroup */
  14 + PCG_ACCT_LRU, /* page has been accounted for (under lru_lock) */
  15 + __NR_PCG_FLAGS,
  16 +};
  17 +
  18 +#ifndef __GENERATING_BOUNDS_H
  19 +#include <generated/bounds.h>
  20 +
4 21 #ifdef CONFIG_CGROUP_MEM_RES_CTLR
5 22 #include <linux/bit_spinlock.h>
  23 +
6 24 /*
7 25 * Page Cgroup can be considered as an extended mem_map.
8 26 * A page_cgroup page is associated with every page descriptor. The
... ... @@ -13,7 +31,6 @@
13 31 struct page_cgroup {
14 32 unsigned long flags;
15 33 struct mem_cgroup *mem_cgroup;
16   - struct page *page;
17 34 struct list_head lru; /* per cgroup LRU list */
18 35 };
19 36  
20 37  
... ... @@ -32,20 +49,8 @@
32 49 #endif
33 50  
34 51 struct page_cgroup *lookup_page_cgroup(struct page *page);
  52 +struct page *lookup_cgroup_page(struct page_cgroup *pc);
35 53  
36   -enum {
37   - /* flags for mem_cgroup */
38   - PCG_LOCK, /* Lock for pc->mem_cgroup and following bits. */
39   - PCG_CACHE, /* charged as cache */
40   - PCG_USED, /* this object is in use. */
41   - PCG_MIGRATION, /* under page migration */
42   - /* flags for mem_cgroup and file and I/O status */
43   - PCG_MOVE_LOCK, /* For race between move_account v.s. following bits */
44   - PCG_FILE_MAPPED, /* page is accounted as "mapped" */
45   - /* No lock in page_cgroup */
46   - PCG_ACCT_LRU, /* page has been accounted for (under lru_lock) */
47   -};
48   -
49 54 #define TESTPCGFLAG(uname, lname) \
50 55 static inline int PageCgroup##uname(struct page_cgroup *pc) \
51 56 { return test_bit(PCG_##lname, &pc->flags); }
... ... @@ -117,6 +122,39 @@
117 122 local_irq_restore(*flags);
118 123 }
119 124  
  125 +#ifdef CONFIG_SPARSEMEM
  126 +#define PCG_ARRAYID_WIDTH SECTIONS_SHIFT
  127 +#else
  128 +#define PCG_ARRAYID_WIDTH NODES_SHIFT
  129 +#endif
  130 +
  131 +#if (PCG_ARRAYID_WIDTH > BITS_PER_LONG - NR_PCG_FLAGS)
  132 +#error Not enough space left in pc->flags to store page_cgroup array IDs
  133 +#endif
  134 +
  135 +/* pc->flags: ARRAY-ID | FLAGS */
  136 +
  137 +#define PCG_ARRAYID_MASK ((1UL << PCG_ARRAYID_WIDTH) - 1)
  138 +
  139 +#define PCG_ARRAYID_OFFSET (BITS_PER_LONG - PCG_ARRAYID_WIDTH)
  140 +/*
  141 + * Zero the shift count for non-existant fields, to prevent compiler
  142 + * warnings and ensure references are optimized away.
  143 + */
  144 +#define PCG_ARRAYID_SHIFT (PCG_ARRAYID_OFFSET * (PCG_ARRAYID_WIDTH != 0))
  145 +
  146 +static inline void set_page_cgroup_array_id(struct page_cgroup *pc,
  147 + unsigned long id)
  148 +{
  149 + pc->flags &= ~(PCG_ARRAYID_MASK << PCG_ARRAYID_SHIFT);
  150 + pc->flags |= (id & PCG_ARRAYID_MASK) << PCG_ARRAYID_SHIFT;
  151 +}
  152 +
  153 +static inline unsigned long page_cgroup_array_id(struct page_cgroup *pc)
  154 +{
  155 + return (pc->flags >> PCG_ARRAYID_SHIFT) & PCG_ARRAYID_MASK;
  156 +}
  157 +
120 158 #else /* CONFIG_CGROUP_MEM_RES_CTLR */
121 159 struct page_cgroup;
122 160  
... ... @@ -137,7 +175,7 @@
137 175 {
138 176 }
139 177  
140   -#endif
  178 +#endif /* CONFIG_CGROUP_MEM_RES_CTLR */
141 179  
142 180 #include <linux/swap.h>
143 181  
... ... @@ -173,6 +211,9 @@
173 211 return;
174 212 }
175 213  
176   -#endif
177   -#endif
  214 +#endif /* CONFIG_CGROUP_MEM_RES_CTLR_SWAP */
  215 +
  216 +#endif /* !__GENERATING_BOUNDS_H */
  217 +
  218 +#endif /* __LINUX_PAGE_CGROUP_H */
... ... @@ -9,12 +9,14 @@
9 9 #include <linux/page-flags.h>
10 10 #include <linux/mmzone.h>
11 11 #include <linux/kbuild.h>
  12 +#include <linux/page_cgroup.h>
12 13  
13 14 void foo(void)
14 15 {
15 16 /* The enum constants to put into include/generated/bounds.h */
16 17 DEFINE(NR_PAGEFLAGS, __NR_PAGEFLAGS);
17 18 DEFINE(MAX_NR_ZONES, __MAX_NR_ZONES);
  19 + DEFINE(NR_PCG_FLAGS, __NR_PCG_FLAGS);
18 20 /* End of constants */
19 21 }
... ... @@ -1080,7 +1080,7 @@
1080 1080 if (unlikely(!PageCgroupUsed(pc)))
1081 1081 continue;
1082 1082  
1083   - page = pc->page;
  1083 + page = lookup_cgroup_page(pc);
1084 1084  
1085 1085 if (unlikely(!PageLRU(page)))
1086 1086 continue;
... ... @@ -3344,7 +3344,7 @@
3344 3344 }
3345 3345 spin_unlock_irqrestore(&zone->lru_lock, flags);
3346 3346  
3347   - page = pc->page;
  3347 + page = lookup_cgroup_page(pc);
3348 3348  
3349 3349 ret = mem_cgroup_move_parent(page, pc, mem, GFP_KERNEL);
3350 3350 if (ret == -ENOMEM)
... ... @@ -11,12 +11,11 @@
11 11 #include <linux/swapops.h>
12 12 #include <linux/kmemleak.h>
13 13  
14   -static void __meminit
15   -__init_page_cgroup(struct page_cgroup *pc, unsigned long pfn)
  14 +static void __meminit init_page_cgroup(struct page_cgroup *pc, unsigned long id)
16 15 {
17 16 pc->flags = 0;
  17 + set_page_cgroup_array_id(pc, id);
18 18 pc->mem_cgroup = NULL;
19   - pc->page = pfn_to_page(pfn);
20 19 INIT_LIST_HEAD(&pc->lru);
21 20 }
22 21 static unsigned long total_usage;
... ... @@ -43,6 +42,19 @@
43 42 return base + offset;
44 43 }
45 44  
  45 +struct page *lookup_cgroup_page(struct page_cgroup *pc)
  46 +{
  47 + unsigned long pfn;
  48 + struct page *page;
  49 + pg_data_t *pgdat;
  50 +
  51 + pgdat = NODE_DATA(page_cgroup_array_id(pc));
  52 + pfn = pc - pgdat->node_page_cgroup + pgdat->node_start_pfn;
  53 + page = pfn_to_page(pfn);
  54 + VM_BUG_ON(pc != lookup_page_cgroup(page));
  55 + return page;
  56 +}
  57 +
46 58 static int __init alloc_node_page_cgroup(int nid)
47 59 {
48 60 struct page_cgroup *base, *pc;
... ... @@ -63,7 +75,7 @@
63 75 return -ENOMEM;
64 76 for (index = 0; index < nr_pages; index++) {
65 77 pc = base + index;
66   - __init_page_cgroup(pc, start_pfn + index);
  78 + init_page_cgroup(pc, nid);
67 79 }
68 80 NODE_DATA(nid)->node_page_cgroup = base;
69 81 total_usage += table_size;
70 82  
71 83  
72 84  
73 85  
74 86  
75 87  
... ... @@ -105,46 +117,53 @@
105 117 return section->page_cgroup + pfn;
106 118 }
107 119  
  120 +struct page *lookup_cgroup_page(struct page_cgroup *pc)
  121 +{
  122 + struct mem_section *section;
  123 + struct page *page;
  124 + unsigned long nr;
  125 +
  126 + nr = page_cgroup_array_id(pc);
  127 + section = __nr_to_section(nr);
  128 + page = pfn_to_page(pc - section->page_cgroup);
  129 + VM_BUG_ON(pc != lookup_page_cgroup(page));
  130 + return page;
  131 +}
  132 +
108 133 /* __alloc_bootmem...() is protected by !slab_available() */
109 134 static int __init_refok init_section_page_cgroup(unsigned long pfn)
110 135 {
111   - struct mem_section *section = __pfn_to_section(pfn);
112 136 struct page_cgroup *base, *pc;
  137 + struct mem_section *section;
113 138 unsigned long table_size;
  139 + unsigned long nr;
114 140 int nid, index;
115 141  
116   - if (!section->page_cgroup) {
117   - nid = page_to_nid(pfn_to_page(pfn));
118   - table_size = sizeof(struct page_cgroup) * PAGES_PER_SECTION;
119   - VM_BUG_ON(!slab_is_available());
120   - if (node_state(nid, N_HIGH_MEMORY)) {
121   - base = kmalloc_node(table_size,
122   - GFP_KERNEL | __GFP_NOWARN, nid);
123   - if (!base)
124   - base = vmalloc_node(table_size, nid);
125   - } else {
126   - base = kmalloc(table_size, GFP_KERNEL | __GFP_NOWARN);
127   - if (!base)
128   - base = vmalloc(table_size);
129   - }
130   - /*
131   - * The value stored in section->page_cgroup is (base - pfn)
132   - * and it does not point to the memory block allocated above,
133   - * causing kmemleak false positives.
134   - */
135   - kmemleak_not_leak(base);
  142 + nr = pfn_to_section_nr(pfn);
  143 + section = __nr_to_section(nr);
  144 +
  145 + if (section->page_cgroup)
  146 + return 0;
  147 +
  148 + nid = page_to_nid(pfn_to_page(pfn));
  149 + table_size = sizeof(struct page_cgroup) * PAGES_PER_SECTION;
  150 + VM_BUG_ON(!slab_is_available());
  151 + if (node_state(nid, N_HIGH_MEMORY)) {
  152 + base = kmalloc_node(table_size,
  153 + GFP_KERNEL | __GFP_NOWARN, nid);
  154 + if (!base)
  155 + base = vmalloc_node(table_size, nid);
136 156 } else {
137   - /*
138   - * We don't have to allocate page_cgroup again, but
139   - * address of memmap may be changed. So, we have to initialize
140   - * again.
141   - */
142   - base = section->page_cgroup + pfn;
143   - table_size = 0;
144   - /* check address of memmap is changed or not. */
145   - if (base->page == pfn_to_page(pfn))
146   - return 0;
  157 + base = kmalloc(table_size, GFP_KERNEL | __GFP_NOWARN);
  158 + if (!base)
  159 + base = vmalloc(table_size);
147 160 }
  161 + /*
  162 + * The value stored in section->page_cgroup is (base - pfn)
  163 + * and it does not point to the memory block allocated above,
  164 + * causing kmemleak false positives.
  165 + */
  166 + kmemleak_not_leak(base);
148 167  
149 168 if (!base) {
150 169 printk(KERN_ERR "page cgroup allocation failure\n");
... ... @@ -153,7 +172,7 @@
153 172  
154 173 for (index = 0; index < PAGES_PER_SECTION; index++) {
155 174 pc = base + index;
156   - __init_page_cgroup(pc, pfn + index);
  175 + init_page_cgroup(pc, nr);
157 176 }
158 177  
159 178 section->page_cgroup = base - pfn;