Commit 7952f98818d561ed0e11434a7a16acd9a7bae859

Authored by Catalin Marinas
1 parent 815c4163b6

kmemleak: Annotate false positive in init_section_page_cgroup()

The pointer to the page_cgroup table allocated in
init_section_page_cgroup() is stored in section->page_cgroup as (base -
pfn). Since this value does not point to the beginning or inside the
allocated memory block, kmemleak reports a false positive.

This was reported in bugzilla.kernel.org as #16297.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Reported-by: Adrien Dessemond <adrien.dessemond@gmail.com>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Andrew Morton <akpm@linux-foundation.org>

Showing 1 changed file with 7 additions and 0 deletions Inline Diff

1 #include <linux/mm.h> 1 #include <linux/mm.h>
2 #include <linux/mmzone.h> 2 #include <linux/mmzone.h>
3 #include <linux/bootmem.h> 3 #include <linux/bootmem.h>
4 #include <linux/bit_spinlock.h> 4 #include <linux/bit_spinlock.h>
5 #include <linux/page_cgroup.h> 5 #include <linux/page_cgroup.h>
6 #include <linux/hash.h> 6 #include <linux/hash.h>
7 #include <linux/slab.h> 7 #include <linux/slab.h>
8 #include <linux/memory.h> 8 #include <linux/memory.h>
9 #include <linux/vmalloc.h> 9 #include <linux/vmalloc.h>
10 #include <linux/cgroup.h> 10 #include <linux/cgroup.h>
11 #include <linux/swapops.h> 11 #include <linux/swapops.h>
12 #include <linux/kmemleak.h>
12 13
13 static void __meminit 14 static void __meminit
14 __init_page_cgroup(struct page_cgroup *pc, unsigned long pfn) 15 __init_page_cgroup(struct page_cgroup *pc, unsigned long pfn)
15 { 16 {
16 pc->flags = 0; 17 pc->flags = 0;
17 pc->mem_cgroup = NULL; 18 pc->mem_cgroup = NULL;
18 pc->page = pfn_to_page(pfn); 19 pc->page = pfn_to_page(pfn);
19 INIT_LIST_HEAD(&pc->lru); 20 INIT_LIST_HEAD(&pc->lru);
20 } 21 }
21 static unsigned long total_usage; 22 static unsigned long total_usage;
22 23
23 #if !defined(CONFIG_SPARSEMEM) 24 #if !defined(CONFIG_SPARSEMEM)
24 25
25 26
26 void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat) 27 void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat)
27 { 28 {
28 pgdat->node_page_cgroup = NULL; 29 pgdat->node_page_cgroup = NULL;
29 } 30 }
30 31
31 struct page_cgroup *lookup_page_cgroup(struct page *page) 32 struct page_cgroup *lookup_page_cgroup(struct page *page)
32 { 33 {
33 unsigned long pfn = page_to_pfn(page); 34 unsigned long pfn = page_to_pfn(page);
34 unsigned long offset; 35 unsigned long offset;
35 struct page_cgroup *base; 36 struct page_cgroup *base;
36 37
37 base = NODE_DATA(page_to_nid(page))->node_page_cgroup; 38 base = NODE_DATA(page_to_nid(page))->node_page_cgroup;
38 if (unlikely(!base)) 39 if (unlikely(!base))
39 return NULL; 40 return NULL;
40 41
41 offset = pfn - NODE_DATA(page_to_nid(page))->node_start_pfn; 42 offset = pfn - NODE_DATA(page_to_nid(page))->node_start_pfn;
42 return base + offset; 43 return base + offset;
43 } 44 }
44 45
45 static int __init alloc_node_page_cgroup(int nid) 46 static int __init alloc_node_page_cgroup(int nid)
46 { 47 {
47 struct page_cgroup *base, *pc; 48 struct page_cgroup *base, *pc;
48 unsigned long table_size; 49 unsigned long table_size;
49 unsigned long start_pfn, nr_pages, index; 50 unsigned long start_pfn, nr_pages, index;
50 51
51 start_pfn = NODE_DATA(nid)->node_start_pfn; 52 start_pfn = NODE_DATA(nid)->node_start_pfn;
52 nr_pages = NODE_DATA(nid)->node_spanned_pages; 53 nr_pages = NODE_DATA(nid)->node_spanned_pages;
53 54
54 if (!nr_pages) 55 if (!nr_pages)
55 return 0; 56 return 0;
56 57
57 table_size = sizeof(struct page_cgroup) * nr_pages; 58 table_size = sizeof(struct page_cgroup) * nr_pages;
58 59
59 base = __alloc_bootmem_node_nopanic(NODE_DATA(nid), 60 base = __alloc_bootmem_node_nopanic(NODE_DATA(nid),
60 table_size, PAGE_SIZE, __pa(MAX_DMA_ADDRESS)); 61 table_size, PAGE_SIZE, __pa(MAX_DMA_ADDRESS));
61 if (!base) 62 if (!base)
62 return -ENOMEM; 63 return -ENOMEM;
63 for (index = 0; index < nr_pages; index++) { 64 for (index = 0; index < nr_pages; index++) {
64 pc = base + index; 65 pc = base + index;
65 __init_page_cgroup(pc, start_pfn + index); 66 __init_page_cgroup(pc, start_pfn + index);
66 } 67 }
67 NODE_DATA(nid)->node_page_cgroup = base; 68 NODE_DATA(nid)->node_page_cgroup = base;
68 total_usage += table_size; 69 total_usage += table_size;
69 return 0; 70 return 0;
70 } 71 }
71 72
72 void __init page_cgroup_init_flatmem(void) 73 void __init page_cgroup_init_flatmem(void)
73 { 74 {
74 75
75 int nid, fail; 76 int nid, fail;
76 77
77 if (mem_cgroup_disabled()) 78 if (mem_cgroup_disabled())
78 return; 79 return;
79 80
80 for_each_online_node(nid) { 81 for_each_online_node(nid) {
81 fail = alloc_node_page_cgroup(nid); 82 fail = alloc_node_page_cgroup(nid);
82 if (fail) 83 if (fail)
83 goto fail; 84 goto fail;
84 } 85 }
85 printk(KERN_INFO "allocated %ld bytes of page_cgroup\n", total_usage); 86 printk(KERN_INFO "allocated %ld bytes of page_cgroup\n", total_usage);
86 printk(KERN_INFO "please try 'cgroup_disable=memory' option if you" 87 printk(KERN_INFO "please try 'cgroup_disable=memory' option if you"
87 " don't want memory cgroups\n"); 88 " don't want memory cgroups\n");
88 return; 89 return;
89 fail: 90 fail:
90 printk(KERN_CRIT "allocation of page_cgroup failed.\n"); 91 printk(KERN_CRIT "allocation of page_cgroup failed.\n");
91 printk(KERN_CRIT "please try 'cgroup_disable=memory' boot option\n"); 92 printk(KERN_CRIT "please try 'cgroup_disable=memory' boot option\n");
92 panic("Out of memory"); 93 panic("Out of memory");
93 } 94 }
94 95
95 #else /* CONFIG_FLAT_NODE_MEM_MAP */ 96 #else /* CONFIG_FLAT_NODE_MEM_MAP */
96 97
97 struct page_cgroup *lookup_page_cgroup(struct page *page) 98 struct page_cgroup *lookup_page_cgroup(struct page *page)
98 { 99 {
99 unsigned long pfn = page_to_pfn(page); 100 unsigned long pfn = page_to_pfn(page);
100 struct mem_section *section = __pfn_to_section(pfn); 101 struct mem_section *section = __pfn_to_section(pfn);
101 102
102 if (!section->page_cgroup) 103 if (!section->page_cgroup)
103 return NULL; 104 return NULL;
104 return section->page_cgroup + pfn; 105 return section->page_cgroup + pfn;
105 } 106 }
106 107
107 /* __alloc_bootmem...() is protected by !slab_available() */ 108 /* __alloc_bootmem...() is protected by !slab_available() */
108 static int __init_refok init_section_page_cgroup(unsigned long pfn) 109 static int __init_refok init_section_page_cgroup(unsigned long pfn)
109 { 110 {
110 struct mem_section *section = __pfn_to_section(pfn); 111 struct mem_section *section = __pfn_to_section(pfn);
111 struct page_cgroup *base, *pc; 112 struct page_cgroup *base, *pc;
112 unsigned long table_size; 113 unsigned long table_size;
113 int nid, index; 114 int nid, index;
114 115
115 if (!section->page_cgroup) { 116 if (!section->page_cgroup) {
116 nid = page_to_nid(pfn_to_page(pfn)); 117 nid = page_to_nid(pfn_to_page(pfn));
117 table_size = sizeof(struct page_cgroup) * PAGES_PER_SECTION; 118 table_size = sizeof(struct page_cgroup) * PAGES_PER_SECTION;
118 VM_BUG_ON(!slab_is_available()); 119 VM_BUG_ON(!slab_is_available());
119 if (node_state(nid, N_HIGH_MEMORY)) { 120 if (node_state(nid, N_HIGH_MEMORY)) {
120 base = kmalloc_node(table_size, 121 base = kmalloc_node(table_size,
121 GFP_KERNEL | __GFP_NOWARN, nid); 122 GFP_KERNEL | __GFP_NOWARN, nid);
122 if (!base) 123 if (!base)
123 base = vmalloc_node(table_size, nid); 124 base = vmalloc_node(table_size, nid);
124 } else { 125 } else {
125 base = kmalloc(table_size, GFP_KERNEL | __GFP_NOWARN); 126 base = kmalloc(table_size, GFP_KERNEL | __GFP_NOWARN);
126 if (!base) 127 if (!base)
127 base = vmalloc(table_size); 128 base = vmalloc(table_size);
128 } 129 }
130 /*
131 * The value stored in section->page_cgroup is (base - pfn)
132 * and it does not point to the memory block allocated above,
133 * causing kmemleak false positives.
134 */
135 kmemleak_not_leak(base);
129 } else { 136 } else {
130 /* 137 /*
131 * We don't have to allocate page_cgroup again, but 138 * We don't have to allocate page_cgroup again, but
132 * address of memmap may be changed. So, we have to initialize 139 * address of memmap may be changed. So, we have to initialize
133 * again. 140 * again.
134 */ 141 */
135 base = section->page_cgroup + pfn; 142 base = section->page_cgroup + pfn;
136 table_size = 0; 143 table_size = 0;
137 /* check address of memmap is changed or not. */ 144 /* check address of memmap is changed or not. */
138 if (base->page == pfn_to_page(pfn)) 145 if (base->page == pfn_to_page(pfn))
139 return 0; 146 return 0;
140 } 147 }
141 148
142 if (!base) { 149 if (!base) {
143 printk(KERN_ERR "page cgroup allocation failure\n"); 150 printk(KERN_ERR "page cgroup allocation failure\n");
144 return -ENOMEM; 151 return -ENOMEM;
145 } 152 }
146 153
147 for (index = 0; index < PAGES_PER_SECTION; index++) { 154 for (index = 0; index < PAGES_PER_SECTION; index++) {
148 pc = base + index; 155 pc = base + index;
149 __init_page_cgroup(pc, pfn + index); 156 __init_page_cgroup(pc, pfn + index);
150 } 157 }
151 158
152 section->page_cgroup = base - pfn; 159 section->page_cgroup = base - pfn;
153 total_usage += table_size; 160 total_usage += table_size;
154 return 0; 161 return 0;
155 } 162 }
156 #ifdef CONFIG_MEMORY_HOTPLUG 163 #ifdef CONFIG_MEMORY_HOTPLUG
157 void __free_page_cgroup(unsigned long pfn) 164 void __free_page_cgroup(unsigned long pfn)
158 { 165 {
159 struct mem_section *ms; 166 struct mem_section *ms;
160 struct page_cgroup *base; 167 struct page_cgroup *base;
161 168
162 ms = __pfn_to_section(pfn); 169 ms = __pfn_to_section(pfn);
163 if (!ms || !ms->page_cgroup) 170 if (!ms || !ms->page_cgroup)
164 return; 171 return;
165 base = ms->page_cgroup + pfn; 172 base = ms->page_cgroup + pfn;
166 if (is_vmalloc_addr(base)) { 173 if (is_vmalloc_addr(base)) {
167 vfree(base); 174 vfree(base);
168 ms->page_cgroup = NULL; 175 ms->page_cgroup = NULL;
169 } else { 176 } else {
170 struct page *page = virt_to_page(base); 177 struct page *page = virt_to_page(base);
171 if (!PageReserved(page)) { /* Is bootmem ? */ 178 if (!PageReserved(page)) { /* Is bootmem ? */
172 kfree(base); 179 kfree(base);
173 ms->page_cgroup = NULL; 180 ms->page_cgroup = NULL;
174 } 181 }
175 } 182 }
176 } 183 }
177 184
178 int __meminit online_page_cgroup(unsigned long start_pfn, 185 int __meminit online_page_cgroup(unsigned long start_pfn,
179 unsigned long nr_pages, 186 unsigned long nr_pages,
180 int nid) 187 int nid)
181 { 188 {
182 unsigned long start, end, pfn; 189 unsigned long start, end, pfn;
183 int fail = 0; 190 int fail = 0;
184 191
185 start = start_pfn & ~(PAGES_PER_SECTION - 1); 192 start = start_pfn & ~(PAGES_PER_SECTION - 1);
186 end = ALIGN(start_pfn + nr_pages, PAGES_PER_SECTION); 193 end = ALIGN(start_pfn + nr_pages, PAGES_PER_SECTION);
187 194
188 for (pfn = start; !fail && pfn < end; pfn += PAGES_PER_SECTION) { 195 for (pfn = start; !fail && pfn < end; pfn += PAGES_PER_SECTION) {
189 if (!pfn_present(pfn)) 196 if (!pfn_present(pfn))
190 continue; 197 continue;
191 fail = init_section_page_cgroup(pfn); 198 fail = init_section_page_cgroup(pfn);
192 } 199 }
193 if (!fail) 200 if (!fail)
194 return 0; 201 return 0;
195 202
196 /* rollback */ 203 /* rollback */
197 for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) 204 for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION)
198 __free_page_cgroup(pfn); 205 __free_page_cgroup(pfn);
199 206
200 return -ENOMEM; 207 return -ENOMEM;
201 } 208 }
202 209
203 int __meminit offline_page_cgroup(unsigned long start_pfn, 210 int __meminit offline_page_cgroup(unsigned long start_pfn,
204 unsigned long nr_pages, int nid) 211 unsigned long nr_pages, int nid)
205 { 212 {
206 unsigned long start, end, pfn; 213 unsigned long start, end, pfn;
207 214
208 start = start_pfn & ~(PAGES_PER_SECTION - 1); 215 start = start_pfn & ~(PAGES_PER_SECTION - 1);
209 end = ALIGN(start_pfn + nr_pages, PAGES_PER_SECTION); 216 end = ALIGN(start_pfn + nr_pages, PAGES_PER_SECTION);
210 217
211 for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) 218 for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION)
212 __free_page_cgroup(pfn); 219 __free_page_cgroup(pfn);
213 return 0; 220 return 0;
214 221
215 } 222 }
216 223
217 static int __meminit page_cgroup_callback(struct notifier_block *self, 224 static int __meminit page_cgroup_callback(struct notifier_block *self,
218 unsigned long action, void *arg) 225 unsigned long action, void *arg)
219 { 226 {
220 struct memory_notify *mn = arg; 227 struct memory_notify *mn = arg;
221 int ret = 0; 228 int ret = 0;
222 switch (action) { 229 switch (action) {
223 case MEM_GOING_ONLINE: 230 case MEM_GOING_ONLINE:
224 ret = online_page_cgroup(mn->start_pfn, 231 ret = online_page_cgroup(mn->start_pfn,
225 mn->nr_pages, mn->status_change_nid); 232 mn->nr_pages, mn->status_change_nid);
226 break; 233 break;
227 case MEM_OFFLINE: 234 case MEM_OFFLINE:
228 offline_page_cgroup(mn->start_pfn, 235 offline_page_cgroup(mn->start_pfn,
229 mn->nr_pages, mn->status_change_nid); 236 mn->nr_pages, mn->status_change_nid);
230 break; 237 break;
231 case MEM_CANCEL_ONLINE: 238 case MEM_CANCEL_ONLINE:
232 case MEM_GOING_OFFLINE: 239 case MEM_GOING_OFFLINE:
233 break; 240 break;
234 case MEM_ONLINE: 241 case MEM_ONLINE:
235 case MEM_CANCEL_OFFLINE: 242 case MEM_CANCEL_OFFLINE:
236 break; 243 break;
237 } 244 }
238 245
239 if (ret) 246 if (ret)
240 ret = notifier_from_errno(ret); 247 ret = notifier_from_errno(ret);
241 else 248 else
242 ret = NOTIFY_OK; 249 ret = NOTIFY_OK;
243 250
244 return ret; 251 return ret;
245 } 252 }
246 253
247 #endif 254 #endif
248 255
249 void __init page_cgroup_init(void) 256 void __init page_cgroup_init(void)
250 { 257 {
251 unsigned long pfn; 258 unsigned long pfn;
252 int fail = 0; 259 int fail = 0;
253 260
254 if (mem_cgroup_disabled()) 261 if (mem_cgroup_disabled())
255 return; 262 return;
256 263
257 for (pfn = 0; !fail && pfn < max_pfn; pfn += PAGES_PER_SECTION) { 264 for (pfn = 0; !fail && pfn < max_pfn; pfn += PAGES_PER_SECTION) {
258 if (!pfn_present(pfn)) 265 if (!pfn_present(pfn))
259 continue; 266 continue;
260 fail = init_section_page_cgroup(pfn); 267 fail = init_section_page_cgroup(pfn);
261 } 268 }
262 if (fail) { 269 if (fail) {
263 printk(KERN_CRIT "try 'cgroup_disable=memory' boot option\n"); 270 printk(KERN_CRIT "try 'cgroup_disable=memory' boot option\n");
264 panic("Out of memory"); 271 panic("Out of memory");
265 } else { 272 } else {
266 hotplug_memory_notifier(page_cgroup_callback, 0); 273 hotplug_memory_notifier(page_cgroup_callback, 0);
267 } 274 }
268 printk(KERN_INFO "allocated %ld bytes of page_cgroup\n", total_usage); 275 printk(KERN_INFO "allocated %ld bytes of page_cgroup\n", total_usage);
269 printk(KERN_INFO "please try 'cgroup_disable=memory' option if you don't" 276 printk(KERN_INFO "please try 'cgroup_disable=memory' option if you don't"
270 " want memory cgroups\n"); 277 " want memory cgroups\n");
271 } 278 }
272 279
273 void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat) 280 void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat)
274 { 281 {
275 return; 282 return;
276 } 283 }
277 284
278 #endif 285 #endif
279 286
280 287
281 #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP 288 #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
282 289
283 static DEFINE_MUTEX(swap_cgroup_mutex); 290 static DEFINE_MUTEX(swap_cgroup_mutex);
284 struct swap_cgroup_ctrl { 291 struct swap_cgroup_ctrl {
285 struct page **map; 292 struct page **map;
286 unsigned long length; 293 unsigned long length;
287 spinlock_t lock; 294 spinlock_t lock;
288 }; 295 };
289 296
290 struct swap_cgroup_ctrl swap_cgroup_ctrl[MAX_SWAPFILES]; 297 struct swap_cgroup_ctrl swap_cgroup_ctrl[MAX_SWAPFILES];
291 298
292 struct swap_cgroup { 299 struct swap_cgroup {
293 unsigned short id; 300 unsigned short id;
294 }; 301 };
295 #define SC_PER_PAGE (PAGE_SIZE/sizeof(struct swap_cgroup)) 302 #define SC_PER_PAGE (PAGE_SIZE/sizeof(struct swap_cgroup))
296 #define SC_POS_MASK (SC_PER_PAGE - 1) 303 #define SC_POS_MASK (SC_PER_PAGE - 1)
297 304
298 /* 305 /*
299 * SwapCgroup implements "lookup" and "exchange" operations. 306 * SwapCgroup implements "lookup" and "exchange" operations.
300 * In typical usage, this swap_cgroup is accessed via memcg's charge/uncharge 307 * In typical usage, this swap_cgroup is accessed via memcg's charge/uncharge
301 * against SwapCache. At swap_free(), this is accessed directly from swap. 308 * against SwapCache. At swap_free(), this is accessed directly from swap.
302 * 309 *
303 * This means, 310 * This means,
304 * - we have no race in "exchange" when we're accessed via SwapCache because 311 * - we have no race in "exchange" when we're accessed via SwapCache because
305 * SwapCache(and its swp_entry) is under lock. 312 * SwapCache(and its swp_entry) is under lock.
306 * - When called via swap_free(), there is no user of this entry and no race. 313 * - When called via swap_free(), there is no user of this entry and no race.
307 * Then, we don't need lock around "exchange". 314 * Then, we don't need lock around "exchange".
308 * 315 *
309 * TODO: we can push these buffers out to HIGHMEM. 316 * TODO: we can push these buffers out to HIGHMEM.
310 */ 317 */
311 318
312 /* 319 /*
313 * allocate buffer for swap_cgroup. 320 * allocate buffer for swap_cgroup.
314 */ 321 */
315 static int swap_cgroup_prepare(int type) 322 static int swap_cgroup_prepare(int type)
316 { 323 {
317 struct page *page; 324 struct page *page;
318 struct swap_cgroup_ctrl *ctrl; 325 struct swap_cgroup_ctrl *ctrl;
319 unsigned long idx, max; 326 unsigned long idx, max;
320 327
321 ctrl = &swap_cgroup_ctrl[type]; 328 ctrl = &swap_cgroup_ctrl[type];
322 329
323 for (idx = 0; idx < ctrl->length; idx++) { 330 for (idx = 0; idx < ctrl->length; idx++) {
324 page = alloc_page(GFP_KERNEL | __GFP_ZERO); 331 page = alloc_page(GFP_KERNEL | __GFP_ZERO);
325 if (!page) 332 if (!page)
326 goto not_enough_page; 333 goto not_enough_page;
327 ctrl->map[idx] = page; 334 ctrl->map[idx] = page;
328 } 335 }
329 return 0; 336 return 0;
330 not_enough_page: 337 not_enough_page:
331 max = idx; 338 max = idx;
332 for (idx = 0; idx < max; idx++) 339 for (idx = 0; idx < max; idx++)
333 __free_page(ctrl->map[idx]); 340 __free_page(ctrl->map[idx]);
334 341
335 return -ENOMEM; 342 return -ENOMEM;
336 } 343 }
337 344
338 /** 345 /**
339 * swap_cgroup_cmpxchg - cmpxchg mem_cgroup's id for this swp_entry. 346 * swap_cgroup_cmpxchg - cmpxchg mem_cgroup's id for this swp_entry.
340 * @end: swap entry to be cmpxchged 347 * @end: swap entry to be cmpxchged
341 * @old: old id 348 * @old: old id
342 * @new: new id 349 * @new: new id
343 * 350 *
344 * Returns old id at success, 0 at failure. 351 * Returns old id at success, 0 at failure.
345 * (There is no mem_cgroup useing 0 as its id) 352 * (There is no mem_cgroup useing 0 as its id)
346 */ 353 */
347 unsigned short swap_cgroup_cmpxchg(swp_entry_t ent, 354 unsigned short swap_cgroup_cmpxchg(swp_entry_t ent,
348 unsigned short old, unsigned short new) 355 unsigned short old, unsigned short new)
349 { 356 {
350 int type = swp_type(ent); 357 int type = swp_type(ent);
351 unsigned long offset = swp_offset(ent); 358 unsigned long offset = swp_offset(ent);
352 unsigned long idx = offset / SC_PER_PAGE; 359 unsigned long idx = offset / SC_PER_PAGE;
353 unsigned long pos = offset & SC_POS_MASK; 360 unsigned long pos = offset & SC_POS_MASK;
354 struct swap_cgroup_ctrl *ctrl; 361 struct swap_cgroup_ctrl *ctrl;
355 struct page *mappage; 362 struct page *mappage;
356 struct swap_cgroup *sc; 363 struct swap_cgroup *sc;
357 unsigned long flags; 364 unsigned long flags;
358 unsigned short retval; 365 unsigned short retval;
359 366
360 ctrl = &swap_cgroup_ctrl[type]; 367 ctrl = &swap_cgroup_ctrl[type];
361 368
362 mappage = ctrl->map[idx]; 369 mappage = ctrl->map[idx];
363 sc = page_address(mappage); 370 sc = page_address(mappage);
364 sc += pos; 371 sc += pos;
365 spin_lock_irqsave(&ctrl->lock, flags); 372 spin_lock_irqsave(&ctrl->lock, flags);
366 retval = sc->id; 373 retval = sc->id;
367 if (retval == old) 374 if (retval == old)
368 sc->id = new; 375 sc->id = new;
369 else 376 else
370 retval = 0; 377 retval = 0;
371 spin_unlock_irqrestore(&ctrl->lock, flags); 378 spin_unlock_irqrestore(&ctrl->lock, flags);
372 return retval; 379 return retval;
373 } 380 }
374 381
375 /** 382 /**
376 * swap_cgroup_record - record mem_cgroup for this swp_entry. 383 * swap_cgroup_record - record mem_cgroup for this swp_entry.
377 * @ent: swap entry to be recorded into 384 * @ent: swap entry to be recorded into
378 * @mem: mem_cgroup to be recorded 385 * @mem: mem_cgroup to be recorded
379 * 386 *
380 * Returns old value at success, 0 at failure. 387 * Returns old value at success, 0 at failure.
381 * (Of course, old value can be 0.) 388 * (Of course, old value can be 0.)
382 */ 389 */
383 unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id) 390 unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id)
384 { 391 {
385 int type = swp_type(ent); 392 int type = swp_type(ent);
386 unsigned long offset = swp_offset(ent); 393 unsigned long offset = swp_offset(ent);
387 unsigned long idx = offset / SC_PER_PAGE; 394 unsigned long idx = offset / SC_PER_PAGE;
388 unsigned long pos = offset & SC_POS_MASK; 395 unsigned long pos = offset & SC_POS_MASK;
389 struct swap_cgroup_ctrl *ctrl; 396 struct swap_cgroup_ctrl *ctrl;
390 struct page *mappage; 397 struct page *mappage;
391 struct swap_cgroup *sc; 398 struct swap_cgroup *sc;
392 unsigned short old; 399 unsigned short old;
393 unsigned long flags; 400 unsigned long flags;
394 401
395 ctrl = &swap_cgroup_ctrl[type]; 402 ctrl = &swap_cgroup_ctrl[type];
396 403
397 mappage = ctrl->map[idx]; 404 mappage = ctrl->map[idx];
398 sc = page_address(mappage); 405 sc = page_address(mappage);
399 sc += pos; 406 sc += pos;
400 spin_lock_irqsave(&ctrl->lock, flags); 407 spin_lock_irqsave(&ctrl->lock, flags);
401 old = sc->id; 408 old = sc->id;
402 sc->id = id; 409 sc->id = id;
403 spin_unlock_irqrestore(&ctrl->lock, flags); 410 spin_unlock_irqrestore(&ctrl->lock, flags);
404 411
405 return old; 412 return old;
406 } 413 }
407 414
408 /** 415 /**
409 * lookup_swap_cgroup - lookup mem_cgroup tied to swap entry 416 * lookup_swap_cgroup - lookup mem_cgroup tied to swap entry
410 * @ent: swap entry to be looked up. 417 * @ent: swap entry to be looked up.
411 * 418 *
412 * Returns CSS ID of mem_cgroup at success. 0 at failure. (0 is invalid ID) 419 * Returns CSS ID of mem_cgroup at success. 0 at failure. (0 is invalid ID)
413 */ 420 */
414 unsigned short lookup_swap_cgroup(swp_entry_t ent) 421 unsigned short lookup_swap_cgroup(swp_entry_t ent)
415 { 422 {
416 int type = swp_type(ent); 423 int type = swp_type(ent);
417 unsigned long offset = swp_offset(ent); 424 unsigned long offset = swp_offset(ent);
418 unsigned long idx = offset / SC_PER_PAGE; 425 unsigned long idx = offset / SC_PER_PAGE;
419 unsigned long pos = offset & SC_POS_MASK; 426 unsigned long pos = offset & SC_POS_MASK;
420 struct swap_cgroup_ctrl *ctrl; 427 struct swap_cgroup_ctrl *ctrl;
421 struct page *mappage; 428 struct page *mappage;
422 struct swap_cgroup *sc; 429 struct swap_cgroup *sc;
423 unsigned short ret; 430 unsigned short ret;
424 431
425 ctrl = &swap_cgroup_ctrl[type]; 432 ctrl = &swap_cgroup_ctrl[type];
426 mappage = ctrl->map[idx]; 433 mappage = ctrl->map[idx];
427 sc = page_address(mappage); 434 sc = page_address(mappage);
428 sc += pos; 435 sc += pos;
429 ret = sc->id; 436 ret = sc->id;
430 return ret; 437 return ret;
431 } 438 }
432 439
433 int swap_cgroup_swapon(int type, unsigned long max_pages) 440 int swap_cgroup_swapon(int type, unsigned long max_pages)
434 { 441 {
435 void *array; 442 void *array;
436 unsigned long array_size; 443 unsigned long array_size;
437 unsigned long length; 444 unsigned long length;
438 struct swap_cgroup_ctrl *ctrl; 445 struct swap_cgroup_ctrl *ctrl;
439 446
440 if (!do_swap_account) 447 if (!do_swap_account)
441 return 0; 448 return 0;
442 449
443 length = ((max_pages/SC_PER_PAGE) + 1); 450 length = ((max_pages/SC_PER_PAGE) + 1);
444 array_size = length * sizeof(void *); 451 array_size = length * sizeof(void *);
445 452
446 array = vmalloc(array_size); 453 array = vmalloc(array_size);
447 if (!array) 454 if (!array)
448 goto nomem; 455 goto nomem;
449 456
450 memset(array, 0, array_size); 457 memset(array, 0, array_size);
451 ctrl = &swap_cgroup_ctrl[type]; 458 ctrl = &swap_cgroup_ctrl[type];
452 mutex_lock(&swap_cgroup_mutex); 459 mutex_lock(&swap_cgroup_mutex);
453 ctrl->length = length; 460 ctrl->length = length;
454 ctrl->map = array; 461 ctrl->map = array;
455 spin_lock_init(&ctrl->lock); 462 spin_lock_init(&ctrl->lock);
456 if (swap_cgroup_prepare(type)) { 463 if (swap_cgroup_prepare(type)) {
457 /* memory shortage */ 464 /* memory shortage */
458 ctrl->map = NULL; 465 ctrl->map = NULL;
459 ctrl->length = 0; 466 ctrl->length = 0;
460 vfree(array); 467 vfree(array);
461 mutex_unlock(&swap_cgroup_mutex); 468 mutex_unlock(&swap_cgroup_mutex);
462 goto nomem; 469 goto nomem;
463 } 470 }
464 mutex_unlock(&swap_cgroup_mutex); 471 mutex_unlock(&swap_cgroup_mutex);
465 472
466 return 0; 473 return 0;
467 nomem: 474 nomem:
468 printk(KERN_INFO "couldn't allocate enough memory for swap_cgroup.\n"); 475 printk(KERN_INFO "couldn't allocate enough memory for swap_cgroup.\n");
469 printk(KERN_INFO 476 printk(KERN_INFO
470 "swap_cgroup can be disabled by noswapaccount boot option\n"); 477 "swap_cgroup can be disabled by noswapaccount boot option\n");
471 return -ENOMEM; 478 return -ENOMEM;
472 } 479 }
473 480
474 void swap_cgroup_swapoff(int type) 481 void swap_cgroup_swapoff(int type)
475 { 482 {
476 int i; 483 int i;
477 struct swap_cgroup_ctrl *ctrl; 484 struct swap_cgroup_ctrl *ctrl;
478 485
479 if (!do_swap_account) 486 if (!do_swap_account)
480 return; 487 return;
481 488
482 mutex_lock(&swap_cgroup_mutex); 489 mutex_lock(&swap_cgroup_mutex);
483 ctrl = &swap_cgroup_ctrl[type]; 490 ctrl = &swap_cgroup_ctrl[type];
484 if (ctrl->map) { 491 if (ctrl->map) {
485 for (i = 0; i < ctrl->length; i++) { 492 for (i = 0; i < ctrl->length; i++) {
486 struct page *page = ctrl->map[i]; 493 struct page *page = ctrl->map[i];
487 if (page) 494 if (page)
488 __free_page(page); 495 __free_page(page);
489 } 496 }
490 vfree(ctrl->map); 497 vfree(ctrl->map);
491 ctrl->map = NULL; 498 ctrl->map = NULL;
492 ctrl->length = 0; 499 ctrl->length = 0;
493 } 500 }
494 mutex_unlock(&swap_cgroup_mutex); 501 mutex_unlock(&swap_cgroup_mutex);
495 } 502 }
496 503
497 #endif 504 #endif
498 505