Blame view
mm/page_cgroup.c
11.4 KB
52d4b9ac0 memcg: allocate a... |
1 2 3 4 5 6 |
#include <linux/mm.h> #include <linux/mmzone.h> #include <linux/bootmem.h> #include <linux/bit_spinlock.h> #include <linux/page_cgroup.h> #include <linux/hash.h> |
94b6da5ab memcg: fix page_c... |
7 |
#include <linux/slab.h> |
52d4b9ac0 memcg: allocate a... |
8 |
#include <linux/memory.h> |
4c8210427 mm: page_cgroup n... |
9 |
#include <linux/vmalloc.h> |
94b6da5ab memcg: fix page_c... |
10 |
#include <linux/cgroup.h> |
27a7faa07 memcg: swap cgrou... |
11 |
#include <linux/swapops.h> |
7952f9881 kmemleak: Annotat... |
12 |
#include <linux/kmemleak.h> |
52d4b9ac0 memcg: allocate a... |
13 14 15 16 17 18 19 |
static void __meminit __init_page_cgroup(struct page_cgroup *pc, unsigned long pfn) { pc->flags = 0; pc->mem_cgroup = NULL; pc->page = pfn_to_page(pfn); |
08e552c69 memcg: synchroniz... |
20 |
INIT_LIST_HEAD(&pc->lru); |
52d4b9ac0 memcg: allocate a... |
21 22 23 24 |
} static unsigned long total_usage; #if !defined(CONFIG_SPARSEMEM) |
31168481c meminit section w... |
25 |
void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat) |
52d4b9ac0 memcg: allocate a... |
26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 |
{ pgdat->node_page_cgroup = NULL; } struct page_cgroup *lookup_page_cgroup(struct page *page) { unsigned long pfn = page_to_pfn(page); unsigned long offset; struct page_cgroup *base; base = NODE_DATA(page_to_nid(page))->node_page_cgroup; if (unlikely(!base)) return NULL; offset = pfn - NODE_DATA(page_to_nid(page))->node_start_pfn; return base + offset; } static int __init alloc_node_page_cgroup(int nid) { struct page_cgroup *base, *pc; unsigned long table_size; unsigned long start_pfn, nr_pages, index; start_pfn = NODE_DATA(nid)->node_start_pfn; nr_pages = NODE_DATA(nid)->node_spanned_pages; |
653d22c0f page_cgroup shoul... |
52 53 |
if (!nr_pages) return 0; |
52d4b9ac0 memcg: allocate a... |
54 |
table_size = sizeof(struct page_cgroup) * nr_pages; |
ca371c0d7 memcg: fix page_c... |
55 56 57 58 |
base = __alloc_bootmem_node_nopanic(NODE_DATA(nid), table_size, PAGE_SIZE, __pa(MAX_DMA_ADDRESS)); if (!base) |
52d4b9ac0 memcg: allocate a... |
59 60 61 62 63 64 65 66 67 |
return -ENOMEM; for (index = 0; index < nr_pages; index++) { pc = base + index; __init_page_cgroup(pc, start_pfn + index); } NODE_DATA(nid)->node_page_cgroup = base; total_usage += table_size; return 0; } |
ca371c0d7 memcg: fix page_c... |
68 |
void __init page_cgroup_init_flatmem(void) |
52d4b9ac0 memcg: allocate a... |
69 70 71 |
{ int nid, fail; |
f8d665422 memcg: add mem_cg... |
72 |
if (mem_cgroup_disabled()) |
94b6da5ab memcg: fix page_c... |
73 |
return; |
52d4b9ac0 memcg: allocate a... |
74 75 76 77 78 79 80 |
for_each_online_node(nid) { fail = alloc_node_page_cgroup(nid); if (fail) goto fail; } printk(KERN_INFO "allocated %ld bytes of page_cgroup ", total_usage); |
8ca739e36 cgroups: make mes... |
81 82 83 |
printk(KERN_INFO "please try 'cgroup_disable=memory' option if you" " don't want memory cgroups "); |
52d4b9ac0 memcg: allocate a... |
84 85 |
return; fail: |
8ca739e36 cgroups: make mes... |
86 87 88 89 |
printk(KERN_CRIT "allocation of page_cgroup failed. "); printk(KERN_CRIT "please try 'cgroup_disable=memory' boot option "); |
52d4b9ac0 memcg: allocate a... |
90 91 92 93 94 95 96 97 98 |
panic("Out of memory"); } #else /* CONFIG_FLAT_NODE_MEM_MAP */ struct page_cgroup *lookup_page_cgroup(struct page *page) { unsigned long pfn = page_to_pfn(page); struct mem_section *section = __pfn_to_section(pfn); |
d69b042f3 memcg: add file-b... |
99 100 |
if (!section->page_cgroup) return NULL; |
52d4b9ac0 memcg: allocate a... |
101 102 |
return section->page_cgroup + pfn; } |
31168481c meminit section w... |
103 |
/* __alloc_bootmem...() is protected by !slab_available() */ |
feb166948 mm: make init_sec... |
104 |
static int __init_refok init_section_page_cgroup(unsigned long pfn) |
52d4b9ac0 memcg: allocate a... |
105 |
{ |
0753b0ef3 memcg: do not rec... |
106 |
struct mem_section *section = __pfn_to_section(pfn); |
52d4b9ac0 memcg: allocate a... |
107 108 109 |
struct page_cgroup *base, *pc; unsigned long table_size; int nid, index; |
dc19f9db3 memcg: memory hot... |
110 111 112 |
if (!section->page_cgroup) { nid = page_to_nid(pfn_to_page(pfn)); table_size = sizeof(struct page_cgroup) * PAGES_PER_SECTION; |
ca371c0d7 memcg: fix page_c... |
113 |
VM_BUG_ON(!slab_is_available()); |
f52407ce2 memory hotplug: a... |
114 115 |
if (node_state(nid, N_HIGH_MEMORY)) { base = kmalloc_node(table_size, |
ca371c0d7 memcg: fix page_c... |
116 |
GFP_KERNEL | __GFP_NOWARN, nid); |
f52407ce2 memory hotplug: a... |
117 118 119 120 121 122 123 |
if (!base) base = vmalloc_node(table_size, nid); } else { base = kmalloc(table_size, GFP_KERNEL | __GFP_NOWARN); if (!base) base = vmalloc(table_size); } |
7952f9881 kmemleak: Annotat... |
124 125 126 127 128 129 |
/* * The value stored in section->page_cgroup is (base - pfn) * and it does not point to the memory block allocated above, * causing kmemleak false positives. */ kmemleak_not_leak(base); |
dc19f9db3 memcg: memory hot... |
130 131 132 133 134 135 136 137 138 139 140 |
} else { /* * We don't have to allocate page_cgroup again, but * address of memmap may be changed. So, we have to initialize * again. */ base = section->page_cgroup + pfn; table_size = 0; /* check address of memmap is changed or not. */ if (base->page == pfn_to_page(pfn)) return 0; |
94b6da5ab memcg: fix page_c... |
141 |
} |
52d4b9ac0 memcg: allocate a... |
142 143 144 145 146 147 148 149 150 151 152 |
if (!base) { printk(KERN_ERR "page cgroup allocation failure "); return -ENOMEM; } for (index = 0; index < PAGES_PER_SECTION; index++) { pc = base + index; __init_page_cgroup(pc, pfn + index); } |
52d4b9ac0 memcg: allocate a... |
153 154 155 156 157 158 159 160 161 162 163 164 165 166 |
section->page_cgroup = base - pfn; total_usage += table_size; return 0; } #ifdef CONFIG_MEMORY_HOTPLUG void __free_page_cgroup(unsigned long pfn) { struct mem_section *ms; struct page_cgroup *base; ms = __pfn_to_section(pfn); if (!ms || !ms->page_cgroup) return; base = ms->page_cgroup + pfn; |
94b6da5ab memcg: fix page_c... |
167 |
if (is_vmalloc_addr(base)) { |
52d4b9ac0 memcg: allocate a... |
168 |
vfree(base); |
94b6da5ab memcg: fix page_c... |
169 170 171 172 173 174 175 176 |
ms->page_cgroup = NULL; } else { struct page *page = virt_to_page(base); if (!PageReserved(page)) { /* Is bootmem ? */ kfree(base); ms->page_cgroup = NULL; } } |
52d4b9ac0 memcg: allocate a... |
177 |
} |
31168481c meminit section w... |
178 |
int __meminit online_page_cgroup(unsigned long start_pfn, |
52d4b9ac0 memcg: allocate a... |
179 180 181 182 183 |
unsigned long nr_pages, int nid) { unsigned long start, end, pfn; int fail = 0; |
33c5d3d64 memcg: bugfix for... |
184 |
start = start_pfn & ~(PAGES_PER_SECTION - 1); |
52d4b9ac0 memcg: allocate a... |
185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 |
end = ALIGN(start_pfn + nr_pages, PAGES_PER_SECTION); for (pfn = start; !fail && pfn < end; pfn += PAGES_PER_SECTION) { if (!pfn_present(pfn)) continue; fail = init_section_page_cgroup(pfn); } if (!fail) return 0; /* rollback */ for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) __free_page_cgroup(pfn); return -ENOMEM; } |
31168481c meminit section w... |
201 |
int __meminit offline_page_cgroup(unsigned long start_pfn, |
52d4b9ac0 memcg: allocate a... |
202 203 204 |
unsigned long nr_pages, int nid) { unsigned long start, end, pfn; |
33c5d3d64 memcg: bugfix for... |
205 |
start = start_pfn & ~(PAGES_PER_SECTION - 1); |
52d4b9ac0 memcg: allocate a... |
206 207 208 209 210 211 212 |
end = ALIGN(start_pfn + nr_pages, PAGES_PER_SECTION); for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) __free_page_cgroup(pfn); return 0; } |
31168481c meminit section w... |
213 |
static int __meminit page_cgroup_callback(struct notifier_block *self, |
52d4b9ac0 memcg: allocate a... |
214 215 216 217 218 219 220 221 222 |
unsigned long action, void *arg) { struct memory_notify *mn = arg; int ret = 0; switch (action) { case MEM_GOING_ONLINE: ret = online_page_cgroup(mn->start_pfn, mn->nr_pages, mn->status_change_nid); break; |
52d4b9ac0 memcg: allocate a... |
223 224 225 226 |
case MEM_OFFLINE: offline_page_cgroup(mn->start_pfn, mn->nr_pages, mn->status_change_nid); break; |
dc19f9db3 memcg: memory hot... |
227 |
case MEM_CANCEL_ONLINE: |
52d4b9ac0 memcg: allocate a... |
228 229 230 231 232 233 |
case MEM_GOING_OFFLINE: break; case MEM_ONLINE: case MEM_CANCEL_OFFLINE: break; } |
dc19f9db3 memcg: memory hot... |
234 235 236 237 238 |
if (ret) ret = notifier_from_errno(ret); else ret = NOTIFY_OK; |
52d4b9ac0 memcg: allocate a... |
239 240 241 242 243 244 245 246 247 |
return ret; } #endif void __init page_cgroup_init(void) { unsigned long pfn; int fail = 0; |
f8d665422 memcg: add mem_cg... |
248 |
if (mem_cgroup_disabled()) |
94b6da5ab memcg: fix page_c... |
249 |
return; |
52d4b9ac0 memcg: allocate a... |
250 251 252 253 254 255 |
for (pfn = 0; !fail && pfn < max_pfn; pfn += PAGES_PER_SECTION) { if (!pfn_present(pfn)) continue; fail = init_section_page_cgroup(pfn); } if (fail) { |
8ca739e36 cgroups: make mes... |
256 257 |
printk(KERN_CRIT "try 'cgroup_disable=memory' boot option "); |
52d4b9ac0 memcg: allocate a... |
258 259 260 261 262 263 |
panic("Out of memory"); } else { hotplug_memory_notifier(page_cgroup_callback, 0); } printk(KERN_INFO "allocated %ld bytes of page_cgroup ", total_usage); |
8ca739e36 cgroups: make mes... |
264 265 266 |
printk(KERN_INFO "please try 'cgroup_disable=memory' option if you don't" " want memory cgroups "); |
52d4b9ac0 memcg: allocate a... |
267 |
} |
31168481c meminit section w... |
268 |
void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat) |
52d4b9ac0 memcg: allocate a... |
269 270 271 272 273 |
{ return; } #endif |
27a7faa07 memcg: swap cgrou... |
274 275 276 277 278 279 280 281 |
#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP static DEFINE_MUTEX(swap_cgroup_mutex); struct swap_cgroup_ctrl { struct page **map; unsigned long length; |
e9e58a4ec memcg: avoid use ... |
282 |
spinlock_t lock; |
27a7faa07 memcg: swap cgrou... |
283 284 285 |
}; struct swap_cgroup_ctrl swap_cgroup_ctrl[MAX_SWAPFILES]; |
27a7faa07 memcg: swap cgrou... |
286 |
struct swap_cgroup { |
a3b2d6926 cgroups: use css ... |
287 |
unsigned short id; |
27a7faa07 memcg: swap cgrou... |
288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 |
}; #define SC_PER_PAGE (PAGE_SIZE/sizeof(struct swap_cgroup)) #define SC_POS_MASK (SC_PER_PAGE - 1) /* * SwapCgroup implements "lookup" and "exchange" operations. * In typical usage, this swap_cgroup is accessed via memcg's charge/uncharge * against SwapCache. At swap_free(), this is accessed directly from swap. * * This means, * - we have no race in "exchange" when we're accessed via SwapCache because * SwapCache(and its swp_entry) is under lock. * - When called via swap_free(), there is no user of this entry and no race. * Then, we don't need lock around "exchange". * * TODO: we can push these buffers out to HIGHMEM. */ /* * allocate buffer for swap_cgroup. */ static int swap_cgroup_prepare(int type) { struct page *page; struct swap_cgroup_ctrl *ctrl; unsigned long idx, max; |
27a7faa07 memcg: swap cgrou... |
314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 |
ctrl = &swap_cgroup_ctrl[type]; for (idx = 0; idx < ctrl->length; idx++) { page = alloc_page(GFP_KERNEL | __GFP_ZERO); if (!page) goto not_enough_page; ctrl->map[idx] = page; } return 0; not_enough_page: max = idx; for (idx = 0; idx < max; idx++) __free_page(ctrl->map[idx]); return -ENOMEM; } /** |
024914477 memcg: move charg... |
332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 |
* swap_cgroup_cmpxchg - cmpxchg mem_cgroup's id for this swp_entry. * @end: swap entry to be cmpxchged * @old: old id * @new: new id * * Returns old id at success, 0 at failure. * (There is no mem_cgroup useing 0 as its id) */ unsigned short swap_cgroup_cmpxchg(swp_entry_t ent, unsigned short old, unsigned short new) { int type = swp_type(ent); unsigned long offset = swp_offset(ent); unsigned long idx = offset / SC_PER_PAGE; unsigned long pos = offset & SC_POS_MASK; struct swap_cgroup_ctrl *ctrl; struct page *mappage; struct swap_cgroup *sc; |
e9e58a4ec memcg: avoid use ... |
350 351 |
unsigned long flags; unsigned short retval; |
024914477 memcg: move charg... |
352 353 354 355 356 357 |
ctrl = &swap_cgroup_ctrl[type]; mappage = ctrl->map[idx]; sc = page_address(mappage); sc += pos; |
e9e58a4ec memcg: avoid use ... |
358 359 360 361 |
spin_lock_irqsave(&ctrl->lock, flags); retval = sc->id; if (retval == old) sc->id = new; |
024914477 memcg: move charg... |
362 |
else |
e9e58a4ec memcg: avoid use ... |
363 364 365 |
retval = 0; spin_unlock_irqrestore(&ctrl->lock, flags); return retval; |
024914477 memcg: move charg... |
366 367 368 |
} /** |
27a7faa07 memcg: swap cgrou... |
369 370 371 372 |
* swap_cgroup_record - record mem_cgroup for this swp_entry. * @ent: swap entry to be recorded into * @mem: mem_cgroup to be recorded * |
a3b2d6926 cgroups: use css ... |
373 374 |
* Returns old value at success, 0 at failure. * (Of course, old value can be 0.) |
27a7faa07 memcg: swap cgrou... |
375 |
*/ |
a3b2d6926 cgroups: use css ... |
376 |
unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id) |
27a7faa07 memcg: swap cgrou... |
377 378 379 380 381 382 383 384 |
{ int type = swp_type(ent); unsigned long offset = swp_offset(ent); unsigned long idx = offset / SC_PER_PAGE; unsigned long pos = offset & SC_POS_MASK; struct swap_cgroup_ctrl *ctrl; struct page *mappage; struct swap_cgroup *sc; |
a3b2d6926 cgroups: use css ... |
385 |
unsigned short old; |
e9e58a4ec memcg: avoid use ... |
386 |
unsigned long flags; |
27a7faa07 memcg: swap cgrou... |
387 |
|
27a7faa07 memcg: swap cgrou... |
388 389 390 391 392 |
ctrl = &swap_cgroup_ctrl[type]; mappage = ctrl->map[idx]; sc = page_address(mappage); sc += pos; |
e9e58a4ec memcg: avoid use ... |
393 394 395 396 |
spin_lock_irqsave(&ctrl->lock, flags); old = sc->id; sc->id = id; spin_unlock_irqrestore(&ctrl->lock, flags); |
27a7faa07 memcg: swap cgrou... |
397 398 399 400 401 402 403 404 |
return old; } /** * lookup_swap_cgroup - lookup mem_cgroup tied to swap entry * @ent: swap entry to be looked up. * |
a3b2d6926 cgroups: use css ... |
405 |
* Returns CSS ID of mem_cgroup at success. 0 at failure. (0 is invalid ID) |
27a7faa07 memcg: swap cgrou... |
406 |
*/ |
a3b2d6926 cgroups: use css ... |
407 |
unsigned short lookup_swap_cgroup(swp_entry_t ent) |
27a7faa07 memcg: swap cgrou... |
408 409 410 411 412 413 414 415 |
{ int type = swp_type(ent); unsigned long offset = swp_offset(ent); unsigned long idx = offset / SC_PER_PAGE; unsigned long pos = offset & SC_POS_MASK; struct swap_cgroup_ctrl *ctrl; struct page *mappage; struct swap_cgroup *sc; |
a3b2d6926 cgroups: use css ... |
416 |
unsigned short ret; |
27a7faa07 memcg: swap cgrou... |
417 |
|
27a7faa07 memcg: swap cgrou... |
418 419 420 421 |
ctrl = &swap_cgroup_ctrl[type]; mappage = ctrl->map[idx]; sc = page_address(mappage); sc += pos; |
a3b2d6926 cgroups: use css ... |
422 |
ret = sc->id; |
27a7faa07 memcg: swap cgrou... |
423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 |
return ret; } int swap_cgroup_swapon(int type, unsigned long max_pages) { void *array; unsigned long array_size; unsigned long length; struct swap_cgroup_ctrl *ctrl; if (!do_swap_account) return 0; length = ((max_pages/SC_PER_PAGE) + 1); array_size = length * sizeof(void *); array = vmalloc(array_size); if (!array) goto nomem; memset(array, 0, array_size); ctrl = &swap_cgroup_ctrl[type]; mutex_lock(&swap_cgroup_mutex); ctrl->length = length; ctrl->map = array; |
e9e58a4ec memcg: avoid use ... |
448 |
spin_lock_init(&ctrl->lock); |
27a7faa07 memcg: swap cgrou... |
449 450 451 452 453 454 455 456 457 |
if (swap_cgroup_prepare(type)) { /* memory shortage */ ctrl->map = NULL; ctrl->length = 0; vfree(array); mutex_unlock(&swap_cgroup_mutex); goto nomem; } mutex_unlock(&swap_cgroup_mutex); |
27a7faa07 memcg: swap cgrou... |
458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 |
return 0; nomem: printk(KERN_INFO "couldn't allocate enough memory for swap_cgroup. "); printk(KERN_INFO "swap_cgroup can be disabled by noswapaccount boot option "); return -ENOMEM; } void swap_cgroup_swapoff(int type) { int i; struct swap_cgroup_ctrl *ctrl; if (!do_swap_account) return; mutex_lock(&swap_cgroup_mutex); ctrl = &swap_cgroup_ctrl[type]; if (ctrl->map) { for (i = 0; i < ctrl->length; i++) { struct page *page = ctrl->map[i]; if (page) __free_page(page); } vfree(ctrl->map); ctrl->map = NULL; ctrl->length = 0; } mutex_unlock(&swap_cgroup_mutex); } #endif |