Blame view
mm/page_cgroup.c
11.2 KB
52d4b9ac0 memcg: allocate a... |
1 2 3 4 5 6 |
#include <linux/mm.h> #include <linux/mmzone.h> #include <linux/bootmem.h> #include <linux/bit_spinlock.h> #include <linux/page_cgroup.h> #include <linux/hash.h> |
94b6da5ab memcg: fix page_c... |
7 |
#include <linux/slab.h> |
52d4b9ac0 memcg: allocate a... |
8 |
#include <linux/memory.h> |
4c8210427 mm: page_cgroup n... |
9 |
#include <linux/vmalloc.h> |
94b6da5ab memcg: fix page_c... |
10 |
#include <linux/cgroup.h> |
27a7faa07 memcg: swap cgrou... |
11 |
#include <linux/swapops.h> |
52d4b9ac0 memcg: allocate a... |
12 13 14 15 16 17 18 |
static void __meminit __init_page_cgroup(struct page_cgroup *pc, unsigned long pfn) { pc->flags = 0; pc->mem_cgroup = NULL; pc->page = pfn_to_page(pfn); |
08e552c69 memcg: synchroniz... |
19 |
INIT_LIST_HEAD(&pc->lru); |
52d4b9ac0 memcg: allocate a... |
20 21 22 23 |
} static unsigned long total_usage; #if !defined(CONFIG_SPARSEMEM) |
31168481c meminit section w... |
24 |
void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat) |
52d4b9ac0 memcg: allocate a... |
25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 |
{ pgdat->node_page_cgroup = NULL; } struct page_cgroup *lookup_page_cgroup(struct page *page) { unsigned long pfn = page_to_pfn(page); unsigned long offset; struct page_cgroup *base; base = NODE_DATA(page_to_nid(page))->node_page_cgroup; if (unlikely(!base)) return NULL; offset = pfn - NODE_DATA(page_to_nid(page))->node_start_pfn; return base + offset; } static int __init alloc_node_page_cgroup(int nid) { struct page_cgroup *base, *pc; unsigned long table_size; unsigned long start_pfn, nr_pages, index; start_pfn = NODE_DATA(nid)->node_start_pfn; nr_pages = NODE_DATA(nid)->node_spanned_pages; |
653d22c0f page_cgroup shoul... |
51 52 |
if (!nr_pages) return 0; |
52d4b9ac0 memcg: allocate a... |
53 |
table_size = sizeof(struct page_cgroup) * nr_pages; |
ca371c0d7 memcg: fix page_c... |
54 55 56 57 |
base = __alloc_bootmem_node_nopanic(NODE_DATA(nid), table_size, PAGE_SIZE, __pa(MAX_DMA_ADDRESS)); if (!base) |
52d4b9ac0 memcg: allocate a... |
58 59 60 61 62 63 64 65 66 |
return -ENOMEM; for (index = 0; index < nr_pages; index++) { pc = base + index; __init_page_cgroup(pc, start_pfn + index); } NODE_DATA(nid)->node_page_cgroup = base; total_usage += table_size; return 0; } |
ca371c0d7 memcg: fix page_c... |
67 |
void __init page_cgroup_init_flatmem(void) |
52d4b9ac0 memcg: allocate a... |
68 69 70 |
{ int nid, fail; |
f8d665422 memcg: add mem_cg... |
71 |
if (mem_cgroup_disabled()) |
94b6da5ab memcg: fix page_c... |
72 |
return; |
52d4b9ac0 memcg: allocate a... |
73 74 75 76 77 78 79 |
for_each_online_node(nid) { fail = alloc_node_page_cgroup(nid); if (fail) goto fail; } printk(KERN_INFO "allocated %ld bytes of page_cgroup ", total_usage); |
8ca739e36 cgroups: make mes... |
80 81 82 |
printk(KERN_INFO "please try 'cgroup_disable=memory' option if you" " don't want memory cgroups "); |
52d4b9ac0 memcg: allocate a... |
83 84 |
return; fail: |
8ca739e36 cgroups: make mes... |
85 86 87 88 |
printk(KERN_CRIT "allocation of page_cgroup failed. "); printk(KERN_CRIT "please try 'cgroup_disable=memory' boot option "); |
52d4b9ac0 memcg: allocate a... |
89 90 91 92 93 94 95 96 97 |
panic("Out of memory"); } #else /* CONFIG_FLAT_NODE_MEM_MAP */ struct page_cgroup *lookup_page_cgroup(struct page *page) { unsigned long pfn = page_to_pfn(page); struct mem_section *section = __pfn_to_section(pfn); |
d69b042f3 memcg: add file-b... |
98 99 |
if (!section->page_cgroup) return NULL; |
52d4b9ac0 memcg: allocate a... |
100 101 |
return section->page_cgroup + pfn; } |
31168481c meminit section w... |
102 |
/* __alloc_bootmem...() is protected by !slab_available() */ |
feb166948 mm: make init_sec... |
103 |
static int __init_refok init_section_page_cgroup(unsigned long pfn) |
52d4b9ac0 memcg: allocate a... |
104 |
{ |
0753b0ef3 memcg: do not rec... |
105 |
struct mem_section *section = __pfn_to_section(pfn); |
52d4b9ac0 memcg: allocate a... |
106 107 108 |
struct page_cgroup *base, *pc; unsigned long table_size; int nid, index; |
dc19f9db3 memcg: memory hot... |
109 110 111 |
if (!section->page_cgroup) { nid = page_to_nid(pfn_to_page(pfn)); table_size = sizeof(struct page_cgroup) * PAGES_PER_SECTION; |
ca371c0d7 memcg: fix page_c... |
112 |
VM_BUG_ON(!slab_is_available()); |
f52407ce2 memory hotplug: a... |
113 114 |
if (node_state(nid, N_HIGH_MEMORY)) { base = kmalloc_node(table_size, |
ca371c0d7 memcg: fix page_c... |
115 |
GFP_KERNEL | __GFP_NOWARN, nid); |
f52407ce2 memory hotplug: a... |
116 117 118 119 120 121 122 |
if (!base) base = vmalloc_node(table_size, nid); } else { base = kmalloc(table_size, GFP_KERNEL | __GFP_NOWARN); if (!base) base = vmalloc(table_size); } |
dc19f9db3 memcg: memory hot... |
123 124 125 126 127 128 129 130 131 132 133 |
} else { /* * We don't have to allocate page_cgroup again, but * address of memmap may be changed. So, we have to initialize * again. */ base = section->page_cgroup + pfn; table_size = 0; /* check address of memmap is changed or not. */ if (base->page == pfn_to_page(pfn)) return 0; |
94b6da5ab memcg: fix page_c... |
134 |
} |
52d4b9ac0 memcg: allocate a... |
135 136 137 138 139 140 141 142 143 144 145 |
if (!base) { printk(KERN_ERR "page cgroup allocation failure "); return -ENOMEM; } for (index = 0; index < PAGES_PER_SECTION; index++) { pc = base + index; __init_page_cgroup(pc, pfn + index); } |
52d4b9ac0 memcg: allocate a... |
146 147 148 149 150 151 152 153 154 155 156 157 158 159 |
section->page_cgroup = base - pfn; total_usage += table_size; return 0; } #ifdef CONFIG_MEMORY_HOTPLUG void __free_page_cgroup(unsigned long pfn) { struct mem_section *ms; struct page_cgroup *base; ms = __pfn_to_section(pfn); if (!ms || !ms->page_cgroup) return; base = ms->page_cgroup + pfn; |
94b6da5ab memcg: fix page_c... |
160 |
if (is_vmalloc_addr(base)) { |
52d4b9ac0 memcg: allocate a... |
161 |
vfree(base); |
94b6da5ab memcg: fix page_c... |
162 163 164 165 166 167 168 169 |
ms->page_cgroup = NULL; } else { struct page *page = virt_to_page(base); if (!PageReserved(page)) { /* Is bootmem ? */ kfree(base); ms->page_cgroup = NULL; } } |
52d4b9ac0 memcg: allocate a... |
170 |
} |
31168481c meminit section w... |
171 |
int __meminit online_page_cgroup(unsigned long start_pfn, |
52d4b9ac0 memcg: allocate a... |
172 173 174 175 176 |
unsigned long nr_pages, int nid) { unsigned long start, end, pfn; int fail = 0; |
33c5d3d64 memcg: bugfix for... |
177 |
start = start_pfn & ~(PAGES_PER_SECTION - 1); |
52d4b9ac0 memcg: allocate a... |
178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 |
end = ALIGN(start_pfn + nr_pages, PAGES_PER_SECTION); for (pfn = start; !fail && pfn < end; pfn += PAGES_PER_SECTION) { if (!pfn_present(pfn)) continue; fail = init_section_page_cgroup(pfn); } if (!fail) return 0; /* rollback */ for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) __free_page_cgroup(pfn); return -ENOMEM; } |
31168481c meminit section w... |
194 |
int __meminit offline_page_cgroup(unsigned long start_pfn, |
52d4b9ac0 memcg: allocate a... |
195 196 197 |
unsigned long nr_pages, int nid) { unsigned long start, end, pfn; |
33c5d3d64 memcg: bugfix for... |
198 |
start = start_pfn & ~(PAGES_PER_SECTION - 1); |
52d4b9ac0 memcg: allocate a... |
199 200 201 202 203 204 205 |
end = ALIGN(start_pfn + nr_pages, PAGES_PER_SECTION); for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) __free_page_cgroup(pfn); return 0; } |
31168481c meminit section w... |
206 |
static int __meminit page_cgroup_callback(struct notifier_block *self, |
52d4b9ac0 memcg: allocate a... |
207 208 209 210 211 212 213 214 215 |
unsigned long action, void *arg) { struct memory_notify *mn = arg; int ret = 0; switch (action) { case MEM_GOING_ONLINE: ret = online_page_cgroup(mn->start_pfn, mn->nr_pages, mn->status_change_nid); break; |
52d4b9ac0 memcg: allocate a... |
216 217 218 219 |
case MEM_OFFLINE: offline_page_cgroup(mn->start_pfn, mn->nr_pages, mn->status_change_nid); break; |
dc19f9db3 memcg: memory hot... |
220 |
case MEM_CANCEL_ONLINE: |
52d4b9ac0 memcg: allocate a... |
221 222 223 224 225 226 |
case MEM_GOING_OFFLINE: break; case MEM_ONLINE: case MEM_CANCEL_OFFLINE: break; } |
dc19f9db3 memcg: memory hot... |
227 228 229 230 231 |
if (ret) ret = notifier_from_errno(ret); else ret = NOTIFY_OK; |
52d4b9ac0 memcg: allocate a... |
232 233 234 235 236 237 238 239 240 |
return ret; } #endif void __init page_cgroup_init(void) { unsigned long pfn; int fail = 0; |
f8d665422 memcg: add mem_cg... |
241 |
if (mem_cgroup_disabled()) |
94b6da5ab memcg: fix page_c... |
242 |
return; |
52d4b9ac0 memcg: allocate a... |
243 244 245 246 247 248 |
for (pfn = 0; !fail && pfn < max_pfn; pfn += PAGES_PER_SECTION) { if (!pfn_present(pfn)) continue; fail = init_section_page_cgroup(pfn); } if (fail) { |
8ca739e36 cgroups: make mes... |
249 250 |
printk(KERN_CRIT "try 'cgroup_disable=memory' boot option "); |
52d4b9ac0 memcg: allocate a... |
251 252 253 254 255 256 |
panic("Out of memory"); } else { hotplug_memory_notifier(page_cgroup_callback, 0); } printk(KERN_INFO "allocated %ld bytes of page_cgroup ", total_usage); |
8ca739e36 cgroups: make mes... |
257 258 259 |
printk(KERN_INFO "please try 'cgroup_disable=memory' option if you don't" " want memory cgroups "); |
52d4b9ac0 memcg: allocate a... |
260 |
} |
31168481c meminit section w... |
261 |
void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat) |
52d4b9ac0 memcg: allocate a... |
262 263 264 265 266 |
{ return; } #endif |
27a7faa07 memcg: swap cgrou... |
267 268 269 270 271 272 273 274 |
#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP static DEFINE_MUTEX(swap_cgroup_mutex); struct swap_cgroup_ctrl { struct page **map; unsigned long length; |
e9e58a4ec memcg: avoid use ... |
275 |
spinlock_t lock; |
27a7faa07 memcg: swap cgrou... |
276 277 278 |
}; struct swap_cgroup_ctrl swap_cgroup_ctrl[MAX_SWAPFILES]; |
27a7faa07 memcg: swap cgrou... |
279 |
struct swap_cgroup { |
a3b2d6926 cgroups: use css ... |
280 |
unsigned short id; |
27a7faa07 memcg: swap cgrou... |
281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 |
}; #define SC_PER_PAGE (PAGE_SIZE/sizeof(struct swap_cgroup)) #define SC_POS_MASK (SC_PER_PAGE - 1) /* * SwapCgroup implements "lookup" and "exchange" operations. * In typical usage, this swap_cgroup is accessed via memcg's charge/uncharge * against SwapCache. At swap_free(), this is accessed directly from swap. * * This means, * - we have no race in "exchange" when we're accessed via SwapCache because * SwapCache(and its swp_entry) is under lock. * - When called via swap_free(), there is no user of this entry and no race. * Then, we don't need lock around "exchange". * * TODO: we can push these buffers out to HIGHMEM. */ /* * allocate buffer for swap_cgroup. */ static int swap_cgroup_prepare(int type) { struct page *page; struct swap_cgroup_ctrl *ctrl; unsigned long idx, max; |
27a7faa07 memcg: swap cgrou... |
307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 |
ctrl = &swap_cgroup_ctrl[type]; for (idx = 0; idx < ctrl->length; idx++) { page = alloc_page(GFP_KERNEL | __GFP_ZERO); if (!page) goto not_enough_page; ctrl->map[idx] = page; } return 0; not_enough_page: max = idx; for (idx = 0; idx < max; idx++) __free_page(ctrl->map[idx]); return -ENOMEM; } /** |
024914477 memcg: move charg... |
325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 |
* swap_cgroup_cmpxchg - cmpxchg mem_cgroup's id for this swp_entry. * @end: swap entry to be cmpxchged * @old: old id * @new: new id * * Returns old id at success, 0 at failure. * (There is no mem_cgroup useing 0 as its id) */ unsigned short swap_cgroup_cmpxchg(swp_entry_t ent, unsigned short old, unsigned short new) { int type = swp_type(ent); unsigned long offset = swp_offset(ent); unsigned long idx = offset / SC_PER_PAGE; unsigned long pos = offset & SC_POS_MASK; struct swap_cgroup_ctrl *ctrl; struct page *mappage; struct swap_cgroup *sc; |
e9e58a4ec memcg: avoid use ... |
343 344 |
unsigned long flags; unsigned short retval; |
024914477 memcg: move charg... |
345 346 347 348 349 350 |
ctrl = &swap_cgroup_ctrl[type]; mappage = ctrl->map[idx]; sc = page_address(mappage); sc += pos; |
e9e58a4ec memcg: avoid use ... |
351 352 353 354 |
spin_lock_irqsave(&ctrl->lock, flags); retval = sc->id; if (retval == old) sc->id = new; |
024914477 memcg: move charg... |
355 |
else |
e9e58a4ec memcg: avoid use ... |
356 357 358 |
retval = 0; spin_unlock_irqrestore(&ctrl->lock, flags); return retval; |
024914477 memcg: move charg... |
359 360 361 |
} /** |
27a7faa07 memcg: swap cgrou... |
362 363 364 365 |
* swap_cgroup_record - record mem_cgroup for this swp_entry. * @ent: swap entry to be recorded into * @mem: mem_cgroup to be recorded * |
a3b2d6926 cgroups: use css ... |
366 367 |
* Returns old value at success, 0 at failure. * (Of course, old value can be 0.) |
27a7faa07 memcg: swap cgrou... |
368 |
*/ |
a3b2d6926 cgroups: use css ... |
369 |
unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id) |
27a7faa07 memcg: swap cgrou... |
370 371 372 373 374 375 376 377 |
{ int type = swp_type(ent); unsigned long offset = swp_offset(ent); unsigned long idx = offset / SC_PER_PAGE; unsigned long pos = offset & SC_POS_MASK; struct swap_cgroup_ctrl *ctrl; struct page *mappage; struct swap_cgroup *sc; |
a3b2d6926 cgroups: use css ... |
378 |
unsigned short old; |
e9e58a4ec memcg: avoid use ... |
379 |
unsigned long flags; |
27a7faa07 memcg: swap cgrou... |
380 |
|
27a7faa07 memcg: swap cgrou... |
381 382 383 384 385 |
ctrl = &swap_cgroup_ctrl[type]; mappage = ctrl->map[idx]; sc = page_address(mappage); sc += pos; |
e9e58a4ec memcg: avoid use ... |
386 387 388 389 |
spin_lock_irqsave(&ctrl->lock, flags); old = sc->id; sc->id = id; spin_unlock_irqrestore(&ctrl->lock, flags); |
27a7faa07 memcg: swap cgrou... |
390 391 392 393 394 395 396 397 |
return old; } /** * lookup_swap_cgroup - lookup mem_cgroup tied to swap entry * @ent: swap entry to be looked up. * |
a3b2d6926 cgroups: use css ... |
398 |
* Returns CSS ID of mem_cgroup at success. 0 at failure. (0 is invalid ID) |
27a7faa07 memcg: swap cgrou... |
399 |
*/ |
a3b2d6926 cgroups: use css ... |
400 |
unsigned short lookup_swap_cgroup(swp_entry_t ent) |
27a7faa07 memcg: swap cgrou... |
401 402 403 404 405 406 407 408 |
{ int type = swp_type(ent); unsigned long offset = swp_offset(ent); unsigned long idx = offset / SC_PER_PAGE; unsigned long pos = offset & SC_POS_MASK; struct swap_cgroup_ctrl *ctrl; struct page *mappage; struct swap_cgroup *sc; |
a3b2d6926 cgroups: use css ... |
409 |
unsigned short ret; |
27a7faa07 memcg: swap cgrou... |
410 |
|
27a7faa07 memcg: swap cgrou... |
411 412 413 414 |
ctrl = &swap_cgroup_ctrl[type]; mappage = ctrl->map[idx]; sc = page_address(mappage); sc += pos; |
a3b2d6926 cgroups: use css ... |
415 |
ret = sc->id; |
27a7faa07 memcg: swap cgrou... |
416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 |
return ret; } int swap_cgroup_swapon(int type, unsigned long max_pages) { void *array; unsigned long array_size; unsigned long length; struct swap_cgroup_ctrl *ctrl; if (!do_swap_account) return 0; length = ((max_pages/SC_PER_PAGE) + 1); array_size = length * sizeof(void *); array = vmalloc(array_size); if (!array) goto nomem; memset(array, 0, array_size); ctrl = &swap_cgroup_ctrl[type]; mutex_lock(&swap_cgroup_mutex); ctrl->length = length; ctrl->map = array; |
e9e58a4ec memcg: avoid use ... |
441 |
spin_lock_init(&ctrl->lock); |
27a7faa07 memcg: swap cgrou... |
442 443 444 445 446 447 448 449 450 |
if (swap_cgroup_prepare(type)) { /* memory shortage */ ctrl->map = NULL; ctrl->length = 0; vfree(array); mutex_unlock(&swap_cgroup_mutex); goto nomem; } mutex_unlock(&swap_cgroup_mutex); |
27a7faa07 memcg: swap cgrou... |
451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 |
return 0; nomem: printk(KERN_INFO "couldn't allocate enough memory for swap_cgroup. "); printk(KERN_INFO "swap_cgroup can be disabled by noswapaccount boot option "); return -ENOMEM; } void swap_cgroup_swapoff(int type) { int i; struct swap_cgroup_ctrl *ctrl; if (!do_swap_account) return; mutex_lock(&swap_cgroup_mutex); ctrl = &swap_cgroup_ctrl[type]; if (ctrl->map) { for (i = 0; i < ctrl->length; i++) { struct page *page = ctrl->map[i]; if (page) __free_page(page); } vfree(ctrl->map); ctrl->map = NULL; ctrl->length = 0; } mutex_unlock(&swap_cgroup_mutex); } #endif |