Blame view
mm/memcontrol.c
27.5 KB
8cdea7c05 Memory controller... |
1 2 3 4 5 |
/* memcontrol.c - Memory Controller * * Copyright IBM Corporation, 2007 * Author Balbir Singh <balbir@linux.vnet.ibm.com> * |
78fb74669 Memory controller... |
6 7 8 |
* Copyright 2007 OpenVZ SWsoft Inc * Author: Pavel Emelianov <xemul@openvz.org> * |
8cdea7c05 Memory controller... |
9 10 11 12 13 14 15 16 17 18 19 20 21 22 |
* This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. */ #include <linux/res_counter.h> #include <linux/memcontrol.h> #include <linux/cgroup.h> |
78fb74669 Memory controller... |
23 |
#include <linux/mm.h> |
d52aa412d memory cgroup enh... |
24 |
#include <linux/smp.h> |
8a9f3ccd2 Memory controller... |
25 |
#include <linux/page-flags.h> |
66e1707bc Memory controller... |
26 |
#include <linux/backing-dev.h> |
8a9f3ccd2 Memory controller... |
27 28 |
#include <linux/bit_spinlock.h> #include <linux/rcupdate.h> |
b6ac57d50 memcgroup: move m... |
29 |
#include <linux/slab.h> |
66e1707bc Memory controller... |
30 31 32 |
#include <linux/swap.h> #include <linux/spinlock.h> #include <linux/fs.h> |
d2ceb9b7d memory cgroup enh... |
33 |
#include <linux/seq_file.h> |
333279487 memcgroup: use vm... |
34 |
#include <linux/vmalloc.h> |
8cdea7c05 Memory controller... |
35 |
|
8697d3319 Memory controller... |
36 |
#include <asm/uaccess.h> |
8cdea7c05 Memory controller... |
37 |
struct cgroup_subsys mem_cgroup_subsys; |
66e1707bc Memory controller... |
38 |
static const int MEM_CGROUP_RECLAIM_RETRIES = 5; |
b6ac57d50 memcgroup: move m... |
39 |
static struct kmem_cache *page_cgroup_cache; |
8cdea7c05 Memory controller... |
40 41 |
/* |
d52aa412d memory cgroup enh... |
42 43 44 45 46 47 48 49 |
* Statistics for memory cgroup. */ enum mem_cgroup_stat_index { /* * For MEM_CONTAINER_TYPE_ALL, usage = pagecache + rss. */ MEM_CGROUP_STAT_CACHE, /* # of pages charged as cache */ MEM_CGROUP_STAT_RSS, /* # of pages charged as rss */ |
55e462b05 memcg: simple sta... |
50 51 |
MEM_CGROUP_STAT_PGPGIN_COUNT, /* # of pages paged in */ MEM_CGROUP_STAT_PGPGOUT_COUNT, /* # of pages paged out */ |
d52aa412d memory cgroup enh... |
52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 |
MEM_CGROUP_STAT_NSTATS, }; struct mem_cgroup_stat_cpu { s64 count[MEM_CGROUP_STAT_NSTATS]; } ____cacheline_aligned_in_smp; struct mem_cgroup_stat { struct mem_cgroup_stat_cpu cpustat[NR_CPUS]; }; /* * For accounting under irq disable, no need for increment preempt count. */ static void __mem_cgroup_stat_add_safe(struct mem_cgroup_stat *stat, enum mem_cgroup_stat_index idx, int val) { int cpu = smp_processor_id(); stat->cpustat[cpu].count[idx] += val; } static s64 mem_cgroup_read_stat(struct mem_cgroup_stat *stat, enum mem_cgroup_stat_index idx) { int cpu; s64 ret = 0; for_each_possible_cpu(cpu) ret += stat->cpustat[cpu].count[idx]; return ret; } /* |
6d12e2d8d per-zone and recl... |
85 86 87 88 89 90 91 92 93 94 95 |
* per-zone information in memory controller. */ enum mem_cgroup_zstat_index { MEM_CGROUP_ZSTAT_ACTIVE, MEM_CGROUP_ZSTAT_INACTIVE, NR_MEM_CGROUP_ZSTAT, }; struct mem_cgroup_per_zone { |
072c56c13 per-zone and recl... |
96 97 98 99 |
/* * spin_lock to protect the per cgroup LRU */ spinlock_t lru_lock; |
1ecaab2bd per-zone and recl... |
100 101 |
struct list_head active_list; struct list_head inactive_list; |
6d12e2d8d per-zone and recl... |
102 103 104 105 106 107 108 109 110 111 112 113 114 115 |
unsigned long count[NR_MEM_CGROUP_ZSTAT]; }; /* Macro for accessing counter */ #define MEM_CGROUP_ZSTAT(mz, idx) ((mz)->count[(idx)]) struct mem_cgroup_per_node { struct mem_cgroup_per_zone zoneinfo[MAX_NR_ZONES]; }; struct mem_cgroup_lru_info { struct mem_cgroup_per_node *nodeinfo[MAX_NUMNODES]; }; /* |
8cdea7c05 Memory controller... |
116 117 118 119 120 121 |
* The memory controller data structure. The memory controller controls both * page cache and RSS per cgroup. We would eventually like to provide * statistics based on the statistics developed by Rik Van Riel for clock-pro, * to help the administrator determine what knobs to tune. * * TODO: Add a water mark for the memory controller. Reclaim will begin when |
8a9f3ccd2 Memory controller... |
122 123 124 |
* we hit the water mark. May be even add a low water mark, such that * no reclaim occurs from a cgroup at it's low water mark, this is * a feature that will be implemented much later in the future. |
8cdea7c05 Memory controller... |
125 126 127 128 129 130 131 |
*/ struct mem_cgroup { struct cgroup_subsys_state css; /* * the counter to account for memory usage */ struct res_counter res; |
78fb74669 Memory controller... |
132 133 134 |
/* * Per cgroup active and inactive list, similar to the * per zone LRU lists. |
78fb74669 Memory controller... |
135 |
*/ |
6d12e2d8d per-zone and recl... |
136 |
struct mem_cgroup_lru_info info; |
072c56c13 per-zone and recl... |
137 |
|
6c48a1d04 per-zone and recl... |
138 |
int prev_priority; /* for recording reclaim priority */ |
d52aa412d memory cgroup enh... |
139 140 141 142 |
/* * statistics. */ struct mem_cgroup_stat stat; |
8cdea7c05 Memory controller... |
143 |
}; |
8869b8f6e memcg: memcontrol... |
144 |
static struct mem_cgroup init_mem_cgroup; |
8cdea7c05 Memory controller... |
145 146 |
/* |
8a9f3ccd2 Memory controller... |
147 |
* We use the lower bit of the page->page_cgroup pointer as a bit spin |
9442ec9df memcg: bad page i... |
148 149 150 151 |
* lock. We need to ensure that page->page_cgroup is at least two * byte aligned (based on comments from Nick Piggin). But since * bit_spin_lock doesn't actually set that lock bit in a non-debug * uniprocessor kernel, we should avoid setting it here too. |
8a9f3ccd2 Memory controller... |
152 153 |
*/ #define PAGE_CGROUP_LOCK_BIT 0x0 |
9442ec9df memcg: bad page i... |
154 155 156 157 158 |
#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) #define PAGE_CGROUP_LOCK (1 << PAGE_CGROUP_LOCK_BIT) #else #define PAGE_CGROUP_LOCK 0x0 #endif |
8a9f3ccd2 Memory controller... |
159 160 |
/* |
8cdea7c05 Memory controller... |
161 162 163 164 165 166 167 |
* A page_cgroup page is associated with every page descriptor. The * page_cgroup helps us identify information about the cgroup */ struct page_cgroup { struct list_head lru; /* per cgroup LRU list */ struct page *page; struct mem_cgroup *mem_cgroup; |
b9c565d5a memcg: remove cle... |
168 |
int ref_cnt; /* cached, mapped, migrating */ |
8869b8f6e memcg: memcontrol... |
169 |
int flags; |
8cdea7c05 Memory controller... |
170 |
}; |
217bc3194 memory cgroup enh... |
171 |
#define PAGE_CGROUP_FLAG_CACHE (0x1) /* charged as cache */ |
3564c7c45 memory cgroup enh... |
172 |
#define PAGE_CGROUP_FLAG_ACTIVE (0x2) /* page is active in this cgroup */ |
8cdea7c05 Memory controller... |
173 |
|
d5b69e38f memcg: memcontrol... |
174 |
static int page_cgroup_nid(struct page_cgroup *pc) |
c0149530d per-zone and recl... |
175 176 177 |
{ return page_to_nid(pc->page); } |
d5b69e38f memcg: memcontrol... |
178 |
static enum zone_type page_cgroup_zid(struct page_cgroup *pc) |
c0149530d per-zone and recl... |
179 180 181 |
{ return page_zonenum(pc->page); } |
217bc3194 memory cgroup enh... |
182 183 184 185 |
enum charge_type { MEM_CGROUP_CHARGE_TYPE_CACHE = 0, MEM_CGROUP_CHARGE_TYPE_MAPPED, }; |
d52aa412d memory cgroup enh... |
186 187 188 189 190 191 192 193 |
/* * Always modified under lru lock. Then, not necessary to preempt_disable() */ static void mem_cgroup_charge_statistics(struct mem_cgroup *mem, int flags, bool charge) { int val = (charge)? 1 : -1; struct mem_cgroup_stat *stat = &mem->stat; |
d52aa412d memory cgroup enh... |
194 |
|
8869b8f6e memcg: memcontrol... |
195 |
VM_BUG_ON(!irqs_disabled()); |
d52aa412d memory cgroup enh... |
196 |
if (flags & PAGE_CGROUP_FLAG_CACHE) |
8869b8f6e memcg: memcontrol... |
197 |
__mem_cgroup_stat_add_safe(stat, MEM_CGROUP_STAT_CACHE, val); |
d52aa412d memory cgroup enh... |
198 199 |
else __mem_cgroup_stat_add_safe(stat, MEM_CGROUP_STAT_RSS, val); |
55e462b05 memcg: simple sta... |
200 201 202 203 204 205 206 |
if (charge) __mem_cgroup_stat_add_safe(stat, MEM_CGROUP_STAT_PGPGIN_COUNT, 1); else __mem_cgroup_stat_add_safe(stat, MEM_CGROUP_STAT_PGPGOUT_COUNT, 1); |
6d12e2d8d per-zone and recl... |
207 |
} |
d5b69e38f memcg: memcontrol... |
208 |
static struct mem_cgroup_per_zone * |
6d12e2d8d per-zone and recl... |
209 210 |
mem_cgroup_zoneinfo(struct mem_cgroup *mem, int nid, int zid) { |
6d12e2d8d per-zone and recl... |
211 212 |
return &mem->info.nodeinfo[nid]->zoneinfo[zid]; } |
d5b69e38f memcg: memcontrol... |
213 |
static struct mem_cgroup_per_zone * |
6d12e2d8d per-zone and recl... |
214 215 216 217 218 |
page_cgroup_zoneinfo(struct page_cgroup *pc) { struct mem_cgroup *mem = pc->mem_cgroup; int nid = page_cgroup_nid(pc); int zid = page_cgroup_zid(pc); |
d52aa412d memory cgroup enh... |
219 |
|
6d12e2d8d per-zone and recl... |
220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 |
return mem_cgroup_zoneinfo(mem, nid, zid); } static unsigned long mem_cgroup_get_all_zonestat(struct mem_cgroup *mem, enum mem_cgroup_zstat_index idx) { int nid, zid; struct mem_cgroup_per_zone *mz; u64 total = 0; for_each_online_node(nid) for (zid = 0; zid < MAX_NR_ZONES; zid++) { mz = mem_cgroup_zoneinfo(mem, nid, zid); total += MEM_CGROUP_ZSTAT(mz, idx); } return total; |
d52aa412d memory cgroup enh... |
236 |
} |
d5b69e38f memcg: memcontrol... |
237 |
static struct mem_cgroup *mem_cgroup_from_cont(struct cgroup *cont) |
8cdea7c05 Memory controller... |
238 239 240 241 242 |
{ return container_of(cgroup_subsys_state(cont, mem_cgroup_subsys_id), struct mem_cgroup, css); } |
cf475ad28 cgroups: add an o... |
243 |
struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p) |
78fb74669 Memory controller... |
244 245 246 247 |
{ return container_of(task_subsys_state(p, mem_cgroup_subsys_id), struct mem_cgroup, css); } |
8a9f3ccd2 Memory controller... |
248 249 |
static inline int page_cgroup_locked(struct page *page) { |
8869b8f6e memcg: memcontrol... |
250 |
return bit_spin_is_locked(PAGE_CGROUP_LOCK_BIT, &page->page_cgroup); |
8a9f3ccd2 Memory controller... |
251 |
} |
9442ec9df memcg: bad page i... |
252 |
static void page_assign_page_cgroup(struct page *page, struct page_cgroup *pc) |
78fb74669 Memory controller... |
253 |
{ |
9442ec9df memcg: bad page i... |
254 255 |
VM_BUG_ON(!page_cgroup_locked(page)); page->page_cgroup = ((unsigned long)pc | PAGE_CGROUP_LOCK); |
78fb74669 Memory controller... |
256 257 258 259 |
} struct page_cgroup *page_get_page_cgroup(struct page *page) { |
8869b8f6e memcg: memcontrol... |
260 |
return (struct page_cgroup *) (page->page_cgroup & ~PAGE_CGROUP_LOCK); |
8a9f3ccd2 Memory controller... |
261 |
} |
d5b69e38f memcg: memcontrol... |
262 |
static void lock_page_cgroup(struct page *page) |
8a9f3ccd2 Memory controller... |
263 264 |
{ bit_spin_lock(PAGE_CGROUP_LOCK_BIT, &page->page_cgroup); |
8a9f3ccd2 Memory controller... |
265 |
} |
2680eed72 memcg: fix mem_cg... |
266 267 268 269 |
static int try_lock_page_cgroup(struct page *page) { return bit_spin_trylock(PAGE_CGROUP_LOCK_BIT, &page->page_cgroup); } |
d5b69e38f memcg: memcontrol... |
270 |
static void unlock_page_cgroup(struct page *page) |
8a9f3ccd2 Memory controller... |
271 272 273 |
{ bit_spin_unlock(PAGE_CGROUP_LOCK_BIT, &page->page_cgroup); } |
3eae90c3c memcg: remove red... |
274 275 |
static void __mem_cgroup_remove_list(struct mem_cgroup_per_zone *mz, struct page_cgroup *pc) |
6d12e2d8d per-zone and recl... |
276 277 |
{ int from = pc->flags & PAGE_CGROUP_FLAG_ACTIVE; |
6d12e2d8d per-zone and recl... |
278 279 280 281 282 283 284 285 286 |
if (from) MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_ACTIVE) -= 1; else MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_INACTIVE) -= 1; mem_cgroup_charge_statistics(pc->mem_cgroup, pc->flags, false); list_del_init(&pc->lru); } |
3eae90c3c memcg: remove red... |
287 288 |
static void __mem_cgroup_add_list(struct mem_cgroup_per_zone *mz, struct page_cgroup *pc) |
6d12e2d8d per-zone and recl... |
289 290 |
{ int to = pc->flags & PAGE_CGROUP_FLAG_ACTIVE; |
6d12e2d8d per-zone and recl... |
291 292 293 |
if (!to) { MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_INACTIVE) += 1; |
1ecaab2bd per-zone and recl... |
294 |
list_add(&pc->lru, &mz->inactive_list); |
6d12e2d8d per-zone and recl... |
295 296 |
} else { MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_ACTIVE) += 1; |
1ecaab2bd per-zone and recl... |
297 |
list_add(&pc->lru, &mz->active_list); |
6d12e2d8d per-zone and recl... |
298 299 300 |
} mem_cgroup_charge_statistics(pc->mem_cgroup, pc->flags, true); } |
8697d3319 Memory controller... |
301 |
static void __mem_cgroup_move_lists(struct page_cgroup *pc, bool active) |
66e1707bc Memory controller... |
302 |
{ |
6d12e2d8d per-zone and recl... |
303 304 305 306 307 308 309 |
int from = pc->flags & PAGE_CGROUP_FLAG_ACTIVE; struct mem_cgroup_per_zone *mz = page_cgroup_zoneinfo(pc); if (from) MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_ACTIVE) -= 1; else MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_INACTIVE) -= 1; |
3564c7c45 memory cgroup enh... |
310 |
if (active) { |
6d12e2d8d per-zone and recl... |
311 |
MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_ACTIVE) += 1; |
3564c7c45 memory cgroup enh... |
312 |
pc->flags |= PAGE_CGROUP_FLAG_ACTIVE; |
1ecaab2bd per-zone and recl... |
313 |
list_move(&pc->lru, &mz->active_list); |
3564c7c45 memory cgroup enh... |
314 |
} else { |
6d12e2d8d per-zone and recl... |
315 |
MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_INACTIVE) += 1; |
3564c7c45 memory cgroup enh... |
316 |
pc->flags &= ~PAGE_CGROUP_FLAG_ACTIVE; |
1ecaab2bd per-zone and recl... |
317 |
list_move(&pc->lru, &mz->inactive_list); |
3564c7c45 memory cgroup enh... |
318 |
} |
66e1707bc Memory controller... |
319 |
} |
4c4a22148 memcontrol: move ... |
320 321 322 323 324 |
int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem) { int ret; task_lock(task); |
bd845e38c memcg: mm_match_c... |
325 |
ret = task->mm && mm_match_cgroup(task->mm, mem); |
4c4a22148 memcontrol: move ... |
326 327 328 |
task_unlock(task); return ret; } |
66e1707bc Memory controller... |
329 330 331 |
/* * This routine assumes that the appropriate zone's lru lock is already held */ |
427d5416f memcg: move_lists... |
332 |
void mem_cgroup_move_lists(struct page *page, bool active) |
66e1707bc Memory controller... |
333 |
{ |
427d5416f memcg: move_lists... |
334 |
struct page_cgroup *pc; |
072c56c13 per-zone and recl... |
335 336 |
struct mem_cgroup_per_zone *mz; unsigned long flags; |
2680eed72 memcg: fix mem_cg... |
337 338 339 340 341 342 343 344 |
/* * We cannot lock_page_cgroup while holding zone's lru_lock, * because other holders of lock_page_cgroup can be interrupted * with an attempt to rotate_reclaimable_page. But we cannot * safely get to page_cgroup without it, so just try_lock it: * mem_cgroup_isolate_pages allows for page left on wrong list. */ if (!try_lock_page_cgroup(page)) |
66e1707bc Memory controller... |
345 |
return; |
2680eed72 memcg: fix mem_cg... |
346 347 |
pc = page_get_page_cgroup(page); if (pc) { |
2680eed72 memcg: fix mem_cg... |
348 |
mz = page_cgroup_zoneinfo(pc); |
2680eed72 memcg: fix mem_cg... |
349 |
spin_lock_irqsave(&mz->lru_lock, flags); |
9b3c0a07e memcg: simplify f... |
350 |
__mem_cgroup_move_lists(pc, active); |
2680eed72 memcg: fix mem_cg... |
351 |
spin_unlock_irqrestore(&mz->lru_lock, flags); |
9b3c0a07e memcg: simplify f... |
352 353 |
} unlock_page_cgroup(page); |
66e1707bc Memory controller... |
354 |
} |
58ae83db2 per-zone and recl... |
355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 |
/* * Calculate mapped_ratio under memory controller. This will be used in * vmscan.c for deteremining we have to reclaim mapped pages. */ int mem_cgroup_calc_mapped_ratio(struct mem_cgroup *mem) { long total, rss; /* * usage is recorded in bytes. But, here, we assume the number of * physical pages can be represented by "long" on any arch. */ total = (long) (mem->res.usage >> PAGE_SHIFT) + 1L; rss = (long)mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_RSS); return (int)((rss * 100L) / total); } |
8869b8f6e memcg: memcontrol... |
371 |
|
5932f3671 per-zone and recl... |
372 373 374 375 376 377 378 379 380 381 382 383 384 385 |
/* * This function is called from vmscan.c. In page reclaiming loop. balance * between active and inactive list is calculated. For memory controller * page reclaiming, we should use using mem_cgroup's imbalance rather than * zone's global lru imbalance. */ long mem_cgroup_reclaim_imbalance(struct mem_cgroup *mem) { unsigned long active, inactive; /* active and inactive are the number of pages. 'long' is ok.*/ active = mem_cgroup_get_all_zonestat(mem, MEM_CGROUP_ZSTAT_ACTIVE); inactive = mem_cgroup_get_all_zonestat(mem, MEM_CGROUP_ZSTAT_INACTIVE); return (long) (active / (inactive + 1)); } |
58ae83db2 per-zone and recl... |
386 |
|
6c48a1d04 per-zone and recl... |
387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 |
/* * prev_priority control...this will be used in memory reclaim path. */ int mem_cgroup_get_reclaim_priority(struct mem_cgroup *mem) { return mem->prev_priority; } void mem_cgroup_note_reclaim_priority(struct mem_cgroup *mem, int priority) { if (priority < mem->prev_priority) mem->prev_priority = priority; } void mem_cgroup_record_reclaim_priority(struct mem_cgroup *mem, int priority) { mem->prev_priority = priority; } |
cc38108e1 per-zone and recl... |
405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 |
/* * Calculate # of pages to be scanned in this priority/zone. * See also vmscan.c * * priority starts from "DEF_PRIORITY" and decremented in each loop. * (see include/linux/mmzone.h) */ long mem_cgroup_calc_reclaim_active(struct mem_cgroup *mem, struct zone *zone, int priority) { long nr_active; int nid = zone->zone_pgdat->node_id; int zid = zone_idx(zone); struct mem_cgroup_per_zone *mz = mem_cgroup_zoneinfo(mem, nid, zid); nr_active = MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_ACTIVE); return (nr_active >> priority); } long mem_cgroup_calc_reclaim_inactive(struct mem_cgroup *mem, struct zone *zone, int priority) { long nr_inactive; int nid = zone->zone_pgdat->node_id; int zid = zone_idx(zone); struct mem_cgroup_per_zone *mz = mem_cgroup_zoneinfo(mem, nid, zid); nr_inactive = MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_INACTIVE); |
cc38108e1 per-zone and recl... |
434 435 |
return (nr_inactive >> priority); } |
66e1707bc Memory controller... |
436 437 438 439 440 441 442 443 444 445 446 447 |
unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, struct list_head *dst, unsigned long *scanned, int order, int mode, struct zone *z, struct mem_cgroup *mem_cont, int active) { unsigned long nr_taken = 0; struct page *page; unsigned long scan; LIST_HEAD(pc_list); struct list_head *src; |
ff7283fa3 bugfix for memory... |
448 |
struct page_cgroup *pc, *tmp; |
1ecaab2bd per-zone and recl... |
449 450 451 |
int nid = z->zone_pgdat->node_id; int zid = zone_idx(z); struct mem_cgroup_per_zone *mz; |
66e1707bc Memory controller... |
452 |
|
cf475ad28 cgroups: add an o... |
453 |
BUG_ON(!mem_cont); |
1ecaab2bd per-zone and recl... |
454 |
mz = mem_cgroup_zoneinfo(mem_cont, nid, zid); |
66e1707bc Memory controller... |
455 |
if (active) |
1ecaab2bd per-zone and recl... |
456 |
src = &mz->active_list; |
66e1707bc Memory controller... |
457 |
else |
1ecaab2bd per-zone and recl... |
458 |
src = &mz->inactive_list; |
66e1707bc Memory controller... |
459 |
|
072c56c13 per-zone and recl... |
460 |
spin_lock(&mz->lru_lock); |
ff7283fa3 bugfix for memory... |
461 462 |
scan = 0; list_for_each_entry_safe_reverse(pc, tmp, src, lru) { |
436c6541b memcgroup: fix zo... |
463 |
if (scan >= nr_to_scan) |
ff7283fa3 bugfix for memory... |
464 |
break; |
66e1707bc Memory controller... |
465 |
page = pc->page; |
66e1707bc Memory controller... |
466 |
|
436c6541b memcgroup: fix zo... |
467 |
if (unlikely(!PageLRU(page))) |
ff7283fa3 bugfix for memory... |
468 |
continue; |
ff7283fa3 bugfix for memory... |
469 |
|
66e1707bc Memory controller... |
470 471 |
if (PageActive(page) && !active) { __mem_cgroup_move_lists(pc, true); |
66e1707bc Memory controller... |
472 473 474 475 |
continue; } if (!PageActive(page) && active) { __mem_cgroup_move_lists(pc, false); |
66e1707bc Memory controller... |
476 477 |
continue; } |
436c6541b memcgroup: fix zo... |
478 479 |
scan++; list_move(&pc->lru, &pc_list); |
66e1707bc Memory controller... |
480 481 482 483 484 485 486 487 |
if (__isolate_lru_page(page, mode) == 0) { list_move(&page->lru, dst); nr_taken++; } } list_splice(&pc_list, src); |
072c56c13 per-zone and recl... |
488 |
spin_unlock(&mz->lru_lock); |
66e1707bc Memory controller... |
489 490 491 492 |
*scanned = scan; return nr_taken; } |
8a9f3ccd2 Memory controller... |
493 494 495 496 497 498 |
/* * Charge the memory controller for page usage. * Return * 0 if the charge was successful * < 0 if the cgroup is over its limit */ |
217bc3194 memory cgroup enh... |
499 500 |
static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm, gfp_t gfp_mask, enum charge_type ctype) |
8a9f3ccd2 Memory controller... |
501 502 |
{ struct mem_cgroup *mem; |
9175e0311 bugfix for memory... |
503 |
struct page_cgroup *pc; |
66e1707bc Memory controller... |
504 505 |
unsigned long flags; unsigned long nr_retries = MEM_CGROUP_RECLAIM_RETRIES; |
072c56c13 per-zone and recl... |
506 |
struct mem_cgroup_per_zone *mz; |
8a9f3ccd2 Memory controller... |
507 |
|
4077960e2 memory controller... |
508 509 |
if (mem_cgroup_subsys.disabled) return 0; |
8a9f3ccd2 Memory controller... |
510 511 512 513 514 515 516 |
/* * Should page_cgroup's go to their own slab? * One could optimize the performance of the charging routine * by saving a bit in the page_flags and using it as a lock * to see if the cgroup page already has a page_cgroup associated * with it */ |
66e1707bc Memory controller... |
517 |
retry: |
7e924aafa memcg: mem_cgroup... |
518 519 520 521 522 523 524 |
lock_page_cgroup(page); pc = page_get_page_cgroup(page); /* * The page_cgroup exists and * the page has already been accounted. */ if (pc) { |
b9c565d5a memcg: remove cle... |
525 526 527 528 529 530 |
VM_BUG_ON(pc->page != page); VM_BUG_ON(pc->ref_cnt <= 0); pc->ref_cnt++; unlock_page_cgroup(page); goto done; |
8a9f3ccd2 Memory controller... |
531 |
} |
7e924aafa memcg: mem_cgroup... |
532 |
unlock_page_cgroup(page); |
8a9f3ccd2 Memory controller... |
533 |
|
b6ac57d50 memcgroup: move m... |
534 |
pc = kmem_cache_zalloc(page_cgroup_cache, gfp_mask); |
8a9f3ccd2 Memory controller... |
535 536 |
if (pc == NULL) goto err; |
8a9f3ccd2 Memory controller... |
537 |
/* |
3be91277e memcgroup: tidy u... |
538 539 |
* We always charge the cgroup the mm_struct belongs to. * The mm_struct's mem_cgroup changes on task migration if the |
8a9f3ccd2 Memory controller... |
540 541 542 543 544 |
* thread group leader migrates. It's possible that mm is not * set, if so charge the init_mm (happens for pagecache usage). */ if (!mm) mm = &init_mm; |
3be91277e memcgroup: tidy u... |
545 |
rcu_read_lock(); |
cf475ad28 cgroups: add an o... |
546 |
mem = mem_cgroup_from_task(rcu_dereference(mm->owner)); |
8a9f3ccd2 Memory controller... |
547 |
/* |
8869b8f6e memcg: memcontrol... |
548 |
* For every charge from the cgroup, increment reference count |
8a9f3ccd2 Memory controller... |
549 550 551 |
*/ css_get(&mem->css); rcu_read_unlock(); |
0eea10301 Memory controller... |
552 |
while (res_counter_charge(&mem->res, PAGE_SIZE)) { |
3be91277e memcgroup: tidy u... |
553 554 |
if (!(gfp_mask & __GFP_WAIT)) goto out; |
e1a1cd590 Memory controller... |
555 556 |
if (try_to_free_mem_cgroup_pages(mem, gfp_mask)) |
66e1707bc Memory controller... |
557 558 559 |
continue; /* |
8869b8f6e memcg: memcontrol... |
560 561 562 563 564 565 |
* try_to_free_mem_cgroup_pages() might not give us a full * picture of reclaim. Some pages are reclaimed and might be * moved to swap cache or just unmapped from the cgroup. * Check the limit again to see if the reclaim reduced the * current usage of the cgroup before giving up */ |
66e1707bc Memory controller... |
566 567 |
if (res_counter_check_under_limit(&mem->res)) continue; |
3be91277e memcgroup: tidy u... |
568 569 570 571 |
if (!nr_retries--) { mem_cgroup_out_of_memory(mem, gfp_mask); goto out; |
66e1707bc Memory controller... |
572 |
} |
8a9f3ccd2 Memory controller... |
573 |
} |
b9c565d5a memcg: remove cle... |
574 |
pc->ref_cnt = 1; |
8a9f3ccd2 Memory controller... |
575 576 |
pc->mem_cgroup = mem; pc->page = page; |
3564c7c45 memory cgroup enh... |
577 |
pc->flags = PAGE_CGROUP_FLAG_ACTIVE; |
217bc3194 memory cgroup enh... |
578 |
if (ctype == MEM_CGROUP_CHARGE_TYPE_CACHE) |
4a56d02e3 memcgroup: make t... |
579 |
pc->flags = PAGE_CGROUP_FLAG_CACHE; |
3be91277e memcgroup: tidy u... |
580 |
|
7e924aafa memcg: mem_cgroup... |
581 582 583 |
lock_page_cgroup(page); if (page_get_page_cgroup(page)) { unlock_page_cgroup(page); |
9175e0311 bugfix for memory... |
584 |
/* |
3be91277e memcgroup: tidy u... |
585 586 |
* Another charge has been added to this page already. * We take lock_page_cgroup(page) again and read |
9175e0311 bugfix for memory... |
587 588 589 590 |
* page->cgroup, increment refcnt.... just retry is OK. */ res_counter_uncharge(&mem->res, PAGE_SIZE); css_put(&mem->css); |
b6ac57d50 memcgroup: move m... |
591 |
kmem_cache_free(page_cgroup_cache, pc); |
9175e0311 bugfix for memory... |
592 593 |
goto retry; } |
7e924aafa memcg: mem_cgroup... |
594 |
page_assign_page_cgroup(page, pc); |
8a9f3ccd2 Memory controller... |
595 |
|
072c56c13 per-zone and recl... |
596 597 |
mz = page_cgroup_zoneinfo(pc); spin_lock_irqsave(&mz->lru_lock, flags); |
3eae90c3c memcg: remove red... |
598 |
__mem_cgroup_add_list(mz, pc); |
072c56c13 per-zone and recl... |
599 |
spin_unlock_irqrestore(&mz->lru_lock, flags); |
66e1707bc Memory controller... |
600 |
|
fb59e9f1e memcg: fix oops o... |
601 |
unlock_page_cgroup(page); |
8a9f3ccd2 Memory controller... |
602 |
done: |
8a9f3ccd2 Memory controller... |
603 |
return 0; |
3be91277e memcgroup: tidy u... |
604 605 |
out: css_put(&mem->css); |
b6ac57d50 memcgroup: move m... |
606 |
kmem_cache_free(page_cgroup_cache, pc); |
8a9f3ccd2 Memory controller... |
607 |
err: |
8a9f3ccd2 Memory controller... |
608 609 |
return -ENOMEM; } |
8869b8f6e memcg: memcontrol... |
610 |
int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask) |
217bc3194 memory cgroup enh... |
611 612 |
{ return mem_cgroup_charge_common(page, mm, gfp_mask, |
8869b8f6e memcg: memcontrol... |
613 |
MEM_CGROUP_CHARGE_TYPE_MAPPED); |
217bc3194 memory cgroup enh... |
614 |
} |
e1a1cd590 Memory controller... |
615 616 |
int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask) |
8697d3319 Memory controller... |
617 |
{ |
8697d3319 Memory controller... |
618 619 |
if (!mm) mm = &init_mm; |
8869b8f6e memcg: memcontrol... |
620 |
return mem_cgroup_charge_common(page, mm, gfp_mask, |
217bc3194 memory cgroup enh... |
621 |
MEM_CGROUP_CHARGE_TYPE_CACHE); |
8697d3319 Memory controller... |
622 623 624 |
} /* |
8a9f3ccd2 Memory controller... |
625 |
* Uncharging is always a welcome operation, we never complain, simply |
8289546e5 memcg: remove mem... |
626 |
* uncharge. |
8a9f3ccd2 Memory controller... |
627 |
*/ |
8289546e5 memcg: remove mem... |
628 |
void mem_cgroup_uncharge_page(struct page *page) |
8a9f3ccd2 Memory controller... |
629 |
{ |
8289546e5 memcg: remove mem... |
630 |
struct page_cgroup *pc; |
8a9f3ccd2 Memory controller... |
631 |
struct mem_cgroup *mem; |
072c56c13 per-zone and recl... |
632 |
struct mem_cgroup_per_zone *mz; |
66e1707bc Memory controller... |
633 |
unsigned long flags; |
8a9f3ccd2 Memory controller... |
634 |
|
4077960e2 memory controller... |
635 636 |
if (mem_cgroup_subsys.disabled) return; |
8697d3319 Memory controller... |
637 |
/* |
3c541e14b Memory controller... |
638 |
* Check if our page_cgroup is valid |
8697d3319 Memory controller... |
639 |
*/ |
8289546e5 memcg: remove mem... |
640 641 |
lock_page_cgroup(page); pc = page_get_page_cgroup(page); |
8a9f3ccd2 Memory controller... |
642 |
if (!pc) |
8289546e5 memcg: remove mem... |
643 |
goto unlock; |
8a9f3ccd2 Memory controller... |
644 |
|
b9c565d5a memcg: remove cle... |
645 646 647 648 |
VM_BUG_ON(pc->page != page); VM_BUG_ON(pc->ref_cnt <= 0); if (--(pc->ref_cnt) == 0) { |
b9c565d5a memcg: remove cle... |
649 650 |
mz = page_cgroup_zoneinfo(pc); spin_lock_irqsave(&mz->lru_lock, flags); |
3eae90c3c memcg: remove red... |
651 |
__mem_cgroup_remove_list(mz, pc); |
b9c565d5a memcg: remove cle... |
652 |
spin_unlock_irqrestore(&mz->lru_lock, flags); |
fb59e9f1e memcg: fix oops o... |
653 654 |
page_assign_page_cgroup(page, NULL); unlock_page_cgroup(page); |
6d48ff8bc memcg: css_put af... |
655 656 657 |
mem = pc->mem_cgroup; res_counter_uncharge(&mem->res, PAGE_SIZE); css_put(&mem->css); |
b6ac57d50 memcgroup: move m... |
658 |
kmem_cache_free(page_cgroup_cache, pc); |
b9c565d5a memcg: remove cle... |
659 |
return; |
8a9f3ccd2 Memory controller... |
660 |
} |
6d12e2d8d per-zone and recl... |
661 |
|
8289546e5 memcg: remove mem... |
662 |
unlock: |
3c541e14b Memory controller... |
663 664 |
unlock_page_cgroup(page); } |
ae41be374 bugfix for memory... |
665 666 667 668 |
/* * Returns non-zero if a page (under migration) has valid page_cgroup member. * Refcnt of page_cgroup is incremented. */ |
ae41be374 bugfix for memory... |
669 670 671 |
int mem_cgroup_prepare_migration(struct page *page) { struct page_cgroup *pc; |
8869b8f6e memcg: memcontrol... |
672 |
|
4077960e2 memory controller... |
673 674 |
if (mem_cgroup_subsys.disabled) return 0; |
ae41be374 bugfix for memory... |
675 676 |
lock_page_cgroup(page); pc = page_get_page_cgroup(page); |
b9c565d5a memcg: remove cle... |
677 678 |
if (pc) pc->ref_cnt++; |
ae41be374 bugfix for memory... |
679 |
unlock_page_cgroup(page); |
b9c565d5a memcg: remove cle... |
680 |
return pc != NULL; |
ae41be374 bugfix for memory... |
681 682 683 684 |
} void mem_cgroup_end_migration(struct page *page) { |
8289546e5 memcg: remove mem... |
685 |
mem_cgroup_uncharge_page(page); |
ae41be374 bugfix for memory... |
686 |
} |
8869b8f6e memcg: memcontrol... |
687 |
|
ae41be374 bugfix for memory... |
688 |
/* |
8869b8f6e memcg: memcontrol... |
689 |
* We know both *page* and *newpage* are now not-on-LRU and PG_locked. |
ae41be374 bugfix for memory... |
690 691 692 |
* And no race with uncharge() routines because page_cgroup for *page* * has extra one reference by mem_cgroup_prepare_migration. */ |
ae41be374 bugfix for memory... |
693 694 695 |
void mem_cgroup_page_migration(struct page *page, struct page *newpage) { struct page_cgroup *pc; |
072c56c13 per-zone and recl... |
696 |
struct mem_cgroup_per_zone *mz; |
d5b69e38f memcg: memcontrol... |
697 |
unsigned long flags; |
8869b8f6e memcg: memcontrol... |
698 |
|
b9c565d5a memcg: remove cle... |
699 |
lock_page_cgroup(page); |
ae41be374 bugfix for memory... |
700 |
pc = page_get_page_cgroup(page); |
b9c565d5a memcg: remove cle... |
701 702 |
if (!pc) { unlock_page_cgroup(page); |
ae41be374 bugfix for memory... |
703 |
return; |
b9c565d5a memcg: remove cle... |
704 |
} |
8869b8f6e memcg: memcontrol... |
705 |
|
b9c565d5a memcg: remove cle... |
706 |
mz = page_cgroup_zoneinfo(pc); |
8869b8f6e memcg: memcontrol... |
707 |
spin_lock_irqsave(&mz->lru_lock, flags); |
3eae90c3c memcg: remove red... |
708 |
__mem_cgroup_remove_list(mz, pc); |
072c56c13 per-zone and recl... |
709 |
spin_unlock_irqrestore(&mz->lru_lock, flags); |
fb59e9f1e memcg: fix oops o... |
710 711 |
page_assign_page_cgroup(page, NULL); unlock_page_cgroup(page); |
ae41be374 bugfix for memory... |
712 713 714 |
pc->page = newpage; lock_page_cgroup(newpage); page_assign_page_cgroup(newpage, pc); |
6d12e2d8d per-zone and recl... |
715 |
|
072c56c13 per-zone and recl... |
716 717 |
mz = page_cgroup_zoneinfo(pc); spin_lock_irqsave(&mz->lru_lock, flags); |
3eae90c3c memcg: remove red... |
718 |
__mem_cgroup_add_list(mz, pc); |
072c56c13 per-zone and recl... |
719 |
spin_unlock_irqrestore(&mz->lru_lock, flags); |
fb59e9f1e memcg: fix oops o... |
720 721 |
unlock_page_cgroup(newpage); |
ae41be374 bugfix for memory... |
722 |
} |
78fb74669 Memory controller... |
723 |
|
cc8475822 memory cgroup enh... |
724 725 726 727 728 729 |
/* * This routine traverse page_cgroup in given list and drop them all. * This routine ignores page_cgroup->ref_cnt. * *And* this routine doesn't reclaim page itself, just removes page_cgroup. */ #define FORCE_UNCHARGE_BATCH (128) |
8869b8f6e memcg: memcontrol... |
730 |
static void mem_cgroup_force_empty_list(struct mem_cgroup *mem, |
072c56c13 per-zone and recl... |
731 732 |
struct mem_cgroup_per_zone *mz, int active) |
cc8475822 memory cgroup enh... |
733 734 735 |
{ struct page_cgroup *pc; struct page *page; |
9b3c0a07e memcg: simplify f... |
736 |
int count = FORCE_UNCHARGE_BATCH; |
cc8475822 memory cgroup enh... |
737 |
unsigned long flags; |
072c56c13 per-zone and recl... |
738 739 740 741 742 743 |
struct list_head *list; if (active) list = &mz->active_list; else list = &mz->inactive_list; |
cc8475822 memory cgroup enh... |
744 |
|
072c56c13 per-zone and recl... |
745 |
spin_lock_irqsave(&mz->lru_lock, flags); |
9b3c0a07e memcg: simplify f... |
746 |
while (!list_empty(list)) { |
cc8475822 memory cgroup enh... |
747 748 |
pc = list_entry(list->prev, struct page_cgroup, lru); page = pc->page; |
9b3c0a07e memcg: simplify f... |
749 750 751 752 753 754 755 |
get_page(page); spin_unlock_irqrestore(&mz->lru_lock, flags); mem_cgroup_uncharge_page(page); put_page(page); if (--count <= 0) { count = FORCE_UNCHARGE_BATCH; cond_resched(); |
b9c565d5a memcg: remove cle... |
756 |
} |
9b3c0a07e memcg: simplify f... |
757 |
spin_lock_irqsave(&mz->lru_lock, flags); |
cc8475822 memory cgroup enh... |
758 |
} |
072c56c13 per-zone and recl... |
759 |
spin_unlock_irqrestore(&mz->lru_lock, flags); |
cc8475822 memory cgroup enh... |
760 761 762 763 764 765 |
} /* * make mem_cgroup's charge to be 0 if there is no task. * This enables deleting this mem_cgroup. */ |
d5b69e38f memcg: memcontrol... |
766 |
static int mem_cgroup_force_empty(struct mem_cgroup *mem) |
cc8475822 memory cgroup enh... |
767 768 |
{ int ret = -EBUSY; |
1ecaab2bd per-zone and recl... |
769 |
int node, zid; |
8869b8f6e memcg: memcontrol... |
770 |
|
4077960e2 memory controller... |
771 772 |
if (mem_cgroup_subsys.disabled) return 0; |
cc8475822 memory cgroup enh... |
773 774 775 |
css_get(&mem->css); /* * page reclaim code (kswapd etc..) will move pages between |
8869b8f6e memcg: memcontrol... |
776 |
* active_list <-> inactive_list while we don't take a lock. |
cc8475822 memory cgroup enh... |
777 778 |
* So, we have to do loop here until all lists are empty. */ |
1ecaab2bd per-zone and recl... |
779 |
while (mem->res.usage > 0) { |
cc8475822 memory cgroup enh... |
780 781 |
if (atomic_read(&mem->css.cgroup->count) > 0) goto out; |
1ecaab2bd per-zone and recl... |
782 783 784 785 786 |
for_each_node_state(node, N_POSSIBLE) for (zid = 0; zid < MAX_NR_ZONES; zid++) { struct mem_cgroup_per_zone *mz; mz = mem_cgroup_zoneinfo(mem, node, zid); /* drop all page_cgroup in active_list */ |
072c56c13 per-zone and recl... |
787 |
mem_cgroup_force_empty_list(mem, mz, 1); |
1ecaab2bd per-zone and recl... |
788 |
/* drop all page_cgroup in inactive_list */ |
072c56c13 per-zone and recl... |
789 |
mem_cgroup_force_empty_list(mem, mz, 0); |
1ecaab2bd per-zone and recl... |
790 |
} |
cc8475822 memory cgroup enh... |
791 792 793 794 795 796 |
} ret = 0; out: css_put(&mem->css); return ret; } |
d5b69e38f memcg: memcontrol... |
797 |
static int mem_cgroup_write_strategy(char *buf, unsigned long long *tmp) |
0eea10301 Memory controller... |
798 799 800 801 802 803 804 805 806 807 808 |
{ *tmp = memparse(buf, &buf); if (*buf != '\0') return -EINVAL; /* * Round up the value to the closest page size */ *tmp = ((*tmp + PAGE_SIZE - 1) >> PAGE_SHIFT) << PAGE_SHIFT; return 0; } |
2c3daa722 CGroup API files:... |
809 |
static u64 mem_cgroup_read(struct cgroup *cont, struct cftype *cft) |
8cdea7c05 Memory controller... |
810 |
{ |
2c3daa722 CGroup API files:... |
811 812 |
return res_counter_read_u64(&mem_cgroup_from_cont(cont)->res, cft->private); |
8cdea7c05 Memory controller... |
813 814 815 816 817 818 819 |
} static ssize_t mem_cgroup_write(struct cgroup *cont, struct cftype *cft, struct file *file, const char __user *userbuf, size_t nbytes, loff_t *ppos) { return res_counter_write(&mem_cgroup_from_cont(cont)->res, |
0eea10301 Memory controller... |
820 821 |
cft->private, userbuf, nbytes, ppos, mem_cgroup_write_strategy); |
8cdea7c05 Memory controller... |
822 |
} |
29f2a4dac memcgroup: implem... |
823 |
static int mem_cgroup_reset(struct cgroup *cont, unsigned int event) |
c84872e16 memcgroup: add th... |
824 825 826 827 |
{ struct mem_cgroup *mem; mem = mem_cgroup_from_cont(cont); |
29f2a4dac memcgroup: implem... |
828 829 830 831 832 833 834 835 |
switch (event) { case RES_MAX_USAGE: res_counter_reset_max(&mem->res); break; case RES_FAILCNT: res_counter_reset_failcnt(&mem->res); break; } |
85cc59db1 memcgroup: use tr... |
836 |
return 0; |
c84872e16 memcgroup: add th... |
837 |
} |
85cc59db1 memcgroup: use tr... |
838 |
static int mem_force_empty_write(struct cgroup *cont, unsigned int event) |
cc8475822 memory cgroup enh... |
839 |
{ |
85cc59db1 memcgroup: use tr... |
840 |
return mem_cgroup_force_empty(mem_cgroup_from_cont(cont)); |
cc8475822 memory cgroup enh... |
841 |
} |
d2ceb9b7d memory cgroup enh... |
842 843 844 845 846 847 |
static const struct mem_cgroup_stat_desc { const char *msg; u64 unit; } mem_cgroup_stat_desc[] = { [MEM_CGROUP_STAT_CACHE] = { "cache", PAGE_SIZE, }, [MEM_CGROUP_STAT_RSS] = { "rss", PAGE_SIZE, }, |
55e462b05 memcg: simple sta... |
848 849 |
[MEM_CGROUP_STAT_PGPGIN_COUNT] = {"pgpgin", 1, }, [MEM_CGROUP_STAT_PGPGOUT_COUNT] = {"pgpgout", 1, }, |
d2ceb9b7d memory cgroup enh... |
850 |
}; |
c64745cf0 CGroup API files:... |
851 852 |
static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft, struct cgroup_map_cb *cb) |
d2ceb9b7d memory cgroup enh... |
853 |
{ |
d2ceb9b7d memory cgroup enh... |
854 855 856 857 858 859 860 861 862 |
struct mem_cgroup *mem_cont = mem_cgroup_from_cont(cont); struct mem_cgroup_stat *stat = &mem_cont->stat; int i; for (i = 0; i < ARRAY_SIZE(stat->cpustat[0].count); i++) { s64 val; val = mem_cgroup_read_stat(stat, i); val *= mem_cgroup_stat_desc[i].unit; |
c64745cf0 CGroup API files:... |
863 |
cb->fill(cb, mem_cgroup_stat_desc[i].msg, val); |
d2ceb9b7d memory cgroup enh... |
864 |
} |
6d12e2d8d per-zone and recl... |
865 866 867 868 869 870 871 872 |
/* showing # of active pages */ { unsigned long active, inactive; inactive = mem_cgroup_get_all_zonestat(mem_cont, MEM_CGROUP_ZSTAT_INACTIVE); active = mem_cgroup_get_all_zonestat(mem_cont, MEM_CGROUP_ZSTAT_ACTIVE); |
c64745cf0 CGroup API files:... |
873 874 |
cb->fill(cb, "active", (active) * PAGE_SIZE); cb->fill(cb, "inactive", (inactive) * PAGE_SIZE); |
6d12e2d8d per-zone and recl... |
875 |
} |
d2ceb9b7d memory cgroup enh... |
876 877 |
return 0; } |
8cdea7c05 Memory controller... |
878 879 |
static struct cftype mem_cgroup_files[] = { { |
0eea10301 Memory controller... |
880 |
.name = "usage_in_bytes", |
8cdea7c05 Memory controller... |
881 |
.private = RES_USAGE, |
2c3daa722 CGroup API files:... |
882 |
.read_u64 = mem_cgroup_read, |
8cdea7c05 Memory controller... |
883 884 |
}, { |
c84872e16 memcgroup: add th... |
885 886 |
.name = "max_usage_in_bytes", .private = RES_MAX_USAGE, |
29f2a4dac memcgroup: implem... |
887 |
.trigger = mem_cgroup_reset, |
c84872e16 memcgroup: add th... |
888 889 890 |
.read_u64 = mem_cgroup_read, }, { |
0eea10301 Memory controller... |
891 |
.name = "limit_in_bytes", |
8cdea7c05 Memory controller... |
892 893 |
.private = RES_LIMIT, .write = mem_cgroup_write, |
2c3daa722 CGroup API files:... |
894 |
.read_u64 = mem_cgroup_read, |
8cdea7c05 Memory controller... |
895 896 897 898 |
}, { .name = "failcnt", .private = RES_FAILCNT, |
29f2a4dac memcgroup: implem... |
899 |
.trigger = mem_cgroup_reset, |
2c3daa722 CGroup API files:... |
900 |
.read_u64 = mem_cgroup_read, |
8cdea7c05 Memory controller... |
901 |
}, |
8697d3319 Memory controller... |
902 |
{ |
cc8475822 memory cgroup enh... |
903 |
.name = "force_empty", |
85cc59db1 memcgroup: use tr... |
904 |
.trigger = mem_force_empty_write, |
cc8475822 memory cgroup enh... |
905 |
}, |
d2ceb9b7d memory cgroup enh... |
906 907 |
{ .name = "stat", |
c64745cf0 CGroup API files:... |
908 |
.read_map = mem_control_stat_show, |
d2ceb9b7d memory cgroup enh... |
909 |
}, |
8cdea7c05 Memory controller... |
910 |
}; |
6d12e2d8d per-zone and recl... |
911 912 913 |
static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *mem, int node) { struct mem_cgroup_per_node *pn; |
1ecaab2bd per-zone and recl... |
914 |
struct mem_cgroup_per_zone *mz; |
41e3355de memcg: fix node_s... |
915 |
int zone, tmp = node; |
1ecaab2bd per-zone and recl... |
916 917 918 919 920 921 922 923 |
/* * This routine is called against possible nodes. * But it's BUG to call kmalloc() against offline node. * * TODO: this routine can waste much memory for nodes which will * never be onlined. It's better to use memory hotplug callback * function. */ |
41e3355de memcg: fix node_s... |
924 925 926 |
if (!node_state(node, N_NORMAL_MEMORY)) tmp = -1; pn = kmalloc_node(sizeof(*pn), GFP_KERNEL, tmp); |
6d12e2d8d per-zone and recl... |
927 928 |
if (!pn) return 1; |
1ecaab2bd per-zone and recl... |
929 |
|
6d12e2d8d per-zone and recl... |
930 931 |
mem->info.nodeinfo[node] = pn; memset(pn, 0, sizeof(*pn)); |
1ecaab2bd per-zone and recl... |
932 933 934 935 936 |
for (zone = 0; zone < MAX_NR_ZONES; zone++) { mz = &pn->zoneinfo[zone]; INIT_LIST_HEAD(&mz->active_list); INIT_LIST_HEAD(&mz->inactive_list); |
072c56c13 per-zone and recl... |
937 |
spin_lock_init(&mz->lru_lock); |
1ecaab2bd per-zone and recl... |
938 |
} |
6d12e2d8d per-zone and recl... |
939 940 |
return 0; } |
1ecaab2bd per-zone and recl... |
941 942 943 944 |
static void free_mem_cgroup_per_zone_info(struct mem_cgroup *mem, int node) { kfree(mem->info.nodeinfo[node]); } |
333279487 memcgroup: use vm... |
945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 |
static struct mem_cgroup *mem_cgroup_alloc(void) { struct mem_cgroup *mem; if (sizeof(*mem) < PAGE_SIZE) mem = kmalloc(sizeof(*mem), GFP_KERNEL); else mem = vmalloc(sizeof(*mem)); if (mem) memset(mem, 0, sizeof(*mem)); return mem; } static void mem_cgroup_free(struct mem_cgroup *mem) { if (sizeof(*mem) < PAGE_SIZE) kfree(mem); else vfree(mem); } |
8cdea7c05 Memory controller... |
966 967 968 969 |
static struct cgroup_subsys_state * mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont) { struct mem_cgroup *mem; |
6d12e2d8d per-zone and recl... |
970 |
int node; |
8cdea7c05 Memory controller... |
971 |
|
b6ac57d50 memcgroup: move m... |
972 |
if (unlikely((cont->parent) == NULL)) { |
78fb74669 Memory controller... |
973 |
mem = &init_mem_cgroup; |
b6ac57d50 memcgroup: move m... |
974 975 |
page_cgroup_cache = KMEM_CACHE(page_cgroup, SLAB_PANIC); } else { |
333279487 memcgroup: use vm... |
976 977 978 |
mem = mem_cgroup_alloc(); if (!mem) return ERR_PTR(-ENOMEM); |
b6ac57d50 memcgroup: move m... |
979 |
} |
78fb74669 Memory controller... |
980 |
|
8cdea7c05 Memory controller... |
981 |
res_counter_init(&mem->res); |
1ecaab2bd per-zone and recl... |
982 |
|
6d12e2d8d per-zone and recl... |
983 984 985 |
for_each_node_state(node, N_POSSIBLE) if (alloc_mem_cgroup_per_zone_info(mem, node)) goto free_out; |
8cdea7c05 Memory controller... |
986 |
return &mem->css; |
6d12e2d8d per-zone and recl... |
987 988 |
free_out: for_each_node_state(node, N_POSSIBLE) |
1ecaab2bd per-zone and recl... |
989 |
free_mem_cgroup_per_zone_info(mem, node); |
6d12e2d8d per-zone and recl... |
990 |
if (cont->parent != NULL) |
333279487 memcgroup: use vm... |
991 |
mem_cgroup_free(mem); |
2dda81ca3 memcgroup: return... |
992 |
return ERR_PTR(-ENOMEM); |
8cdea7c05 Memory controller... |
993 |
} |
df878fb04 memory cgroup enh... |
994 995 996 997 998 999 |
static void mem_cgroup_pre_destroy(struct cgroup_subsys *ss, struct cgroup *cont) { struct mem_cgroup *mem = mem_cgroup_from_cont(cont); mem_cgroup_force_empty(mem); } |
8cdea7c05 Memory controller... |
1000 1001 1002 |
static void mem_cgroup_destroy(struct cgroup_subsys *ss, struct cgroup *cont) { |
6d12e2d8d per-zone and recl... |
1003 1004 1005 1006 |
int node; struct mem_cgroup *mem = mem_cgroup_from_cont(cont); for_each_node_state(node, N_POSSIBLE) |
1ecaab2bd per-zone and recl... |
1007 |
free_mem_cgroup_per_zone_info(mem, node); |
6d12e2d8d per-zone and recl... |
1008 |
|
333279487 memcgroup: use vm... |
1009 |
mem_cgroup_free(mem_cgroup_from_cont(cont)); |
8cdea7c05 Memory controller... |
1010 1011 1012 1013 1014 |
} static int mem_cgroup_populate(struct cgroup_subsys *ss, struct cgroup *cont) { |
4077960e2 memory controller... |
1015 1016 |
if (mem_cgroup_subsys.disabled) return 0; |
8cdea7c05 Memory controller... |
1017 1018 1019 |
return cgroup_add_files(cont, ss, mem_cgroup_files, ARRAY_SIZE(mem_cgroup_files)); } |
67e465a77 Memory controller... |
1020 1021 1022 1023 1024 1025 1026 |
static void mem_cgroup_move_task(struct cgroup_subsys *ss, struct cgroup *cont, struct cgroup *old_cont, struct task_struct *p) { struct mm_struct *mm; struct mem_cgroup *mem, *old_mem; |
4077960e2 memory controller... |
1027 1028 |
if (mem_cgroup_subsys.disabled) return; |
67e465a77 Memory controller... |
1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 |
mm = get_task_mm(p); if (mm == NULL) return; mem = mem_cgroup_from_cont(cont); old_mem = mem_cgroup_from_cont(old_cont); if (mem == old_mem) goto out; /* * Only thread group leaders are allowed to migrate, the mm_struct is * in effect owned by the leader */ |
52ea27eb4 memcgroup: fix ch... |
1043 |
if (!thread_group_leader(p)) |
67e465a77 Memory controller... |
1044 |
goto out; |
67e465a77 Memory controller... |
1045 1046 |
out: mmput(mm); |
67e465a77 Memory controller... |
1047 |
} |
8cdea7c05 Memory controller... |
1048 1049 1050 1051 |
struct cgroup_subsys mem_cgroup_subsys = { .name = "memory", .subsys_id = mem_cgroup_subsys_id, .create = mem_cgroup_create, |
df878fb04 memory cgroup enh... |
1052 |
.pre_destroy = mem_cgroup_pre_destroy, |
8cdea7c05 Memory controller... |
1053 1054 |
.destroy = mem_cgroup_destroy, .populate = mem_cgroup_populate, |
67e465a77 Memory controller... |
1055 |
.attach = mem_cgroup_move_task, |
6d12e2d8d per-zone and recl... |
1056 |
.early_init = 0, |
8cdea7c05 Memory controller... |
1057 |
}; |