Blame view
mm/memcontrol.c
61.1 KB
8cdea7c05 Memory controller... |
1 2 3 4 5 |
/* memcontrol.c - Memory Controller * * Copyright IBM Corporation, 2007 * Author Balbir Singh <balbir@linux.vnet.ibm.com> * |
78fb74669 Memory controller... |
6 7 8 |
* Copyright 2007 OpenVZ SWsoft Inc * Author: Pavel Emelianov <xemul@openvz.org> * |
8cdea7c05 Memory controller... |
9 10 11 12 13 14 15 16 17 18 19 20 21 22 |
* This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. */ #include <linux/res_counter.h> #include <linux/memcontrol.h> #include <linux/cgroup.h> |
78fb74669 Memory controller... |
23 |
#include <linux/mm.h> |
d13d14430 memcg: handle swa... |
24 |
#include <linux/pagemap.h> |
d52aa412d memory cgroup enh... |
25 |
#include <linux/smp.h> |
8a9f3ccd2 Memory controller... |
26 |
#include <linux/page-flags.h> |
66e1707bc Memory controller... |
27 |
#include <linux/backing-dev.h> |
8a9f3ccd2 Memory controller... |
28 29 |
#include <linux/bit_spinlock.h> #include <linux/rcupdate.h> |
e222432bf memcg: show memcg... |
30 |
#include <linux/limits.h> |
8c7c6e34a memcg: mem+swap c... |
31 |
#include <linux/mutex.h> |
b6ac57d50 memcgroup: move m... |
32 |
#include <linux/slab.h> |
66e1707bc Memory controller... |
33 34 35 |
#include <linux/swap.h> #include <linux/spinlock.h> #include <linux/fs.h> |
d2ceb9b7d memory cgroup enh... |
36 |
#include <linux/seq_file.h> |
333279487 memcgroup: use vm... |
37 |
#include <linux/vmalloc.h> |
b69408e88 vmscan: Use an in... |
38 |
#include <linux/mm_inline.h> |
52d4b9ac0 memcg: allocate a... |
39 |
#include <linux/page_cgroup.h> |
08e552c69 memcg: synchroniz... |
40 |
#include "internal.h" |
8cdea7c05 Memory controller... |
41 |
|
8697d3319 Memory controller... |
42 |
#include <asm/uaccess.h> |
a181b0e88 memcg: make globa... |
43 |
struct cgroup_subsys mem_cgroup_subsys __read_mostly; |
a181b0e88 memcg: make globa... |
44 |
#define MEM_CGROUP_RECLAIM_RETRIES 5 |
8cdea7c05 Memory controller... |
45 |
|
c077719be memcg: mem+swap c... |
46 47 48 49 50 51 52 |
#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP /* Turned on only when memory cgroup is enabled && really_do_swap_account = 0 */ int do_swap_account __read_mostly; static int really_do_swap_account __initdata = 1; /* for remember boot option*/ #else #define do_swap_account (0) #endif |
7f4d454de memcg: avoid dead... |
53 |
static DEFINE_MUTEX(memcg_tasklist); /* can be hold under cgroup_mutex */ |
c077719be memcg: mem+swap c... |
54 |
|
8cdea7c05 Memory controller... |
55 |
/* |
d52aa412d memory cgroup enh... |
56 57 58 59 60 61 62 63 |
* Statistics for memory cgroup. */ enum mem_cgroup_stat_index { /* * For MEM_CONTAINER_TYPE_ALL, usage = pagecache + rss. */ MEM_CGROUP_STAT_CACHE, /* # of pages charged as cache */ MEM_CGROUP_STAT_RSS, /* # of pages charged as rss */ |
55e462b05 memcg: simple sta... |
64 65 |
MEM_CGROUP_STAT_PGPGIN_COUNT, /* # of pages paged in */ MEM_CGROUP_STAT_PGPGOUT_COUNT, /* # of pages paged out */ |
d52aa412d memory cgroup enh... |
66 67 68 69 70 71 72 73 74 |
MEM_CGROUP_STAT_NSTATS, }; struct mem_cgroup_stat_cpu { s64 count[MEM_CGROUP_STAT_NSTATS]; } ____cacheline_aligned_in_smp; struct mem_cgroup_stat { |
c8dad2bb6 memcg: reduce siz... |
75 |
struct mem_cgroup_stat_cpu cpustat[0]; |
d52aa412d memory cgroup enh... |
76 77 78 79 80 |
}; /* * For accounting under irq disable, no need for increment preempt count. */ |
addb9efeb memcg: optimize p... |
81 |
static inline void __mem_cgroup_stat_add_safe(struct mem_cgroup_stat_cpu *stat, |
d52aa412d memory cgroup enh... |
82 83 |
enum mem_cgroup_stat_index idx, int val) { |
addb9efeb memcg: optimize p... |
84 |
stat->count[idx] += val; |
d52aa412d memory cgroup enh... |
85 86 87 88 89 90 91 92 93 94 95 |
} static s64 mem_cgroup_read_stat(struct mem_cgroup_stat *stat, enum mem_cgroup_stat_index idx) { int cpu; s64 ret = 0; for_each_possible_cpu(cpu) ret += stat->cpustat[cpu].count[idx]; return ret; } |
04046e1a0 memcg: use CSS ID |
96 97 98 99 100 101 102 103 |
static s64 mem_cgroup_local_usage(struct mem_cgroup_stat *stat) { s64 ret; ret = mem_cgroup_read_stat(stat, MEM_CGROUP_STAT_CACHE); ret += mem_cgroup_read_stat(stat, MEM_CGROUP_STAT_RSS); return ret; } |
d52aa412d memory cgroup enh... |
104 |
/* |
6d12e2d8d per-zone and recl... |
105 106 |
* per-zone information in memory controller. */ |
6d12e2d8d per-zone and recl... |
107 |
struct mem_cgroup_per_zone { |
072c56c13 per-zone and recl... |
108 109 110 |
/* * spin_lock to protect the per cgroup LRU */ |
b69408e88 vmscan: Use an in... |
111 112 |
struct list_head lists[NR_LRU_LISTS]; unsigned long count[NR_LRU_LISTS]; |
3e2f41f1f memcg: add zone_r... |
113 114 |
struct zone_reclaim_stat reclaim_stat; |
6d12e2d8d per-zone and recl... |
115 116 117 118 119 120 121 122 123 124 125 126 127 |
}; /* Macro for accessing counter */ #define MEM_CGROUP_ZSTAT(mz, idx) ((mz)->count[(idx)]) struct mem_cgroup_per_node { struct mem_cgroup_per_zone zoneinfo[MAX_NR_ZONES]; }; struct mem_cgroup_lru_info { struct mem_cgroup_per_node *nodeinfo[MAX_NUMNODES]; }; /* |
8cdea7c05 Memory controller... |
128 129 130 131 132 133 |
* The memory controller data structure. The memory controller controls both * page cache and RSS per cgroup. We would eventually like to provide * statistics based on the statistics developed by Rik Van Riel for clock-pro, * to help the administrator determine what knobs to tune. * * TODO: Add a water mark for the memory controller. Reclaim will begin when |
8a9f3ccd2 Memory controller... |
134 135 136 |
* we hit the water mark. May be even add a low water mark, such that * no reclaim occurs from a cgroup at it's low water mark, this is * a feature that will be implemented much later in the future. |
8cdea7c05 Memory controller... |
137 138 139 140 141 142 143 |
*/ struct mem_cgroup { struct cgroup_subsys_state css; /* * the counter to account for memory usage */ struct res_counter res; |
78fb74669 Memory controller... |
144 |
/* |
8c7c6e34a memcg: mem+swap c... |
145 146 147 148 |
* the counter to account for mem+swap usage. */ struct res_counter memsw; /* |
78fb74669 Memory controller... |
149 150 |
* Per cgroup active and inactive list, similar to the * per zone LRU lists. |
78fb74669 Memory controller... |
151 |
*/ |
6d12e2d8d per-zone and recl... |
152 |
struct mem_cgroup_lru_info info; |
072c56c13 per-zone and recl... |
153 |
|
2733c06ac memcg: protect pr... |
154 155 156 157 |
/* protect against reclaim related member. */ spinlock_t reclaim_param_lock; |
6c48a1d04 per-zone and recl... |
158 |
int prev_priority; /* for recording reclaim priority */ |
6d61ef409 memcg: memory cgr... |
159 160 161 |
/* * While reclaiming in a hiearchy, we cache the last child we |
04046e1a0 memcg: use CSS ID |
162 |
* reclaimed from. |
6d61ef409 memcg: memory cgr... |
163 |
*/ |
04046e1a0 memcg: use CSS ID |
164 |
int last_scanned_child; |
18f59ea7d memcg: memory cgr... |
165 166 167 168 |
/* * Should the accounting and control be hierarchical, per subtree? */ bool use_hierarchy; |
a636b327f memcg: avoid unne... |
169 |
unsigned long last_oom_jiffies; |
8c7c6e34a memcg: mem+swap c... |
170 |
atomic_t refcnt; |
14797e236 memcg: add inacti... |
171 |
|
a7885eb8a memcg: swappiness |
172 |
unsigned int swappiness; |
d52aa412d memory cgroup enh... |
173 |
/* |
c8dad2bb6 memcg: reduce siz... |
174 |
* statistics. This must be placed at the end of memcg. |
d52aa412d memory cgroup enh... |
175 176 |
*/ struct mem_cgroup_stat stat; |
8cdea7c05 Memory controller... |
177 |
}; |
217bc3194 memory cgroup enh... |
178 179 180 |
enum charge_type { MEM_CGROUP_CHARGE_TYPE_CACHE = 0, MEM_CGROUP_CHARGE_TYPE_MAPPED, |
4f98a2fee vmscan: split LRU... |
181 |
MEM_CGROUP_CHARGE_TYPE_SHMEM, /* used by page migration of shmem */ |
c05555b57 memcg: atomic ops... |
182 |
MEM_CGROUP_CHARGE_TYPE_FORCE, /* used by force_empty */ |
d13d14430 memcg: handle swa... |
183 |
MEM_CGROUP_CHARGE_TYPE_SWAPOUT, /* for accounting swapcache */ |
c05555b57 memcg: atomic ops... |
184 185 |
NR_CHARGE_TYPE, }; |
52d4b9ac0 memcg: allocate a... |
186 187 188 |
/* only for here (for easy reading.) */ #define PCGF_CACHE (1UL << PCG_CACHE) #define PCGF_USED (1UL << PCG_USED) |
52d4b9ac0 memcg: allocate a... |
189 |
#define PCGF_LOCK (1UL << PCG_LOCK) |
c05555b57 memcg: atomic ops... |
190 191 |
static const unsigned long pcg_default_flags[NR_CHARGE_TYPE] = { |
08e552c69 memcg: synchroniz... |
192 193 194 |
PCGF_CACHE | PCGF_USED | PCGF_LOCK, /* File Cache */ PCGF_USED | PCGF_LOCK, /* Anon */ PCGF_CACHE | PCGF_USED | PCGF_LOCK, /* Shmem */ |
52d4b9ac0 memcg: allocate a... |
195 |
0, /* FORCE */ |
217bc3194 memory cgroup enh... |
196 |
}; |
8c7c6e34a memcg: mem+swap c... |
197 198 199 200 201 202 203 204 205 |
/* for encoding cft->private value on file */ #define _MEM (0) #define _MEMSWAP (1) #define MEMFILE_PRIVATE(x, val) (((x) << 16) | (val)) #define MEMFILE_TYPE(val) (((val) >> 16) & 0xffff) #define MEMFILE_ATTR(val) ((val) & 0xffff) static void mem_cgroup_get(struct mem_cgroup *mem); static void mem_cgroup_put(struct mem_cgroup *mem); |
7bcc1bb12 memcg: get/put pa... |
206 |
static struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *mem); |
8c7c6e34a memcg: mem+swap c... |
207 |
|
c05555b57 memcg: atomic ops... |
208 209 210 |
static void mem_cgroup_charge_statistics(struct mem_cgroup *mem, struct page_cgroup *pc, bool charge) |
d52aa412d memory cgroup enh... |
211 212 213 |
{ int val = (charge)? 1 : -1; struct mem_cgroup_stat *stat = &mem->stat; |
addb9efeb memcg: optimize p... |
214 |
struct mem_cgroup_stat_cpu *cpustat; |
08e552c69 memcg: synchroniz... |
215 |
int cpu = get_cpu(); |
d52aa412d memory cgroup enh... |
216 |
|
08e552c69 memcg: synchroniz... |
217 |
cpustat = &stat->cpustat[cpu]; |
c05555b57 memcg: atomic ops... |
218 |
if (PageCgroupCache(pc)) |
addb9efeb memcg: optimize p... |
219 |
__mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_CACHE, val); |
d52aa412d memory cgroup enh... |
220 |
else |
addb9efeb memcg: optimize p... |
221 |
__mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_RSS, val); |
55e462b05 memcg: simple sta... |
222 223 |
if (charge) |
addb9efeb memcg: optimize p... |
224 |
__mem_cgroup_stat_add_safe(cpustat, |
55e462b05 memcg: simple sta... |
225 226 |
MEM_CGROUP_STAT_PGPGIN_COUNT, 1); else |
addb9efeb memcg: optimize p... |
227 |
__mem_cgroup_stat_add_safe(cpustat, |
55e462b05 memcg: simple sta... |
228 |
MEM_CGROUP_STAT_PGPGOUT_COUNT, 1); |
08e552c69 memcg: synchroniz... |
229 |
put_cpu(); |
6d12e2d8d per-zone and recl... |
230 |
} |
d5b69e38f memcg: memcontrol... |
231 |
static struct mem_cgroup_per_zone * |
6d12e2d8d per-zone and recl... |
232 233 |
mem_cgroup_zoneinfo(struct mem_cgroup *mem, int nid, int zid) { |
6d12e2d8d per-zone and recl... |
234 235 |
return &mem->info.nodeinfo[nid]->zoneinfo[zid]; } |
d5b69e38f memcg: memcontrol... |
236 |
static struct mem_cgroup_per_zone * |
6d12e2d8d per-zone and recl... |
237 238 239 240 241 |
page_cgroup_zoneinfo(struct page_cgroup *pc) { struct mem_cgroup *mem = pc->mem_cgroup; int nid = page_cgroup_nid(pc); int zid = page_cgroup_zid(pc); |
d52aa412d memory cgroup enh... |
242 |
|
549927620 memcg: add null c... |
243 244 |
if (!mem) return NULL; |
6d12e2d8d per-zone and recl... |
245 246 |
return mem_cgroup_zoneinfo(mem, nid, zid); } |
14067bb3e memcg: hierarchic... |
247 |
static unsigned long mem_cgroup_get_local_zonestat(struct mem_cgroup *mem, |
b69408e88 vmscan: Use an in... |
248 |
enum lru_list idx) |
6d12e2d8d per-zone and recl... |
249 250 251 252 253 254 255 256 257 258 259 |
{ int nid, zid; struct mem_cgroup_per_zone *mz; u64 total = 0; for_each_online_node(nid) for (zid = 0; zid < MAX_NR_ZONES; zid++) { mz = mem_cgroup_zoneinfo(mem, nid, zid); total += MEM_CGROUP_ZSTAT(mz, idx); } return total; |
d52aa412d memory cgroup enh... |
260 |
} |
d5b69e38f memcg: memcontrol... |
261 |
static struct mem_cgroup *mem_cgroup_from_cont(struct cgroup *cont) |
8cdea7c05 Memory controller... |
262 263 264 265 266 |
{ return container_of(cgroup_subsys_state(cont, mem_cgroup_subsys_id), struct mem_cgroup, css); } |
cf475ad28 cgroups: add an o... |
267 |
struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p) |
78fb74669 Memory controller... |
268 |
{ |
31a78f23b mm owner: fix rac... |
269 270 271 272 273 274 275 |
/* * mm_update_next_owner() may clear mm->owner to NULL * if it races with swapoff, page migration, etc. * So this can be called with p == NULL. */ if (unlikely(!p)) return NULL; |
78fb74669 Memory controller... |
276 277 278 |
return container_of(task_subsys_state(p, mem_cgroup_subsys_id), struct mem_cgroup, css); } |
54595fe26 memcg: use css_tr... |
279 280 281 |
static struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm) { struct mem_cgroup *mem = NULL; |
0b7f569e4 memcg: fix OOM ki... |
282 283 284 |
if (!mm) return NULL; |
54595fe26 memcg: use css_tr... |
285 286 287 288 289 290 291 292 293 294 295 296 297 298 |
/* * Because we have no locks, mm->owner's may be being moved to other * cgroup. We use css_tryget() here even if this looks * pessimistic (rather than adding locks here). */ rcu_read_lock(); do { mem = mem_cgroup_from_task(rcu_dereference(mm->owner)); if (unlikely(!mem)) break; } while (!css_tryget(&mem->css)); rcu_read_unlock(); return mem; } |
14067bb3e memcg: hierarchic... |
299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 |
/* * Call callback function against all cgroup under hierarchy tree. */ static int mem_cgroup_walk_tree(struct mem_cgroup *root, void *data, int (*func)(struct mem_cgroup *, void *)) { int found, ret, nextid; struct cgroup_subsys_state *css; struct mem_cgroup *mem; if (!root->use_hierarchy) return (*func)(root, data); nextid = 1; do { ret = 0; mem = NULL; rcu_read_lock(); css = css_get_next(&mem_cgroup_subsys, nextid, &root->css, &found); if (css && css_tryget(css)) mem = container_of(css, struct mem_cgroup, css); rcu_read_unlock(); if (mem) { ret = (*func)(mem, data); css_put(&mem->css); } nextid = found + 1; } while (!ret && css); return ret; } |
08e552c69 memcg: synchroniz... |
333 334 335 336 337 338 339 340 341 342 343 344 345 |
/* * Following LRU functions are allowed to be used without PCG_LOCK. * Operations are called by routine of global LRU independently from memcg. * What we have to take care of here is validness of pc->mem_cgroup. * * Changes to pc->mem_cgroup happens when * 1. charge * 2. moving account * In typical case, "charge" is done before add-to-lru. Exception is SwapCache. * It is added to LRU before charge. * If PCG_USED bit is not set, page_cgroup is not added to this private LRU. * When moving account, the page is not on LRU. It's isolated. */ |
4f98a2fee vmscan: split LRU... |
346 |
|
08e552c69 memcg: synchroniz... |
347 348 349 350 351 |
void mem_cgroup_del_lru_list(struct page *page, enum lru_list lru) { struct page_cgroup *pc; struct mem_cgroup *mem; struct mem_cgroup_per_zone *mz; |
6d12e2d8d per-zone and recl... |
352 |
|
f8d665422 memcg: add mem_cg... |
353 |
if (mem_cgroup_disabled()) |
08e552c69 memcg: synchroniz... |
354 355 356 |
return; pc = lookup_page_cgroup(page); /* can happen while we handle swapcache. */ |
544122e5e memcg: fix LRU ac... |
357 |
if (list_empty(&pc->lru) || !pc->mem_cgroup) |
08e552c69 memcg: synchroniz... |
358 |
return; |
544122e5e memcg: fix LRU ac... |
359 360 361 362 |
/* * We don't check PCG_USED bit. It's cleared when the "page" is finally * removed from global LRU. */ |
08e552c69 memcg: synchroniz... |
363 364 |
mz = page_cgroup_zoneinfo(pc); mem = pc->mem_cgroup; |
b69408e88 vmscan: Use an in... |
365 |
MEM_CGROUP_ZSTAT(mz, lru) -= 1; |
08e552c69 memcg: synchroniz... |
366 367 |
list_del_init(&pc->lru); return; |
6d12e2d8d per-zone and recl... |
368 |
} |
08e552c69 memcg: synchroniz... |
369 |
void mem_cgroup_del_lru(struct page *page) |
6d12e2d8d per-zone and recl... |
370 |
{ |
08e552c69 memcg: synchroniz... |
371 372 |
mem_cgroup_del_lru_list(page, page_lru(page)); } |
b69408e88 vmscan: Use an in... |
373 |
|
08e552c69 memcg: synchroniz... |
374 375 376 377 |
void mem_cgroup_rotate_lru_list(struct page *page, enum lru_list lru) { struct mem_cgroup_per_zone *mz; struct page_cgroup *pc; |
b69408e88 vmscan: Use an in... |
378 |
|
f8d665422 memcg: add mem_cg... |
379 |
if (mem_cgroup_disabled()) |
08e552c69 memcg: synchroniz... |
380 |
return; |
6d12e2d8d per-zone and recl... |
381 |
|
08e552c69 memcg: synchroniz... |
382 |
pc = lookup_page_cgroup(page); |
bd112db87 memcg: fix mem_cg... |
383 384 385 386 |
/* * Used bit is set without atomic ops but after smp_wmb(). * For making pc->mem_cgroup visible, insert smp_rmb() here. */ |
08e552c69 memcg: synchroniz... |
387 388 389 390 391 392 |
smp_rmb(); /* unused page is not rotated. */ if (!PageCgroupUsed(pc)) return; mz = page_cgroup_zoneinfo(pc); list_move(&pc->lru, &mz->lists[lru]); |
6d12e2d8d per-zone and recl... |
393 |
} |
08e552c69 memcg: synchroniz... |
394 |
void mem_cgroup_add_lru_list(struct page *page, enum lru_list lru) |
66e1707bc Memory controller... |
395 |
{ |
08e552c69 memcg: synchroniz... |
396 397 |
struct page_cgroup *pc; struct mem_cgroup_per_zone *mz; |
6d12e2d8d per-zone and recl... |
398 |
|
f8d665422 memcg: add mem_cg... |
399 |
if (mem_cgroup_disabled()) |
08e552c69 memcg: synchroniz... |
400 401 |
return; pc = lookup_page_cgroup(page); |
bd112db87 memcg: fix mem_cg... |
402 403 404 405 |
/* * Used bit is set without atomic ops but after smp_wmb(). * For making pc->mem_cgroup visible, insert smp_rmb() here. */ |
08e552c69 memcg: synchroniz... |
406 407 |
smp_rmb(); if (!PageCgroupUsed(pc)) |
894bc3104 Unevictable LRU I... |
408 |
return; |
b69408e88 vmscan: Use an in... |
409 |
|
08e552c69 memcg: synchroniz... |
410 |
mz = page_cgroup_zoneinfo(pc); |
b69408e88 vmscan: Use an in... |
411 |
MEM_CGROUP_ZSTAT(mz, lru) += 1; |
08e552c69 memcg: synchroniz... |
412 413 |
list_add(&pc->lru, &mz->lists[lru]); } |
544122e5e memcg: fix LRU ac... |
414 |
|
08e552c69 memcg: synchroniz... |
415 |
/* |
544122e5e memcg: fix LRU ac... |
416 417 418 419 420 |
* At handling SwapCache, pc->mem_cgroup may be changed while it's linked to * lru because the page may.be reused after it's fully uncharged (because of * SwapCache behavior).To handle that, unlink page_cgroup from LRU when charge * it again. This function is only used to charge SwapCache. It's done under * lock_page and expected that zone->lru_lock is never held. |
08e552c69 memcg: synchroniz... |
421 |
*/ |
544122e5e memcg: fix LRU ac... |
422 |
static void mem_cgroup_lru_del_before_commit_swapcache(struct page *page) |
08e552c69 memcg: synchroniz... |
423 |
{ |
544122e5e memcg: fix LRU ac... |
424 425 426 427 428 429 430 431 432 433 434 435 |
unsigned long flags; struct zone *zone = page_zone(page); struct page_cgroup *pc = lookup_page_cgroup(page); spin_lock_irqsave(&zone->lru_lock, flags); /* * Forget old LRU when this page_cgroup is *not* used. This Used bit * is guarded by lock_page() because the page is SwapCache. */ if (!PageCgroupUsed(pc)) mem_cgroup_del_lru_list(page, page_lru(page)); spin_unlock_irqrestore(&zone->lru_lock, flags); |
08e552c69 memcg: synchroniz... |
436 |
} |
544122e5e memcg: fix LRU ac... |
437 438 439 440 441 442 443 444 445 446 447 448 |
static void mem_cgroup_lru_add_after_commit_swapcache(struct page *page) { unsigned long flags; struct zone *zone = page_zone(page); struct page_cgroup *pc = lookup_page_cgroup(page); spin_lock_irqsave(&zone->lru_lock, flags); /* link when the page is linked to LRU but page_cgroup isn't */ if (PageLRU(page) && list_empty(&pc->lru)) mem_cgroup_add_lru_list(page, page_lru(page)); spin_unlock_irqrestore(&zone->lru_lock, flags); } |
08e552c69 memcg: synchroniz... |
449 450 451 |
void mem_cgroup_move_lists(struct page *page, enum lru_list from, enum lru_list to) { |
f8d665422 memcg: add mem_cg... |
452 |
if (mem_cgroup_disabled()) |
08e552c69 memcg: synchroniz... |
453 454 455 |
return; mem_cgroup_del_lru_list(page, from); mem_cgroup_add_lru_list(page, to); |
66e1707bc Memory controller... |
456 |
} |
4c4a22148 memcontrol: move ... |
457 458 459 |
int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem) { int ret; |
0b7f569e4 memcg: fix OOM ki... |
460 |
struct mem_cgroup *curr = NULL; |
4c4a22148 memcontrol: move ... |
461 462 |
task_lock(task); |
0b7f569e4 memcg: fix OOM ki... |
463 464 465 |
rcu_read_lock(); curr = try_get_mem_cgroup_from_mm(task->mm); rcu_read_unlock(); |
4c4a22148 memcontrol: move ... |
466 |
task_unlock(task); |
0b7f569e4 memcg: fix OOM ki... |
467 468 469 470 471 472 473 |
if (!curr) return 0; if (curr->use_hierarchy) ret = css_is_ancestor(&curr->css, &mem->css); else ret = (curr == mem); css_put(&curr->css); |
4c4a22148 memcontrol: move ... |
474 475 |
return ret; } |
66e1707bc Memory controller... |
476 |
/* |
6c48a1d04 per-zone and recl... |
477 478 479 480 |
* prev_priority control...this will be used in memory reclaim path. */ int mem_cgroup_get_reclaim_priority(struct mem_cgroup *mem) { |
2733c06ac memcg: protect pr... |
481 482 483 484 485 486 487 |
int prev_priority; spin_lock(&mem->reclaim_param_lock); prev_priority = mem->prev_priority; spin_unlock(&mem->reclaim_param_lock); return prev_priority; |
6c48a1d04 per-zone and recl... |
488 489 490 491 |
} void mem_cgroup_note_reclaim_priority(struct mem_cgroup *mem, int priority) { |
2733c06ac memcg: protect pr... |
492 |
spin_lock(&mem->reclaim_param_lock); |
6c48a1d04 per-zone and recl... |
493 494 |
if (priority < mem->prev_priority) mem->prev_priority = priority; |
2733c06ac memcg: protect pr... |
495 |
spin_unlock(&mem->reclaim_param_lock); |
6c48a1d04 per-zone and recl... |
496 497 498 499 |
} void mem_cgroup_record_reclaim_priority(struct mem_cgroup *mem, int priority) { |
2733c06ac memcg: protect pr... |
500 |
spin_lock(&mem->reclaim_param_lock); |
6c48a1d04 per-zone and recl... |
501 |
mem->prev_priority = priority; |
2733c06ac memcg: protect pr... |
502 |
spin_unlock(&mem->reclaim_param_lock); |
6c48a1d04 per-zone and recl... |
503 |
} |
c772be939 memcg: fix calcul... |
504 |
static int calc_inactive_ratio(struct mem_cgroup *memcg, unsigned long *present_pages) |
14797e236 memcg: add inacti... |
505 506 507 |
{ unsigned long active; unsigned long inactive; |
c772be939 memcg: fix calcul... |
508 509 |
unsigned long gb; unsigned long inactive_ratio; |
14797e236 memcg: add inacti... |
510 |
|
14067bb3e memcg: hierarchic... |
511 512 |
inactive = mem_cgroup_get_local_zonestat(memcg, LRU_INACTIVE_ANON); active = mem_cgroup_get_local_zonestat(memcg, LRU_ACTIVE_ANON); |
14797e236 memcg: add inacti... |
513 |
|
c772be939 memcg: fix calcul... |
514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 |
gb = (inactive + active) >> (30 - PAGE_SHIFT); if (gb) inactive_ratio = int_sqrt(10 * gb); else inactive_ratio = 1; if (present_pages) { present_pages[0] = inactive; present_pages[1] = active; } return inactive_ratio; } int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg) { unsigned long active; unsigned long inactive; unsigned long present_pages[2]; unsigned long inactive_ratio; inactive_ratio = calc_inactive_ratio(memcg, present_pages); inactive = present_pages[0]; active = present_pages[1]; if (inactive * inactive_ratio < active) |
14797e236 memcg: add inacti... |
541 542 543 544 |
return 1; return 0; } |
a3d8e0549 memcg: add mem_cg... |
545 546 547 548 549 550 551 552 553 554 |
unsigned long mem_cgroup_zone_nr_pages(struct mem_cgroup *memcg, struct zone *zone, enum lru_list lru) { int nid = zone->zone_pgdat->node_id; int zid = zone_idx(zone); struct mem_cgroup_per_zone *mz = mem_cgroup_zoneinfo(memcg, nid, zid); return MEM_CGROUP_ZSTAT(mz, lru); } |
3e2f41f1f memcg: add zone_r... |
555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 |
struct zone_reclaim_stat *mem_cgroup_get_reclaim_stat(struct mem_cgroup *memcg, struct zone *zone) { int nid = zone->zone_pgdat->node_id; int zid = zone_idx(zone); struct mem_cgroup_per_zone *mz = mem_cgroup_zoneinfo(memcg, nid, zid); return &mz->reclaim_stat; } struct zone_reclaim_stat * mem_cgroup_get_reclaim_stat_from_page(struct page *page) { struct page_cgroup *pc; struct mem_cgroup_per_zone *mz; if (mem_cgroup_disabled()) return NULL; pc = lookup_page_cgroup(page); |
bd112db87 memcg: fix mem_cg... |
575 576 577 578 579 580 581 |
/* * Used bit is set without atomic ops but after smp_wmb(). * For making pc->mem_cgroup visible, insert smp_rmb() here. */ smp_rmb(); if (!PageCgroupUsed(pc)) return NULL; |
3e2f41f1f memcg: add zone_r... |
582 583 584 585 586 587 |
mz = page_cgroup_zoneinfo(pc); if (!mz) return NULL; return &mz->reclaim_stat; } |
66e1707bc Memory controller... |
588 589 590 591 592 |
unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, struct list_head *dst, unsigned long *scanned, int order, int mode, struct zone *z, struct mem_cgroup *mem_cont, |
4f98a2fee vmscan: split LRU... |
593 |
int active, int file) |
66e1707bc Memory controller... |
594 595 596 597 598 599 |
{ unsigned long nr_taken = 0; struct page *page; unsigned long scan; LIST_HEAD(pc_list); struct list_head *src; |
ff7283fa3 bugfix for memory... |
600 |
struct page_cgroup *pc, *tmp; |
1ecaab2bd per-zone and recl... |
601 602 603 |
int nid = z->zone_pgdat->node_id; int zid = zone_idx(z); struct mem_cgroup_per_zone *mz; |
4f98a2fee vmscan: split LRU... |
604 |
int lru = LRU_FILE * !!file + !!active; |
66e1707bc Memory controller... |
605 |
|
cf475ad28 cgroups: add an o... |
606 |
BUG_ON(!mem_cont); |
1ecaab2bd per-zone and recl... |
607 |
mz = mem_cgroup_zoneinfo(mem_cont, nid, zid); |
b69408e88 vmscan: Use an in... |
608 |
src = &mz->lists[lru]; |
66e1707bc Memory controller... |
609 |
|
ff7283fa3 bugfix for memory... |
610 611 |
scan = 0; list_for_each_entry_safe_reverse(pc, tmp, src, lru) { |
436c6541b memcgroup: fix zo... |
612 |
if (scan >= nr_to_scan) |
ff7283fa3 bugfix for memory... |
613 |
break; |
08e552c69 memcg: synchroniz... |
614 615 |
page = pc->page; |
52d4b9ac0 memcg: allocate a... |
616 617 |
if (unlikely(!PageCgroupUsed(pc))) continue; |
436c6541b memcgroup: fix zo... |
618 |
if (unlikely(!PageLRU(page))) |
ff7283fa3 bugfix for memory... |
619 |
continue; |
ff7283fa3 bugfix for memory... |
620 |
|
436c6541b memcgroup: fix zo... |
621 |
scan++; |
4f98a2fee vmscan: split LRU... |
622 |
if (__isolate_lru_page(page, mode, file) == 0) { |
66e1707bc Memory controller... |
623 624 625 626 |
list_move(&page->lru, dst); nr_taken++; } } |
66e1707bc Memory controller... |
627 628 629 |
*scanned = scan; return nr_taken; } |
6d61ef409 memcg: memory cgr... |
630 631 |
#define mem_cgroup_from_res_counter(counter, member) \ container_of(counter, struct mem_cgroup, member) |
b85a96c0b memcg: memory swa... |
632 633 634 635 636 637 638 639 640 641 642 |
static bool mem_cgroup_check_under_limit(struct mem_cgroup *mem) { if (do_swap_account) { if (res_counter_check_under_limit(&mem->res) && res_counter_check_under_limit(&mem->memsw)) return true; } else if (res_counter_check_under_limit(&mem->res)) return true; return false; } |
a7885eb8a memcg: swappiness |
643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 |
static unsigned int get_swappiness(struct mem_cgroup *memcg) { struct cgroup *cgrp = memcg->css.cgroup; unsigned int swappiness; /* root ? */ if (cgrp->parent == NULL) return vm_swappiness; spin_lock(&memcg->reclaim_param_lock); swappiness = memcg->swappiness; spin_unlock(&memcg->reclaim_param_lock); return swappiness; } |
81d39c20f memcg: fix shrink... |
658 659 660 661 662 663 |
static int mem_cgroup_count_children_cb(struct mem_cgroup *mem, void *data) { int *val = data; (*val)++; return 0; } |
e222432bf memcg: show memcg... |
664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 |
/** * mem_cgroup_print_mem_info: Called from OOM with tasklist_lock held in read mode. * @memcg: The memory cgroup that went over limit * @p: Task that is going to be killed * * NOTE: @memcg and @p's mem_cgroup can be different when hierarchy is * enabled */ void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p) { struct cgroup *task_cgrp; struct cgroup *mem_cgrp; /* * Need a buffer in BSS, can't rely on allocations. The code relies * on the assumption that OOM is serialized for memory controller. * If this assumption is broken, revisit this code. */ static char memcg_name[PATH_MAX]; int ret; if (!memcg) return; rcu_read_lock(); mem_cgrp = memcg->css.cgroup; task_cgrp = task_cgroup(p, mem_cgroup_subsys_id); ret = cgroup_path(task_cgrp, memcg_name, PATH_MAX); if (ret < 0) { /* * Unfortunately, we are unable to convert to a useful name * But we'll still print out the usage information */ rcu_read_unlock(); goto done; } rcu_read_unlock(); printk(KERN_INFO "Task in %s killed", memcg_name); rcu_read_lock(); ret = cgroup_path(mem_cgrp, memcg_name, PATH_MAX); if (ret < 0) { rcu_read_unlock(); goto done; } rcu_read_unlock(); /* * Continues from above, so we don't need an KERN_ level */ printk(KERN_CONT " as a result of limit of %s ", memcg_name); done: printk(KERN_INFO "memory: usage %llukB, limit %llukB, failcnt %llu ", res_counter_read_u64(&memcg->res, RES_USAGE) >> 10, res_counter_read_u64(&memcg->res, RES_LIMIT) >> 10, res_counter_read_u64(&memcg->res, RES_FAILCNT)); printk(KERN_INFO "memory+swap: usage %llukB, limit %llukB, " "failcnt %llu ", res_counter_read_u64(&memcg->memsw, RES_USAGE) >> 10, res_counter_read_u64(&memcg->memsw, RES_LIMIT) >> 10, res_counter_read_u64(&memcg->memsw, RES_FAILCNT)); } |
81d39c20f memcg: fix shrink... |
734 735 736 737 738 739 740 741 742 743 |
/* * This function returns the number of memcg under hierarchy tree. Returns * 1(self count) if no children. */ static int mem_cgroup_count_children(struct mem_cgroup *mem) { int num = 0; mem_cgroup_walk_tree(mem, &num, mem_cgroup_count_children_cb); return num; } |
6d61ef409 memcg: memory cgr... |
744 |
/* |
04046e1a0 memcg: use CSS ID |
745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 |
* Visit the first child (need not be the first child as per the ordering * of the cgroup list, since we track last_scanned_child) of @mem and use * that to reclaim free pages from. */ static struct mem_cgroup * mem_cgroup_select_victim(struct mem_cgroup *root_mem) { struct mem_cgroup *ret = NULL; struct cgroup_subsys_state *css; int nextid, found; if (!root_mem->use_hierarchy) { css_get(&root_mem->css); ret = root_mem; } while (!ret) { rcu_read_lock(); nextid = root_mem->last_scanned_child + 1; css = css_get_next(&mem_cgroup_subsys, nextid, &root_mem->css, &found); if (css && css_tryget(css)) ret = container_of(css, struct mem_cgroup, css); rcu_read_unlock(); /* Updates scanning parameter */ spin_lock(&root_mem->reclaim_param_lock); if (!css) { /* this means start scan from ID:1 */ root_mem->last_scanned_child = 0; } else root_mem->last_scanned_child = found; spin_unlock(&root_mem->reclaim_param_lock); } return ret; } /* * Scan the hierarchy if needed to reclaim memory. We remember the last child * we reclaimed from, so that we don't end up penalizing one child extensively * based on its position in the children list. |
6d61ef409 memcg: memory cgr... |
787 788 |
* * root_mem is the original ancestor that we've been reclaim from. |
04046e1a0 memcg: use CSS ID |
789 790 791 |
* * We give up and return to the caller when we visit root_mem twice. * (other groups can be removed while we're walking....) |
81d39c20f memcg: fix shrink... |
792 793 |
* * If shrink==true, for avoiding to free too much, this returns immedieately. |
6d61ef409 memcg: memory cgr... |
794 795 |
*/ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem, |
81d39c20f memcg: fix shrink... |
796 |
gfp_t gfp_mask, bool noswap, bool shrink) |
6d61ef409 memcg: memory cgr... |
797 |
{ |
04046e1a0 memcg: use CSS ID |
798 799 800 801 802 803 804 805 806 807 808 |
struct mem_cgroup *victim; int ret, total = 0; int loop = 0; while (loop < 2) { victim = mem_cgroup_select_victim(root_mem); if (victim == root_mem) loop++; if (!mem_cgroup_local_usage(&victim->stat)) { /* this cgroup's local usage == 0 */ css_put(&victim->css); |
6d61ef409 memcg: memory cgr... |
809 810 |
continue; } |
04046e1a0 memcg: use CSS ID |
811 812 813 814 |
/* we use swappiness of local cgroup */ ret = try_to_free_mem_cgroup_pages(victim, gfp_mask, noswap, get_swappiness(victim)); css_put(&victim->css); |
81d39c20f memcg: fix shrink... |
815 816 817 818 819 820 821 |
/* * At shrinking usage, we can't check we should stop here or * reclaim more. It's depends on callers. last_scanned_child * will work enough for keeping fairness under tree. */ if (shrink) return ret; |
04046e1a0 memcg: use CSS ID |
822 |
total += ret; |
b85a96c0b memcg: memory swa... |
823 |
if (mem_cgroup_check_under_limit(root_mem)) |
04046e1a0 memcg: use CSS ID |
824 |
return 1 + total; |
6d61ef409 memcg: memory cgr... |
825 |
} |
04046e1a0 memcg: use CSS ID |
826 |
return total; |
6d61ef409 memcg: memory cgr... |
827 |
} |
a636b327f memcg: avoid unne... |
828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 |
bool mem_cgroup_oom_called(struct task_struct *task) { bool ret = false; struct mem_cgroup *mem; struct mm_struct *mm; rcu_read_lock(); mm = task->mm; if (!mm) mm = &init_mm; mem = mem_cgroup_from_task(rcu_dereference(mm->owner)); if (mem && time_before(jiffies, mem->last_oom_jiffies + HZ/10)) ret = true; rcu_read_unlock(); return ret; } |
0b7f569e4 memcg: fix OOM ki... |
844 845 846 847 848 849 850 851 852 853 854 |
static int record_last_oom_cb(struct mem_cgroup *mem, void *data) { mem->last_oom_jiffies = jiffies; return 0; } static void record_last_oom(struct mem_cgroup *mem) { mem_cgroup_walk_tree(mem, NULL, record_last_oom_cb); } |
f817ed485 memcg: move all a... |
855 856 857 |
/* * Unlike exported interface, "oom" parameter is added. if oom==true, * oom-killer can be invoked. |
8a9f3ccd2 Memory controller... |
858 |
*/ |
f817ed485 memcg: move all a... |
859 |
static int __mem_cgroup_try_charge(struct mm_struct *mm, |
8c7c6e34a memcg: mem+swap c... |
860 861 |
gfp_t gfp_mask, struct mem_cgroup **memcg, bool oom) |
8a9f3ccd2 Memory controller... |
862 |
{ |
6d61ef409 memcg: memory cgr... |
863 |
struct mem_cgroup *mem, *mem_over_limit; |
7a81b88cb memcg: introduce ... |
864 |
int nr_retries = MEM_CGROUP_RECLAIM_RETRIES; |
28dbc4b6a memcg: memory cgr... |
865 |
struct res_counter *fail_res; |
a636b327f memcg: avoid unne... |
866 867 868 869 870 871 |
if (unlikely(test_thread_flag(TIF_MEMDIE))) { /* Don't account this! */ *memcg = NULL; return 0; } |
8a9f3ccd2 Memory controller... |
872 |
/* |
3be91277e memcgroup: tidy u... |
873 874 |
* We always charge the cgroup the mm_struct belongs to. * The mm_struct's mem_cgroup changes on task migration if the |
8a9f3ccd2 Memory controller... |
875 876 877 |
* thread group leader migrates. It's possible that mm is not * set, if so charge the init_mm (happens for pagecache usage). */ |
54595fe26 memcg: use css_tr... |
878 879 880 |
mem = *memcg; if (likely(!mem)) { mem = try_get_mem_cgroup_from_mm(mm); |
7a81b88cb memcg: introduce ... |
881 |
*memcg = mem; |
e8589cc18 memcg: better mig... |
882 |
} else { |
7a81b88cb memcg: introduce ... |
883 |
css_get(&mem->css); |
e8589cc18 memcg: better mig... |
884 |
} |
54595fe26 memcg: use css_tr... |
885 886 |
if (unlikely(!mem)) return 0; |
46f7e602f memcg: fix build ... |
887 |
VM_BUG_ON(css_is_removed(&mem->css)); |
8a9f3ccd2 Memory controller... |
888 |
|
8c7c6e34a memcg: mem+swap c... |
889 890 891 |
while (1) { int ret; bool noswap = false; |
7a81b88cb memcg: introduce ... |
892 |
|
28dbc4b6a memcg: memory cgr... |
893 |
ret = res_counter_charge(&mem->res, PAGE_SIZE, &fail_res); |
8c7c6e34a memcg: mem+swap c... |
894 895 896 |
if (likely(!ret)) { if (!do_swap_account) break; |
28dbc4b6a memcg: memory cgr... |
897 898 |
ret = res_counter_charge(&mem->memsw, PAGE_SIZE, &fail_res); |
8c7c6e34a memcg: mem+swap c... |
899 900 901 902 903 |
if (likely(!ret)) break; /* mem+swap counter fails */ res_counter_uncharge(&mem->res, PAGE_SIZE); noswap = true; |
6d61ef409 memcg: memory cgr... |
904 905 906 907 908 909 |
mem_over_limit = mem_cgroup_from_res_counter(fail_res, memsw); } else /* mem counter fails */ mem_over_limit = mem_cgroup_from_res_counter(fail_res, res); |
3be91277e memcgroup: tidy u... |
910 |
if (!(gfp_mask & __GFP_WAIT)) |
7a81b88cb memcg: introduce ... |
911 |
goto nomem; |
e1a1cd590 Memory controller... |
912 |
|
6d61ef409 memcg: memory cgr... |
913 |
ret = mem_cgroup_hierarchical_reclaim(mem_over_limit, gfp_mask, |
81d39c20f memcg: fix shrink... |
914 |
noswap, false); |
4d1c62738 memcg: make oom l... |
915 916 |
if (ret) continue; |
66e1707bc Memory controller... |
917 918 |
/* |
8869b8f6e memcg: memcontrol... |
919 920 921 922 923 |
* try_to_free_mem_cgroup_pages() might not give us a full * picture of reclaim. Some pages are reclaimed and might be * moved to swap cache or just unmapped from the cgroup. * Check the limit again to see if the reclaim reduced the * current usage of the cgroup before giving up |
8c7c6e34a memcg: mem+swap c... |
924 |
* |
8869b8f6e memcg: memcontrol... |
925 |
*/ |
b85a96c0b memcg: memory swa... |
926 927 |
if (mem_cgroup_check_under_limit(mem_over_limit)) continue; |
3be91277e memcgroup: tidy u... |
928 929 |
if (!nr_retries--) { |
a636b327f memcg: avoid unne... |
930 |
if (oom) { |
7f4d454de memcg: avoid dead... |
931 |
mutex_lock(&memcg_tasklist); |
887007561 memcg: fix reclai... |
932 |
mem_cgroup_out_of_memory(mem_over_limit, gfp_mask); |
7f4d454de memcg: avoid dead... |
933 |
mutex_unlock(&memcg_tasklist); |
0b7f569e4 memcg: fix OOM ki... |
934 |
record_last_oom(mem_over_limit); |
a636b327f memcg: avoid unne... |
935 |
} |
7a81b88cb memcg: introduce ... |
936 |
goto nomem; |
66e1707bc Memory controller... |
937 |
} |
8a9f3ccd2 Memory controller... |
938 |
} |
7a81b88cb memcg: introduce ... |
939 940 941 942 943 |
return 0; nomem: css_put(&mem->css); return -ENOMEM; } |
8a9f3ccd2 Memory controller... |
944 |
|
a3b2d6926 cgroups: use css ... |
945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 |
/* * A helper function to get mem_cgroup from ID. must be called under * rcu_read_lock(). The caller must check css_is_removed() or some if * it's concern. (dropping refcnt from swap can be called against removed * memcg.) */ static struct mem_cgroup *mem_cgroup_lookup(unsigned short id) { struct cgroup_subsys_state *css; /* ID 0 is unused ID */ if (!id) return NULL; css = css_lookup(&mem_cgroup_subsys, id); if (!css) return NULL; return container_of(css, struct mem_cgroup, css); } |
b5a84319a memcg: fix shmem'... |
964 965 966 |
static struct mem_cgroup *try_get_mem_cgroup_from_swapcache(struct page *page) { struct mem_cgroup *mem; |
3c776e646 memcg: charge swa... |
967 |
struct page_cgroup *pc; |
a3b2d6926 cgroups: use css ... |
968 |
unsigned short id; |
b5a84319a memcg: fix shmem'... |
969 |
swp_entry_t ent; |
3c776e646 memcg: charge swa... |
970 |
VM_BUG_ON(!PageLocked(page)); |
b5a84319a memcg: fix shmem'... |
971 972 |
if (!PageSwapCache(page)) return NULL; |
3c776e646 memcg: charge swa... |
973 |
pc = lookup_page_cgroup(page); |
c0bd3f63c memcg: fix try_ge... |
974 |
lock_page_cgroup(pc); |
a3b2d6926 cgroups: use css ... |
975 |
if (PageCgroupUsed(pc)) { |
3c776e646 memcg: charge swa... |
976 |
mem = pc->mem_cgroup; |
a3b2d6926 cgroups: use css ... |
977 978 979 |
if (mem && !css_tryget(&mem->css)) mem = NULL; } else { |
3c776e646 memcg: charge swa... |
980 |
ent.val = page_private(page); |
a3b2d6926 cgroups: use css ... |
981 982 983 984 985 986 |
id = lookup_swap_cgroup(ent); rcu_read_lock(); mem = mem_cgroup_lookup(id); if (mem && !css_tryget(&mem->css)) mem = NULL; rcu_read_unlock(); |
3c776e646 memcg: charge swa... |
987 |
} |
c0bd3f63c memcg: fix try_ge... |
988 |
unlock_page_cgroup(pc); |
b5a84319a memcg: fix shmem'... |
989 990 |
return mem; } |
7a81b88cb memcg: introduce ... |
991 |
/* |
a5e924f5f memcg: remove mem... |
992 |
* commit a charge got by __mem_cgroup_try_charge() and makes page_cgroup to be |
7a81b88cb memcg: introduce ... |
993 994 995 996 997 998 999 |
* USED state. If already USED, uncharge and return. */ static void __mem_cgroup_commit_charge(struct mem_cgroup *mem, struct page_cgroup *pc, enum charge_type ctype) { |
7a81b88cb memcg: introduce ... |
1000 1001 1002 |
/* try_charge() can return NULL to *memcg, taking care of it. */ if (!mem) return; |
52d4b9ac0 memcg: allocate a... |
1003 1004 1005 1006 1007 |
lock_page_cgroup(pc); if (unlikely(PageCgroupUsed(pc))) { unlock_page_cgroup(pc); res_counter_uncharge(&mem->res, PAGE_SIZE); |
8c7c6e34a memcg: mem+swap c... |
1008 1009 |
if (do_swap_account) res_counter_uncharge(&mem->memsw, PAGE_SIZE); |
52d4b9ac0 memcg: allocate a... |
1010 |
css_put(&mem->css); |
7a81b88cb memcg: introduce ... |
1011 |
return; |
52d4b9ac0 memcg: allocate a... |
1012 |
} |
8a9f3ccd2 Memory controller... |
1013 |
pc->mem_cgroup = mem; |
08e552c69 memcg: synchroniz... |
1014 |
smp_wmb(); |
c05555b57 memcg: atomic ops... |
1015 |
pc->flags = pcg_default_flags[ctype]; |
3be91277e memcgroup: tidy u... |
1016 |
|
08e552c69 memcg: synchroniz... |
1017 |
mem_cgroup_charge_statistics(mem, pc, true); |
52d4b9ac0 memcg: allocate a... |
1018 |
|
52d4b9ac0 memcg: allocate a... |
1019 |
unlock_page_cgroup(pc); |
7a81b88cb memcg: introduce ... |
1020 |
} |
66e1707bc Memory controller... |
1021 |
|
f817ed485 memcg: move all a... |
1022 1023 1024 1025 1026 1027 1028 |
/** * mem_cgroup_move_account - move account of the page * @pc: page_cgroup of the page. * @from: mem_cgroup which the page is moved from. * @to: mem_cgroup which the page is moved to. @from != @to. * * The caller must confirm following. |
08e552c69 memcg: synchroniz... |
1029 |
* - page is not on LRU (isolate_page() is useful.) |
f817ed485 memcg: move all a... |
1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 |
* * returns 0 at success, * returns -EBUSY when lock is busy or "pc" is unstable. * * This function does "uncharge" from old cgroup but doesn't do "charge" to * new cgroup. It should be done by a caller. */ static int mem_cgroup_move_account(struct page_cgroup *pc, struct mem_cgroup *from, struct mem_cgroup *to) { struct mem_cgroup_per_zone *from_mz, *to_mz; int nid, zid; int ret = -EBUSY; |
f817ed485 memcg: move all a... |
1044 |
VM_BUG_ON(from == to); |
08e552c69 memcg: synchroniz... |
1045 |
VM_BUG_ON(PageLRU(pc->page)); |
f817ed485 memcg: move all a... |
1046 1047 1048 1049 1050 |
nid = page_cgroup_nid(pc); zid = page_cgroup_zid(pc); from_mz = mem_cgroup_zoneinfo(from, nid, zid); to_mz = mem_cgroup_zoneinfo(to, nid, zid); |
f817ed485 memcg: move all a... |
1051 1052 1053 1054 1055 1056 1057 1058 |
if (!trylock_page_cgroup(pc)) return ret; if (!PageCgroupUsed(pc)) goto out; if (pc->mem_cgroup != from) goto out; |
08e552c69 memcg: synchroniz... |
1059 1060 1061 1062 |
res_counter_uncharge(&from->res, PAGE_SIZE); mem_cgroup_charge_statistics(from, pc, false); if (do_swap_account) res_counter_uncharge(&from->memsw, PAGE_SIZE); |
40d58138f memcg: fix error ... |
1063 1064 1065 |
css_put(&from->css); css_get(&to->css); |
08e552c69 memcg: synchroniz... |
1066 1067 |
pc->mem_cgroup = to; mem_cgroup_charge_statistics(to, pc, true); |
08e552c69 memcg: synchroniz... |
1068 |
ret = 0; |
f817ed485 memcg: move all a... |
1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 |
out: unlock_page_cgroup(pc); return ret; } /* * move charges to its parent. */ static int mem_cgroup_move_parent(struct page_cgroup *pc, struct mem_cgroup *child, gfp_t gfp_mask) { |
08e552c69 memcg: synchroniz... |
1082 |
struct page *page = pc->page; |
f817ed485 memcg: move all a... |
1083 1084 1085 |
struct cgroup *cg = child->css.cgroup; struct cgroup *pcg = cg->parent; struct mem_cgroup *parent; |
f817ed485 memcg: move all a... |
1086 1087 1088 1089 1090 |
int ret; /* Is ROOT ? */ if (!pcg) return -EINVAL; |
08e552c69 memcg: synchroniz... |
1091 |
|
f817ed485 memcg: move all a... |
1092 |
parent = mem_cgroup_from_cont(pcg); |
08e552c69 memcg: synchroniz... |
1093 |
|
f817ed485 memcg: move all a... |
1094 |
ret = __mem_cgroup_try_charge(NULL, gfp_mask, &parent, false); |
a636b327f memcg: avoid unne... |
1095 |
if (ret || !parent) |
f817ed485 memcg: move all a... |
1096 |
return ret; |
40d58138f memcg: fix error ... |
1097 1098 1099 1100 |
if (!get_page_unless_zero(page)) { ret = -EBUSY; goto uncharge; } |
08e552c69 memcg: synchroniz... |
1101 1102 1103 1104 1105 |
ret = isolate_lru_page(page); if (ret) goto cancel; |
f817ed485 memcg: move all a... |
1106 |
|
f817ed485 memcg: move all a... |
1107 |
ret = mem_cgroup_move_account(pc, child, parent); |
f817ed485 memcg: move all a... |
1108 |
|
08e552c69 memcg: synchroniz... |
1109 1110 1111 |
putback_lru_page(page); if (!ret) { put_page(page); |
40d58138f memcg: fix error ... |
1112 1113 |
/* drop extra refcnt by try_charge() */ css_put(&parent->css); |
08e552c69 memcg: synchroniz... |
1114 |
return 0; |
8c7c6e34a memcg: mem+swap c... |
1115 |
} |
40d58138f memcg: fix error ... |
1116 |
|
08e552c69 memcg: synchroniz... |
1117 |
cancel: |
40d58138f memcg: fix error ... |
1118 1119 1120 1121 1122 |
put_page(page); uncharge: /* drop extra refcnt by try_charge() */ css_put(&parent->css); /* uncharge if move fails */ |
08e552c69 memcg: synchroniz... |
1123 1124 1125 |
res_counter_uncharge(&parent->res, PAGE_SIZE); if (do_swap_account) res_counter_uncharge(&parent->memsw, PAGE_SIZE); |
f817ed485 memcg: move all a... |
1126 1127 |
return ret; } |
7a81b88cb memcg: introduce ... |
1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 |
/* * Charge the memory controller for page usage. * Return * 0 if the charge was successful * < 0 if the cgroup is over its limit */ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm, gfp_t gfp_mask, enum charge_type ctype, struct mem_cgroup *memcg) { struct mem_cgroup *mem; struct page_cgroup *pc; int ret; pc = lookup_page_cgroup(page); /* can happen at boot */ if (unlikely(!pc)) return 0; prefetchw(pc); mem = memcg; |
f817ed485 memcg: move all a... |
1149 |
ret = __mem_cgroup_try_charge(mm, gfp_mask, &mem, true); |
a636b327f memcg: avoid unne... |
1150 |
if (ret || !mem) |
7a81b88cb memcg: introduce ... |
1151 1152 1153 |
return ret; __mem_cgroup_commit_charge(mem, pc, ctype); |
8a9f3ccd2 Memory controller... |
1154 |
return 0; |
8a9f3ccd2 Memory controller... |
1155 |
} |
7a81b88cb memcg: introduce ... |
1156 1157 |
int mem_cgroup_newpage_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask) |
217bc3194 memory cgroup enh... |
1158 |
{ |
f8d665422 memcg: add mem_cg... |
1159 |
if (mem_cgroup_disabled()) |
cede86acd memcg: clean up c... |
1160 |
return 0; |
52d4b9ac0 memcg: allocate a... |
1161 1162 |
if (PageCompound(page)) return 0; |
69029cd55 memcg: remove ref... |
1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 |
/* * If already mapped, we don't have to account. * If page cache, page->mapping has address_space. * But page->mapping may have out-of-use anon_vma pointer, * detecit it by PageAnon() check. newly-mapped-anon's page->mapping * is NULL. */ if (page_mapped(page) || (page->mapping && !PageAnon(page))) return 0; if (unlikely(!mm)) mm = &init_mm; |
217bc3194 memory cgroup enh... |
1174 |
return mem_cgroup_charge_common(page, mm, gfp_mask, |
e8589cc18 memcg: better mig... |
1175 |
MEM_CGROUP_CHARGE_TYPE_MAPPED, NULL); |
217bc3194 memory cgroup enh... |
1176 |
} |
83aae4c73 memcg: cleanup ca... |
1177 1178 1179 |
static void __mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr, enum charge_type ctype); |
e1a1cd590 Memory controller... |
1180 1181 |
int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask) |
8697d3319 Memory controller... |
1182 |
{ |
b5a84319a memcg: fix shmem'... |
1183 1184 |
struct mem_cgroup *mem = NULL; int ret; |
f8d665422 memcg: add mem_cg... |
1185 |
if (mem_cgroup_disabled()) |
cede86acd memcg: clean up c... |
1186 |
return 0; |
52d4b9ac0 memcg: allocate a... |
1187 1188 |
if (PageCompound(page)) return 0; |
accf163e6 memcg: remove a r... |
1189 1190 1191 1192 1193 1194 1195 1196 |
/* * Corner case handling. This is called from add_to_page_cache() * in usual. But some FS (shmem) precharges this page before calling it * and call add_to_page_cache() with GFP_NOWAIT. * * For GFP_NOWAIT case, the page may be pre-charged before calling * add_to_page_cache(). (See shmem.c) check it here and avoid to call * charge twice. (It works but has to pay a bit larger cost.) |
b5a84319a memcg: fix shmem'... |
1197 1198 |
* And when the page is SwapCache, it should take swap information * into account. This is under lock_page() now. |
accf163e6 memcg: remove a r... |
1199 1200 1201 |
*/ if (!(gfp_mask & __GFP_WAIT)) { struct page_cgroup *pc; |
52d4b9ac0 memcg: allocate a... |
1202 1203 1204 1205 1206 1207 1208 |
pc = lookup_page_cgroup(page); if (!pc) return 0; lock_page_cgroup(pc); if (PageCgroupUsed(pc)) { unlock_page_cgroup(pc); |
accf163e6 memcg: remove a r... |
1209 1210 |
return 0; } |
52d4b9ac0 memcg: allocate a... |
1211 |
unlock_page_cgroup(pc); |
accf163e6 memcg: remove a r... |
1212 |
} |
b5a84319a memcg: fix shmem'... |
1213 |
if (unlikely(!mm && !mem)) |
8697d3319 Memory controller... |
1214 |
mm = &init_mm; |
accf163e6 memcg: remove a r... |
1215 |
|
c05555b57 memcg: atomic ops... |
1216 1217 |
if (page_is_file_cache(page)) return mem_cgroup_charge_common(page, mm, gfp_mask, |
e8589cc18 memcg: better mig... |
1218 |
MEM_CGROUP_CHARGE_TYPE_CACHE, NULL); |
b5a84319a memcg: fix shmem'... |
1219 |
|
83aae4c73 memcg: cleanup ca... |
1220 1221 1222 1223 1224 1225 1226 1227 1228 |
/* shmem */ if (PageSwapCache(page)) { ret = mem_cgroup_try_charge_swapin(mm, page, gfp_mask, &mem); if (!ret) __mem_cgroup_commit_charge_swapin(page, mem, MEM_CGROUP_CHARGE_TYPE_SHMEM); } else ret = mem_cgroup_charge_common(page, mm, gfp_mask, MEM_CGROUP_CHARGE_TYPE_SHMEM, mem); |
b5a84319a memcg: fix shmem'... |
1229 |
|
b5a84319a memcg: fix shmem'... |
1230 |
return ret; |
e8589cc18 memcg: better mig... |
1231 |
} |
54595fe26 memcg: use css_tr... |
1232 1233 1234 1235 1236 1237 |
/* * While swap-in, try_charge -> commit or cancel, the page is locked. * And when try_charge() successfully returns, one refcnt to memcg without * struct page_cgroup is aquired. This refcnt will be cumsumed by * "commit()" or removed by "cancel()" */ |
8c7c6e34a memcg: mem+swap c... |
1238 1239 1240 1241 1242 |
int mem_cgroup_try_charge_swapin(struct mm_struct *mm, struct page *page, gfp_t mask, struct mem_cgroup **ptr) { struct mem_cgroup *mem; |
54595fe26 memcg: use css_tr... |
1243 |
int ret; |
8c7c6e34a memcg: mem+swap c... |
1244 |
|
f8d665422 memcg: add mem_cg... |
1245 |
if (mem_cgroup_disabled()) |
8c7c6e34a memcg: mem+swap c... |
1246 1247 1248 1249 |
return 0; if (!do_swap_account) goto charge_cur_mm; |
8c7c6e34a memcg: mem+swap c... |
1250 1251 1252 1253 1254 1255 1256 |
/* * A racing thread's fault, or swapoff, may have already updated * the pte, and even removed page from swap cache: return success * to go on to do_swap_page()'s pte_same() test, which should fail. */ if (!PageSwapCache(page)) return 0; |
b5a84319a memcg: fix shmem'... |
1257 |
mem = try_get_mem_cgroup_from_swapcache(page); |
54595fe26 memcg: use css_tr... |
1258 1259 |
if (!mem) goto charge_cur_mm; |
8c7c6e34a memcg: mem+swap c... |
1260 |
*ptr = mem; |
54595fe26 memcg: use css_tr... |
1261 1262 1263 1264 |
ret = __mem_cgroup_try_charge(NULL, mask, ptr, true); /* drop extra refcnt from tryget */ css_put(&mem->css); return ret; |
8c7c6e34a memcg: mem+swap c... |
1265 1266 1267 1268 1269 |
charge_cur_mm: if (unlikely(!mm)) mm = &init_mm; return __mem_cgroup_try_charge(mm, mask, ptr, true); } |
83aae4c73 memcg: cleanup ca... |
1270 1271 1272 |
static void __mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr, enum charge_type ctype) |
7a81b88cb memcg: introduce ... |
1273 1274 |
{ struct page_cgroup *pc; |
f8d665422 memcg: add mem_cg... |
1275 |
if (mem_cgroup_disabled()) |
7a81b88cb memcg: introduce ... |
1276 1277 1278 1279 |
return; if (!ptr) return; pc = lookup_page_cgroup(page); |
544122e5e memcg: fix LRU ac... |
1280 |
mem_cgroup_lru_del_before_commit_swapcache(page); |
83aae4c73 memcg: cleanup ca... |
1281 |
__mem_cgroup_commit_charge(ptr, pc, ctype); |
544122e5e memcg: fix LRU ac... |
1282 |
mem_cgroup_lru_add_after_commit_swapcache(page); |
8c7c6e34a memcg: mem+swap c... |
1283 1284 1285 |
/* * Now swap is on-memory. This means this page may be * counted both as mem and swap....double count. |
03f3c4336 memcg: fix swap a... |
1286 1287 1288 |
* Fix it by uncharging from memsw. Basically, this SwapCache is stable * under lock_page(). But in do_swap_page()::memory.c, reuse_swap_page() * may call delete_from_swap_cache() before reach here. |
8c7c6e34a memcg: mem+swap c... |
1289 |
*/ |
03f3c4336 memcg: fix swap a... |
1290 |
if (do_swap_account && PageSwapCache(page)) { |
8c7c6e34a memcg: mem+swap c... |
1291 |
swp_entry_t ent = {.val = page_private(page)}; |
a3b2d6926 cgroups: use css ... |
1292 |
unsigned short id; |
8c7c6e34a memcg: mem+swap c... |
1293 |
struct mem_cgroup *memcg; |
a3b2d6926 cgroups: use css ... |
1294 1295 1296 1297 |
id = swap_cgroup_record(ent, 0); rcu_read_lock(); memcg = mem_cgroup_lookup(id); |
8c7c6e34a memcg: mem+swap c... |
1298 |
if (memcg) { |
a3b2d6926 cgroups: use css ... |
1299 1300 1301 1302 |
/* * This recorded memcg can be obsolete one. So, avoid * calling css_tryget */ |
8c7c6e34a memcg: mem+swap c... |
1303 1304 1305 |
res_counter_uncharge(&memcg->memsw, PAGE_SIZE); mem_cgroup_put(memcg); } |
a3b2d6926 cgroups: use css ... |
1306 |
rcu_read_unlock(); |
8c7c6e34a memcg: mem+swap c... |
1307 |
} |
08e552c69 memcg: synchroniz... |
1308 |
/* add this page(page_cgroup) to the LRU we want. */ |
544122e5e memcg: fix LRU ac... |
1309 |
|
7a81b88cb memcg: introduce ... |
1310 |
} |
83aae4c73 memcg: cleanup ca... |
1311 1312 1313 1314 1315 |
void mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr) { __mem_cgroup_commit_charge_swapin(page, ptr, MEM_CGROUP_CHARGE_TYPE_MAPPED); } |
7a81b88cb memcg: introduce ... |
1316 1317 |
void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *mem) { |
f8d665422 memcg: add mem_cg... |
1318 |
if (mem_cgroup_disabled()) |
7a81b88cb memcg: introduce ... |
1319 1320 1321 1322 |
return; if (!mem) return; res_counter_uncharge(&mem->res, PAGE_SIZE); |
8c7c6e34a memcg: mem+swap c... |
1323 1324 |
if (do_swap_account) res_counter_uncharge(&mem->memsw, PAGE_SIZE); |
7a81b88cb memcg: introduce ... |
1325 1326 |
css_put(&mem->css); } |
8697d3319 Memory controller... |
1327 |
/* |
69029cd55 memcg: remove ref... |
1328 |
* uncharge if !page_mapped(page) |
8a9f3ccd2 Memory controller... |
1329 |
*/ |
8c7c6e34a memcg: mem+swap c... |
1330 |
static struct mem_cgroup * |
69029cd55 memcg: remove ref... |
1331 |
__mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype) |
8a9f3ccd2 Memory controller... |
1332 |
{ |
8289546e5 memcg: remove mem... |
1333 |
struct page_cgroup *pc; |
8c7c6e34a memcg: mem+swap c... |
1334 |
struct mem_cgroup *mem = NULL; |
072c56c13 per-zone and recl... |
1335 |
struct mem_cgroup_per_zone *mz; |
8a9f3ccd2 Memory controller... |
1336 |
|
f8d665422 memcg: add mem_cg... |
1337 |
if (mem_cgroup_disabled()) |
8c7c6e34a memcg: mem+swap c... |
1338 |
return NULL; |
4077960e2 memory controller... |
1339 |
|
d13d14430 memcg: handle swa... |
1340 |
if (PageSwapCache(page)) |
8c7c6e34a memcg: mem+swap c... |
1341 |
return NULL; |
d13d14430 memcg: handle swa... |
1342 |
|
8697d3319 Memory controller... |
1343 |
/* |
3c541e14b Memory controller... |
1344 |
* Check if our page_cgroup is valid |
8697d3319 Memory controller... |
1345 |
*/ |
52d4b9ac0 memcg: allocate a... |
1346 1347 |
pc = lookup_page_cgroup(page); if (unlikely(!pc || !PageCgroupUsed(pc))) |
8c7c6e34a memcg: mem+swap c... |
1348 |
return NULL; |
b9c565d5a memcg: remove cle... |
1349 |
|
52d4b9ac0 memcg: allocate a... |
1350 |
lock_page_cgroup(pc); |
d13d14430 memcg: handle swa... |
1351 |
|
8c7c6e34a memcg: mem+swap c... |
1352 |
mem = pc->mem_cgroup; |
d13d14430 memcg: handle swa... |
1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 |
if (!PageCgroupUsed(pc)) goto unlock_out; switch (ctype) { case MEM_CGROUP_CHARGE_TYPE_MAPPED: if (page_mapped(page)) goto unlock_out; break; case MEM_CGROUP_CHARGE_TYPE_SWAPOUT: if (!PageAnon(page)) { /* Shared memory */ if (page->mapping && !page_is_file_cache(page)) goto unlock_out; } else if (page_mapped(page)) /* Anon */ goto unlock_out; break; default: break; |
52d4b9ac0 memcg: allocate a... |
1370 |
} |
d13d14430 memcg: handle swa... |
1371 |
|
8c7c6e34a memcg: mem+swap c... |
1372 1373 1374 |
res_counter_uncharge(&mem->res, PAGE_SIZE); if (do_swap_account && (ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT)) res_counter_uncharge(&mem->memsw, PAGE_SIZE); |
08e552c69 memcg: synchroniz... |
1375 |
mem_cgroup_charge_statistics(mem, pc, false); |
04046e1a0 memcg: use CSS ID |
1376 |
|
52d4b9ac0 memcg: allocate a... |
1377 |
ClearPageCgroupUsed(pc); |
544122e5e memcg: fix LRU ac... |
1378 1379 1380 1381 1382 1383 |
/* * pc->mem_cgroup is not cleared here. It will be accessed when it's * freed from LRU. This is safe because uncharged page is expected not * to be reused (freed soon). Exception is SwapCache, it's handled by * special functions. */ |
b9c565d5a memcg: remove cle... |
1384 |
|
69029cd55 memcg: remove ref... |
1385 |
mz = page_cgroup_zoneinfo(pc); |
52d4b9ac0 memcg: allocate a... |
1386 |
unlock_page_cgroup(pc); |
fb59e9f1e memcg: fix oops o... |
1387 |
|
a7fe942e9 memcg: swapout re... |
1388 1389 1390 |
/* at swapout, this memcg will be accessed to record to swap */ if (ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT) css_put(&mem->css); |
6d12e2d8d per-zone and recl... |
1391 |
|
8c7c6e34a memcg: mem+swap c... |
1392 |
return mem; |
d13d14430 memcg: handle swa... |
1393 1394 1395 |
unlock_out: unlock_page_cgroup(pc); |
8c7c6e34a memcg: mem+swap c... |
1396 |
return NULL; |
3c541e14b Memory controller... |
1397 |
} |
69029cd55 memcg: remove ref... |
1398 1399 |
void mem_cgroup_uncharge_page(struct page *page) { |
52d4b9ac0 memcg: allocate a... |
1400 1401 1402 1403 1404 |
/* early check. */ if (page_mapped(page)) return; if (page->mapping && !PageAnon(page)) return; |
69029cd55 memcg: remove ref... |
1405 1406 1407 1408 1409 1410 |
__mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_MAPPED); } void mem_cgroup_uncharge_cache_page(struct page *page) { VM_BUG_ON(page_mapped(page)); |
b7abea963 memcg: make page-... |
1411 |
VM_BUG_ON(page->mapping); |
69029cd55 memcg: remove ref... |
1412 1413 |
__mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_CACHE); } |
e767e0561 memcg: fix deadlo... |
1414 |
#ifdef CONFIG_SWAP |
8c7c6e34a memcg: mem+swap c... |
1415 |
/* |
e767e0561 memcg: fix deadlo... |
1416 |
* called after __delete_from_swap_cache() and drop "page" account. |
8c7c6e34a memcg: mem+swap c... |
1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 |
* memcg information is recorded to swap_cgroup of "ent" */ void mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent) { struct mem_cgroup *memcg; memcg = __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_SWAPOUT); /* record memcg information */ if (do_swap_account && memcg) { |
a3b2d6926 cgroups: use css ... |
1427 |
swap_cgroup_record(ent, css_id(&memcg->css)); |
8c7c6e34a memcg: mem+swap c... |
1428 1429 |
mem_cgroup_get(memcg); } |
a7fe942e9 memcg: swapout re... |
1430 1431 |
if (memcg) css_put(&memcg->css); |
8c7c6e34a memcg: mem+swap c... |
1432 |
} |
e767e0561 memcg: fix deadlo... |
1433 |
#endif |
8c7c6e34a memcg: mem+swap c... |
1434 1435 1436 1437 1438 1439 1440 |
#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP /* * called from swap_entry_free(). remove record in swap_cgroup and * uncharge "memsw" account. */ void mem_cgroup_uncharge_swap(swp_entry_t ent) |
d13d14430 memcg: handle swa... |
1441 |
{ |
8c7c6e34a memcg: mem+swap c... |
1442 |
struct mem_cgroup *memcg; |
a3b2d6926 cgroups: use css ... |
1443 |
unsigned short id; |
8c7c6e34a memcg: mem+swap c... |
1444 1445 1446 |
if (!do_swap_account) return; |
a3b2d6926 cgroups: use css ... |
1447 1448 1449 |
id = swap_cgroup_record(ent, 0); rcu_read_lock(); memcg = mem_cgroup_lookup(id); |
8c7c6e34a memcg: mem+swap c... |
1450 |
if (memcg) { |
a3b2d6926 cgroups: use css ... |
1451 1452 1453 1454 |
/* * We uncharge this because swap is freed. * This memcg can be obsolete one. We avoid calling css_tryget */ |
8c7c6e34a memcg: mem+swap c... |
1455 1456 1457 |
res_counter_uncharge(&memcg->memsw, PAGE_SIZE); mem_cgroup_put(memcg); } |
a3b2d6926 cgroups: use css ... |
1458 |
rcu_read_unlock(); |
d13d14430 memcg: handle swa... |
1459 |
} |
8c7c6e34a memcg: mem+swap c... |
1460 |
#endif |
d13d14430 memcg: handle swa... |
1461 |
|
ae41be374 bugfix for memory... |
1462 |
/* |
01b1ae63c memcg: simple mig... |
1463 1464 |
* Before starting migration, account PAGE_SIZE to mem_cgroup that the old * page belongs to. |
ae41be374 bugfix for memory... |
1465 |
*/ |
01b1ae63c memcg: simple mig... |
1466 |
int mem_cgroup_prepare_migration(struct page *page, struct mem_cgroup **ptr) |
ae41be374 bugfix for memory... |
1467 1468 |
{ struct page_cgroup *pc; |
e8589cc18 memcg: better mig... |
1469 |
struct mem_cgroup *mem = NULL; |
e8589cc18 memcg: better mig... |
1470 |
int ret = 0; |
8869b8f6e memcg: memcontrol... |
1471 |
|
f8d665422 memcg: add mem_cg... |
1472 |
if (mem_cgroup_disabled()) |
4077960e2 memory controller... |
1473 |
return 0; |
52d4b9ac0 memcg: allocate a... |
1474 1475 1476 |
pc = lookup_page_cgroup(page); lock_page_cgroup(pc); if (PageCgroupUsed(pc)) { |
e8589cc18 memcg: better mig... |
1477 1478 |
mem = pc->mem_cgroup; css_get(&mem->css); |
e8589cc18 memcg: better mig... |
1479 |
} |
52d4b9ac0 memcg: allocate a... |
1480 |
unlock_page_cgroup(pc); |
01b1ae63c memcg: simple mig... |
1481 |
|
e8589cc18 memcg: better mig... |
1482 |
if (mem) { |
3bb4edf24 memcg: don't trig... |
1483 |
ret = __mem_cgroup_try_charge(NULL, GFP_KERNEL, &mem, false); |
e8589cc18 memcg: better mig... |
1484 1485 |
css_put(&mem->css); } |
01b1ae63c memcg: simple mig... |
1486 |
*ptr = mem; |
e8589cc18 memcg: better mig... |
1487 |
return ret; |
ae41be374 bugfix for memory... |
1488 |
} |
8869b8f6e memcg: memcontrol... |
1489 |
|
69029cd55 memcg: remove ref... |
1490 |
/* remove redundant charge if migration failed*/ |
01b1ae63c memcg: simple mig... |
1491 1492 |
void mem_cgroup_end_migration(struct mem_cgroup *mem, struct page *oldpage, struct page *newpage) |
ae41be374 bugfix for memory... |
1493 |
{ |
01b1ae63c memcg: simple mig... |
1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 |
struct page *target, *unused; struct page_cgroup *pc; enum charge_type ctype; if (!mem) return; /* at migration success, oldpage->mapping is NULL. */ if (oldpage->mapping) { target = oldpage; unused = NULL; } else { target = newpage; unused = oldpage; } if (PageAnon(target)) ctype = MEM_CGROUP_CHARGE_TYPE_MAPPED; else if (page_is_file_cache(target)) ctype = MEM_CGROUP_CHARGE_TYPE_CACHE; else ctype = MEM_CGROUP_CHARGE_TYPE_SHMEM; /* unused page is not on radix-tree now. */ |
d13d14430 memcg: handle swa... |
1518 |
if (unused) |
01b1ae63c memcg: simple mig... |
1519 1520 1521 |
__mem_cgroup_uncharge_common(unused, ctype); pc = lookup_page_cgroup(target); |
69029cd55 memcg: remove ref... |
1522 |
/* |
01b1ae63c memcg: simple mig... |
1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 |
* __mem_cgroup_commit_charge() check PCG_USED bit of page_cgroup. * So, double-counting is effectively avoided. */ __mem_cgroup_commit_charge(mem, pc, ctype); /* * Both of oldpage and newpage are still under lock_page(). * Then, we don't have to care about race in radix-tree. * But we have to be careful that this page is unmapped or not. * * There is a case for !page_mapped(). At the start of * migration, oldpage was mapped. But now, it's zapped. * But we know *target* page is not freed/reused under us. * mem_cgroup_uncharge_page() does all necessary checks. |
69029cd55 memcg: remove ref... |
1537 |
*/ |
01b1ae63c memcg: simple mig... |
1538 1539 |
if (ctype == MEM_CGROUP_CHARGE_TYPE_MAPPED) mem_cgroup_uncharge_page(target); |
ae41be374 bugfix for memory... |
1540 |
} |
78fb74669 Memory controller... |
1541 |
|
cc8475822 memory cgroup enh... |
1542 |
/* |
ae3abae64 memcg: fix mem_cg... |
1543 1544 1545 1546 1547 1548 |
* A call to try to shrink memory usage on charge failure at shmem's swapin. * Calling hierarchical_reclaim is not enough because we should update * last_oom_jiffies to prevent pagefault_out_of_memory from invoking global OOM. * Moreover considering hierarchy, we should reclaim from the mem_over_limit, * not from the memcg which this page would be charged to. * try_charge_swapin does all of these works properly. |
c9b0ed514 memcg: helper fun... |
1549 |
*/ |
ae3abae64 memcg: fix mem_cg... |
1550 |
int mem_cgroup_shmem_charge_fallback(struct page *page, |
b5a84319a memcg: fix shmem'... |
1551 1552 |
struct mm_struct *mm, gfp_t gfp_mask) |
c9b0ed514 memcg: helper fun... |
1553 |
{ |
b5a84319a memcg: fix shmem'... |
1554 |
struct mem_cgroup *mem = NULL; |
ae3abae64 memcg: fix mem_cg... |
1555 |
int ret; |
c9b0ed514 memcg: helper fun... |
1556 |
|
f8d665422 memcg: add mem_cg... |
1557 |
if (mem_cgroup_disabled()) |
cede86acd memcg: clean up c... |
1558 |
return 0; |
c9b0ed514 memcg: helper fun... |
1559 |
|
ae3abae64 memcg: fix mem_cg... |
1560 1561 1562 |
ret = mem_cgroup_try_charge_swapin(mm, page, gfp_mask, &mem); if (!ret) mem_cgroup_cancel_charge_swapin(mem); /* it does !mem check */ |
c9b0ed514 memcg: helper fun... |
1563 |
|
ae3abae64 memcg: fix mem_cg... |
1564 |
return ret; |
c9b0ed514 memcg: helper fun... |
1565 |
} |
8c7c6e34a memcg: mem+swap c... |
1566 |
static DEFINE_MUTEX(set_limit_mutex); |
d38d2a758 mm: make mem_cgro... |
1567 |
static int mem_cgroup_resize_limit(struct mem_cgroup *memcg, |
8c7c6e34a memcg: mem+swap c... |
1568 |
unsigned long long val) |
628f42355 memcg: limit chan... |
1569 |
{ |
81d39c20f memcg: fix shrink... |
1570 |
int retry_count; |
628f42355 memcg: limit chan... |
1571 |
int progress; |
8c7c6e34a memcg: mem+swap c... |
1572 |
u64 memswlimit; |
628f42355 memcg: limit chan... |
1573 |
int ret = 0; |
81d39c20f memcg: fix shrink... |
1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 |
int children = mem_cgroup_count_children(memcg); u64 curusage, oldusage; /* * For keeping hierarchical_reclaim simple, how long we should retry * is depends on callers. We set our retry-count to be function * of # of children which we should visit in this loop. */ retry_count = MEM_CGROUP_RECLAIM_RETRIES * children; oldusage = res_counter_read_u64(&memcg->res, RES_USAGE); |
628f42355 memcg: limit chan... |
1585 |
|
8c7c6e34a memcg: mem+swap c... |
1586 |
while (retry_count) { |
628f42355 memcg: limit chan... |
1587 1588 1589 1590 |
if (signal_pending(current)) { ret = -EINTR; break; } |
8c7c6e34a memcg: mem+swap c... |
1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 |
/* * Rather than hide all in some function, I do this in * open coded manner. You see what this really does. * We have to guarantee mem->res.limit < mem->memsw.limit. */ mutex_lock(&set_limit_mutex); memswlimit = res_counter_read_u64(&memcg->memsw, RES_LIMIT); if (memswlimit < val) { ret = -EINVAL; mutex_unlock(&set_limit_mutex); |
628f42355 memcg: limit chan... |
1601 1602 |
break; } |
8c7c6e34a memcg: mem+swap c... |
1603 1604 1605 1606 1607 |
ret = res_counter_set_limit(&memcg->res, val); mutex_unlock(&set_limit_mutex); if (!ret) break; |
42e9abb62 memcg: change try... |
1608 |
progress = mem_cgroup_hierarchical_reclaim(memcg, GFP_KERNEL, |
81d39c20f memcg: fix shrink... |
1609 1610 1611 1612 1613 1614 1615 |
false, true); curusage = res_counter_read_u64(&memcg->res, RES_USAGE); /* Usage is reduced ? */ if (curusage >= oldusage) retry_count--; else oldusage = curusage; |
8c7c6e34a memcg: mem+swap c... |
1616 |
} |
14797e236 memcg: add inacti... |
1617 |
|
8c7c6e34a memcg: mem+swap c... |
1618 1619 1620 1621 1622 1623 |
return ret; } int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg, unsigned long long val) { |
81d39c20f memcg: fix shrink... |
1624 |
int retry_count; |
8c7c6e34a memcg: mem+swap c... |
1625 |
u64 memlimit, oldusage, curusage; |
81d39c20f memcg: fix shrink... |
1626 1627 |
int children = mem_cgroup_count_children(memcg); int ret = -EBUSY; |
8c7c6e34a memcg: mem+swap c... |
1628 1629 1630 |
if (!do_swap_account) return -EINVAL; |
81d39c20f memcg: fix shrink... |
1631 1632 1633 |
/* see mem_cgroup_resize_res_limit */ retry_count = children * MEM_CGROUP_RECLAIM_RETRIES; oldusage = res_counter_read_u64(&memcg->memsw, RES_USAGE); |
8c7c6e34a memcg: mem+swap c... |
1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 |
while (retry_count) { if (signal_pending(current)) { ret = -EINTR; break; } /* * Rather than hide all in some function, I do this in * open coded manner. You see what this really does. * We have to guarantee mem->res.limit < mem->memsw.limit. */ mutex_lock(&set_limit_mutex); memlimit = res_counter_read_u64(&memcg->res, RES_LIMIT); if (memlimit > val) { ret = -EINVAL; mutex_unlock(&set_limit_mutex); break; } ret = res_counter_set_limit(&memcg->memsw, val); mutex_unlock(&set_limit_mutex); if (!ret) break; |
81d39c20f memcg: fix shrink... |
1656 |
mem_cgroup_hierarchical_reclaim(memcg, GFP_KERNEL, true, true); |
8c7c6e34a memcg: mem+swap c... |
1657 |
curusage = res_counter_read_u64(&memcg->memsw, RES_USAGE); |
81d39c20f memcg: fix shrink... |
1658 |
/* Usage is reduced ? */ |
8c7c6e34a memcg: mem+swap c... |
1659 |
if (curusage >= oldusage) |
628f42355 memcg: limit chan... |
1660 |
retry_count--; |
81d39c20f memcg: fix shrink... |
1661 1662 |
else oldusage = curusage; |
628f42355 memcg: limit chan... |
1663 1664 1665 |
} return ret; } |
c9b0ed514 memcg: helper fun... |
1666 |
/* |
cc8475822 memory cgroup enh... |
1667 |
* This routine traverse page_cgroup in given list and drop them all. |
cc8475822 memory cgroup enh... |
1668 1669 |
* *And* this routine doesn't reclaim page itself, just removes page_cgroup. */ |
f817ed485 memcg: move all a... |
1670 |
static int mem_cgroup_force_empty_list(struct mem_cgroup *mem, |
08e552c69 memcg: synchroniz... |
1671 |
int node, int zid, enum lru_list lru) |
cc8475822 memory cgroup enh... |
1672 |
{ |
08e552c69 memcg: synchroniz... |
1673 1674 |
struct zone *zone; struct mem_cgroup_per_zone *mz; |
f817ed485 memcg: move all a... |
1675 |
struct page_cgroup *pc, *busy; |
08e552c69 memcg: synchroniz... |
1676 |
unsigned long flags, loop; |
072c56c13 per-zone and recl... |
1677 |
struct list_head *list; |
f817ed485 memcg: move all a... |
1678 |
int ret = 0; |
072c56c13 per-zone and recl... |
1679 |
|
08e552c69 memcg: synchroniz... |
1680 1681 |
zone = &NODE_DATA(node)->node_zones[zid]; mz = mem_cgroup_zoneinfo(mem, node, zid); |
b69408e88 vmscan: Use an in... |
1682 |
list = &mz->lists[lru]; |
cc8475822 memory cgroup enh... |
1683 |
|
f817ed485 memcg: move all a... |
1684 1685 1686 1687 1688 1689 |
loop = MEM_CGROUP_ZSTAT(mz, lru); /* give some margin against EBUSY etc...*/ loop += 256; busy = NULL; while (loop--) { ret = 0; |
08e552c69 memcg: synchroniz... |
1690 |
spin_lock_irqsave(&zone->lru_lock, flags); |
f817ed485 memcg: move all a... |
1691 |
if (list_empty(list)) { |
08e552c69 memcg: synchroniz... |
1692 |
spin_unlock_irqrestore(&zone->lru_lock, flags); |
52d4b9ac0 memcg: allocate a... |
1693 |
break; |
f817ed485 memcg: move all a... |
1694 1695 1696 1697 1698 |
} pc = list_entry(list->prev, struct page_cgroup, lru); if (busy == pc) { list_move(&pc->lru, list); busy = 0; |
08e552c69 memcg: synchroniz... |
1699 |
spin_unlock_irqrestore(&zone->lru_lock, flags); |
f817ed485 memcg: move all a... |
1700 1701 |
continue; } |
08e552c69 memcg: synchroniz... |
1702 |
spin_unlock_irqrestore(&zone->lru_lock, flags); |
f817ed485 memcg: move all a... |
1703 |
|
2c26fdd70 memcg: revert gfp... |
1704 |
ret = mem_cgroup_move_parent(pc, mem, GFP_KERNEL); |
f817ed485 memcg: move all a... |
1705 |
if (ret == -ENOMEM) |
52d4b9ac0 memcg: allocate a... |
1706 |
break; |
f817ed485 memcg: move all a... |
1707 1708 1709 1710 1711 1712 1713 |
if (ret == -EBUSY || ret == -EINVAL) { /* found lock contention or "pc" is obsolete. */ busy = pc; cond_resched(); } else busy = NULL; |
cc8475822 memory cgroup enh... |
1714 |
} |
08e552c69 memcg: synchroniz... |
1715 |
|
f817ed485 memcg: move all a... |
1716 1717 1718 |
if (!ret && !list_empty(list)) return -EBUSY; return ret; |
cc8475822 memory cgroup enh... |
1719 1720 1721 1722 1723 1724 |
} /* * make mem_cgroup's charge to be 0 if there is no task. * This enables deleting this mem_cgroup. */ |
c1e862c1f memcg: new force_... |
1725 |
static int mem_cgroup_force_empty(struct mem_cgroup *mem, bool free_all) |
cc8475822 memory cgroup enh... |
1726 |
{ |
f817ed485 memcg: move all a... |
1727 1728 1729 |
int ret; int node, zid, shrink; int nr_retries = MEM_CGROUP_RECLAIM_RETRIES; |
c1e862c1f memcg: new force_... |
1730 |
struct cgroup *cgrp = mem->css.cgroup; |
8869b8f6e memcg: memcontrol... |
1731 |
|
cc8475822 memory cgroup enh... |
1732 |
css_get(&mem->css); |
f817ed485 memcg: move all a... |
1733 1734 |
shrink = 0; |
c1e862c1f memcg: new force_... |
1735 1736 1737 |
/* should free all ? */ if (free_all) goto try_to_free; |
f817ed485 memcg: move all a... |
1738 |
move_account: |
1ecaab2bd per-zone and recl... |
1739 |
while (mem->res.usage > 0) { |
f817ed485 memcg: move all a... |
1740 |
ret = -EBUSY; |
c1e862c1f memcg: new force_... |
1741 1742 1743 1744 |
if (cgroup_task_count(cgrp) || !list_empty(&cgrp->children)) goto out; ret = -EINTR; if (signal_pending(current)) |
cc8475822 memory cgroup enh... |
1745 |
goto out; |
52d4b9ac0 memcg: allocate a... |
1746 1747 |
/* This is for making all *used* pages to be on LRU. */ lru_add_drain_all(); |
f817ed485 memcg: move all a... |
1748 |
ret = 0; |
299b4eaa3 memcg: NULL point... |
1749 |
for_each_node_state(node, N_HIGH_MEMORY) { |
f817ed485 memcg: move all a... |
1750 |
for (zid = 0; !ret && zid < MAX_NR_ZONES; zid++) { |
b69408e88 vmscan: Use an in... |
1751 |
enum lru_list l; |
f817ed485 memcg: move all a... |
1752 1753 |
for_each_lru(l) { ret = mem_cgroup_force_empty_list(mem, |
08e552c69 memcg: synchroniz... |
1754 |
node, zid, l); |
f817ed485 memcg: move all a... |
1755 1756 1757 |
if (ret) break; } |
1ecaab2bd per-zone and recl... |
1758 |
} |
f817ed485 memcg: move all a... |
1759 1760 1761 1762 1763 1764 |
if (ret) break; } /* it seems parent cgroup doesn't have enough mem */ if (ret == -ENOMEM) goto try_to_free; |
52d4b9ac0 memcg: allocate a... |
1765 |
cond_resched(); |
cc8475822 memory cgroup enh... |
1766 1767 1768 1769 1770 |
} ret = 0; out: css_put(&mem->css); return ret; |
f817ed485 memcg: move all a... |
1771 1772 |
try_to_free: |
c1e862c1f memcg: new force_... |
1773 1774 |
/* returns EBUSY if there is a task or if we come here twice. */ if (cgroup_task_count(cgrp) || !list_empty(&cgrp->children) || shrink) { |
f817ed485 memcg: move all a... |
1775 1776 1777 |
ret = -EBUSY; goto out; } |
c1e862c1f memcg: new force_... |
1778 1779 |
/* we call try-to-free pages for make this cgroup empty */ lru_add_drain_all(); |
f817ed485 memcg: move all a... |
1780 1781 1782 1783 |
/* try to free all pages in this cgroup */ shrink = 1; while (nr_retries && mem->res.usage > 0) { int progress; |
c1e862c1f memcg: new force_... |
1784 1785 1786 1787 1788 |
if (signal_pending(current)) { ret = -EINTR; goto out; } |
a7885eb8a memcg: swappiness |
1789 1790 |
progress = try_to_free_mem_cgroup_pages(mem, GFP_KERNEL, false, get_swappiness(mem)); |
c1e862c1f memcg: new force_... |
1791 |
if (!progress) { |
f817ed485 memcg: move all a... |
1792 |
nr_retries--; |
c1e862c1f memcg: new force_... |
1793 1794 1795 |
/* maybe some writeback is necessary */ congestion_wait(WRITE, HZ/10); } |
f817ed485 memcg: move all a... |
1796 1797 |
} |
08e552c69 memcg: synchroniz... |
1798 |
lru_add_drain(); |
f817ed485 memcg: move all a... |
1799 1800 1801 1802 1803 |
/* try move_account...there may be some *locked* pages. */ if (mem->res.usage) goto move_account; ret = 0; goto out; |
cc8475822 memory cgroup enh... |
1804 |
} |
c1e862c1f memcg: new force_... |
1805 1806 1807 1808 |
int mem_cgroup_force_empty_write(struct cgroup *cont, unsigned int event) { return mem_cgroup_force_empty(mem_cgroup_from_cont(cont), true); } |
18f59ea7d memcg: memory cgr... |
1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 |
static u64 mem_cgroup_hierarchy_read(struct cgroup *cont, struct cftype *cft) { return mem_cgroup_from_cont(cont)->use_hierarchy; } static int mem_cgroup_hierarchy_write(struct cgroup *cont, struct cftype *cft, u64 val) { int retval = 0; struct mem_cgroup *mem = mem_cgroup_from_cont(cont); struct cgroup *parent = cont->parent; struct mem_cgroup *parent_mem = NULL; if (parent) parent_mem = mem_cgroup_from_cont(parent); cgroup_lock(); /* * If parent's use_hiearchy is set, we can't make any modifications * in the child subtrees. If it is unset, then the change can * occur, provided the current cgroup has no children. * * For the root cgroup, parent_mem is NULL, we allow value to be * set if there are no children. */ if ((!parent_mem || !parent_mem->use_hierarchy) && (val == 1 || val == 0)) { if (list_empty(&cont->children)) mem->use_hierarchy = val; else retval = -EBUSY; } else retval = -EINVAL; cgroup_unlock(); return retval; } |
2c3daa722 CGroup API files:... |
1846 |
static u64 mem_cgroup_read(struct cgroup *cont, struct cftype *cft) |
8cdea7c05 Memory controller... |
1847 |
{ |
8c7c6e34a memcg: mem+swap c... |
1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 |
struct mem_cgroup *mem = mem_cgroup_from_cont(cont); u64 val = 0; int type, name; type = MEMFILE_TYPE(cft->private); name = MEMFILE_ATTR(cft->private); switch (type) { case _MEM: val = res_counter_read_u64(&mem->res, name); break; case _MEMSWAP: if (do_swap_account) val = res_counter_read_u64(&mem->memsw, name); break; default: BUG(); break; } return val; |
8cdea7c05 Memory controller... |
1867 |
} |
628f42355 memcg: limit chan... |
1868 1869 1870 1871 |
/* * The user of this function is... * RES_LIMIT. */ |
856c13aa1 cgroup files: con... |
1872 1873 |
static int mem_cgroup_write(struct cgroup *cont, struct cftype *cft, const char *buffer) |
8cdea7c05 Memory controller... |
1874 |
{ |
628f42355 memcg: limit chan... |
1875 |
struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); |
8c7c6e34a memcg: mem+swap c... |
1876 |
int type, name; |
628f42355 memcg: limit chan... |
1877 1878 |
unsigned long long val; int ret; |
8c7c6e34a memcg: mem+swap c... |
1879 1880 1881 |
type = MEMFILE_TYPE(cft->private); name = MEMFILE_ATTR(cft->private); switch (name) { |
628f42355 memcg: limit chan... |
1882 1883 1884 |
case RES_LIMIT: /* This function does all necessary parse...reuse it */ ret = res_counter_memparse_write_strategy(buffer, &val); |
8c7c6e34a memcg: mem+swap c... |
1885 1886 1887 |
if (ret) break; if (type == _MEM) |
628f42355 memcg: limit chan... |
1888 |
ret = mem_cgroup_resize_limit(memcg, val); |
8c7c6e34a memcg: mem+swap c... |
1889 1890 |
else ret = mem_cgroup_resize_memsw_limit(memcg, val); |
628f42355 memcg: limit chan... |
1891 1892 1893 1894 1895 1896 |
break; default: ret = -EINVAL; /* should be BUG() ? */ break; } return ret; |
8cdea7c05 Memory controller... |
1897 |
} |
fee7b548e memcg: show real ... |
1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 |
static void memcg_get_hierarchical_limit(struct mem_cgroup *memcg, unsigned long long *mem_limit, unsigned long long *memsw_limit) { struct cgroup *cgroup; unsigned long long min_limit, min_memsw_limit, tmp; min_limit = res_counter_read_u64(&memcg->res, RES_LIMIT); min_memsw_limit = res_counter_read_u64(&memcg->memsw, RES_LIMIT); cgroup = memcg->css.cgroup; if (!memcg->use_hierarchy) goto out; while (cgroup->parent) { cgroup = cgroup->parent; memcg = mem_cgroup_from_cont(cgroup); if (!memcg->use_hierarchy) break; tmp = res_counter_read_u64(&memcg->res, RES_LIMIT); min_limit = min(min_limit, tmp); tmp = res_counter_read_u64(&memcg->memsw, RES_LIMIT); min_memsw_limit = min(min_memsw_limit, tmp); } out: *mem_limit = min_limit; *memsw_limit = min_memsw_limit; return; } |
29f2a4dac memcgroup: implem... |
1925 |
static int mem_cgroup_reset(struct cgroup *cont, unsigned int event) |
c84872e16 memcgroup: add th... |
1926 1927 |
{ struct mem_cgroup *mem; |
8c7c6e34a memcg: mem+swap c... |
1928 |
int type, name; |
c84872e16 memcgroup: add th... |
1929 1930 |
mem = mem_cgroup_from_cont(cont); |
8c7c6e34a memcg: mem+swap c... |
1931 1932 1933 |
type = MEMFILE_TYPE(event); name = MEMFILE_ATTR(event); switch (name) { |
29f2a4dac memcgroup: implem... |
1934 |
case RES_MAX_USAGE: |
8c7c6e34a memcg: mem+swap c... |
1935 1936 1937 1938 |
if (type == _MEM) res_counter_reset_max(&mem->res); else res_counter_reset_max(&mem->memsw); |
29f2a4dac memcgroup: implem... |
1939 1940 |
break; case RES_FAILCNT: |
8c7c6e34a memcg: mem+swap c... |
1941 1942 1943 1944 |
if (type == _MEM) res_counter_reset_failcnt(&mem->res); else res_counter_reset_failcnt(&mem->memsw); |
29f2a4dac memcgroup: implem... |
1945 1946 |
break; } |
85cc59db1 memcgroup: use tr... |
1947 |
return 0; |
c84872e16 memcgroup: add th... |
1948 |
} |
14067bb3e memcg: hierarchic... |
1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 |
/* For read statistics */ enum { MCS_CACHE, MCS_RSS, MCS_PGPGIN, MCS_PGPGOUT, MCS_INACTIVE_ANON, MCS_ACTIVE_ANON, MCS_INACTIVE_FILE, MCS_ACTIVE_FILE, MCS_UNEVICTABLE, NR_MCS_STAT, }; struct mcs_total_stat { s64 stat[NR_MCS_STAT]; |
d2ceb9b7d memory cgroup enh... |
1966 |
}; |
14067bb3e memcg: hierarchic... |
1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 |
struct { char *local_name; char *total_name; } memcg_stat_strings[NR_MCS_STAT] = { {"cache", "total_cache"}, {"rss", "total_rss"}, {"pgpgin", "total_pgpgin"}, {"pgpgout", "total_pgpgout"}, {"inactive_anon", "total_inactive_anon"}, {"active_anon", "total_active_anon"}, {"inactive_file", "total_inactive_file"}, {"active_file", "total_active_file"}, {"unevictable", "total_unevictable"} }; static int mem_cgroup_get_local_stat(struct mem_cgroup *mem, void *data) { struct mcs_total_stat *s = data; s64 val; /* per cpu stat */ val = mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_CACHE); s->stat[MCS_CACHE] += val * PAGE_SIZE; val = mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_RSS); s->stat[MCS_RSS] += val * PAGE_SIZE; val = mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_PGPGIN_COUNT); s->stat[MCS_PGPGIN] += val; val = mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_PGPGOUT_COUNT); s->stat[MCS_PGPGOUT] += val; /* per zone stat */ val = mem_cgroup_get_local_zonestat(mem, LRU_INACTIVE_ANON); s->stat[MCS_INACTIVE_ANON] += val * PAGE_SIZE; val = mem_cgroup_get_local_zonestat(mem, LRU_ACTIVE_ANON); s->stat[MCS_ACTIVE_ANON] += val * PAGE_SIZE; val = mem_cgroup_get_local_zonestat(mem, LRU_INACTIVE_FILE); s->stat[MCS_INACTIVE_FILE] += val * PAGE_SIZE; val = mem_cgroup_get_local_zonestat(mem, LRU_ACTIVE_FILE); s->stat[MCS_ACTIVE_FILE] += val * PAGE_SIZE; val = mem_cgroup_get_local_zonestat(mem, LRU_UNEVICTABLE); s->stat[MCS_UNEVICTABLE] += val * PAGE_SIZE; return 0; } static void mem_cgroup_get_total_stat(struct mem_cgroup *mem, struct mcs_total_stat *s) { mem_cgroup_walk_tree(mem, s, mem_cgroup_get_local_stat); } |
c64745cf0 CGroup API files:... |
2017 2018 |
static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft, struct cgroup_map_cb *cb) |
d2ceb9b7d memory cgroup enh... |
2019 |
{ |
d2ceb9b7d memory cgroup enh... |
2020 |
struct mem_cgroup *mem_cont = mem_cgroup_from_cont(cont); |
14067bb3e memcg: hierarchic... |
2021 |
struct mcs_total_stat mystat; |
d2ceb9b7d memory cgroup enh... |
2022 |
int i; |
14067bb3e memcg: hierarchic... |
2023 2024 |
memset(&mystat, 0, sizeof(mystat)); mem_cgroup_get_local_stat(mem_cont, &mystat); |
d2ceb9b7d memory cgroup enh... |
2025 |
|
14067bb3e memcg: hierarchic... |
2026 2027 |
for (i = 0; i < NR_MCS_STAT; i++) cb->fill(cb, memcg_stat_strings[i].local_name, mystat.stat[i]); |
7b854121e Unevictable LRU P... |
2028 |
|
14067bb3e memcg: hierarchic... |
2029 |
/* Hierarchical information */ |
fee7b548e memcg: show real ... |
2030 2031 2032 2033 2034 2035 2036 |
{ unsigned long long limit, memsw_limit; memcg_get_hierarchical_limit(mem_cont, &limit, &memsw_limit); cb->fill(cb, "hierarchical_memory_limit", limit); if (do_swap_account) cb->fill(cb, "hierarchical_memsw_limit", memsw_limit); } |
7f016ee8b memcg: show recla... |
2037 |
|
14067bb3e memcg: hierarchic... |
2038 2039 2040 2041 |
memset(&mystat, 0, sizeof(mystat)); mem_cgroup_get_total_stat(mem_cont, &mystat); for (i = 0; i < NR_MCS_STAT; i++) cb->fill(cb, memcg_stat_strings[i].total_name, mystat.stat[i]); |
7f016ee8b memcg: show recla... |
2042 |
#ifdef CONFIG_DEBUG_VM |
c772be939 memcg: fix calcul... |
2043 |
cb->fill(cb, "inactive_ratio", calc_inactive_ratio(mem_cont, NULL)); |
7f016ee8b memcg: show recla... |
2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 |
{ int nid, zid; struct mem_cgroup_per_zone *mz; unsigned long recent_rotated[2] = {0, 0}; unsigned long recent_scanned[2] = {0, 0}; for_each_online_node(nid) for (zid = 0; zid < MAX_NR_ZONES; zid++) { mz = mem_cgroup_zoneinfo(mem_cont, nid, zid); recent_rotated[0] += mz->reclaim_stat.recent_rotated[0]; recent_rotated[1] += mz->reclaim_stat.recent_rotated[1]; recent_scanned[0] += mz->reclaim_stat.recent_scanned[0]; recent_scanned[1] += mz->reclaim_stat.recent_scanned[1]; } cb->fill(cb, "recent_rotated_anon", recent_rotated[0]); cb->fill(cb, "recent_rotated_file", recent_rotated[1]); cb->fill(cb, "recent_scanned_anon", recent_scanned[0]); cb->fill(cb, "recent_scanned_file", recent_scanned[1]); } #endif |
d2ceb9b7d memory cgroup enh... |
2070 2071 |
return 0; } |
a7885eb8a memcg: swappiness |
2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 |
static u64 mem_cgroup_swappiness_read(struct cgroup *cgrp, struct cftype *cft) { struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp); return get_swappiness(memcg); } static int mem_cgroup_swappiness_write(struct cgroup *cgrp, struct cftype *cft, u64 val) { struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp); struct mem_cgroup *parent; |
068b38c1f memcg: fix a race... |
2084 |
|
a7885eb8a memcg: swappiness |
2085 2086 2087 2088 2089 2090 2091 |
if (val > 100) return -EINVAL; if (cgrp->parent == NULL) return -EINVAL; parent = mem_cgroup_from_cont(cgrp->parent); |
068b38c1f memcg: fix a race... |
2092 2093 |
cgroup_lock(); |
a7885eb8a memcg: swappiness |
2094 2095 |
/* If under hierarchy, only empty-root can set this value */ if ((parent->use_hierarchy) || |
068b38c1f memcg: fix a race... |
2096 2097 |
(memcg->use_hierarchy && !list_empty(&cgrp->children))) { cgroup_unlock(); |
a7885eb8a memcg: swappiness |
2098 |
return -EINVAL; |
068b38c1f memcg: fix a race... |
2099 |
} |
a7885eb8a memcg: swappiness |
2100 2101 2102 2103 |
spin_lock(&memcg->reclaim_param_lock); memcg->swappiness = val; spin_unlock(&memcg->reclaim_param_lock); |
068b38c1f memcg: fix a race... |
2104 |
cgroup_unlock(); |
a7885eb8a memcg: swappiness |
2105 2106 |
return 0; } |
c1e862c1f memcg: new force_... |
2107 |
|
8cdea7c05 Memory controller... |
2108 2109 |
static struct cftype mem_cgroup_files[] = { { |
0eea10301 Memory controller... |
2110 |
.name = "usage_in_bytes", |
8c7c6e34a memcg: mem+swap c... |
2111 |
.private = MEMFILE_PRIVATE(_MEM, RES_USAGE), |
2c3daa722 CGroup API files:... |
2112 |
.read_u64 = mem_cgroup_read, |
8cdea7c05 Memory controller... |
2113 2114 |
}, { |
c84872e16 memcgroup: add th... |
2115 |
.name = "max_usage_in_bytes", |
8c7c6e34a memcg: mem+swap c... |
2116 |
.private = MEMFILE_PRIVATE(_MEM, RES_MAX_USAGE), |
29f2a4dac memcgroup: implem... |
2117 |
.trigger = mem_cgroup_reset, |
c84872e16 memcgroup: add th... |
2118 2119 2120 |
.read_u64 = mem_cgroup_read, }, { |
0eea10301 Memory controller... |
2121 |
.name = "limit_in_bytes", |
8c7c6e34a memcg: mem+swap c... |
2122 |
.private = MEMFILE_PRIVATE(_MEM, RES_LIMIT), |
856c13aa1 cgroup files: con... |
2123 |
.write_string = mem_cgroup_write, |
2c3daa722 CGroup API files:... |
2124 |
.read_u64 = mem_cgroup_read, |
8cdea7c05 Memory controller... |
2125 2126 2127 |
}, { .name = "failcnt", |
8c7c6e34a memcg: mem+swap c... |
2128 |
.private = MEMFILE_PRIVATE(_MEM, RES_FAILCNT), |
29f2a4dac memcgroup: implem... |
2129 |
.trigger = mem_cgroup_reset, |
2c3daa722 CGroup API files:... |
2130 |
.read_u64 = mem_cgroup_read, |
8cdea7c05 Memory controller... |
2131 |
}, |
8697d3319 Memory controller... |
2132 |
{ |
d2ceb9b7d memory cgroup enh... |
2133 |
.name = "stat", |
c64745cf0 CGroup API files:... |
2134 |
.read_map = mem_control_stat_show, |
d2ceb9b7d memory cgroup enh... |
2135 |
}, |
c1e862c1f memcg: new force_... |
2136 2137 2138 2139 |
{ .name = "force_empty", .trigger = mem_cgroup_force_empty_write, }, |
18f59ea7d memcg: memory cgr... |
2140 2141 2142 2143 2144 |
{ .name = "use_hierarchy", .write_u64 = mem_cgroup_hierarchy_write, .read_u64 = mem_cgroup_hierarchy_read, }, |
a7885eb8a memcg: swappiness |
2145 2146 2147 2148 2149 |
{ .name = "swappiness", .read_u64 = mem_cgroup_swappiness_read, .write_u64 = mem_cgroup_swappiness_write, }, |
8cdea7c05 Memory controller... |
2150 |
}; |
8c7c6e34a memcg: mem+swap c... |
2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 |
#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP static struct cftype memsw_cgroup_files[] = { { .name = "memsw.usage_in_bytes", .private = MEMFILE_PRIVATE(_MEMSWAP, RES_USAGE), .read_u64 = mem_cgroup_read, }, { .name = "memsw.max_usage_in_bytes", .private = MEMFILE_PRIVATE(_MEMSWAP, RES_MAX_USAGE), .trigger = mem_cgroup_reset, .read_u64 = mem_cgroup_read, }, { .name = "memsw.limit_in_bytes", .private = MEMFILE_PRIVATE(_MEMSWAP, RES_LIMIT), .write_string = mem_cgroup_write, .read_u64 = mem_cgroup_read, }, { .name = "memsw.failcnt", .private = MEMFILE_PRIVATE(_MEMSWAP, RES_FAILCNT), .trigger = mem_cgroup_reset, .read_u64 = mem_cgroup_read, }, }; static int register_memsw_files(struct cgroup *cont, struct cgroup_subsys *ss) { if (!do_swap_account) return 0; return cgroup_add_files(cont, ss, memsw_cgroup_files, ARRAY_SIZE(memsw_cgroup_files)); }; #else static int register_memsw_files(struct cgroup *cont, struct cgroup_subsys *ss) { return 0; } #endif |
6d12e2d8d per-zone and recl... |
2191 2192 2193 |
static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *mem, int node) { struct mem_cgroup_per_node *pn; |
1ecaab2bd per-zone and recl... |
2194 |
struct mem_cgroup_per_zone *mz; |
b69408e88 vmscan: Use an in... |
2195 |
enum lru_list l; |
41e3355de memcg: fix node_s... |
2196 |
int zone, tmp = node; |
1ecaab2bd per-zone and recl... |
2197 2198 2199 2200 2201 2202 2203 2204 |
/* * This routine is called against possible nodes. * But it's BUG to call kmalloc() against offline node. * * TODO: this routine can waste much memory for nodes which will * never be onlined. It's better to use memory hotplug callback * function. */ |
41e3355de memcg: fix node_s... |
2205 2206 2207 |
if (!node_state(node, N_NORMAL_MEMORY)) tmp = -1; pn = kmalloc_node(sizeof(*pn), GFP_KERNEL, tmp); |
6d12e2d8d per-zone and recl... |
2208 2209 |
if (!pn) return 1; |
1ecaab2bd per-zone and recl... |
2210 |
|
6d12e2d8d per-zone and recl... |
2211 2212 |
mem->info.nodeinfo[node] = pn; memset(pn, 0, sizeof(*pn)); |
1ecaab2bd per-zone and recl... |
2213 2214 2215 |
for (zone = 0; zone < MAX_NR_ZONES; zone++) { mz = &pn->zoneinfo[zone]; |
b69408e88 vmscan: Use an in... |
2216 2217 |
for_each_lru(l) INIT_LIST_HEAD(&mz->lists[l]); |
1ecaab2bd per-zone and recl... |
2218 |
} |
6d12e2d8d per-zone and recl... |
2219 2220 |
return 0; } |
1ecaab2bd per-zone and recl... |
2221 2222 2223 2224 |
static void free_mem_cgroup_per_zone_info(struct mem_cgroup *mem, int node) { kfree(mem->info.nodeinfo[node]); } |
c8dad2bb6 memcg: reduce siz... |
2225 2226 2227 2228 2229 |
static int mem_cgroup_size(void) { int cpustat_size = nr_cpu_ids * sizeof(struct mem_cgroup_stat_cpu); return sizeof(struct mem_cgroup) + cpustat_size; } |
333279487 memcgroup: use vm... |
2230 2231 2232 |
static struct mem_cgroup *mem_cgroup_alloc(void) { struct mem_cgroup *mem; |
c8dad2bb6 memcg: reduce siz... |
2233 |
int size = mem_cgroup_size(); |
333279487 memcgroup: use vm... |
2234 |
|
c8dad2bb6 memcg: reduce siz... |
2235 2236 |
if (size < PAGE_SIZE) mem = kmalloc(size, GFP_KERNEL); |
333279487 memcgroup: use vm... |
2237 |
else |
c8dad2bb6 memcg: reduce siz... |
2238 |
mem = vmalloc(size); |
333279487 memcgroup: use vm... |
2239 2240 |
if (mem) |
c8dad2bb6 memcg: reduce siz... |
2241 |
memset(mem, 0, size); |
333279487 memcgroup: use vm... |
2242 2243 |
return mem; } |
8c7c6e34a memcg: mem+swap c... |
2244 2245 2246 2247 2248 2249 2250 2251 |
/* * At destroying mem_cgroup, references from swap_cgroup can remain. * (scanning all at force_empty is too costly...) * * Instead of clearing all references at force_empty, we remember * the number of reference from swap_cgroup and free mem_cgroup when * it goes down to 0. * |
8c7c6e34a memcg: mem+swap c... |
2252 2253 |
* Removal of cgroup itself succeeds regardless of refs from swap. */ |
a7ba0eef3 memcg: fix double... |
2254 |
static void __mem_cgroup_free(struct mem_cgroup *mem) |
333279487 memcgroup: use vm... |
2255 |
{ |
08e552c69 memcg: synchroniz... |
2256 |
int node; |
04046e1a0 memcg: use CSS ID |
2257 |
free_css_id(&mem_cgroup_subsys, &mem->css); |
08e552c69 memcg: synchroniz... |
2258 2259 |
for_each_node_state(node, N_POSSIBLE) free_mem_cgroup_per_zone_info(mem, node); |
c8dad2bb6 memcg: reduce siz... |
2260 |
if (mem_cgroup_size() < PAGE_SIZE) |
333279487 memcgroup: use vm... |
2261 2262 2263 2264 |
kfree(mem); else vfree(mem); } |
8c7c6e34a memcg: mem+swap c... |
2265 2266 2267 2268 2269 2270 2271 |
static void mem_cgroup_get(struct mem_cgroup *mem) { atomic_inc(&mem->refcnt); } static void mem_cgroup_put(struct mem_cgroup *mem) { |
7bcc1bb12 memcg: get/put pa... |
2272 2273 |
if (atomic_dec_and_test(&mem->refcnt)) { struct mem_cgroup *parent = parent_mem_cgroup(mem); |
a7ba0eef3 memcg: fix double... |
2274 |
__mem_cgroup_free(mem); |
7bcc1bb12 memcg: get/put pa... |
2275 2276 2277 |
if (parent) mem_cgroup_put(parent); } |
8c7c6e34a memcg: mem+swap c... |
2278 |
} |
7bcc1bb12 memcg: get/put pa... |
2279 2280 2281 2282 2283 2284 2285 2286 2287 |
/* * Returns the parent mem_cgroup in memcgroup hierarchy with hierarchy enabled. */ static struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *mem) { if (!mem->res.parent) return NULL; return mem_cgroup_from_res_counter(mem->res.parent, res); } |
333279487 memcgroup: use vm... |
2288 |
|
c077719be memcg: mem+swap c... |
2289 2290 2291 |
#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP static void __init enable_swap_cgroup(void) { |
f8d665422 memcg: add mem_cg... |
2292 |
if (!mem_cgroup_disabled() && really_do_swap_account) |
c077719be memcg: mem+swap c... |
2293 2294 2295 2296 2297 2298 2299 |
do_swap_account = 1; } #else static void __init enable_swap_cgroup(void) { } #endif |
0eb253e22 memcg: fix sectio... |
2300 |
static struct cgroup_subsys_state * __ref |
8cdea7c05 Memory controller... |
2301 2302 |
mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont) { |
28dbc4b6a memcg: memory cgr... |
2303 |
struct mem_cgroup *mem, *parent; |
04046e1a0 memcg: use CSS ID |
2304 |
long error = -ENOMEM; |
6d12e2d8d per-zone and recl... |
2305 |
int node; |
8cdea7c05 Memory controller... |
2306 |
|
c8dad2bb6 memcg: reduce siz... |
2307 2308 |
mem = mem_cgroup_alloc(); if (!mem) |
04046e1a0 memcg: use CSS ID |
2309 |
return ERR_PTR(error); |
78fb74669 Memory controller... |
2310 |
|
6d12e2d8d per-zone and recl... |
2311 2312 2313 |
for_each_node_state(node, N_POSSIBLE) if (alloc_mem_cgroup_per_zone_info(mem, node)) goto free_out; |
c077719be memcg: mem+swap c... |
2314 |
/* root ? */ |
28dbc4b6a memcg: memory cgr... |
2315 |
if (cont->parent == NULL) { |
c077719be memcg: mem+swap c... |
2316 |
enable_swap_cgroup(); |
28dbc4b6a memcg: memory cgr... |
2317 |
parent = NULL; |
18f59ea7d memcg: memory cgr... |
2318 |
} else { |
28dbc4b6a memcg: memory cgr... |
2319 |
parent = mem_cgroup_from_cont(cont->parent); |
18f59ea7d memcg: memory cgr... |
2320 2321 |
mem->use_hierarchy = parent->use_hierarchy; } |
28dbc4b6a memcg: memory cgr... |
2322 |
|
18f59ea7d memcg: memory cgr... |
2323 2324 2325 |
if (parent && parent->use_hierarchy) { res_counter_init(&mem->res, &parent->res); res_counter_init(&mem->memsw, &parent->memsw); |
7bcc1bb12 memcg: get/put pa... |
2326 2327 2328 2329 2330 2331 2332 |
/* * We increment refcnt of the parent to ensure that we can * safely access it on res_counter_charge/uncharge. * This refcnt will be decremented when freeing this * mem_cgroup(see mem_cgroup_put). */ mem_cgroup_get(parent); |
18f59ea7d memcg: memory cgr... |
2333 2334 2335 2336 |
} else { res_counter_init(&mem->res, NULL); res_counter_init(&mem->memsw, NULL); } |
04046e1a0 memcg: use CSS ID |
2337 |
mem->last_scanned_child = 0; |
2733c06ac memcg: protect pr... |
2338 |
spin_lock_init(&mem->reclaim_param_lock); |
6d61ef409 memcg: memory cgr... |
2339 |
|
a7885eb8a memcg: swappiness |
2340 2341 |
if (parent) mem->swappiness = get_swappiness(parent); |
a7ba0eef3 memcg: fix double... |
2342 |
atomic_set(&mem->refcnt, 1); |
8cdea7c05 Memory controller... |
2343 |
return &mem->css; |
6d12e2d8d per-zone and recl... |
2344 |
free_out: |
a7ba0eef3 memcg: fix double... |
2345 |
__mem_cgroup_free(mem); |
04046e1a0 memcg: use CSS ID |
2346 |
return ERR_PTR(error); |
8cdea7c05 Memory controller... |
2347 |
} |
ec64f5154 cgroup: fix frequ... |
2348 |
static int mem_cgroup_pre_destroy(struct cgroup_subsys *ss, |
df878fb04 memory cgroup enh... |
2349 2350 2351 |
struct cgroup *cont) { struct mem_cgroup *mem = mem_cgroup_from_cont(cont); |
ec64f5154 cgroup: fix frequ... |
2352 2353 |
return mem_cgroup_force_empty(mem, false); |
df878fb04 memory cgroup enh... |
2354 |
} |
8cdea7c05 Memory controller... |
2355 2356 2357 |
static void mem_cgroup_destroy(struct cgroup_subsys *ss, struct cgroup *cont) { |
c268e9946 memcg: fix hierar... |
2358 |
struct mem_cgroup *mem = mem_cgroup_from_cont(cont); |
c268e9946 memcg: fix hierar... |
2359 |
|
c268e9946 memcg: fix hierar... |
2360 |
mem_cgroup_put(mem); |
8cdea7c05 Memory controller... |
2361 2362 2363 2364 2365 |
} static int mem_cgroup_populate(struct cgroup_subsys *ss, struct cgroup *cont) { |
8c7c6e34a memcg: mem+swap c... |
2366 2367 2368 2369 2370 2371 2372 2373 |
int ret; ret = cgroup_add_files(cont, ss, mem_cgroup_files, ARRAY_SIZE(mem_cgroup_files)); if (!ret) ret = register_memsw_files(cont, ss); return ret; |
8cdea7c05 Memory controller... |
2374 |
} |
67e465a77 Memory controller... |
2375 2376 2377 2378 2379 |
static void mem_cgroup_move_task(struct cgroup_subsys *ss, struct cgroup *cont, struct cgroup *old_cont, struct task_struct *p) { |
7f4d454de memcg: avoid dead... |
2380 |
mutex_lock(&memcg_tasklist); |
67e465a77 Memory controller... |
2381 |
/* |
f9717d28d memcg: check grou... |
2382 2383 |
* FIXME: It's better to move charges of this process from old * memcg to new memcg. But it's just on TODO-List now. |
67e465a77 Memory controller... |
2384 |
*/ |
7f4d454de memcg: avoid dead... |
2385 |
mutex_unlock(&memcg_tasklist); |
67e465a77 Memory controller... |
2386 |
} |
8cdea7c05 Memory controller... |
2387 2388 2389 2390 |
struct cgroup_subsys mem_cgroup_subsys = { .name = "memory", .subsys_id = mem_cgroup_subsys_id, .create = mem_cgroup_create, |
df878fb04 memory cgroup enh... |
2391 |
.pre_destroy = mem_cgroup_pre_destroy, |
8cdea7c05 Memory controller... |
2392 2393 |
.destroy = mem_cgroup_destroy, .populate = mem_cgroup_populate, |
67e465a77 Memory controller... |
2394 |
.attach = mem_cgroup_move_task, |
6d12e2d8d per-zone and recl... |
2395 |
.early_init = 0, |
04046e1a0 memcg: use CSS ID |
2396 |
.use_id = 1, |
8cdea7c05 Memory controller... |
2397 |
}; |
c077719be memcg: mem+swap c... |
2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 |
#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP static int __init disable_swap_account(char *s) { really_do_swap_account = 0; return 1; } __setup("noswapaccount", disable_swap_account); #endif |