Commit 28dbc4b6a01fb579a9441c7b81e3d3413dc452df

Authored by Balbir Singh
Committed by Linus Torvalds
1 parent 52bc0d8210

memcg: memory cgroup resource counters for hierarchy

Add support for building hierarchies in resource counters.  Cgroups allows
us to build a deep hierarchy, but we currently don't link the resource
counters belonging to the memory controller control groups, in the same
fashion as the corresponding cgroup entries in the cgroup hierarchy.  This
patch provides the infrastructure for resource counters that have the same
hiearchy as their cgroup counter parts.

These set of patches are based on the resource counter hiearchy patches
posted by Pavel Emelianov.

NOTE: Building hiearchies is expensive, deeper hierarchies imply charging
the all the way up to the root.  It is known that hiearchies are
expensive, so the user needs to be careful and aware of the trade-offs
before creating very deep ones.

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Balbir Singh <balbir@linux.vnet.ibm.com>
Cc: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
Cc: Paul Menage <menage@google.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Pavel Emelianov <xemul@openvz.org>
Cc: Dhaval Giani <dhaval@linux.vnet.ibm.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

Showing 3 changed files with 54 additions and 18 deletions Side-by-side Diff

include/linux/res_counter.h
... ... @@ -43,6 +43,10 @@
43 43 * the routines below consider this to be IRQ-safe
44 44 */
45 45 spinlock_t lock;
  46 + /*
  47 + * Parent counter, used for hierarchial resource accounting
  48 + */
  49 + struct res_counter *parent;
46 50 };
47 51  
48 52 /**
... ... @@ -87,7 +91,7 @@
87 91 * helpers for accounting
88 92 */
89 93  
90   -void res_counter_init(struct res_counter *counter);
  94 +void res_counter_init(struct res_counter *counter, struct res_counter *parent);
91 95  
92 96 /*
93 97 * charge - try to consume more resource.
... ... @@ -103,7 +107,7 @@
103 107 int __must_check res_counter_charge_locked(struct res_counter *counter,
104 108 unsigned long val);
105 109 int __must_check res_counter_charge(struct res_counter *counter,
106   - unsigned long val);
  110 + unsigned long val, struct res_counter **limit_fail_at);
107 111  
108 112 /*
109 113 * uncharge - tell that some portion of the resource is released
kernel/res_counter.c
... ... @@ -15,10 +15,11 @@
15 15 #include <linux/uaccess.h>
16 16 #include <linux/mm.h>
17 17  
18   -void res_counter_init(struct res_counter *counter)
  18 +void res_counter_init(struct res_counter *counter, struct res_counter *parent)
19 19 {
20 20 spin_lock_init(&counter->lock);
21 21 counter->limit = (unsigned long long)LLONG_MAX;
  22 + counter->parent = parent;
22 23 }
23 24  
24 25 int res_counter_charge_locked(struct res_counter *counter, unsigned long val)
25 26  
26 27  
... ... @@ -34,14 +35,34 @@
34 35 return 0;
35 36 }
36 37  
37   -int res_counter_charge(struct res_counter *counter, unsigned long val)
  38 +int res_counter_charge(struct res_counter *counter, unsigned long val,
  39 + struct res_counter **limit_fail_at)
38 40 {
39 41 int ret;
40 42 unsigned long flags;
  43 + struct res_counter *c, *u;
41 44  
42   - spin_lock_irqsave(&counter->lock, flags);
43   - ret = res_counter_charge_locked(counter, val);
44   - spin_unlock_irqrestore(&counter->lock, flags);
  45 + *limit_fail_at = NULL;
  46 + local_irq_save(flags);
  47 + for (c = counter; c != NULL; c = c->parent) {
  48 + spin_lock(&c->lock);
  49 + ret = res_counter_charge_locked(c, val);
  50 + spin_unlock(&c->lock);
  51 + if (ret < 0) {
  52 + *limit_fail_at = c;
  53 + goto undo;
  54 + }
  55 + }
  56 + ret = 0;
  57 + goto done;
  58 +undo:
  59 + for (u = counter; u != c; u = u->parent) {
  60 + spin_lock(&u->lock);
  61 + res_counter_uncharge_locked(u, val);
  62 + spin_unlock(&u->lock);
  63 + }
  64 +done:
  65 + local_irq_restore(flags);
45 66 return ret;
46 67 }
47 68  
48 69  
... ... @@ -56,10 +77,15 @@
56 77 void res_counter_uncharge(struct res_counter *counter, unsigned long val)
57 78 {
58 79 unsigned long flags;
  80 + struct res_counter *c;
59 81  
60   - spin_lock_irqsave(&counter->lock, flags);
61   - res_counter_uncharge_locked(counter, val);
62   - spin_unlock_irqrestore(&counter->lock, flags);
  82 + local_irq_save(flags);
  83 + for (c = counter; c != NULL; c = c->parent) {
  84 + spin_lock(&c->lock);
  85 + res_counter_uncharge_locked(c, val);
  86 + spin_unlock(&c->lock);
  87 + }
  88 + local_irq_restore(flags);
63 89 }
64 90  
65 91  
... ... @@ -471,6 +471,7 @@
471 471 {
472 472 struct mem_cgroup *mem;
473 473 int nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
  474 + struct res_counter *fail_res;
474 475 /*
475 476 * We always charge the cgroup the mm_struct belongs to.
476 477 * The mm_struct's mem_cgroup changes on task migration if the
477 478  
... ... @@ -499,11 +500,12 @@
499 500 int ret;
500 501 bool noswap = false;
501 502  
502   - ret = res_counter_charge(&mem->res, PAGE_SIZE);
  503 + ret = res_counter_charge(&mem->res, PAGE_SIZE, &fail_res);
503 504 if (likely(!ret)) {
504 505 if (!do_swap_account)
505 506 break;
506   - ret = res_counter_charge(&mem->memsw, PAGE_SIZE);
  507 + ret = res_counter_charge(&mem->memsw, PAGE_SIZE,
  508 + &fail_res);
507 509 if (likely(!ret))
508 510 break;
509 511 /* mem+swap counter fails */
510 512  
511 513  
512 514  
... ... @@ -1709,22 +1711,26 @@
1709 1711 static struct cgroup_subsys_state *
1710 1712 mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
1711 1713 {
1712   - struct mem_cgroup *mem;
  1714 + struct mem_cgroup *mem, *parent;
1713 1715 int node;
1714 1716  
1715 1717 mem = mem_cgroup_alloc();
1716 1718 if (!mem)
1717 1719 return ERR_PTR(-ENOMEM);
1718 1720  
1719   - res_counter_init(&mem->res);
1720   - res_counter_init(&mem->memsw);
1721   -
1722 1721 for_each_node_state(node, N_POSSIBLE)
1723 1722 if (alloc_mem_cgroup_per_zone_info(mem, node))
1724 1723 goto free_out;
1725 1724 /* root ? */
1726   - if (cont->parent == NULL)
  1725 + if (cont->parent == NULL) {
1727 1726 enable_swap_cgroup();
  1727 + parent = NULL;
  1728 + } else
  1729 + parent = mem_cgroup_from_cont(cont->parent);
  1730 +
  1731 + res_counter_init(&mem->res, parent ? &parent->res : NULL);
  1732 + res_counter_init(&mem->memsw, parent ? &parent->memsw : NULL);
  1733 +
1728 1734  
1729 1735 return &mem->css;
1730 1736 free_out: