Commit 8cdea7c05454260c0d4d83503949c358eb131d17

Authored by Balbir Singh
Committed by Linus Torvalds
1 parent e552b66170

Memory controller: cgroups setup

Setup the memory cgroup and add basic hooks and controls to integrate
and work with the cgroup.

Signed-off-by: Balbir Singh <balbir@linux.vnet.ibm.com>
Cc: Pavel Emelianov <xemul@openvz.org>
Cc: Paul Menage <menage@google.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Kirill Korotaev <dev@sw.ru>
Cc: Herbert Poetzl <herbert@13thfloor.at>
Cc: David Rientjes <rientjes@google.com>
Cc: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

Showing 5 changed files with 161 additions and 0 deletions Side-by-side Diff

include/linux/cgroup_subsys.h
... ... @@ -36,4 +36,10 @@
36 36 #endif
37 37  
38 38 /* */
  39 +
  40 +#ifdef CONFIG_CGROUP_MEM_CONT
  41 +SUBSYS(mem_cgroup)
  42 +#endif
  43 +
  44 +/* */
include/linux/memcontrol.h
  1 +/* memcontrol.h - Memory Controller
  2 + *
  3 + * Copyright IBM Corporation, 2007
  4 + * Author Balbir Singh <balbir@linux.vnet.ibm.com>
  5 + *
  6 + * This program is free software; you can redistribute it and/or modify
  7 + * it under the terms of the GNU General Public License as published by
  8 + * the Free Software Foundation; either version 2 of the License, or
  9 + * (at your option) any later version.
  10 + *
  11 + * This program is distributed in the hope that it will be useful,
  12 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14 + * GNU General Public License for more details.
  15 + */
  16 +
  17 +#ifndef _LINUX_MEMCONTROL_H
  18 +#define _LINUX_MEMCONTROL_H
  19 +
  20 +#endif /* _LINUX_MEMCONTROL_H */
... ... @@ -397,6 +397,13 @@
397 397 If you are using a distro that was released in 2006 or later,
398 398 it should be safe to say N here.
399 399  
  400 +config CGROUP_MEM_CONT
  401 + bool "Memory controller for cgroups"
  402 + depends on CGROUPS && RESOURCE_COUNTERS
  403 + help
  404 + Provides a memory controller that manages both page cache and
  405 + RSS memory.
  406 +
400 407 config PROC_PID_CPUSET
401 408 bool "Include legacy /proc/<pid>/cpuset file"
402 409 depends on CPUSETS
... ... @@ -32,4 +32,5 @@
32 32 obj-$(CONFIG_MIGRATION) += migrate.o
33 33 obj-$(CONFIG_SMP) += allocpercpu.o
34 34 obj-$(CONFIG_QUICKLIST) += quicklist.o
  35 +obj-$(CONFIG_CGROUP_MEM_CONT) += memcontrol.o
  1 +/* memcontrol.c - Memory Controller
  2 + *
  3 + * Copyright IBM Corporation, 2007
  4 + * Author Balbir Singh <balbir@linux.vnet.ibm.com>
  5 + *
  6 + * This program is free software; you can redistribute it and/or modify
  7 + * it under the terms of the GNU General Public License as published by
  8 + * the Free Software Foundation; either version 2 of the License, or
  9 + * (at your option) any later version.
  10 + *
  11 + * This program is distributed in the hope that it will be useful,
  12 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14 + * GNU General Public License for more details.
  15 + */
  16 +
  17 +#include <linux/res_counter.h>
  18 +#include <linux/memcontrol.h>
  19 +#include <linux/cgroup.h>
  20 +
  21 +struct cgroup_subsys mem_cgroup_subsys;
  22 +
  23 +/*
  24 + * The memory controller data structure. The memory controller controls both
  25 + * page cache and RSS per cgroup. We would eventually like to provide
  26 + * statistics based on the statistics developed by Rik Van Riel for clock-pro,
  27 + * to help the administrator determine what knobs to tune.
  28 + *
  29 + * TODO: Add a water mark for the memory controller. Reclaim will begin when
  30 + * we hit the water mark.
  31 + */
  32 +struct mem_cgroup {
  33 + struct cgroup_subsys_state css;
  34 + /*
  35 + * the counter to account for memory usage
  36 + */
  37 + struct res_counter res;
  38 +};
  39 +
  40 +/*
  41 + * A page_cgroup page is associated with every page descriptor. The
  42 + * page_cgroup helps us identify information about the cgroup
  43 + */
  44 +struct page_cgroup {
  45 + struct list_head lru; /* per cgroup LRU list */
  46 + struct page *page;
  47 + struct mem_cgroup *mem_cgroup;
  48 +};
  49 +
  50 +
  51 +static inline
  52 +struct mem_cgroup *mem_cgroup_from_cont(struct cgroup *cont)
  53 +{
  54 + return container_of(cgroup_subsys_state(cont,
  55 + mem_cgroup_subsys_id), struct mem_cgroup,
  56 + css);
  57 +}
  58 +
  59 +static ssize_t mem_cgroup_read(struct cgroup *cont, struct cftype *cft,
  60 + struct file *file, char __user *userbuf, size_t nbytes,
  61 + loff_t *ppos)
  62 +{
  63 + return res_counter_read(&mem_cgroup_from_cont(cont)->res,
  64 + cft->private, userbuf, nbytes, ppos);
  65 +}
  66 +
  67 +static ssize_t mem_cgroup_write(struct cgroup *cont, struct cftype *cft,
  68 + struct file *file, const char __user *userbuf,
  69 + size_t nbytes, loff_t *ppos)
  70 +{
  71 + return res_counter_write(&mem_cgroup_from_cont(cont)->res,
  72 + cft->private, userbuf, nbytes, ppos);
  73 +}
  74 +
  75 +static struct cftype mem_cgroup_files[] = {
  76 + {
  77 + .name = "usage",
  78 + .private = RES_USAGE,
  79 + .read = mem_cgroup_read,
  80 + },
  81 + {
  82 + .name = "limit",
  83 + .private = RES_LIMIT,
  84 + .write = mem_cgroup_write,
  85 + .read = mem_cgroup_read,
  86 + },
  87 + {
  88 + .name = "failcnt",
  89 + .private = RES_FAILCNT,
  90 + .read = mem_cgroup_read,
  91 + },
  92 +};
  93 +
  94 +static struct cgroup_subsys_state *
  95 +mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
  96 +{
  97 + struct mem_cgroup *mem;
  98 +
  99 + mem = kzalloc(sizeof(struct mem_cgroup), GFP_KERNEL);
  100 + if (!mem)
  101 + return -ENOMEM;
  102 +
  103 + res_counter_init(&mem->res);
  104 + return &mem->css;
  105 +}
  106 +
  107 +static void mem_cgroup_destroy(struct cgroup_subsys *ss,
  108 + struct cgroup *cont)
  109 +{
  110 + kfree(mem_cgroup_from_cont(cont));
  111 +}
  112 +
  113 +static int mem_cgroup_populate(struct cgroup_subsys *ss,
  114 + struct cgroup *cont)
  115 +{
  116 + return cgroup_add_files(cont, ss, mem_cgroup_files,
  117 + ARRAY_SIZE(mem_cgroup_files));
  118 +}
  119 +
  120 +struct cgroup_subsys mem_cgroup_subsys = {
  121 + .name = "memory",
  122 + .subsys_id = mem_cgroup_subsys_id,
  123 + .create = mem_cgroup_create,
  124 + .destroy = mem_cgroup_destroy,
  125 + .populate = mem_cgroup_populate,
  126 + .early_init = 0,
  127 +};