Commit 65c64ce8ee642eb330a4c4d94b664725f2902b44

Authored by Glauber Costa
Committed by David S. Miller
1 parent 7d6c429b26

Partial revert "Basic kernel memory functionality for the Memory Controller"

This reverts commit e5671dfae59b165e2adfd4dfbdeab11ac8db5bda.

After a follow up discussion with Michal, it was agreed it would
be better to leave the kmem controller with just the tcp files,
deferring the behavior of the other general memory.kmem.* files
for a later time, when more caches are controlled. This is because
generic kmem files are not used by tcp accounting and it is
not clear how other slab caches would fit into the scheme.

We are reverting the original commit so we can track the reference.
Part of the patch is kept, because it was used by the later tcp
code. Conflicts are shown in the bottom. init/Kconfig is removed from
the revert entirely.

Signed-off-by: Glauber Costa <glommer@parallels.com>
Acked-by: Michal Hocko <mhocko@suse.cz>
CC: Kirill A. Shutemov <kirill@shutemov.name>
CC: Paul Menage <paul@paulmenage.org>
CC: Greg Thelen <gthelen@google.com>
CC: Johannes Weiner <jweiner@redhat.com>
CC: David S. Miller <davem@davemloft.net>

Conflicts:

	Documentation/cgroups/memory.txt
	mm/memcontrol.c
Signed-off-by: David S. Miller <davem@davemloft.net>

Showing 2 changed files with 8 additions and 107 deletions Side-by-side Diff

Documentation/cgroups/memory.txt
... ... @@ -44,9 +44,8 @@
44 44 - oom-killer disable knob and oom-notifier
45 45 - Root cgroup has no limit controls.
46 46  
47   - Hugepages is not under control yet. We just manage pages on LRU. To add more
48   - controls, we have to take care of performance. Kernel memory support is work
49   - in progress, and the current version provides basically functionality.
  47 + Kernel memory support is work in progress, and the current version provides
  48 + basically functionality. (See Section 2.7)
50 49  
51 50 Brief summary of control files.
52 51  
53 52  
... ... @@ -57,11 +56,8 @@
57 56 (See 5.5 for details)
58 57 memory.memsw.usage_in_bytes # show current res_counter usage for memory+Swap
59 58 (See 5.5 for details)
60   - memory.kmem.usage_in_bytes # show current res_counter usage for kmem only.
61   - (See 2.7 for details)
62 59 memory.limit_in_bytes # set/show limit of memory usage
63 60 memory.memsw.limit_in_bytes # set/show limit of memory+Swap usage
64   - memory.kmem.limit_in_bytes # if allowed, set/show limit of kernel memory
65 61 memory.failcnt # show the number of memory usage hits limits
66 62 memory.memsw.failcnt # show the number of memory+Swap hits limits
67 63 memory.max_usage_in_bytes # show max memory usage recorded
... ... @@ -76,8 +72,6 @@
76 72 memory.oom_control # set/show oom controls.
77 73 memory.numa_stat # show the number of memory usage per numa node
78 74  
79   - memory.independent_kmem_limit # select whether or not kernel memory limits are
80   - independent of user limits
81 75 memory.kmem.tcp.limit_in_bytes # set/show hard limit for tcp buf memory
82 76 memory.kmem.tcp.usage_in_bytes # show current tcp buf memory allocation
83 77  
84 78  
... ... @@ -271,20 +265,8 @@
271 265 different than user memory, since it can't be swapped out, which makes it
272 266 possible to DoS the system by consuming too much of this precious resource.
273 267  
274   -Some kernel memory resources may be accounted and limited separately from the
275   -main "kmem" resource. For instance, a slab cache that is considered important
276   -enough to be limited separately may have its own knobs.
277   -
278 268 Kernel memory limits are not imposed for the root cgroup. Usage for the root
279 269 cgroup may or may not be accounted.
280   -
281   -Memory limits as specified by the standard Memory Controller may or may not
282   -take kernel memory into consideration. This is achieved through the file
283   -memory.independent_kmem_limit. A Value different than 0 will allow for kernel
284   -memory to be controlled separately.
285   -
286   -When kernel memory limits are not independent, the limit values set in
287   -memory.kmem files are ignored.
288 270  
289 271 Currently no soft limit is implemented for kernel memory. It is future work
290 272 to trigger slab reclaim when those limits are reached.
... ... @@ -229,10 +229,6 @@
229 229 */
230 230 struct res_counter memsw;
231 231 /*
232   - * the counter to account for kmem usage.
233   - */
234   - struct res_counter kmem;
235   - /*
236 232 * Per cgroup active and inactive list, similar to the
237 233 * per zone LRU lists.
238 234 */
... ... @@ -283,11 +279,6 @@
283 279 */
284 280 unsigned long move_charge_at_immigrate;
285 281 /*
286   - * Should kernel memory limits be stabilished independently
287   - * from user memory ?
288   - */
289   - int kmem_independent_accounting;
290   - /*
291 282 * percpu counter.
292 283 */
293 284 struct mem_cgroup_stat_cpu *stat;
... ... @@ -359,14 +350,9 @@
359 350 };
360 351  
361 352 /* for encoding cft->private value on file */
362   -
363   -enum mem_type {
364   - _MEM = 0,
365   - _MEMSWAP,
366   - _OOM_TYPE,
367   - _KMEM,
368   -};
369   -
  353 +#define _MEM (0)
  354 +#define _MEMSWAP (1)
  355 +#define _OOM_TYPE (2)
370 356 #define MEMFILE_PRIVATE(x, val) (((x) << 16) | (val))
371 357 #define MEMFILE_TYPE(val) (((val) >> 16) & 0xffff)
372 358 #define MEMFILE_ATTR(val) ((val) & 0xffff)
373 359  
374 360  
... ... @@ -3919,17 +3905,10 @@
3919 3905 u64 val;
3920 3906  
3921 3907 if (!mem_cgroup_is_root(memcg)) {
3922   - val = 0;
3923   -#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
3924   - if (!memcg->kmem_independent_accounting)
3925   - val = res_counter_read_u64(&memcg->kmem, RES_USAGE);
3926   -#endif
3927 3908 if (!swap)
3928   - val += res_counter_read_u64(&memcg->res, RES_USAGE);
  3909 + return res_counter_read_u64(&memcg->res, RES_USAGE);
3929 3910 else
3930   - val += res_counter_read_u64(&memcg->memsw, RES_USAGE);
3931   -
3932   - return val;
  3911 + return res_counter_read_u64(&memcg->memsw, RES_USAGE);
3933 3912 }
3934 3913  
3935 3914 val = mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_CACHE);
... ... @@ -3962,11 +3941,6 @@
3962 3941 else
3963 3942 val = res_counter_read_u64(&memcg->memsw, name);
3964 3943 break;
3965   -#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
3966   - case _KMEM:
3967   - val = res_counter_read_u64(&memcg->kmem, name);
3968   - break;
3969   -#endif
3970 3944 default:
3971 3945 BUG();
3972 3946 break;
3973 3947  
... ... @@ -4696,59 +4670,8 @@
4696 4670 #endif /* CONFIG_NUMA */
4697 4671  
4698 4672 #ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
4699   -static u64 kmem_limit_independent_read(struct cgroup *cgroup, struct cftype *cft)
4700   -{
4701   - return mem_cgroup_from_cont(cgroup)->kmem_independent_accounting;
4702   -}
4703   -
4704   -static int kmem_limit_independent_write(struct cgroup *cgroup, struct cftype *cft,
4705   - u64 val)
4706   -{
4707   - struct mem_cgroup *memcg = mem_cgroup_from_cont(cgroup);
4708   - struct mem_cgroup *parent = parent_mem_cgroup(memcg);
4709   -
4710   - val = !!val;
4711   -
4712   - /*
4713   - * This follows the same hierarchy restrictions than
4714   - * mem_cgroup_hierarchy_write()
4715   - */
4716   - if (!parent || !parent->use_hierarchy) {
4717   - if (list_empty(&cgroup->children))
4718   - memcg->kmem_independent_accounting = val;
4719   - else
4720   - return -EBUSY;
4721   - }
4722   - else
4723   - return -EINVAL;
4724   -
4725   - return 0;
4726   -}
4727   -static struct cftype kmem_cgroup_files[] = {
4728   - {
4729   - .name = "independent_kmem_limit",
4730   - .read_u64 = kmem_limit_independent_read,
4731   - .write_u64 = kmem_limit_independent_write,
4732   - },
4733   - {
4734   - .name = "kmem.usage_in_bytes",
4735   - .private = MEMFILE_PRIVATE(_KMEM, RES_USAGE),
4736   - .read_u64 = mem_cgroup_read,
4737   - },
4738   - {
4739   - .name = "kmem.limit_in_bytes",
4740   - .private = MEMFILE_PRIVATE(_KMEM, RES_LIMIT),
4741   - .read_u64 = mem_cgroup_read,
4742   - },
4743   -};
4744   -
4745 4673 static int register_kmem_files(struct cgroup *cont, struct cgroup_subsys *ss)
4746 4674 {
4747   - int ret = 0;
4748   -
4749   - ret = cgroup_add_files(cont, ss, kmem_cgroup_files,
4750   - ARRAY_SIZE(kmem_cgroup_files));
4751   -
4752 4675 /*
4753 4676 * Part of this would be better living in a separate allocation
4754 4677 * function, leaving us with just the cgroup tree population work.
... ... @@ -4756,9 +4679,7 @@
4756 4679 * is only initialized after cgroup creation. I found the less
4757 4680 * cumbersome way to deal with it to defer it all to populate time
4758 4681 */
4759   - if (!ret)
4760   - ret = mem_cgroup_sockets_init(cont, ss);
4761   - return ret;
  4682 + return mem_cgroup_sockets_init(cont, ss);
4762 4683 };
4763 4684  
4764 4685 static void kmem_cgroup_destroy(struct cgroup_subsys *ss,
... ... @@ -5092,7 +5013,6 @@
5092 5013 if (parent && parent->use_hierarchy) {
5093 5014 res_counter_init(&memcg->res, &parent->res);
5094 5015 res_counter_init(&memcg->memsw, &parent->memsw);
5095   - res_counter_init(&memcg->kmem, &parent->kmem);
5096 5016 /*
5097 5017 * We increment refcnt of the parent to ensure that we can
5098 5018 * safely access it on res_counter_charge/uncharge.
... ... @@ -5103,7 +5023,6 @@
5103 5023 } else {
5104 5024 res_counter_init(&memcg->res, NULL);
5105 5025 res_counter_init(&memcg->memsw, NULL);
5106   - res_counter_init(&memcg->kmem, NULL);
5107 5026 }
5108 5027 memcg->last_scanned_child = 0;
5109 5028 memcg->last_scanned_node = MAX_NUMNODES;