slab: embed memcg_cache_params to kmem_cache

Currently, kmem_cache stores a pointer to struct memcg_cache_params instead of embedding it. The rationale is to save memory when kmem accounting is disabled. However, the memcg_cache_params has shrivelled drastically since it was first introduced: * Initially: struct memcg_cache_params { bool is_root_cache; union { struct kmem_cache *memcg_caches[0]; struct { struct mem_cgroup *memcg; struct list_head list; struct kmem_cache *root_cache; bool dead; atomic_t nr_pages; struct work_struct destroy; }; }; }; * Now: struct memcg_cache_params { bool is_root_cache; union { struct { struct rcu_head rcu_head; struct kmem_cache *memcg_caches[0]; }; struct { struct mem_cgroup *memcg; struct kmem_cache *root_cache; }; }; }; So the memory saving does not seem to be a clear win anymore. OTOH, keeping a pointer to memcg_cache_params struct instead of embedding it results in touching one more cache line on kmem alloc/free hot paths. Besides, it makes linking kmem caches in a list chained by a field of struct memcg_cache_params really painful due to a level of indirection, while I want to make them linked in the following patch. That said, let us embed it. Signed-off-by: Vladimir Davydov <vdavydov@parallels.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Michal Hocko <mhocko@suse.cz> Cc: Tejun Heo <tj@kernel.org> Cc: Christoph Lameter <cl@linux.com> Cc: Pekka Enberg <penberg@kernel.org> Cc: David Rientjes <rientjes@google.com> Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com> Cc: Dave Chinner <david@fromorbit.com> Cc: Dan Carpenter <dan.carpenter@oracle.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

slab: embed memcg_cache_params to kmem_cache
Currently, kmem_cache stores a pointer to struct memcg_cache_params instead of embedding it. The rationale is to save memory when kmem accounting is disabled. However, the memcg_cache_params has shrivelled drastically since it was first introduced: * Initially: struct memcg_cache_params { bool is_root_cache; union { struct kmem_cache *memcg_caches[0]; struct { struct mem_cgroup *memcg; struct list_head list; struct kmem_cache *root_cache; bool dead; atomic_t nr_pages; struct work_struct destroy; }; }; }; * Now: struct memcg_cache_params { bool is_root_cache; union { struct { struct rcu_head rcu_head; struct kmem_cache *memcg_caches[0]; }; struct { struct mem_cgroup *memcg; struct kmem_cache *root_cache; }; }; }; So the memory saving does not seem to be a clear win anymore. OTOH, keeping a pointer to memcg_cache_params struct instead of embedding it results in touching one more cache line on kmem alloc/free hot paths. Besides, it makes linking kmem caches in a list chained by a field of struct memcg_cache_params really painful due to a level of indirection, while I want to make them linked in the following patch. That said, let us embed it. Signed-off-by: Vladimir Davydov <vdavydov@parallels.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Michal Hocko <mhocko@suse.cz> Cc: Tejun Heo <tj@kernel.org> Cc: Christoph Lameter <cl@linux.com> Cc: Pekka Enberg <penberg@kernel.org> Cc: David Rientjes <rientjes@google.com> Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com> Cc: Dave Chinner <david@fromorbit.com> Cc: Dan Carpenter <dan.carpenter@oracle.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Vladimir Davydov · Linus Torvalds
1 parent 49e7e7ff8d
Showing 7 changed files with 111 additions and 103 deletions Side-by-side Diff
include/linux/slab.h
include/linux/slab_def.h
include/linux/slub_def.h
mm/memcontrol.c
mm/slab.h
mm/slab_common.c
mm/slub.c
@@ -473,14 +473,14 @@
 #ifndef ARCH_SLAB_MINALIGN
 #define ARCH_SLAB_MINALIGN __alignof__(unsigned long long)
 #endif
+
+struct memcg_cache_array {
+	struct rcu_head rcu;
+	struct kmem_cache *entries[0];
+};
+
 /*
  * This is the main placeholder for memcg-related information in kmem caches.
- * struct kmem_cache will hold a pointer to it, so the memory cost while
- * disabled is 1 pointer. The runtime cost while enabled, gets bigger than it
- * would otherwise be if that would be bundled in kmem_cache: we'll need an
- * extra pointer chase. But the trade off clearly lays in favor of not
- * penalizing non-users.
- *
  * Both the root cache and the child caches will have it. For the root cache,
  * this will hold a dynamically allocated array large enough to hold
  * information about the currently limited memcgs in the system. To allow the
@@ -495,10 +495,7 @@
 struct memcg_cache_params {
 	bool is_root_cache;
 	union {
-		struct {
-			struct rcu_head rcu_head;
-			struct kmem_cache *memcg_caches[0];
-		};
+		struct memcg_cache_array __rcu *memcg_caches;
 		struct {
 			struct mem_cgroup *memcg;
 			struct kmem_cache *root_cache;
@@ -70,7 +70,7 @@
 	int obj_offset;
 #endif /* CONFIG_DEBUG_SLAB */
 #ifdef CONFIG_MEMCG_KMEM
-	struct memcg_cache_params *memcg_params;
+	struct memcg_cache_params memcg_params;
 #endif
  
 	struct kmem_cache_node *node[MAX_NUMNODES];
@@ -85,7 +85,7 @@
 	struct kobject kobj;	/* For sysfs */
 #endif
 #ifdef CONFIG_MEMCG_KMEM
-	struct memcg_cache_params *memcg_params;
+	struct memcg_cache_params memcg_params;
 	int max_attr_size; /* for propagation, maximum size of a stored attr */
 #ifdef CONFIG_SYSFS
 	struct kset *memcg_kset;
@@ -332,7 +332,7 @@
 	struct cg_proto tcp_mem;
 #endif
 #if defined(CONFIG_MEMCG_KMEM)
-        /* Index in the kmem_cache->memcg_params->memcg_caches array */
+        /* Index in the kmem_cache->memcg_params.memcg_caches array */
 	int kmemcg_id;
 #endif
  
@@ -531,7 +531,7 @@
  
 #ifdef CONFIG_MEMCG_KMEM
 /*
- * This will be the memcg's index in each cache's ->memcg_params->memcg_caches.
+ * This will be the memcg's index in each cache's ->memcg_params.memcg_caches.
  * The main reason for not using cgroup id for this:
  *  this works better in sparse environments, where we have a lot of memcgs,
  *  but only a few kmem-limited. Or also, if we have, for instance, 200
@@ -2667,8 +2667,7 @@
 	struct mem_cgroup *memcg;
 	struct kmem_cache *memcg_cachep;
  
-	VM_BUG_ON(!cachep->memcg_params);
-	VM_BUG_ON(!cachep->memcg_params->is_root_cache);
+	VM_BUG_ON(!is_root_cache(cachep));
  
 	if (current->memcg_kmem_skip_account)
 		return cachep;
@@ -2702,7 +2701,7 @@
 void __memcg_kmem_put_cache(struct kmem_cache *cachep)
 {
 	if (!is_root_cache(cachep))
-		css_put(&cachep->memcg_params->memcg->css);
+		css_put(&cachep->memcg_params.memcg->css);
 }
  
 /*
@@ -2778,7 +2777,7 @@
 	if (PageSlab(page)) {
 		cachep = page->slab_cache;
 		if (!is_root_cache(cachep))
-			memcg = cachep->memcg_params->memcg;
+			memcg = cachep->memcg_params.memcg;
 	} else
 		/* page allocated by alloc_kmem_pages */
 		memcg = page->mem_cgroup;
@@ -86,8 +86,6 @@
 extern void create_boot_cache(struct kmem_cache *, const char *name,
 			size_t size, unsigned long flags);
  
-struct mem_cgroup;
-
 int slab_unmergeable(struct kmem_cache *s);
 struct kmem_cache *find_mergeable(size_t size, size_t align,
 		unsigned long flags, const char *name, void (*ctor)(void *));
  
  
@@ -167,14 +165,13 @@
 #ifdef CONFIG_MEMCG_KMEM
 static inline bool is_root_cache(struct kmem_cache *s)
 {
-	return !s->memcg_params || s->memcg_params->is_root_cache;
+	return s->memcg_params.is_root_cache;
 }
  
 static inline bool slab_equal_or_root(struct kmem_cache *s,
-					struct kmem_cache *p)
+				      struct kmem_cache *p)
 {
-	return (p == s) ||
-		(s->memcg_params && (p == s->memcg_params->root_cache));
+	return p == s || p == s->memcg_params.root_cache;
 }
  
 /*
  
  
  
  
  
  
@@ -185,37 +182,30 @@
 static inline const char *cache_name(struct kmem_cache *s)
 {
 	if (!is_root_cache(s))
-		return s->memcg_params->root_cache->name;
+		s = s->memcg_params.root_cache;
 	return s->name;
 }
  
 /*
  * Note, we protect with RCU only the memcg_caches array, not per-memcg caches.
- * That said the caller must assure the memcg's cache won't go away. Since once
- * created a memcg's cache is destroyed only along with the root cache, it is
- * true if we are going to allocate from the cache or hold a reference to the
- * root cache by other means. Otherwise, we should hold either the slab_mutex
- * or the memcg's slab_caches_mutex while calling this function and accessing
- * the returned value.
+ * That said the caller must assure the memcg's cache won't go away by either
+ * taking a css reference to the owner cgroup, or holding the slab_mutex.
  */
 static inline struct kmem_cache *
 cache_from_memcg_idx(struct kmem_cache *s, int idx)
 {
 	struct kmem_cache *cachep;
-	struct memcg_cache_params *params;
+	struct memcg_cache_array *arr;
  
-	if (!s->memcg_params)
-		return NULL;
-
 	rcu_read_lock();
-	params = rcu_dereference(s->memcg_params);
+	arr = rcu_dereference(s->memcg_params.memcg_caches);
  
 	/*
 	 * Make sure we will access the up-to-date value. The code updating
 	 * memcg_caches issues a write barrier to match this (see
-	 * memcg_register_cache()).
+	 * memcg_create_kmem_cache()).
 	 */
-	cachep = lockless_dereference(params->memcg_caches[idx]);
+	cachep = lockless_dereference(arr->entries[idx]);
 	rcu_read_unlock();
  
 	return cachep;
@@ -225,7 +215,7 @@
 {
 	if (is_root_cache(s))
 		return s;
-	return s->memcg_params->root_cache;
+	return s->memcg_params.root_cache;
 }
  
 static __always_inline int memcg_charge_slab(struct kmem_cache *s,
@@ -235,7 +225,7 @@
 		return 0;
 	if (is_root_cache(s))
 		return 0;
-	return memcg_charge_kmem(s->memcg_params->memcg, gfp, 1 << order);
+	return memcg_charge_kmem(s->memcg_params.memcg, gfp, 1 << order);
 }
  
 static __always_inline void memcg_uncharge_slab(struct kmem_cache *s, int order)
  
@@ -244,9 +234,13 @@
 		return;
 	if (is_root_cache(s))
 		return;
-	memcg_uncharge_kmem(s->memcg_params->memcg, 1 << order);
+	memcg_uncharge_kmem(s->memcg_params.memcg, 1 << order);
 }
-#else
+
+extern void slab_init_memcg_params(struct kmem_cache *);
+
+#else /* !CONFIG_MEMCG_KMEM */
+
 static inline bool is_root_cache(struct kmem_cache *s)
 {
 	return true;
@@ -282,7 +276,11 @@
 static inline void memcg_uncharge_slab(struct kmem_cache *s, int order)
 {
 }
-#endif
+
+static inline void slab_init_memcg_params(struct kmem_cache *s)
+{
+}
+#endif /* CONFIG_MEMCG_KMEM */
  
 static inline struct kmem_cache *cache_from_obj(struct kmem_cache *s, void *x)
 {
@@ -106,62 +106,66 @@
 #endif
  
 #ifdef CONFIG_MEMCG_KMEM
-static int memcg_alloc_cache_params(struct mem_cgroup *memcg,
-		struct kmem_cache *s, struct kmem_cache *root_cache)
+void slab_init_memcg_params(struct kmem_cache *s)
 {
-	size_t size;
+	s->memcg_params.is_root_cache = true;
+	RCU_INIT_POINTER(s->memcg_params.memcg_caches, NULL);
+}
  
-	if (!memcg_kmem_enabled())
+static int init_memcg_params(struct kmem_cache *s,
+		struct mem_cgroup *memcg, struct kmem_cache *root_cache)
+{
+	struct memcg_cache_array *arr;
+
+	if (memcg) {
+		s->memcg_params.is_root_cache = false;
+		s->memcg_params.memcg = memcg;
+		s->memcg_params.root_cache = root_cache;
 		return 0;
+	}
  
-	if (!memcg) {
-		size = offsetof(struct memcg_cache_params, memcg_caches);
-		size += memcg_nr_cache_ids * sizeof(void *);
-	} else
-		size = sizeof(struct memcg_cache_params);
+	slab_init_memcg_params(s);
  
-	s->memcg_params = kzalloc(size, GFP_KERNEL);
-	if (!s->memcg_params)
+	if (!memcg_nr_cache_ids)
+		return 0;
+
+	arr = kzalloc(sizeof(struct memcg_cache_array) +
+		      memcg_nr_cache_ids * sizeof(void *),
+		      GFP_KERNEL);
+	if (!arr)
 		return -ENOMEM;
  
-	if (memcg) {
-		s->memcg_params->memcg = memcg;
-		s->memcg_params->root_cache = root_cache;
-	} else
-		s->memcg_params->is_root_cache = true;
-
+	RCU_INIT_POINTER(s->memcg_params.memcg_caches, arr);
 	return 0;
 }
  
-static void memcg_free_cache_params(struct kmem_cache *s)
+static void destroy_memcg_params(struct kmem_cache *s)
 {
-	kfree(s->memcg_params);
+	if (is_root_cache(s))
+		kfree(rcu_access_pointer(s->memcg_params.memcg_caches));
 }
  
-static int memcg_update_cache_params(struct kmem_cache *s, int num_memcgs)
+static int update_memcg_params(struct kmem_cache *s, int new_array_size)
 {
-	int size;
-	struct memcg_cache_params *new_params, *cur_params;
+	struct memcg_cache_array *old, *new;
  
-	BUG_ON(!is_root_cache(s));
+	if (!is_root_cache(s))
+		return 0;
  
-	size = offsetof(struct memcg_cache_params, memcg_caches);
-	size += num_memcgs * sizeof(void *);
-
-	new_params = kzalloc(size, GFP_KERNEL);
-	if (!new_params)
+	new = kzalloc(sizeof(struct memcg_cache_array) +
+		      new_array_size * sizeof(void *), GFP_KERNEL);
+	if (!new)
 		return -ENOMEM;
  
-	cur_params = s->memcg_params;
-	memcpy(new_params->memcg_caches, cur_params->memcg_caches,
-	       memcg_nr_cache_ids * sizeof(void *));
+	old = rcu_dereference_protected(s->memcg_params.memcg_caches,
+					lockdep_is_held(&slab_mutex));
+	if (old)
+		memcpy(new->entries, old->entries,
+		       memcg_nr_cache_ids * sizeof(void *));
  
-	new_params->is_root_cache = true;
-
-	rcu_assign_pointer(s->memcg_params, new_params);
-	if (cur_params)
-		kfree_rcu(cur_params, rcu_head);
-
+	rcu_assign_pointer(s->memcg_params.memcg_caches, new);
+	if (old)
+		kfree_rcu(old, rcu);
 	return 0;
 }
  
@@ -172,10 +176,7 @@
  
 	mutex_lock(&slab_mutex);
 	list_for_each_entry(s, &slab_caches, list) {
-		if (!is_root_cache(s))
-			continue;
-
-		ret = memcg_update_cache_params(s, num_memcgs);
+		ret = update_memcg_params(s, num_memcgs);
 		/*
 		 * Instead of freeing the memory, we'll just leave the caches
 		 * up to this point in an updated state.
  
@@ -187,13 +188,13 @@
 	return ret;
 }
 #else
-static inline int memcg_alloc_cache_params(struct mem_cgroup *memcg,
-		struct kmem_cache *s, struct kmem_cache *root_cache)
+static inline int init_memcg_params(struct kmem_cache *s,
+		struct mem_cgroup *memcg, struct kmem_cache *root_cache)
 {
 	return 0;
 }
  
-static inline void memcg_free_cache_params(struct kmem_cache *s)
+static inline void destroy_memcg_params(struct kmem_cache *s)
 {
 }
 #endif /* CONFIG_MEMCG_KMEM */
@@ -311,7 +312,7 @@
 	s->align = align;
 	s->ctor = ctor;
  
-	err = memcg_alloc_cache_params(memcg, s, root_cache);
+	err = init_memcg_params(s, memcg, root_cache);
 	if (err)
 		goto out_free_cache;
  
@@ -327,7 +328,7 @@
 	return s;
  
 out_free_cache:
-	memcg_free_cache_params(s);
+	destroy_memcg_params(s);
 	kmem_cache_free(kmem_cache, s);
 	goto out;
 }
  
@@ -439,11 +440,15 @@
  
 #ifdef CONFIG_MEMCG_KMEM
 	if (!is_root_cache(s)) {
-		struct kmem_cache *root_cache = s->memcg_params->root_cache;
-		int memcg_id = memcg_cache_id(s->memcg_params->memcg);
+		int idx;
+		struct memcg_cache_array *arr;
  
-		BUG_ON(root_cache->memcg_params->memcg_caches[memcg_id] != s);
-		root_cache->memcg_params->memcg_caches[memcg_id] = NULL;
+		idx = memcg_cache_id(s->memcg_params.memcg);
+		arr = rcu_dereference_protected(s->memcg_params.root_cache->
+						memcg_params.memcg_caches,
+						lockdep_is_held(&slab_mutex));
+		BUG_ON(arr->entries[idx] != s);
+		arr->entries[idx] = NULL;
 	}
 #endif
 	list_move(&s->list, release);
  
  
  
  
@@ -481,27 +486,32 @@
 			     struct kmem_cache *root_cache)
 {
 	static char memcg_name_buf[NAME_MAX + 1]; /* protected by slab_mutex */
-	int memcg_id = memcg_cache_id(memcg);
+	struct memcg_cache_array *arr;
 	struct kmem_cache *s = NULL;
 	char *cache_name;
+	int idx;
  
 	get_online_cpus();
 	get_online_mems();
  
 	mutex_lock(&slab_mutex);
  
+	idx = memcg_cache_id(memcg);
+	arr = rcu_dereference_protected(root_cache->memcg_params.memcg_caches,
+					lockdep_is_held(&slab_mutex));
+
 	/*
 	 * Since per-memcg caches are created asynchronously on first
 	 * allocation (see memcg_kmem_get_cache()), several threads can try to
 	 * create the same cache, but only one of them may succeed.
 	 */
-	if (cache_from_memcg_idx(root_cache, memcg_id))
+	if (arr->entries[idx])
 		goto out_unlock;
  
 	cgroup_name(mem_cgroup_css(memcg)->cgroup,
 		    memcg_name_buf, sizeof(memcg_name_buf));
 	cache_name = kasprintf(GFP_KERNEL, "%s(%d:%s)", root_cache->name,
-			       memcg_cache_id(memcg), memcg_name_buf);
+			       idx, memcg_name_buf);
 	if (!cache_name)
 		goto out_unlock;
  
@@ -525,7 +535,7 @@
 	 * initialized.
 	 */
 	smp_wmb();
-	root_cache->memcg_params->memcg_caches[memcg_id] = s;
+	arr->entries[idx] = s;
  
 out_unlock:
 	mutex_unlock(&slab_mutex);
@@ -545,7 +555,7 @@
  
 	mutex_lock(&slab_mutex);
 	list_for_each_entry_safe(s, s2, &slab_caches, list) {
-		if (is_root_cache(s) || s->memcg_params->memcg != memcg)
+		if (is_root_cache(s) || s->memcg_params.memcg != memcg)
 			continue;
 		/*
 		 * The cgroup is about to be freed and therefore has no charges
@@ -564,7 +574,7 @@
  
 void slab_kmem_cache_release(struct kmem_cache *s)
 {
-	memcg_free_cache_params(s);
+	destroy_memcg_params(s);
 	kfree(s->name);
 	kmem_cache_free(kmem_cache, s);
 }
@@ -640,6 +650,9 @@
 	s->name = name;
 	s->size = s->object_size = size;
 	s->align = calculate_alignment(flags, ARCH_KMALLOC_MINALIGN, size);
+
+	slab_init_memcg_params(s);
+
 	err = __kmem_cache_create(s, flags);
  
 	if (err)
@@ -980,7 +993,7 @@
  
 	if (p == slab_caches.next)
 		print_slabinfo_header(m);
-	if (!is_root_cache(s) && s->memcg_params->memcg == memcg)
+	if (!is_root_cache(s) && s->memcg_params.memcg == memcg)
 		cache_show(s, m);
 	return 0;
 }
@@ -3577,6 +3577,7 @@
 			p->slab_cache = s;
 #endif
 	}
+	slab_init_memcg_params(s);
 	list_add(&s->list, &slab_caches);
 	return s;
 }
@@ -4964,7 +4965,7 @@
 	if (is_root_cache(s))
 		return;
  
-	root_cache = s->memcg_params->root_cache;
+	root_cache = s->memcg_params.root_cache;
  
 	/*
 	 * This mean this cache had no attribute written. Therefore, no point
@@ -5044,7 +5045,7 @@
 {
 #ifdef CONFIG_MEMCG_KMEM
 	if (!is_root_cache(s))
-		return s->memcg_params->root_cache->memcg_kset;
+		return s->memcg_params.root_cache->memcg_kset;
 #endif
 	return slab_kset;
 }
...	...	@@ -473,14 +473,14 @@
473	473	#ifndef ARCH_SLAB_MINALIGN
474	474	#define ARCH_SLAB_MINALIGN __alignof__(unsigned long long)
475	475	#endif
	476	+
	477	+struct memcg_cache_array {
	478	+ struct rcu_head rcu;
	479	+ struct kmem_cache *entries[0];
	480	+};
	481	+
476	482	/*
477	483	* This is the main placeholder for memcg-related information in kmem caches.
478		- * struct kmem_cache will hold a pointer to it, so the memory cost while
479		- * disabled is 1 pointer. The runtime cost while enabled, gets bigger than it
480		- * would otherwise be if that would be bundled in kmem_cache: we'll need an
481		- * extra pointer chase. But the trade off clearly lays in favor of not
482		- * penalizing non-users.
483		- *
484	484	* Both the root cache and the child caches will have it. For the root cache,
485	485	* this will hold a dynamically allocated array large enough to hold
486	486	* information about the currently limited memcgs in the system. To allow the
...	...	@@ -495,10 +495,7 @@
495	495	struct memcg_cache_params {
496	496	bool is_root_cache;
497	497	union {
498		- struct {
499		- struct rcu_head rcu_head;
500		- struct kmem_cache *memcg_caches[0];
501		- };
	498	+ struct memcg_cache_array __rcu *memcg_caches;
502	499	struct {
503	500	struct mem_cgroup *memcg;
504	501	struct kmem_cache *root_cache;
...	...	@@ -70,7 +70,7 @@
70	70	int obj_offset;
71	71	#endif /* CONFIG_DEBUG_SLAB */
72	72	#ifdef CONFIG_MEMCG_KMEM
73		- struct memcg_cache_params *memcg_params;
	73	+ struct memcg_cache_params memcg_params;
74	74	#endif
75	75
76	76	struct kmem_cache_node *node[MAX_NUMNODES];
...	...	@@ -85,7 +85,7 @@
85	85	struct kobject kobj; /* For sysfs */
86	86	#endif
87	87	#ifdef CONFIG_MEMCG_KMEM
88		- struct memcg_cache_params *memcg_params;
	88	+ struct memcg_cache_params memcg_params;
89	89	int max_attr_size; /* for propagation, maximum size of a stored attr */
90	90	#ifdef CONFIG_SYSFS
91	91	struct kset *memcg_kset;
...	...	@@ -332,7 +332,7 @@
332	332	struct cg_proto tcp_mem;
333	333	#endif
334	334	#if defined(CONFIG_MEMCG_KMEM)
335		- /* Index in the kmem_cache->memcg_params->memcg_caches array */
	335	+ /* Index in the kmem_cache->memcg_params.memcg_caches array */
336	336	int kmemcg_id;
337	337	#endif
338	338
...	...	@@ -531,7 +531,7 @@
531	531
532	532	#ifdef CONFIG_MEMCG_KMEM
533	533	/*
534		- * This will be the memcg's index in each cache's ->memcg_params->memcg_caches.
	534	+ * This will be the memcg's index in each cache's ->memcg_params.memcg_caches.
535	535	* The main reason for not using cgroup id for this:
536	536	* this works better in sparse environments, where we have a lot of memcgs,
537	537	* but only a few kmem-limited. Or also, if we have, for instance, 200
...	...	@@ -2667,8 +2667,7 @@
2667	2667	struct mem_cgroup *memcg;
2668	2668	struct kmem_cache *memcg_cachep;
2669	2669
2670		- VM_BUG_ON(!cachep->memcg_params);
2671		- VM_BUG_ON(!cachep->memcg_params->is_root_cache);
	2670	+ VM_BUG_ON(!is_root_cache(cachep));
2672	2671
2673	2672	if (current->memcg_kmem_skip_account)
2674	2673	return cachep;
...	...	@@ -2702,7 +2701,7 @@
2702	2701	void __memcg_kmem_put_cache(struct kmem_cache *cachep)
2703	2702	{
2704	2703	if (!is_root_cache(cachep))
2705		- css_put(&cachep->memcg_params->memcg->css);
	2704	+ css_put(&cachep->memcg_params.memcg->css);
2706	2705	}
2707	2706
2708	2707	/*
...	...	@@ -2778,7 +2777,7 @@
2778	2777	if (PageSlab(page)) {
2779	2778	cachep = page->slab_cache;
2780	2779	if (!is_root_cache(cachep))
2781		- memcg = cachep->memcg_params->memcg;
	2780	+ memcg = cachep->memcg_params.memcg;
2782	2781	} else
2783	2782	/* page allocated by alloc_kmem_pages */
2784	2783	memcg = page->mem_cgroup;
...	...	@@ -86,8 +86,6 @@
86	86	extern void create_boot_cache(struct kmem_cache , const char name,
87	87	size_t size, unsigned long flags);
88	88
89		-struct mem_cgroup;
90		-
91	89	int slab_unmergeable(struct kmem_cache *s);
92	90	struct kmem_cache *find_mergeable(size_t size, size_t align,
93	91	unsigned long flags, const char name, void (ctor)(void *));
94	92
95	93
...	...	@@ -167,14 +165,13 @@
167	165	#ifdef CONFIG_MEMCG_KMEM
168	166	static inline bool is_root_cache(struct kmem_cache *s)
169	167	{
170		- return !s->memcg_params \|\| s->memcg_params->is_root_cache;
	168	+ return s->memcg_params.is_root_cache;
171	169	}
172	170
173	171	static inline bool slab_equal_or_root(struct kmem_cache *s,
174		- struct kmem_cache *p)
	172	+ struct kmem_cache *p)
175	173	{
176		- return (p == s) \|\|
177		- (s->memcg_params && (p == s->memcg_params->root_cache));
	174	+ return p == s \|\| p == s->memcg_params.root_cache;
178	175	}
179	176
180	177	/*
181	178
182	179
183	180
184	181
185	182
186	183
...	...	@@ -185,37 +182,30 @@
185	182	static inline const char cache_name(struct kmem_cache s)
186	183	{
187	184	if (!is_root_cache(s))
188		- return s->memcg_params->root_cache->name;
	185	+ s = s->memcg_params.root_cache;
189	186	return s->name;
190	187	}
191	188
192	189	/*
193	190	* Note, we protect with RCU only the memcg_caches array, not per-memcg caches.
194		- * That said the caller must assure the memcg's cache won't go away. Since once
195		- * created a memcg's cache is destroyed only along with the root cache, it is
196		- * true if we are going to allocate from the cache or hold a reference to the
197		- * root cache by other means. Otherwise, we should hold either the slab_mutex
198		- * or the memcg's slab_caches_mutex while calling this function and accessing
199		- * the returned value.
	191	+ * That said the caller must assure the memcg's cache won't go away by either
	192	+ * taking a css reference to the owner cgroup, or holding the slab_mutex.
200	193	*/
201	194	static inline struct kmem_cache *
202	195	cache_from_memcg_idx(struct kmem_cache *s, int idx)
203	196	{
204	197	struct kmem_cache *cachep;
205		- struct memcg_cache_params *params;
	198	+ struct memcg_cache_array *arr;
206	199
207		- if (!s->memcg_params)
208		- return NULL;
209		-
210	200	rcu_read_lock();
211		- params = rcu_dereference(s->memcg_params);
	201	+ arr = rcu_dereference(s->memcg_params.memcg_caches);
212	202
213	203	/*
214	204	* Make sure we will access the up-to-date value. The code updating
215	205	* memcg_caches issues a write barrier to match this (see
216		- * memcg_register_cache()).
	206	+ * memcg_create_kmem_cache()).
217	207	*/
218		- cachep = lockless_dereference(params->memcg_caches[idx]);
	208	+ cachep = lockless_dereference(arr->entries[idx]);
219	209	rcu_read_unlock();
220	210
221	211	return cachep;
...	...	@@ -225,7 +215,7 @@
225	215	{
226	216	if (is_root_cache(s))
227	217	return s;
228		- return s->memcg_params->root_cache;
	218	+ return s->memcg_params.root_cache;
229	219	}
230	220
231	221	static __always_inline int memcg_charge_slab(struct kmem_cache *s,
...	...	@@ -235,7 +225,7 @@
235	225	return 0;
236	226	if (is_root_cache(s))
237	227	return 0;
238		- return memcg_charge_kmem(s->memcg_params->memcg, gfp, 1 << order);
	228	+ return memcg_charge_kmem(s->memcg_params.memcg, gfp, 1 << order);
239	229	}
240	230
241	231	static __always_inline void memcg_uncharge_slab(struct kmem_cache *s, int order)
242	232
...	...	@@ -244,9 +234,13 @@
244	234	return;
245	235	if (is_root_cache(s))
246	236	return;
247		- memcg_uncharge_kmem(s->memcg_params->memcg, 1 << order);
	237	+ memcg_uncharge_kmem(s->memcg_params.memcg, 1 << order);
248	238	}
249		-#else
	239	+
	240	+extern void slab_init_memcg_params(struct kmem_cache *);
	241	+
	242	+#else /* !CONFIG_MEMCG_KMEM */
	243	+
250	244	static inline bool is_root_cache(struct kmem_cache *s)
251	245	{
252	246	return true;
...	...	@@ -282,7 +276,11 @@
282	276	static inline void memcg_uncharge_slab(struct kmem_cache *s, int order)
283	277	{
284	278	}
285		-#endif
	279	+
	280	+static inline void slab_init_memcg_params(struct kmem_cache *s)
	281	+{
	282	+}
	283	+#endif /* CONFIG_MEMCG_KMEM */
286	284
287	285	static inline struct kmem_cache cache_from_obj(struct kmem_cache s, void *x)
288	286	{
...	...	@@ -106,62 +106,66 @@
106	106	#endif
107	107
108	108	#ifdef CONFIG_MEMCG_KMEM
109		-static int memcg_alloc_cache_params(struct mem_cgroup *memcg,
110		- struct kmem_cache s, struct kmem_cache root_cache)
	109	+void slab_init_memcg_params(struct kmem_cache *s)
111	110	{
112		- size_t size;
	111	+ s->memcg_params.is_root_cache = true;
	112	+ RCU_INIT_POINTER(s->memcg_params.memcg_caches, NULL);
	113	+}
113	114
114		- if (!memcg_kmem_enabled())
	115	+static int init_memcg_params(struct kmem_cache *s,
	116	+ struct mem_cgroup memcg, struct kmem_cache root_cache)
	117	+{
	118	+ struct memcg_cache_array *arr;
	119	+
	120	+ if (memcg) {
	121	+ s->memcg_params.is_root_cache = false;
	122	+ s->memcg_params.memcg = memcg;
	123	+ s->memcg_params.root_cache = root_cache;
115	124	return 0;
	125	+ }
116	126
117		- if (!memcg) {
118		- size = offsetof(struct memcg_cache_params, memcg_caches);
119		- size += memcg_nr_cache_ids * sizeof(void *);
120		- } else
121		- size = sizeof(struct memcg_cache_params);
	127	+ slab_init_memcg_params(s);
122	128
123		- s->memcg_params = kzalloc(size, GFP_KERNEL);
124		- if (!s->memcg_params)
	129	+ if (!memcg_nr_cache_ids)
	130	+ return 0;
	131	+
	132	+ arr = kzalloc(sizeof(struct memcg_cache_array) +
	133	+ memcg_nr_cache_ids * sizeof(void *),
	134	+ GFP_KERNEL);
	135	+ if (!arr)
125	136	return -ENOMEM;
126	137
127		- if (memcg) {
128		- s->memcg_params->memcg = memcg;
129		- s->memcg_params->root_cache = root_cache;
130		- } else
131		- s->memcg_params->is_root_cache = true;
132		-
	138	+ RCU_INIT_POINTER(s->memcg_params.memcg_caches, arr);
133	139	return 0;
134	140	}
135	141
136		-static void memcg_free_cache_params(struct kmem_cache *s)
	142	+static void destroy_memcg_params(struct kmem_cache *s)
137	143	{
138		- kfree(s->memcg_params);
	144	+ if (is_root_cache(s))
	145	+ kfree(rcu_access_pointer(s->memcg_params.memcg_caches));
139	146	}
140	147
141		-static int memcg_update_cache_params(struct kmem_cache *s, int num_memcgs)
	148	+static int update_memcg_params(struct kmem_cache *s, int new_array_size)
142	149	{
143		- int size;
144		- struct memcg_cache_params new_params, cur_params;
	150	+ struct memcg_cache_array old, new;
145	151
146		- BUG_ON(!is_root_cache(s));
	152	+ if (!is_root_cache(s))
	153	+ return 0;
147	154
148		- size = offsetof(struct memcg_cache_params, memcg_caches);
149		- size += num_memcgs * sizeof(void *);
150		-
151		- new_params = kzalloc(size, GFP_KERNEL);
152		- if (!new_params)
	155	+ new = kzalloc(sizeof(struct memcg_cache_array) +
	156	+ new_array_size * sizeof(void *), GFP_KERNEL);
	157	+ if (!new)
153	158	return -ENOMEM;
154	159
155		- cur_params = s->memcg_params;
156		- memcpy(new_params->memcg_caches, cur_params->memcg_caches,
157		- memcg_nr_cache_ids * sizeof(void *));
	160	+ old = rcu_dereference_protected(s->memcg_params.memcg_caches,
	161	+ lockdep_is_held(&slab_mutex));
	162	+ if (old)
	163	+ memcpy(new->entries, old->entries,
	164	+ memcg_nr_cache_ids * sizeof(void *));
158	165
159		- new_params->is_root_cache = true;
160		-
161		- rcu_assign_pointer(s->memcg_params, new_params);
162		- if (cur_params)
163		- kfree_rcu(cur_params, rcu_head);
164		-
	166	+ rcu_assign_pointer(s->memcg_params.memcg_caches, new);
	167	+ if (old)
	168	+ kfree_rcu(old, rcu);
165	169	return 0;
166	170	}
167	171
...	...	@@ -172,10 +176,7 @@
172	176
173	177	mutex_lock(&slab_mutex);
174	178	list_for_each_entry(s, &slab_caches, list) {
175		- if (!is_root_cache(s))
176		- continue;
177		-
178		- ret = memcg_update_cache_params(s, num_memcgs);
	179	+ ret = update_memcg_params(s, num_memcgs);
179	180	/*
180	181	* Instead of freeing the memory, we'll just leave the caches
181	182	* up to this point in an updated state.
182	183
...	...	@@ -187,13 +188,13 @@
187	188	return ret;
188	189	}
189	190	#else
190		-static inline int memcg_alloc_cache_params(struct mem_cgroup *memcg,
191		- struct kmem_cache s, struct kmem_cache root_cache)
	191	+static inline int init_memcg_params(struct kmem_cache *s,
	192	+ struct mem_cgroup memcg, struct kmem_cache root_cache)
192	193	{
193	194	return 0;
194	195	}
195	196
196		-static inline void memcg_free_cache_params(struct kmem_cache *s)
	197	+static inline void destroy_memcg_params(struct kmem_cache *s)
197	198	{
198	199	}
199	200	#endif /* CONFIG_MEMCG_KMEM */
...	...	@@ -311,7 +312,7 @@
311	312	s->align = align;
312	313	s->ctor = ctor;
313	314
314		- err = memcg_alloc_cache_params(memcg, s, root_cache);
	315	+ err = init_memcg_params(s, memcg, root_cache);
315	316	if (err)
316	317	goto out_free_cache;
317	318
...	...	@@ -327,7 +328,7 @@
327	328	return s;
328	329
329	330	out_free_cache:
330		- memcg_free_cache_params(s);
	331	+ destroy_memcg_params(s);
331	332	kmem_cache_free(kmem_cache, s);
332	333	goto out;
333	334	}
334	335
...	...	@@ -439,11 +440,15 @@
439	440
440	441	#ifdef CONFIG_MEMCG_KMEM
441	442	if (!is_root_cache(s)) {
442		- struct kmem_cache *root_cache = s->memcg_params->root_cache;
443		- int memcg_id = memcg_cache_id(s->memcg_params->memcg);
	443	+ int idx;
	444	+ struct memcg_cache_array *arr;
444	445
445		- BUG_ON(root_cache->memcg_params->memcg_caches[memcg_id] != s);
446		- root_cache->memcg_params->memcg_caches[memcg_id] = NULL;
	446	+ idx = memcg_cache_id(s->memcg_params.memcg);
	447	+ arr = rcu_dereference_protected(s->memcg_params.root_cache->
	448	+ memcg_params.memcg_caches,
	449	+ lockdep_is_held(&slab_mutex));
	450	+ BUG_ON(arr->entries[idx] != s);
	451	+ arr->entries[idx] = NULL;
447	452	}
448	453	#endif
449	454	list_move(&s->list, release);
450	455
451	456
452	457
453	458
...	...	@@ -481,27 +486,32 @@
481	486	struct kmem_cache *root_cache)
482	487	{
483	488	static char memcg_name_buf[NAME_MAX + 1]; /* protected by slab_mutex */
484		- int memcg_id = memcg_cache_id(memcg);
	489	+ struct memcg_cache_array *arr;
485	490	struct kmem_cache *s = NULL;
486	491	char *cache_name;
	492	+ int idx;
487	493
488	494	get_online_cpus();
489	495	get_online_mems();
490	496
491	497	mutex_lock(&slab_mutex);
492	498
	499	+ idx = memcg_cache_id(memcg);
	500	+ arr = rcu_dereference_protected(root_cache->memcg_params.memcg_caches,
	501	+ lockdep_is_held(&slab_mutex));
	502	+
493	503	/*
494	504	* Since per-memcg caches are created asynchronously on first
495	505	* allocation (see memcg_kmem_get_cache()), several threads can try to
496	506	* create the same cache, but only one of them may succeed.
497	507	*/
498		- if (cache_from_memcg_idx(root_cache, memcg_id))
	508	+ if (arr->entries[idx])
499	509	goto out_unlock;
500	510
501	511	cgroup_name(mem_cgroup_css(memcg)->cgroup,
502	512	memcg_name_buf, sizeof(memcg_name_buf));
503	513	cache_name = kasprintf(GFP_KERNEL, "%s(%d:%s)", root_cache->name,
504		- memcg_cache_id(memcg), memcg_name_buf);
	514	+ idx, memcg_name_buf);
505	515	if (!cache_name)
506	516	goto out_unlock;
507	517
...	...	@@ -525,7 +535,7 @@
525	535	* initialized.
526	536	*/
527	537	smp_wmb();
528		- root_cache->memcg_params->memcg_caches[memcg_id] = s;
	538	+ arr->entries[idx] = s;
529	539
530	540	out_unlock:
531	541	mutex_unlock(&slab_mutex);
...	...	@@ -545,7 +555,7 @@
545	555
546	556	mutex_lock(&slab_mutex);
547	557	list_for_each_entry_safe(s, s2, &slab_caches, list) {
548		- if (is_root_cache(s) \|\| s->memcg_params->memcg != memcg)
	558	+ if (is_root_cache(s) \|\| s->memcg_params.memcg != memcg)
549	559	continue;
550	560	/*
551	561	* The cgroup is about to be freed and therefore has no charges
...	...	@@ -564,7 +574,7 @@
564	574
565	575	void slab_kmem_cache_release(struct kmem_cache *s)
566	576	{
567		- memcg_free_cache_params(s);
	577	+ destroy_memcg_params(s);
568	578	kfree(s->name);
569	579	kmem_cache_free(kmem_cache, s);
570	580	}
...	...	@@ -640,6 +650,9 @@
640	650	s->name = name;
641	651	s->size = s->object_size = size;
642	652	s->align = calculate_alignment(flags, ARCH_KMALLOC_MINALIGN, size);
	653	+
	654	+ slab_init_memcg_params(s);
	655	+
643	656	err = __kmem_cache_create(s, flags);
644	657
645	658	if (err)
...	...	@@ -980,7 +993,7 @@
980	993
981	994	if (p == slab_caches.next)
982	995	print_slabinfo_header(m);
983		- if (!is_root_cache(s) && s->memcg_params->memcg == memcg)
	996	+ if (!is_root_cache(s) && s->memcg_params.memcg == memcg)
984	997	cache_show(s, m);
985	998	return 0;
986	999	}
...	...	@@ -3577,6 +3577,7 @@
3577	3577	p->slab_cache = s;
3578	3578	#endif
3579	3579	}
	3580	+ slab_init_memcg_params(s);
3580	3581	list_add(&s->list, &slab_caches);
3581	3582	return s;
3582	3583	}
...	...	@@ -4964,7 +4965,7 @@
4964	4965	if (is_root_cache(s))
4965	4966	return;
4966	4967
4967		- root_cache = s->memcg_params->root_cache;
	4968	+ root_cache = s->memcg_params.root_cache;
4968	4969
4969	4970	/*
4970	4971	* This mean this cache had no attribute written. Therefore, no point
...	...	@@ -5044,7 +5045,7 @@
5044	5045	{
5045	5046	#ifdef CONFIG_MEMCG_KMEM
5046	5047	if (!is_root_cache(s))
5047		- return s->memcg_params->root_cache->memcg_kset;
	5048	+ return s->memcg_params.root_cache->memcg_kset;
5048	5049	#endif
5049	5050	return slab_kset;
5050	5051	}