Commit f7ce3190c4a35bf887adb7a1aa1ba899b679872d

Authored by Vladimir Davydov
Committed by Linus Torvalds
1 parent 49e7e7ff8d

slab: embed memcg_cache_params to kmem_cache

Currently, kmem_cache stores a pointer to struct memcg_cache_params
instead of embedding it.  The rationale is to save memory when kmem
accounting is disabled.  However, the memcg_cache_params has shrivelled
drastically since it was first introduced:

* Initially:

struct memcg_cache_params {
	bool is_root_cache;
	union {
		struct kmem_cache *memcg_caches[0];
		struct {
			struct mem_cgroup *memcg;
			struct list_head list;
			struct kmem_cache *root_cache;
			bool dead;
			atomic_t nr_pages;
			struct work_struct destroy;
		};
	};
};

* Now:

struct memcg_cache_params {
	bool is_root_cache;
	union {
		struct {
			struct rcu_head rcu_head;
			struct kmem_cache *memcg_caches[0];
		};
		struct {
			struct mem_cgroup *memcg;
			struct kmem_cache *root_cache;
		};
	};
};

So the memory saving does not seem to be a clear win anymore.

OTOH, keeping a pointer to memcg_cache_params struct instead of embedding
it results in touching one more cache line on kmem alloc/free hot paths.
Besides, it makes linking kmem caches in a list chained by a field of
struct memcg_cache_params really painful due to a level of indirection,
while I want to make them linked in the following patch.  That said, let
us embed it.

Signed-off-by: Vladimir Davydov <vdavydov@parallels.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Tejun Heo <tj@kernel.org>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

Showing 7 changed files with 111 additions and 103 deletions Side-by-side Diff

include/linux/slab.h
... ... @@ -473,14 +473,14 @@
473 473 #ifndef ARCH_SLAB_MINALIGN
474 474 #define ARCH_SLAB_MINALIGN __alignof__(unsigned long long)
475 475 #endif
  476 +
  477 +struct memcg_cache_array {
  478 + struct rcu_head rcu;
  479 + struct kmem_cache *entries[0];
  480 +};
  481 +
476 482 /*
477 483 * This is the main placeholder for memcg-related information in kmem caches.
478   - * struct kmem_cache will hold a pointer to it, so the memory cost while
479   - * disabled is 1 pointer. The runtime cost while enabled, gets bigger than it
480   - * would otherwise be if that would be bundled in kmem_cache: we'll need an
481   - * extra pointer chase. But the trade off clearly lays in favor of not
482   - * penalizing non-users.
483   - *
484 484 * Both the root cache and the child caches will have it. For the root cache,
485 485 * this will hold a dynamically allocated array large enough to hold
486 486 * information about the currently limited memcgs in the system. To allow the
... ... @@ -495,10 +495,7 @@
495 495 struct memcg_cache_params {
496 496 bool is_root_cache;
497 497 union {
498   - struct {
499   - struct rcu_head rcu_head;
500   - struct kmem_cache *memcg_caches[0];
501   - };
  498 + struct memcg_cache_array __rcu *memcg_caches;
502 499 struct {
503 500 struct mem_cgroup *memcg;
504 501 struct kmem_cache *root_cache;
include/linux/slab_def.h
... ... @@ -70,7 +70,7 @@
70 70 int obj_offset;
71 71 #endif /* CONFIG_DEBUG_SLAB */
72 72 #ifdef CONFIG_MEMCG_KMEM
73   - struct memcg_cache_params *memcg_params;
  73 + struct memcg_cache_params memcg_params;
74 74 #endif
75 75  
76 76 struct kmem_cache_node *node[MAX_NUMNODES];
include/linux/slub_def.h
... ... @@ -85,7 +85,7 @@
85 85 struct kobject kobj; /* For sysfs */
86 86 #endif
87 87 #ifdef CONFIG_MEMCG_KMEM
88   - struct memcg_cache_params *memcg_params;
  88 + struct memcg_cache_params memcg_params;
89 89 int max_attr_size; /* for propagation, maximum size of a stored attr */
90 90 #ifdef CONFIG_SYSFS
91 91 struct kset *memcg_kset;
... ... @@ -332,7 +332,7 @@
332 332 struct cg_proto tcp_mem;
333 333 #endif
334 334 #if defined(CONFIG_MEMCG_KMEM)
335   - /* Index in the kmem_cache->memcg_params->memcg_caches array */
  335 + /* Index in the kmem_cache->memcg_params.memcg_caches array */
336 336 int kmemcg_id;
337 337 #endif
338 338  
... ... @@ -531,7 +531,7 @@
531 531  
532 532 #ifdef CONFIG_MEMCG_KMEM
533 533 /*
534   - * This will be the memcg's index in each cache's ->memcg_params->memcg_caches.
  534 + * This will be the memcg's index in each cache's ->memcg_params.memcg_caches.
535 535 * The main reason for not using cgroup id for this:
536 536 * this works better in sparse environments, where we have a lot of memcgs,
537 537 * but only a few kmem-limited. Or also, if we have, for instance, 200
... ... @@ -2667,8 +2667,7 @@
2667 2667 struct mem_cgroup *memcg;
2668 2668 struct kmem_cache *memcg_cachep;
2669 2669  
2670   - VM_BUG_ON(!cachep->memcg_params);
2671   - VM_BUG_ON(!cachep->memcg_params->is_root_cache);
  2670 + VM_BUG_ON(!is_root_cache(cachep));
2672 2671  
2673 2672 if (current->memcg_kmem_skip_account)
2674 2673 return cachep;
... ... @@ -2702,7 +2701,7 @@
2702 2701 void __memcg_kmem_put_cache(struct kmem_cache *cachep)
2703 2702 {
2704 2703 if (!is_root_cache(cachep))
2705   - css_put(&cachep->memcg_params->memcg->css);
  2704 + css_put(&cachep->memcg_params.memcg->css);
2706 2705 }
2707 2706  
2708 2707 /*
... ... @@ -2778,7 +2777,7 @@
2778 2777 if (PageSlab(page)) {
2779 2778 cachep = page->slab_cache;
2780 2779 if (!is_root_cache(cachep))
2781   - memcg = cachep->memcg_params->memcg;
  2780 + memcg = cachep->memcg_params.memcg;
2782 2781 } else
2783 2782 /* page allocated by alloc_kmem_pages */
2784 2783 memcg = page->mem_cgroup;
... ... @@ -86,8 +86,6 @@
86 86 extern void create_boot_cache(struct kmem_cache *, const char *name,
87 87 size_t size, unsigned long flags);
88 88  
89   -struct mem_cgroup;
90   -
91 89 int slab_unmergeable(struct kmem_cache *s);
92 90 struct kmem_cache *find_mergeable(size_t size, size_t align,
93 91 unsigned long flags, const char *name, void (*ctor)(void *));
94 92  
95 93  
... ... @@ -167,14 +165,13 @@
167 165 #ifdef CONFIG_MEMCG_KMEM
168 166 static inline bool is_root_cache(struct kmem_cache *s)
169 167 {
170   - return !s->memcg_params || s->memcg_params->is_root_cache;
  168 + return s->memcg_params.is_root_cache;
171 169 }
172 170  
173 171 static inline bool slab_equal_or_root(struct kmem_cache *s,
174   - struct kmem_cache *p)
  172 + struct kmem_cache *p)
175 173 {
176   - return (p == s) ||
177   - (s->memcg_params && (p == s->memcg_params->root_cache));
  174 + return p == s || p == s->memcg_params.root_cache;
178 175 }
179 176  
180 177 /*
181 178  
182 179  
183 180  
184 181  
185 182  
186 183  
... ... @@ -185,37 +182,30 @@
185 182 static inline const char *cache_name(struct kmem_cache *s)
186 183 {
187 184 if (!is_root_cache(s))
188   - return s->memcg_params->root_cache->name;
  185 + s = s->memcg_params.root_cache;
189 186 return s->name;
190 187 }
191 188  
192 189 /*
193 190 * Note, we protect with RCU only the memcg_caches array, not per-memcg caches.
194   - * That said the caller must assure the memcg's cache won't go away. Since once
195   - * created a memcg's cache is destroyed only along with the root cache, it is
196   - * true if we are going to allocate from the cache or hold a reference to the
197   - * root cache by other means. Otherwise, we should hold either the slab_mutex
198   - * or the memcg's slab_caches_mutex while calling this function and accessing
199   - * the returned value.
  191 + * That said the caller must assure the memcg's cache won't go away by either
  192 + * taking a css reference to the owner cgroup, or holding the slab_mutex.
200 193 */
201 194 static inline struct kmem_cache *
202 195 cache_from_memcg_idx(struct kmem_cache *s, int idx)
203 196 {
204 197 struct kmem_cache *cachep;
205   - struct memcg_cache_params *params;
  198 + struct memcg_cache_array *arr;
206 199  
207   - if (!s->memcg_params)
208   - return NULL;
209   -
210 200 rcu_read_lock();
211   - params = rcu_dereference(s->memcg_params);
  201 + arr = rcu_dereference(s->memcg_params.memcg_caches);
212 202  
213 203 /*
214 204 * Make sure we will access the up-to-date value. The code updating
215 205 * memcg_caches issues a write barrier to match this (see
216   - * memcg_register_cache()).
  206 + * memcg_create_kmem_cache()).
217 207 */
218   - cachep = lockless_dereference(params->memcg_caches[idx]);
  208 + cachep = lockless_dereference(arr->entries[idx]);
219 209 rcu_read_unlock();
220 210  
221 211 return cachep;
... ... @@ -225,7 +215,7 @@
225 215 {
226 216 if (is_root_cache(s))
227 217 return s;
228   - return s->memcg_params->root_cache;
  218 + return s->memcg_params.root_cache;
229 219 }
230 220  
231 221 static __always_inline int memcg_charge_slab(struct kmem_cache *s,
... ... @@ -235,7 +225,7 @@
235 225 return 0;
236 226 if (is_root_cache(s))
237 227 return 0;
238   - return memcg_charge_kmem(s->memcg_params->memcg, gfp, 1 << order);
  228 + return memcg_charge_kmem(s->memcg_params.memcg, gfp, 1 << order);
239 229 }
240 230  
241 231 static __always_inline void memcg_uncharge_slab(struct kmem_cache *s, int order)
242 232  
... ... @@ -244,9 +234,13 @@
244 234 return;
245 235 if (is_root_cache(s))
246 236 return;
247   - memcg_uncharge_kmem(s->memcg_params->memcg, 1 << order);
  237 + memcg_uncharge_kmem(s->memcg_params.memcg, 1 << order);
248 238 }
249   -#else
  239 +
  240 +extern void slab_init_memcg_params(struct kmem_cache *);
  241 +
  242 +#else /* !CONFIG_MEMCG_KMEM */
  243 +
250 244 static inline bool is_root_cache(struct kmem_cache *s)
251 245 {
252 246 return true;
... ... @@ -282,7 +276,11 @@
282 276 static inline void memcg_uncharge_slab(struct kmem_cache *s, int order)
283 277 {
284 278 }
285   -#endif
  279 +
  280 +static inline void slab_init_memcg_params(struct kmem_cache *s)
  281 +{
  282 +}
  283 +#endif /* CONFIG_MEMCG_KMEM */
286 284  
287 285 static inline struct kmem_cache *cache_from_obj(struct kmem_cache *s, void *x)
288 286 {
... ... @@ -106,62 +106,66 @@
106 106 #endif
107 107  
108 108 #ifdef CONFIG_MEMCG_KMEM
109   -static int memcg_alloc_cache_params(struct mem_cgroup *memcg,
110   - struct kmem_cache *s, struct kmem_cache *root_cache)
  109 +void slab_init_memcg_params(struct kmem_cache *s)
111 110 {
112   - size_t size;
  111 + s->memcg_params.is_root_cache = true;
  112 + RCU_INIT_POINTER(s->memcg_params.memcg_caches, NULL);
  113 +}
113 114  
114   - if (!memcg_kmem_enabled())
  115 +static int init_memcg_params(struct kmem_cache *s,
  116 + struct mem_cgroup *memcg, struct kmem_cache *root_cache)
  117 +{
  118 + struct memcg_cache_array *arr;
  119 +
  120 + if (memcg) {
  121 + s->memcg_params.is_root_cache = false;
  122 + s->memcg_params.memcg = memcg;
  123 + s->memcg_params.root_cache = root_cache;
115 124 return 0;
  125 + }
116 126  
117   - if (!memcg) {
118   - size = offsetof(struct memcg_cache_params, memcg_caches);
119   - size += memcg_nr_cache_ids * sizeof(void *);
120   - } else
121   - size = sizeof(struct memcg_cache_params);
  127 + slab_init_memcg_params(s);
122 128  
123   - s->memcg_params = kzalloc(size, GFP_KERNEL);
124   - if (!s->memcg_params)
  129 + if (!memcg_nr_cache_ids)
  130 + return 0;
  131 +
  132 + arr = kzalloc(sizeof(struct memcg_cache_array) +
  133 + memcg_nr_cache_ids * sizeof(void *),
  134 + GFP_KERNEL);
  135 + if (!arr)
125 136 return -ENOMEM;
126 137  
127   - if (memcg) {
128   - s->memcg_params->memcg = memcg;
129   - s->memcg_params->root_cache = root_cache;
130   - } else
131   - s->memcg_params->is_root_cache = true;
132   -
  138 + RCU_INIT_POINTER(s->memcg_params.memcg_caches, arr);
133 139 return 0;
134 140 }
135 141  
136   -static void memcg_free_cache_params(struct kmem_cache *s)
  142 +static void destroy_memcg_params(struct kmem_cache *s)
137 143 {
138   - kfree(s->memcg_params);
  144 + if (is_root_cache(s))
  145 + kfree(rcu_access_pointer(s->memcg_params.memcg_caches));
139 146 }
140 147  
141   -static int memcg_update_cache_params(struct kmem_cache *s, int num_memcgs)
  148 +static int update_memcg_params(struct kmem_cache *s, int new_array_size)
142 149 {
143   - int size;
144   - struct memcg_cache_params *new_params, *cur_params;
  150 + struct memcg_cache_array *old, *new;
145 151  
146   - BUG_ON(!is_root_cache(s));
  152 + if (!is_root_cache(s))
  153 + return 0;
147 154  
148   - size = offsetof(struct memcg_cache_params, memcg_caches);
149   - size += num_memcgs * sizeof(void *);
150   -
151   - new_params = kzalloc(size, GFP_KERNEL);
152   - if (!new_params)
  155 + new = kzalloc(sizeof(struct memcg_cache_array) +
  156 + new_array_size * sizeof(void *), GFP_KERNEL);
  157 + if (!new)
153 158 return -ENOMEM;
154 159  
155   - cur_params = s->memcg_params;
156   - memcpy(new_params->memcg_caches, cur_params->memcg_caches,
157   - memcg_nr_cache_ids * sizeof(void *));
  160 + old = rcu_dereference_protected(s->memcg_params.memcg_caches,
  161 + lockdep_is_held(&slab_mutex));
  162 + if (old)
  163 + memcpy(new->entries, old->entries,
  164 + memcg_nr_cache_ids * sizeof(void *));
158 165  
159   - new_params->is_root_cache = true;
160   -
161   - rcu_assign_pointer(s->memcg_params, new_params);
162   - if (cur_params)
163   - kfree_rcu(cur_params, rcu_head);
164   -
  166 + rcu_assign_pointer(s->memcg_params.memcg_caches, new);
  167 + if (old)
  168 + kfree_rcu(old, rcu);
165 169 return 0;
166 170 }
167 171  
... ... @@ -172,10 +176,7 @@
172 176  
173 177 mutex_lock(&slab_mutex);
174 178 list_for_each_entry(s, &slab_caches, list) {
175   - if (!is_root_cache(s))
176   - continue;
177   -
178   - ret = memcg_update_cache_params(s, num_memcgs);
  179 + ret = update_memcg_params(s, num_memcgs);
179 180 /*
180 181 * Instead of freeing the memory, we'll just leave the caches
181 182 * up to this point in an updated state.
182 183  
... ... @@ -187,13 +188,13 @@
187 188 return ret;
188 189 }
189 190 #else
190   -static inline int memcg_alloc_cache_params(struct mem_cgroup *memcg,
191   - struct kmem_cache *s, struct kmem_cache *root_cache)
  191 +static inline int init_memcg_params(struct kmem_cache *s,
  192 + struct mem_cgroup *memcg, struct kmem_cache *root_cache)
192 193 {
193 194 return 0;
194 195 }
195 196  
196   -static inline void memcg_free_cache_params(struct kmem_cache *s)
  197 +static inline void destroy_memcg_params(struct kmem_cache *s)
197 198 {
198 199 }
199 200 #endif /* CONFIG_MEMCG_KMEM */
... ... @@ -311,7 +312,7 @@
311 312 s->align = align;
312 313 s->ctor = ctor;
313 314  
314   - err = memcg_alloc_cache_params(memcg, s, root_cache);
  315 + err = init_memcg_params(s, memcg, root_cache);
315 316 if (err)
316 317 goto out_free_cache;
317 318  
... ... @@ -327,7 +328,7 @@
327 328 return s;
328 329  
329 330 out_free_cache:
330   - memcg_free_cache_params(s);
  331 + destroy_memcg_params(s);
331 332 kmem_cache_free(kmem_cache, s);
332 333 goto out;
333 334 }
334 335  
... ... @@ -439,11 +440,15 @@
439 440  
440 441 #ifdef CONFIG_MEMCG_KMEM
441 442 if (!is_root_cache(s)) {
442   - struct kmem_cache *root_cache = s->memcg_params->root_cache;
443   - int memcg_id = memcg_cache_id(s->memcg_params->memcg);
  443 + int idx;
  444 + struct memcg_cache_array *arr;
444 445  
445   - BUG_ON(root_cache->memcg_params->memcg_caches[memcg_id] != s);
446   - root_cache->memcg_params->memcg_caches[memcg_id] = NULL;
  446 + idx = memcg_cache_id(s->memcg_params.memcg);
  447 + arr = rcu_dereference_protected(s->memcg_params.root_cache->
  448 + memcg_params.memcg_caches,
  449 + lockdep_is_held(&slab_mutex));
  450 + BUG_ON(arr->entries[idx] != s);
  451 + arr->entries[idx] = NULL;
447 452 }
448 453 #endif
449 454 list_move(&s->list, release);
450 455  
451 456  
452 457  
453 458  
... ... @@ -481,27 +486,32 @@
481 486 struct kmem_cache *root_cache)
482 487 {
483 488 static char memcg_name_buf[NAME_MAX + 1]; /* protected by slab_mutex */
484   - int memcg_id = memcg_cache_id(memcg);
  489 + struct memcg_cache_array *arr;
485 490 struct kmem_cache *s = NULL;
486 491 char *cache_name;
  492 + int idx;
487 493  
488 494 get_online_cpus();
489 495 get_online_mems();
490 496  
491 497 mutex_lock(&slab_mutex);
492 498  
  499 + idx = memcg_cache_id(memcg);
  500 + arr = rcu_dereference_protected(root_cache->memcg_params.memcg_caches,
  501 + lockdep_is_held(&slab_mutex));
  502 +
493 503 /*
494 504 * Since per-memcg caches are created asynchronously on first
495 505 * allocation (see memcg_kmem_get_cache()), several threads can try to
496 506 * create the same cache, but only one of them may succeed.
497 507 */
498   - if (cache_from_memcg_idx(root_cache, memcg_id))
  508 + if (arr->entries[idx])
499 509 goto out_unlock;
500 510  
501 511 cgroup_name(mem_cgroup_css(memcg)->cgroup,
502 512 memcg_name_buf, sizeof(memcg_name_buf));
503 513 cache_name = kasprintf(GFP_KERNEL, "%s(%d:%s)", root_cache->name,
504   - memcg_cache_id(memcg), memcg_name_buf);
  514 + idx, memcg_name_buf);
505 515 if (!cache_name)
506 516 goto out_unlock;
507 517  
... ... @@ -525,7 +535,7 @@
525 535 * initialized.
526 536 */
527 537 smp_wmb();
528   - root_cache->memcg_params->memcg_caches[memcg_id] = s;
  538 + arr->entries[idx] = s;
529 539  
530 540 out_unlock:
531 541 mutex_unlock(&slab_mutex);
... ... @@ -545,7 +555,7 @@
545 555  
546 556 mutex_lock(&slab_mutex);
547 557 list_for_each_entry_safe(s, s2, &slab_caches, list) {
548   - if (is_root_cache(s) || s->memcg_params->memcg != memcg)
  558 + if (is_root_cache(s) || s->memcg_params.memcg != memcg)
549 559 continue;
550 560 /*
551 561 * The cgroup is about to be freed and therefore has no charges
... ... @@ -564,7 +574,7 @@
564 574  
565 575 void slab_kmem_cache_release(struct kmem_cache *s)
566 576 {
567   - memcg_free_cache_params(s);
  577 + destroy_memcg_params(s);
568 578 kfree(s->name);
569 579 kmem_cache_free(kmem_cache, s);
570 580 }
... ... @@ -640,6 +650,9 @@
640 650 s->name = name;
641 651 s->size = s->object_size = size;
642 652 s->align = calculate_alignment(flags, ARCH_KMALLOC_MINALIGN, size);
  653 +
  654 + slab_init_memcg_params(s);
  655 +
643 656 err = __kmem_cache_create(s, flags);
644 657  
645 658 if (err)
... ... @@ -980,7 +993,7 @@
980 993  
981 994 if (p == slab_caches.next)
982 995 print_slabinfo_header(m);
983   - if (!is_root_cache(s) && s->memcg_params->memcg == memcg)
  996 + if (!is_root_cache(s) && s->memcg_params.memcg == memcg)
984 997 cache_show(s, m);
985 998 return 0;
986 999 }
... ... @@ -3577,6 +3577,7 @@
3577 3577 p->slab_cache = s;
3578 3578 #endif
3579 3579 }
  3580 + slab_init_memcg_params(s);
3580 3581 list_add(&s->list, &slab_caches);
3581 3582 return s;
3582 3583 }
... ... @@ -4964,7 +4965,7 @@
4964 4965 if (is_root_cache(s))
4965 4966 return;
4966 4967  
4967   - root_cache = s->memcg_params->root_cache;
  4968 + root_cache = s->memcg_params.root_cache;
4968 4969  
4969 4970 /*
4970 4971 * This mean this cache had no attribute written. Therefore, no point
... ... @@ -5044,7 +5045,7 @@
5044 5045 {
5045 5046 #ifdef CONFIG_MEMCG_KMEM
5046 5047 if (!is_root_cache(s))
5047   - return s->memcg_params->root_cache->memcg_kset;
  5048 + return s->memcg_params.root_cache->memcg_kset;
5048 5049 #endif
5049 5050 return slab_kset;
5050 5051 }