Commit bd67314586a3d5725e60f2f6587b4cb0f659bb67

Authored by Vladimir Davydov
Committed by Linus Torvalds
1 parent c67a8a685a

memcg, slab: simplify synchronization scheme

At present, we have the following mutexes protecting data related to per
memcg kmem caches:

 - slab_mutex.  This one is held during the whole kmem cache creation
   and destruction paths.  We also take it when updating per root cache
   memcg_caches arrays (see memcg_update_all_caches).  As a result, taking
   it guarantees there will be no changes to any kmem cache (including per
   memcg).  Why do we need something else then?  The point is it is
   private to slab implementation and has some internal dependencies with
   other mutexes (get_online_cpus).  So we just don't want to rely upon it
   and prefer to introduce additional mutexes instead.

 - activate_kmem_mutex.  Initially it was added to synchronize
   initializing kmem limit (memcg_activate_kmem).  However, since we can
   grow per root cache memcg_caches arrays only on kmem limit
   initialization (see memcg_update_all_caches), we also employ it to
   protect against memcg_caches arrays relocation (e.g.  see
   __kmem_cache_destroy_memcg_children).

 - We have a convention not to take slab_mutex in memcontrol.c, but we
   want to walk over per memcg memcg_slab_caches lists there (e.g.  for
   destroying all memcg caches on offline).  So we have per memcg
   slab_caches_mutex's protecting those lists.

The mutexes are taken in the following order:

   activate_kmem_mutex -> slab_mutex -> memcg::slab_caches_mutex

Such a syncrhonization scheme has a number of flaws, for instance:

 - We can't call kmem_cache_{destroy,shrink} while walking over a
   memcg::memcg_slab_caches list due to locking order.  As a result, in
   mem_cgroup_destroy_all_caches we schedule the
   memcg_cache_params::destroy work shrinking and destroying the cache.

 - We don't have a mutex to synchronize per memcg caches destruction
   between memcg offline (mem_cgroup_destroy_all_caches) and root cache
   destruction (__kmem_cache_destroy_memcg_children).  Currently we just
   don't bother about it.

This patch simplifies it by substituting per memcg slab_caches_mutex's
with the global memcg_slab_mutex.  It will be held whenever a new per
memcg cache is created or destroyed, so it protects per root cache
memcg_caches arrays and per memcg memcg_slab_caches lists.  The locking
order is following:

   activate_kmem_mutex -> memcg_slab_mutex -> slab_mutex

This allows us to call kmem_cache_{create,shrink,destroy} under the
memcg_slab_mutex.  As a result, we don't need memcg_cache_params::destroy
work any more - we can simply destroy caches while iterating over a per
memcg slab caches list.

Also using the global mutex simplifies synchronization between concurrent
per memcg caches creation/destruction, e.g.  mem_cgroup_destroy_all_caches
vs __kmem_cache_destroy_memcg_children.

The downside of this is that we substitute per-memcg slab_caches_mutex's
with a hummer-like global mutex, but since we already take either the
slab_mutex or the cgroup_mutex along with a memcg::slab_caches_mutex, it
shouldn't hurt concurrency a lot.

Signed-off-by: Vladimir Davydov <vdavydov@parallels.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Glauber Costa <glommer@gmail.com>
Cc: Pekka Enberg <penberg@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

Showing 4 changed files with 69 additions and 120 deletions Side-by-side Diff

include/linux/memcontrol.h
... ... @@ -497,8 +497,6 @@
497 497 int memcg_alloc_cache_params(struct mem_cgroup *memcg, struct kmem_cache *s,
498 498 struct kmem_cache *root_cache);
499 499 void memcg_free_cache_params(struct kmem_cache *s);
500   -void memcg_register_cache(struct kmem_cache *s);
501   -void memcg_unregister_cache(struct kmem_cache *s);
502 500  
503 501 int memcg_update_cache_size(struct kmem_cache *s, int num_groups);
504 502 void memcg_update_array_size(int num_groups);
... ... @@ -637,14 +635,6 @@
637 635 }
638 636  
639 637 static inline void memcg_free_cache_params(struct kmem_cache *s)
640   -{
641   -}
642   -
643   -static inline void memcg_register_cache(struct kmem_cache *s)
644   -{
645   -}
646   -
647   -static inline void memcg_unregister_cache(struct kmem_cache *s)
648 638 {
649 639 }
650 640  
include/linux/slab.h
... ... @@ -116,7 +116,8 @@
116 116 unsigned long,
117 117 void (*)(void *));
118 118 #ifdef CONFIG_MEMCG_KMEM
119   -void kmem_cache_create_memcg(struct mem_cgroup *, struct kmem_cache *);
  119 +struct kmem_cache *kmem_cache_create_memcg(struct mem_cgroup *,
  120 + struct kmem_cache *);
120 121 #endif
121 122 void kmem_cache_destroy(struct kmem_cache *);
122 123 int kmem_cache_shrink(struct kmem_cache *);
... ... @@ -525,8 +526,6 @@
525 526 * @list: list_head for the list of all caches in this memcg
526 527 * @root_cache: pointer to the global, root cache, this cache was derived from
527 528 * @nr_pages: number of pages that belongs to this cache.
528   - * @destroy: worker to be called whenever we are ready, or believe we may be
529   - * ready, to destroy this cache.
530 529 */
531 530 struct memcg_cache_params {
532 531 bool is_root_cache;
... ... @@ -540,7 +539,6 @@
540 539 struct list_head list;
541 540 struct kmem_cache *root_cache;
542 541 atomic_t nr_pages;
543   - struct work_struct destroy;
544 542 };
545 543 };
546 544 };
... ... @@ -357,10 +357,9 @@
357 357 struct cg_proto tcp_mem;
358 358 #endif
359 359 #if defined(CONFIG_MEMCG_KMEM)
360   - /* analogous to slab_common's slab_caches list. per-memcg */
  360 + /* analogous to slab_common's slab_caches list, but per-memcg;
  361 + * protected by memcg_slab_mutex */
361 362 struct list_head memcg_slab_caches;
362   - /* Not a spinlock, we can take a lot of time walking the list */
363   - struct mutex slab_caches_mutex;
364 363 /* Index in the kmem_cache->memcg_params->memcg_caches array */
365 364 int kmemcg_id;
366 365 #endif
... ... @@ -2913,6 +2912,12 @@
2913 2912 static DEFINE_MUTEX(set_limit_mutex);
2914 2913  
2915 2914 #ifdef CONFIG_MEMCG_KMEM
  2915 +/*
  2916 + * The memcg_slab_mutex is held whenever a per memcg kmem cache is created or
  2917 + * destroyed. It protects memcg_caches arrays and memcg_slab_caches lists.
  2918 + */
  2919 +static DEFINE_MUTEX(memcg_slab_mutex);
  2920 +
2916 2921 static DEFINE_MUTEX(activate_kmem_mutex);
2917 2922  
2918 2923 static inline bool memcg_can_account_kmem(struct mem_cgroup *memcg)
2919 2924  
... ... @@ -2945,10 +2950,10 @@
2945 2950  
2946 2951 print_slabinfo_header(m);
2947 2952  
2948   - mutex_lock(&memcg->slab_caches_mutex);
  2953 + mutex_lock(&memcg_slab_mutex);
2949 2954 list_for_each_entry(params, &memcg->memcg_slab_caches, list)
2950 2955 cache_show(memcg_params_to_cache(params), m);
2951   - mutex_unlock(&memcg->slab_caches_mutex);
  2956 + mutex_unlock(&memcg_slab_mutex);
2952 2957  
2953 2958 return 0;
2954 2959 }
... ... @@ -3050,8 +3055,6 @@
3050 3055 memcg_limited_groups_array_size = memcg_caches_array_size(num);
3051 3056 }
3052 3057  
3053   -static void kmem_cache_destroy_work_func(struct work_struct *w);
3054   -
3055 3058 int memcg_update_cache_size(struct kmem_cache *s, int num_groups)
3056 3059 {
3057 3060 struct memcg_cache_params *cur_params = s->memcg_params;
... ... @@ -3148,8 +3151,6 @@
3148 3151 if (memcg) {
3149 3152 s->memcg_params->memcg = memcg;
3150 3153 s->memcg_params->root_cache = root_cache;
3151   - INIT_WORK(&s->memcg_params->destroy,
3152   - kmem_cache_destroy_work_func);
3153 3154 css_get(&memcg->css);
3154 3155 } else
3155 3156 s->memcg_params->is_root_cache = true;
3156 3157  
3157 3158  
3158 3159  
3159 3160  
3160 3161  
3161 3162  
... ... @@ -3166,24 +3167,34 @@
3166 3167 kfree(s->memcg_params);
3167 3168 }
3168 3169  
3169   -void memcg_register_cache(struct kmem_cache *s)
  3170 +static void memcg_kmem_create_cache(struct mem_cgroup *memcg,
  3171 + struct kmem_cache *root_cache)
3170 3172 {
3171   - struct kmem_cache *root;
3172   - struct mem_cgroup *memcg;
  3173 + struct kmem_cache *cachep;
3173 3174 int id;
3174 3175  
3175   - if (is_root_cache(s))
  3176 + lockdep_assert_held(&memcg_slab_mutex);
  3177 +
  3178 + id = memcg_cache_id(memcg);
  3179 +
  3180 + /*
  3181 + * Since per-memcg caches are created asynchronously on first
  3182 + * allocation (see memcg_kmem_get_cache()), several threads can try to
  3183 + * create the same cache, but only one of them may succeed.
  3184 + */
  3185 + if (cache_from_memcg_idx(root_cache, id))
3176 3186 return;
3177 3187  
  3188 + cachep = kmem_cache_create_memcg(memcg, root_cache);
3178 3189 /*
3179   - * Holding the slab_mutex assures nobody will touch the memcg_caches
3180   - * array while we are modifying it.
  3190 + * If we could not create a memcg cache, do not complain, because
  3191 + * that's not critical at all as we can always proceed with the root
  3192 + * cache.
3181 3193 */
3182   - lockdep_assert_held(&slab_mutex);
  3194 + if (!cachep)
  3195 + return;
3183 3196  
3184   - root = s->memcg_params->root_cache;
3185   - memcg = s->memcg_params->memcg;
3186   - id = memcg_cache_id(memcg);
  3197 + list_add(&cachep->memcg_params->list, &memcg->memcg_slab_caches);
3187 3198  
3188 3199 /*
3189 3200 * Since readers won't lock (see cache_from_memcg_idx()), we need a
3190 3201  
3191 3202  
3192 3203  
3193 3204  
3194 3205  
3195 3206  
3196 3207  
... ... @@ -3192,49 +3203,30 @@
3192 3203 */
3193 3204 smp_wmb();
3194 3205  
3195   - /*
3196   - * Initialize the pointer to this cache in its parent's memcg_params
3197   - * before adding it to the memcg_slab_caches list, otherwise we can
3198   - * fail to convert memcg_params_to_cache() while traversing the list.
3199   - */
3200   - VM_BUG_ON(root->memcg_params->memcg_caches[id]);
3201   - root->memcg_params->memcg_caches[id] = s;
3202   -
3203   - mutex_lock(&memcg->slab_caches_mutex);
3204   - list_add(&s->memcg_params->list, &memcg->memcg_slab_caches);
3205   - mutex_unlock(&memcg->slab_caches_mutex);
  3206 + BUG_ON(root_cache->memcg_params->memcg_caches[id]);
  3207 + root_cache->memcg_params->memcg_caches[id] = cachep;
3206 3208 }
3207 3209  
3208   -void memcg_unregister_cache(struct kmem_cache *s)
  3210 +static void memcg_kmem_destroy_cache(struct kmem_cache *cachep)
3209 3211 {
3210   - struct kmem_cache *root;
  3212 + struct kmem_cache *root_cache;
3211 3213 struct mem_cgroup *memcg;
3212 3214 int id;
3213 3215  
3214   - if (is_root_cache(s))
3215   - return;
  3216 + lockdep_assert_held(&memcg_slab_mutex);
3216 3217  
3217   - /*
3218   - * Holding the slab_mutex assures nobody will touch the memcg_caches
3219   - * array while we are modifying it.
3220   - */
3221   - lockdep_assert_held(&slab_mutex);
  3218 + BUG_ON(is_root_cache(cachep));
3222 3219  
3223   - root = s->memcg_params->root_cache;
3224   - memcg = s->memcg_params->memcg;
  3220 + root_cache = cachep->memcg_params->root_cache;
  3221 + memcg = cachep->memcg_params->memcg;
3225 3222 id = memcg_cache_id(memcg);
3226 3223  
3227   - mutex_lock(&memcg->slab_caches_mutex);
3228   - list_del(&s->memcg_params->list);
3229   - mutex_unlock(&memcg->slab_caches_mutex);
  3224 + BUG_ON(root_cache->memcg_params->memcg_caches[id] != cachep);
  3225 + root_cache->memcg_params->memcg_caches[id] = NULL;
3230 3226  
3231   - /*
3232   - * Clear the pointer to this cache in its parent's memcg_params only
3233   - * after removing it from the memcg_slab_caches list, otherwise we can
3234   - * fail to convert memcg_params_to_cache() while traversing the list.
3235   - */
3236   - VM_BUG_ON(root->memcg_params->memcg_caches[id] != s);
3237   - root->memcg_params->memcg_caches[id] = NULL;
  3227 + list_del(&cachep->memcg_params->list);
  3228 +
  3229 + kmem_cache_destroy(cachep);
3238 3230 }
3239 3231  
3240 3232 /*
3241 3233  
3242 3234  
3243 3235  
3244 3236  
3245 3237  
3246 3238  
3247 3239  
... ... @@ -3268,70 +3260,42 @@
3268 3260 current->memcg_kmem_skip_account--;
3269 3261 }
3270 3262  
3271   -static void kmem_cache_destroy_work_func(struct work_struct *w)
3272   -{
3273   - struct kmem_cache *cachep;
3274   - struct memcg_cache_params *p;
3275   -
3276   - p = container_of(w, struct memcg_cache_params, destroy);
3277   -
3278   - cachep = memcg_params_to_cache(p);
3279   -
3280   - kmem_cache_shrink(cachep);
3281   - if (atomic_read(&cachep->memcg_params->nr_pages) == 0)
3282   - kmem_cache_destroy(cachep);
3283   -}
3284   -
3285 3263 int __kmem_cache_destroy_memcg_children(struct kmem_cache *s)
3286 3264 {
3287 3265 struct kmem_cache *c;
3288 3266 int i, failed = 0;
3289 3267  
3290   - /*
3291   - * If the cache is being destroyed, we trust that there is no one else
3292   - * requesting objects from it. Even if there are, the sanity checks in
3293   - * kmem_cache_destroy should caught this ill-case.
3294   - *
3295   - * Still, we don't want anyone else freeing memcg_caches under our
3296   - * noses, which can happen if a new memcg comes to life. As usual,
3297   - * we'll take the activate_kmem_mutex to protect ourselves against
3298   - * this.
3299   - */
3300   - mutex_lock(&activate_kmem_mutex);
  3268 + mutex_lock(&memcg_slab_mutex);
3301 3269 for_each_memcg_cache_index(i) {
3302 3270 c = cache_from_memcg_idx(s, i);
3303 3271 if (!c)
3304 3272 continue;
3305 3273  
3306   - /*
3307   - * We will now manually delete the caches, so to avoid races
3308   - * we need to cancel all pending destruction workers and
3309   - * proceed with destruction ourselves.
3310   - */
3311   - cancel_work_sync(&c->memcg_params->destroy);
3312   - kmem_cache_destroy(c);
  3274 + memcg_kmem_destroy_cache(c);
3313 3275  
3314 3276 if (cache_from_memcg_idx(s, i))
3315 3277 failed++;
3316 3278 }
3317   - mutex_unlock(&activate_kmem_mutex);
  3279 + mutex_unlock(&memcg_slab_mutex);
3318 3280 return failed;
3319 3281 }
3320 3282  
3321 3283 static void mem_cgroup_destroy_all_caches(struct mem_cgroup *memcg)
3322 3284 {
3323 3285 struct kmem_cache *cachep;
3324   - struct memcg_cache_params *params;
  3286 + struct memcg_cache_params *params, *tmp;
3325 3287  
3326 3288 if (!memcg_kmem_is_active(memcg))
3327 3289 return;
3328 3290  
3329   - mutex_lock(&memcg->slab_caches_mutex);
3330   - list_for_each_entry(params, &memcg->memcg_slab_caches, list) {
  3291 + mutex_lock(&memcg_slab_mutex);
  3292 + list_for_each_entry_safe(params, tmp, &memcg->memcg_slab_caches, list) {
3331 3293 cachep = memcg_params_to_cache(params);
3332   - schedule_work(&cachep->memcg_params->destroy);
  3294 + kmem_cache_shrink(cachep);
  3295 + if (atomic_read(&cachep->memcg_params->nr_pages) == 0)
  3296 + memcg_kmem_destroy_cache(cachep);
3333 3297 }
3334   - mutex_unlock(&memcg->slab_caches_mutex);
  3298 + mutex_unlock(&memcg_slab_mutex);
3335 3299 }
3336 3300  
3337 3301 struct create_work {
... ... @@ -3346,7 +3310,10 @@
3346 3310 struct mem_cgroup *memcg = cw->memcg;
3347 3311 struct kmem_cache *cachep = cw->cachep;
3348 3312  
3349   - kmem_cache_create_memcg(memcg, cachep);
  3313 + mutex_lock(&memcg_slab_mutex);
  3314 + memcg_kmem_create_cache(memcg, cachep);
  3315 + mutex_unlock(&memcg_slab_mutex);
  3316 +
3350 3317 css_put(&memcg->css);
3351 3318 kfree(cw);
3352 3319 }
3353 3320  
3354 3321  
... ... @@ -5022,13 +4989,14 @@
5022 4989 * Make sure we have enough space for this cgroup in each root cache's
5023 4990 * memcg_params.
5024 4991 */
  4992 + mutex_lock(&memcg_slab_mutex);
5025 4993 err = memcg_update_all_caches(memcg_id + 1);
  4994 + mutex_unlock(&memcg_slab_mutex);
5026 4995 if (err)
5027 4996 goto out_rmid;
5028 4997  
5029 4998 memcg->kmemcg_id = memcg_id;
5030 4999 INIT_LIST_HEAD(&memcg->memcg_slab_caches);
5031   - mutex_init(&memcg->slab_caches_mutex);
5032 5000  
5033 5001 /*
5034 5002 * We couldn't have accounted to this cgroup, because it hasn't got the
... ... @@ -160,7 +160,6 @@
160 160  
161 161 s->refcount = 1;
162 162 list_add(&s->list, &slab_caches);
163   - memcg_register_cache(s);
164 163 out:
165 164 if (err)
166 165 return ERR_PTR(err);
167 166  
... ... @@ -270,9 +269,10 @@
270 269 * requests going from @memcg to @root_cache. The new cache inherits properties
271 270 * from its parent.
272 271 */
273   -void kmem_cache_create_memcg(struct mem_cgroup *memcg, struct kmem_cache *root_cache)
  272 +struct kmem_cache *kmem_cache_create_memcg(struct mem_cgroup *memcg,
  273 + struct kmem_cache *root_cache)
274 274 {
275   - struct kmem_cache *s;
  275 + struct kmem_cache *s = NULL;
276 276 char *cache_name;
277 277  
278 278 get_online_cpus();
... ... @@ -280,14 +280,6 @@
280 280  
281 281 mutex_lock(&slab_mutex);
282 282  
283   - /*
284   - * Since per-memcg caches are created asynchronously on first
285   - * allocation (see memcg_kmem_get_cache()), several threads can try to
286   - * create the same cache, but only one of them may succeed.
287   - */
288   - if (cache_from_memcg_idx(root_cache, memcg_cache_id(memcg)))
289   - goto out_unlock;
290   -
291 283 cache_name = memcg_create_cache_name(memcg, root_cache);
292 284 if (!cache_name)
293 285 goto out_unlock;
294 286  
295 287  
... ... @@ -296,14 +288,18 @@
296 288 root_cache->size, root_cache->align,
297 289 root_cache->flags, root_cache->ctor,
298 290 memcg, root_cache);
299   - if (IS_ERR(s))
  291 + if (IS_ERR(s)) {
300 292 kfree(cache_name);
  293 + s = NULL;
  294 + }
301 295  
302 296 out_unlock:
303 297 mutex_unlock(&slab_mutex);
304 298  
305 299 put_online_mems();
306 300 put_online_cpus();
  301 +
  302 + return s;
307 303 }
308 304  
309 305 static int kmem_cache_destroy_memcg_children(struct kmem_cache *s)
310 306  
... ... @@ -348,11 +344,8 @@
348 344 goto out_unlock;
349 345  
350 346 list_del(&s->list);
351   - memcg_unregister_cache(s);
352   -
353 347 if (__kmem_cache_shutdown(s) != 0) {
354 348 list_add(&s->list, &slab_caches);
355   - memcg_register_cache(s);
356 349 printk(KERN_ERR "kmem_cache_destroy %s: "
357 350 "Slab cache still has objects\n", s->name);
358 351 dump_stack();