Commit f8570263ee16eb1d5038b8e20d7db3a68bbb2b49

Authored by Vladimir Davydov
Committed by Linus Torvalds
1 parent f717eb3abb

memcg, slab: RCU protect memcg_params for root caches

We relocate root cache's memcg_params whenever we need to grow the
memcg_caches array to accommodate all kmem-active memory cgroups.
Currently on relocation we free the old version immediately, which can
lead to use-after-free, because the memcg_caches array is accessed
lock-free (see cache_from_memcg_idx()).  This patch fixes this by making
memcg_params RCU-protected for root caches.

Signed-off-by: Vladimir Davydov <vdavydov@parallels.com>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Glauber Costa <glommer@gmail.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Balbir Singh <bsingharora@gmail.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Christoph Lameter <cl@linux.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

Showing 3 changed files with 30 additions and 10 deletions Side-by-side Diff

include/linux/slab.h
... ... @@ -513,7 +513,9 @@
513 513 *
514 514 * Both the root cache and the child caches will have it. For the root cache,
515 515 * this will hold a dynamically allocated array large enough to hold
516   - * information about the currently limited memcgs in the system.
  516 + * information about the currently limited memcgs in the system. To allow the
  517 + * array to be accessed without taking any locks, on relocation we free the old
  518 + * version only after a grace period.
517 519 *
518 520 * Child caches will hold extra metadata needed for its operation. Fields are:
519 521 *
... ... @@ -528,7 +530,10 @@
528 530 struct memcg_cache_params {
529 531 bool is_root_cache;
530 532 union {
531   - struct kmem_cache *memcg_caches[0];
  533 + struct {
  534 + struct rcu_head rcu_head;
  535 + struct kmem_cache *memcg_caches[0];
  536 + };
532 537 struct {
533 538 struct mem_cgroup *memcg;
534 539 struct list_head list;
... ... @@ -3178,18 +3178,17 @@
3178 3178  
3179 3179 if (num_groups > memcg_limited_groups_array_size) {
3180 3180 int i;
  3181 + struct memcg_cache_params *new_params;
3181 3182 ssize_t size = memcg_caches_array_size(num_groups);
3182 3183  
3183 3184 size *= sizeof(void *);
3184 3185 size += offsetof(struct memcg_cache_params, memcg_caches);
3185 3186  
3186   - s->memcg_params = kzalloc(size, GFP_KERNEL);
3187   - if (!s->memcg_params) {
3188   - s->memcg_params = cur_params;
  3187 + new_params = kzalloc(size, GFP_KERNEL);
  3188 + if (!new_params)
3189 3189 return -ENOMEM;
3190   - }
3191 3190  
3192   - s->memcg_params->is_root_cache = true;
  3191 + new_params->is_root_cache = true;
3193 3192  
3194 3193 /*
3195 3194 * There is the chance it will be bigger than
... ... @@ -3203,7 +3202,7 @@
3203 3202 for (i = 0; i < memcg_limited_groups_array_size; i++) {
3204 3203 if (!cur_params->memcg_caches[i])
3205 3204 continue;
3206   - s->memcg_params->memcg_caches[i] =
  3205 + new_params->memcg_caches[i] =
3207 3206 cur_params->memcg_caches[i];
3208 3207 }
3209 3208  
... ... @@ -3216,7 +3215,9 @@
3216 3215 * bigger than the others. And all updates will reset this
3217 3216 * anyway.
3218 3217 */
3219   - kfree(cur_params);
  3218 + rcu_assign_pointer(s->memcg_params, new_params);
  3219 + if (cur_params)
  3220 + kfree_rcu(cur_params, rcu_head);
3220 3221 }
3221 3222 return 0;
3222 3223 }
... ... @@ -160,14 +160,28 @@
160 160 return s->name;
161 161 }
162 162  
  163 +/*
  164 + * Note, we protect with RCU only the memcg_caches array, not per-memcg caches.
  165 + * That said the caller must assure the memcg's cache won't go away. Since once
  166 + * created a memcg's cache is destroyed only along with the root cache, it is
  167 + * true if we are going to allocate from the cache or hold a reference to the
  168 + * root cache by other means. Otherwise, we should hold either the slab_mutex
  169 + * or the memcg's slab_caches_mutex while calling this function and accessing
  170 + * the returned value.
  171 + */
163 172 static inline struct kmem_cache *
164 173 cache_from_memcg_idx(struct kmem_cache *s, int idx)
165 174 {
166 175 struct kmem_cache *cachep;
  176 + struct memcg_cache_params *params;
167 177  
168 178 if (!s->memcg_params)
169 179 return NULL;
170   - cachep = s->memcg_params->memcg_caches[idx];
  180 +
  181 + rcu_read_lock();
  182 + params = rcu_dereference(s->memcg_params);
  183 + cachep = params->memcg_caches[idx];
  184 + rcu_read_unlock();
171 185  
172 186 /*
173 187 * Make sure we will access the up-to-date value. The code updating