Commit 146aa1bd0511f88ddb4e92fafa2b8aad4f2f65f3

Authored by Lai Jiangshan
Committed by Linus Torvalds
1 parent 248736c2a5

cgroups: fix probable race with put_css_set[_taskexit] and find_css_set

put_css_set_taskexit may be called when find_css_set is called on other
cpu.  And the race will occur:

put_css_set_taskexit side                    find_css_set side

                                        |
atomic_dec_and_test(&kref->refcount)    |
    /* kref->refcount = 0 */            |
....................................................................
                                        |  read_lock(&css_set_lock)
                                        |  find_existing_css_set
                                        |  get_css_set
                                        |  read_unlock(&css_set_lock);
....................................................................
__release_css_set                       |
....................................................................
                                        | /* use a released css_set */
                                        |

[put_css_set is the same. But in the current code, all put_css_set are
put into cgroup mutex critical region as the same as find_css_set.]

[akpm@linux-foundation.org: repair comments]
[menage@google.com: eliminate race in css_set refcounting]
Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Paul Menage <menage@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

Showing 3 changed files with 23 additions and 27 deletions Side-by-side Diff

include/linux/cgroup.h
... ... @@ -9,7 +9,6 @@
9 9 */
10 10  
11 11 #include <linux/sched.h>
12   -#include <linux/kref.h>
13 12 #include <linux/cpumask.h>
14 13 #include <linux/nodemask.h>
15 14 #include <linux/rcupdate.h>
... ... @@ -149,7 +148,7 @@
149 148 struct css_set {
150 149  
151 150 /* Reference count */
152   - struct kref ref;
  151 + atomic_t refcount;
153 152  
154 153 /*
155 154 * List running through all cgroup groups in the same hash
... ... @@ -241,7 +241,6 @@
241 241 struct cg_cgroup_link *link;
242 242 struct cg_cgroup_link *saved_link;
243 243  
244   - write_lock(&css_set_lock);
245 244 hlist_del(&cg->hlist);
246 245 css_set_count--;
247 246  
248 247  
249 248  
250 249  
... ... @@ -251,16 +250,25 @@
251 250 list_del(&link->cgrp_link_list);
252 251 kfree(link);
253 252 }
254   -
255   - write_unlock(&css_set_lock);
256 253 }
257 254  
258   -static void __release_css_set(struct kref *k, int taskexit)
  255 +static void __put_css_set(struct css_set *cg, int taskexit)
259 256 {
260 257 int i;
261   - struct css_set *cg = container_of(k, struct css_set, ref);
262   -
  258 + /*
  259 + * Ensure that the refcount doesn't hit zero while any readers
  260 + * can see it. Similar to atomic_dec_and_lock(), but for an
  261 + * rwlock
  262 + */
  263 + if (atomic_add_unless(&cg->refcount, -1, 1))
  264 + return;
  265 + write_lock(&css_set_lock);
  266 + if (!atomic_dec_and_test(&cg->refcount)) {
  267 + write_unlock(&css_set_lock);
  268 + return;
  269 + }
263 270 unlink_css_set(cg);
  271 + write_unlock(&css_set_lock);
264 272  
265 273 rcu_read_lock();
266 274 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
267 275  
268 276  
269 277  
... ... @@ -276,32 +284,22 @@
276 284 kfree(cg);
277 285 }
278 286  
279   -static void release_css_set(struct kref *k)
280   -{
281   - __release_css_set(k, 0);
282   -}
283   -
284   -static void release_css_set_taskexit(struct kref *k)
285   -{
286   - __release_css_set(k, 1);
287   -}
288   -
289 287 /*
290 288 * refcounted get/put for css_set objects
291 289 */
292 290 static inline void get_css_set(struct css_set *cg)
293 291 {
294   - kref_get(&cg->ref);
  292 + atomic_inc(&cg->refcount);
295 293 }
296 294  
297 295 static inline void put_css_set(struct css_set *cg)
298 296 {
299   - kref_put(&cg->ref, release_css_set);
  297 + __put_css_set(cg, 0);
300 298 }
301 299  
302 300 static inline void put_css_set_taskexit(struct css_set *cg)
303 301 {
304   - kref_put(&cg->ref, release_css_set_taskexit);
  302 + __put_css_set(cg, 1);
305 303 }
306 304  
307 305 /*
... ... @@ -427,7 +425,7 @@
427 425 return NULL;
428 426 }
429 427  
430   - kref_init(&res->ref);
  428 + atomic_set(&res->refcount, 1);
431 429 INIT_LIST_HEAD(&res->cg_links);
432 430 INIT_LIST_HEAD(&res->tasks);
433 431 INIT_HLIST_NODE(&res->hlist);
... ... @@ -1728,7 +1726,7 @@
1728 1726  
1729 1727 read_lock(&css_set_lock);
1730 1728 list_for_each_entry(link, &cgrp->css_sets, cgrp_link_list) {
1731   - count += atomic_read(&link->cg->ref.refcount);
  1729 + count += atomic_read(&link->cg->refcount);
1732 1730 }
1733 1731 read_unlock(&css_set_lock);
1734 1732 return count;
... ... @@ -2495,8 +2493,7 @@
2495 2493 int __init cgroup_init_early(void)
2496 2494 {
2497 2495 int i;
2498   - kref_init(&init_css_set.ref);
2499   - kref_get(&init_css_set.ref);
  2496 + atomic_set(&init_css_set.refcount, 1);
2500 2497 INIT_LIST_HEAD(&init_css_set.cg_links);
2501 2498 INIT_LIST_HEAD(&init_css_set.tasks);
2502 2499 INIT_HLIST_NODE(&init_css_set.hlist);
kernel/cgroup_debug.c
... ... @@ -57,7 +57,7 @@
57 57 u64 count;
58 58  
59 59 rcu_read_lock();
60   - count = atomic_read(&current->cgroups->ref.refcount);
  60 + count = atomic_read(&current->cgroups->refcount);
61 61 rcu_read_unlock();
62 62 return count;
63 63 }
... ... @@ -90,7 +90,7 @@
90 90 {
91 91 .name = "releasable",
92 92 .read_u64 = releasable_read,
93   - }
  93 + },
94 94 };
95 95  
96 96 static int debug_populate(struct cgroup_subsys *ss, struct cgroup *cont)