Commit 146aa1bd0511f88ddb4e92fafa2b8aad4f2f65f3
Committed by
Linus Torvalds
1 parent
248736c2a5
Exists in
master
and in
4 other branches
cgroups: fix probable race with put_css_set[_taskexit] and find_css_set
put_css_set_taskexit may be called when find_css_set is called on other cpu. And the race will occur: put_css_set_taskexit side find_css_set side | atomic_dec_and_test(&kref->refcount) | /* kref->refcount = 0 */ | .................................................................... | read_lock(&css_set_lock) | find_existing_css_set | get_css_set | read_unlock(&css_set_lock); .................................................................... __release_css_set | .................................................................... | /* use a released css_set */ | [put_css_set is the same. But in the current code, all put_css_set are put into cgroup mutex critical region as the same as find_css_set.] [akpm@linux-foundation.org: repair comments] [menage@google.com: eliminate race in css_set refcounting] Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com> Cc: Balbir Singh <balbir@in.ibm.com> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Signed-off-by: Paul Menage <menage@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Showing 3 changed files with 23 additions and 27 deletions Side-by-side Diff
include/linux/cgroup.h
... | ... | @@ -9,7 +9,6 @@ |
9 | 9 | */ |
10 | 10 | |
11 | 11 | #include <linux/sched.h> |
12 | -#include <linux/kref.h> | |
13 | 12 | #include <linux/cpumask.h> |
14 | 13 | #include <linux/nodemask.h> |
15 | 14 | #include <linux/rcupdate.h> |
... | ... | @@ -149,7 +148,7 @@ |
149 | 148 | struct css_set { |
150 | 149 | |
151 | 150 | /* Reference count */ |
152 | - struct kref ref; | |
151 | + atomic_t refcount; | |
153 | 152 | |
154 | 153 | /* |
155 | 154 | * List running through all cgroup groups in the same hash |
kernel/cgroup.c
... | ... | @@ -241,7 +241,6 @@ |
241 | 241 | struct cg_cgroup_link *link; |
242 | 242 | struct cg_cgroup_link *saved_link; |
243 | 243 | |
244 | - write_lock(&css_set_lock); | |
245 | 244 | hlist_del(&cg->hlist); |
246 | 245 | css_set_count--; |
247 | 246 | |
248 | 247 | |
249 | 248 | |
250 | 249 | |
... | ... | @@ -251,16 +250,25 @@ |
251 | 250 | list_del(&link->cgrp_link_list); |
252 | 251 | kfree(link); |
253 | 252 | } |
254 | - | |
255 | - write_unlock(&css_set_lock); | |
256 | 253 | } |
257 | 254 | |
258 | -static void __release_css_set(struct kref *k, int taskexit) | |
255 | +static void __put_css_set(struct css_set *cg, int taskexit) | |
259 | 256 | { |
260 | 257 | int i; |
261 | - struct css_set *cg = container_of(k, struct css_set, ref); | |
262 | - | |
258 | + /* | |
259 | + * Ensure that the refcount doesn't hit zero while any readers | |
260 | + * can see it. Similar to atomic_dec_and_lock(), but for an | |
261 | + * rwlock | |
262 | + */ | |
263 | + if (atomic_add_unless(&cg->refcount, -1, 1)) | |
264 | + return; | |
265 | + write_lock(&css_set_lock); | |
266 | + if (!atomic_dec_and_test(&cg->refcount)) { | |
267 | + write_unlock(&css_set_lock); | |
268 | + return; | |
269 | + } | |
263 | 270 | unlink_css_set(cg); |
271 | + write_unlock(&css_set_lock); | |
264 | 272 | |
265 | 273 | rcu_read_lock(); |
266 | 274 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { |
267 | 275 | |
268 | 276 | |
269 | 277 | |
... | ... | @@ -276,32 +284,22 @@ |
276 | 284 | kfree(cg); |
277 | 285 | } |
278 | 286 | |
279 | -static void release_css_set(struct kref *k) | |
280 | -{ | |
281 | - __release_css_set(k, 0); | |
282 | -} | |
283 | - | |
284 | -static void release_css_set_taskexit(struct kref *k) | |
285 | -{ | |
286 | - __release_css_set(k, 1); | |
287 | -} | |
288 | - | |
289 | 287 | /* |
290 | 288 | * refcounted get/put for css_set objects |
291 | 289 | */ |
292 | 290 | static inline void get_css_set(struct css_set *cg) |
293 | 291 | { |
294 | - kref_get(&cg->ref); | |
292 | + atomic_inc(&cg->refcount); | |
295 | 293 | } |
296 | 294 | |
297 | 295 | static inline void put_css_set(struct css_set *cg) |
298 | 296 | { |
299 | - kref_put(&cg->ref, release_css_set); | |
297 | + __put_css_set(cg, 0); | |
300 | 298 | } |
301 | 299 | |
302 | 300 | static inline void put_css_set_taskexit(struct css_set *cg) |
303 | 301 | { |
304 | - kref_put(&cg->ref, release_css_set_taskexit); | |
302 | + __put_css_set(cg, 1); | |
305 | 303 | } |
306 | 304 | |
307 | 305 | /* |
... | ... | @@ -427,7 +425,7 @@ |
427 | 425 | return NULL; |
428 | 426 | } |
429 | 427 | |
430 | - kref_init(&res->ref); | |
428 | + atomic_set(&res->refcount, 1); | |
431 | 429 | INIT_LIST_HEAD(&res->cg_links); |
432 | 430 | INIT_LIST_HEAD(&res->tasks); |
433 | 431 | INIT_HLIST_NODE(&res->hlist); |
... | ... | @@ -1728,7 +1726,7 @@ |
1728 | 1726 | |
1729 | 1727 | read_lock(&css_set_lock); |
1730 | 1728 | list_for_each_entry(link, &cgrp->css_sets, cgrp_link_list) { |
1731 | - count += atomic_read(&link->cg->ref.refcount); | |
1729 | + count += atomic_read(&link->cg->refcount); | |
1732 | 1730 | } |
1733 | 1731 | read_unlock(&css_set_lock); |
1734 | 1732 | return count; |
... | ... | @@ -2495,8 +2493,7 @@ |
2495 | 2493 | int __init cgroup_init_early(void) |
2496 | 2494 | { |
2497 | 2495 | int i; |
2498 | - kref_init(&init_css_set.ref); | |
2499 | - kref_get(&init_css_set.ref); | |
2496 | + atomic_set(&init_css_set.refcount, 1); | |
2500 | 2497 | INIT_LIST_HEAD(&init_css_set.cg_links); |
2501 | 2498 | INIT_LIST_HEAD(&init_css_set.tasks); |
2502 | 2499 | INIT_HLIST_NODE(&init_css_set.hlist); |
kernel/cgroup_debug.c
... | ... | @@ -57,7 +57,7 @@ |
57 | 57 | u64 count; |
58 | 58 | |
59 | 59 | rcu_read_lock(); |
60 | - count = atomic_read(¤t->cgroups->ref.refcount); | |
60 | + count = atomic_read(¤t->cgroups->refcount); | |
61 | 61 | rcu_read_unlock(); |
62 | 62 | return count; |
63 | 63 | } |
... | ... | @@ -90,7 +90,7 @@ |
90 | 90 | { |
91 | 91 | .name = "releasable", |
92 | 92 | .read_u64 = releasable_read, |
93 | - } | |
93 | + }, | |
94 | 94 | }; |
95 | 95 | |
96 | 96 | static int debug_populate(struct cgroup_subsys *ss, struct cgroup *cont) |