Commit e7c5ec9193d32b9559a3bb8893ceedbda85201ff
Committed by
Linus Torvalds
1 parent
2cb378c862
Exists in
master
and in
39 other branches
cgroups: add css_tryget()
Add css_tryget(), that obtains a counted reference on a CSS. It is used in situations where the caller has a "weak" reference to the CSS, i.e. one that does not protect the cgroup from removal via a reference count, but would instead be cleaned up by a destroy() callback. css_tryget() will return true on success, or false if the cgroup is being removed. This is similar to Kamezawa Hiroyuki's patch from a week or two ago, but with the difference that in the event of css_tryget() racing with a cgroup_rmdir(), css_tryget() will only return false if the cgroup really does get removed. This implementation is done by biasing css->refcnt, so that a refcnt of 1 means "releasable" and 0 means "released or releasing". In the event of a race, css_tryget() distinguishes between "released" and "releasing" by checking for the CSS_REMOVED flag in css->flags. Signed-off-by: Paul Menage <menage@google.com> Tested-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Li Zefan <lizf@cn.fujitsu.com> Cc: Balbir Singh <balbir@in.ibm.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Showing 2 changed files with 88 additions and 11 deletions Side-by-side Diff
include/linux/cgroup.h
... | ... | @@ -52,9 +52,9 @@ |
52 | 52 | * hierarchy structure */ |
53 | 53 | struct cgroup *cgroup; |
54 | 54 | |
55 | - /* State maintained by the cgroup system to allow | |
56 | - * subsystems to be "busy". Should be accessed via css_get() | |
57 | - * and css_put() */ | |
55 | + /* State maintained by the cgroup system to allow subsystems | |
56 | + * to be "busy". Should be accessed via css_get(), | |
57 | + * css_tryget() and and css_put(). */ | |
58 | 58 | |
59 | 59 | atomic_t refcnt; |
60 | 60 | |
61 | 61 | |
... | ... | @@ -64,11 +64,14 @@ |
64 | 64 | /* bits in struct cgroup_subsys_state flags field */ |
65 | 65 | enum { |
66 | 66 | CSS_ROOT, /* This CSS is the root of the subsystem */ |
67 | + CSS_REMOVED, /* This CSS is dead */ | |
67 | 68 | }; |
68 | 69 | |
69 | 70 | /* |
70 | - * Call css_get() to hold a reference on the cgroup; | |
71 | - * | |
71 | + * Call css_get() to hold a reference on the css; it can be used | |
72 | + * for a reference obtained via: | |
73 | + * - an existing ref-counted reference to the css | |
74 | + * - task->cgroups for a locked task | |
72 | 75 | */ |
73 | 76 | |
74 | 77 | static inline void css_get(struct cgroup_subsys_state *css) |
75 | 78 | |
76 | 79 | |
... | ... | @@ -77,9 +80,32 @@ |
77 | 80 | if (!test_bit(CSS_ROOT, &css->flags)) |
78 | 81 | atomic_inc(&css->refcnt); |
79 | 82 | } |
83 | + | |
84 | +static inline bool css_is_removed(struct cgroup_subsys_state *css) | |
85 | +{ | |
86 | + return test_bit(CSS_REMOVED, &css->flags); | |
87 | +} | |
88 | + | |
80 | 89 | /* |
90 | + * Call css_tryget() to take a reference on a css if your existing | |
91 | + * (known-valid) reference isn't already ref-counted. Returns false if | |
92 | + * the css has been destroyed. | |
93 | + */ | |
94 | + | |
95 | +static inline bool css_tryget(struct cgroup_subsys_state *css) | |
96 | +{ | |
97 | + if (test_bit(CSS_ROOT, &css->flags)) | |
98 | + return true; | |
99 | + while (!atomic_inc_not_zero(&css->refcnt)) { | |
100 | + if (test_bit(CSS_REMOVED, &css->flags)) | |
101 | + return false; | |
102 | + } | |
103 | + return true; | |
104 | +} | |
105 | + | |
106 | +/* | |
81 | 107 | * css_put() should be called to release a reference taken by |
82 | - * css_get() | |
108 | + * css_get() or css_tryget() | |
83 | 109 | */ |
84 | 110 | |
85 | 111 | extern void __css_put(struct cgroup_subsys_state *css); |
kernel/cgroup.c
... | ... | @@ -2333,7 +2333,7 @@ |
2333 | 2333 | struct cgroup *cgrp) |
2334 | 2334 | { |
2335 | 2335 | css->cgroup = cgrp; |
2336 | - atomic_set(&css->refcnt, 0); | |
2336 | + atomic_set(&css->refcnt, 1); | |
2337 | 2337 | css->flags = 0; |
2338 | 2338 | if (cgrp == dummytop) |
2339 | 2339 | set_bit(CSS_ROOT, &css->flags); |
... | ... | @@ -2465,7 +2465,7 @@ |
2465 | 2465 | { |
2466 | 2466 | /* Check the reference count on each subsystem. Since we |
2467 | 2467 | * already established that there are no tasks in the |
2468 | - * cgroup, if the css refcount is also 0, then there should | |
2468 | + * cgroup, if the css refcount is also 1, then there should | |
2469 | 2469 | * be no outstanding references, so the subsystem is safe to |
2470 | 2470 | * destroy. We scan across all subsystems rather than using |
2471 | 2471 | * the per-hierarchy linked list of mounted subsystems since |
2472 | 2472 | |
... | ... | @@ -2486,12 +2486,62 @@ |
2486 | 2486 | * matter, since it can only happen if the cgroup |
2487 | 2487 | * has been deleted and hence no longer needs the |
2488 | 2488 | * release agent to be called anyway. */ |
2489 | - if (css && atomic_read(&css->refcnt)) | |
2489 | + if (css && (atomic_read(&css->refcnt) > 1)) | |
2490 | 2490 | return 1; |
2491 | 2491 | } |
2492 | 2492 | return 0; |
2493 | 2493 | } |
2494 | 2494 | |
2495 | +/* | |
2496 | + * Atomically mark all (or else none) of the cgroup's CSS objects as | |
2497 | + * CSS_REMOVED. Return true on success, or false if the cgroup has | |
2498 | + * busy subsystems. Call with cgroup_mutex held | |
2499 | + */ | |
2500 | + | |
2501 | +static int cgroup_clear_css_refs(struct cgroup *cgrp) | |
2502 | +{ | |
2503 | + struct cgroup_subsys *ss; | |
2504 | + unsigned long flags; | |
2505 | + bool failed = false; | |
2506 | + local_irq_save(flags); | |
2507 | + for_each_subsys(cgrp->root, ss) { | |
2508 | + struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id]; | |
2509 | + int refcnt; | |
2510 | + do { | |
2511 | + /* We can only remove a CSS with a refcnt==1 */ | |
2512 | + refcnt = atomic_read(&css->refcnt); | |
2513 | + if (refcnt > 1) { | |
2514 | + failed = true; | |
2515 | + goto done; | |
2516 | + } | |
2517 | + BUG_ON(!refcnt); | |
2518 | + /* | |
2519 | + * Drop the refcnt to 0 while we check other | |
2520 | + * subsystems. This will cause any racing | |
2521 | + * css_tryget() to spin until we set the | |
2522 | + * CSS_REMOVED bits or abort | |
2523 | + */ | |
2524 | + } while (atomic_cmpxchg(&css->refcnt, refcnt, 0) != refcnt); | |
2525 | + } | |
2526 | + done: | |
2527 | + for_each_subsys(cgrp->root, ss) { | |
2528 | + struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id]; | |
2529 | + if (failed) { | |
2530 | + /* | |
2531 | + * Restore old refcnt if we previously managed | |
2532 | + * to clear it from 1 to 0 | |
2533 | + */ | |
2534 | + if (!atomic_read(&css->refcnt)) | |
2535 | + atomic_set(&css->refcnt, 1); | |
2536 | + } else { | |
2537 | + /* Commit the fact that the CSS is removed */ | |
2538 | + set_bit(CSS_REMOVED, &css->flags); | |
2539 | + } | |
2540 | + } | |
2541 | + local_irq_restore(flags); | |
2542 | + return !failed; | |
2543 | +} | |
2544 | + | |
2495 | 2545 | static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry) |
2496 | 2546 | { |
2497 | 2547 | struct cgroup *cgrp = dentry->d_fsdata; |
... | ... | @@ -2522,7 +2572,7 @@ |
2522 | 2572 | |
2523 | 2573 | if (atomic_read(&cgrp->count) |
2524 | 2574 | || !list_empty(&cgrp->children) |
2525 | - || cgroup_has_css_refs(cgrp)) { | |
2575 | + || !cgroup_clear_css_refs(cgrp)) { | |
2526 | 2576 | mutex_unlock(&cgroup_mutex); |
2527 | 2577 | return -EBUSY; |
2528 | 2578 | } |
... | ... | @@ -3078,7 +3128,8 @@ |
3078 | 3128 | { |
3079 | 3129 | struct cgroup *cgrp = css->cgroup; |
3080 | 3130 | rcu_read_lock(); |
3081 | - if (atomic_dec_and_test(&css->refcnt) && notify_on_release(cgrp)) { | |
3131 | + if ((atomic_dec_return(&css->refcnt) == 1) && | |
3132 | + notify_on_release(cgrp)) { | |
3082 | 3133 | set_bit(CGRP_RELEASABLE, &cgrp->flags); |
3083 | 3134 | check_for_release(cgrp); |
3084 | 3135 | } |