Commit e7c5ec9193d32b9559a3bb8893ceedbda85201ff

Authored by Paul Menage
Committed by Linus Torvalds
1 parent 2cb378c862

cgroups: add css_tryget()

Add css_tryget(), that obtains a counted reference on a CSS.  It is used
in situations where the caller has a "weak" reference to the CSS, i.e.
one that does not protect the cgroup from removal via a reference count,
but would instead be cleaned up by a destroy() callback.

css_tryget() will return true on success, or false if the cgroup is being
removed.

This is similar to Kamezawa Hiroyuki's patch from a week or two ago, but
with the difference that in the event of css_tryget() racing with a
cgroup_rmdir(), css_tryget() will only return false if the cgroup really
does get removed.

This implementation is done by biasing css->refcnt, so that a refcnt of 1
means "releasable" and 0 means "released or releasing".  In the event of a
race, css_tryget() distinguishes between "released" and "releasing" by
checking for the CSS_REMOVED flag in css->flags.

Signed-off-by: Paul Menage <menage@google.com>
Tested-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

Showing 2 changed files with 88 additions and 11 deletions Side-by-side Diff

include/linux/cgroup.h
... ... @@ -52,9 +52,9 @@
52 52 * hierarchy structure */
53 53 struct cgroup *cgroup;
54 54  
55   - /* State maintained by the cgroup system to allow
56   - * subsystems to be "busy". Should be accessed via css_get()
57   - * and css_put() */
  55 + /* State maintained by the cgroup system to allow subsystems
  56 + * to be "busy". Should be accessed via css_get(),
  57 + * css_tryget() and and css_put(). */
58 58  
59 59 atomic_t refcnt;
60 60  
61 61  
... ... @@ -64,11 +64,14 @@
64 64 /* bits in struct cgroup_subsys_state flags field */
65 65 enum {
66 66 CSS_ROOT, /* This CSS is the root of the subsystem */
  67 + CSS_REMOVED, /* This CSS is dead */
67 68 };
68 69  
69 70 /*
70   - * Call css_get() to hold a reference on the cgroup;
71   - *
  71 + * Call css_get() to hold a reference on the css; it can be used
  72 + * for a reference obtained via:
  73 + * - an existing ref-counted reference to the css
  74 + * - task->cgroups for a locked task
72 75 */
73 76  
74 77 static inline void css_get(struct cgroup_subsys_state *css)
75 78  
76 79  
... ... @@ -77,9 +80,32 @@
77 80 if (!test_bit(CSS_ROOT, &css->flags))
78 81 atomic_inc(&css->refcnt);
79 82 }
  83 +
  84 +static inline bool css_is_removed(struct cgroup_subsys_state *css)
  85 +{
  86 + return test_bit(CSS_REMOVED, &css->flags);
  87 +}
  88 +
80 89 /*
  90 + * Call css_tryget() to take a reference on a css if your existing
  91 + * (known-valid) reference isn't already ref-counted. Returns false if
  92 + * the css has been destroyed.
  93 + */
  94 +
  95 +static inline bool css_tryget(struct cgroup_subsys_state *css)
  96 +{
  97 + if (test_bit(CSS_ROOT, &css->flags))
  98 + return true;
  99 + while (!atomic_inc_not_zero(&css->refcnt)) {
  100 + if (test_bit(CSS_REMOVED, &css->flags))
  101 + return false;
  102 + }
  103 + return true;
  104 +}
  105 +
  106 +/*
81 107 * css_put() should be called to release a reference taken by
82   - * css_get()
  108 + * css_get() or css_tryget()
83 109 */
84 110  
85 111 extern void __css_put(struct cgroup_subsys_state *css);
... ... @@ -2333,7 +2333,7 @@
2333 2333 struct cgroup *cgrp)
2334 2334 {
2335 2335 css->cgroup = cgrp;
2336   - atomic_set(&css->refcnt, 0);
  2336 + atomic_set(&css->refcnt, 1);
2337 2337 css->flags = 0;
2338 2338 if (cgrp == dummytop)
2339 2339 set_bit(CSS_ROOT, &css->flags);
... ... @@ -2465,7 +2465,7 @@
2465 2465 {
2466 2466 /* Check the reference count on each subsystem. Since we
2467 2467 * already established that there are no tasks in the
2468   - * cgroup, if the css refcount is also 0, then there should
  2468 + * cgroup, if the css refcount is also 1, then there should
2469 2469 * be no outstanding references, so the subsystem is safe to
2470 2470 * destroy. We scan across all subsystems rather than using
2471 2471 * the per-hierarchy linked list of mounted subsystems since
2472 2472  
... ... @@ -2486,12 +2486,62 @@
2486 2486 * matter, since it can only happen if the cgroup
2487 2487 * has been deleted and hence no longer needs the
2488 2488 * release agent to be called anyway. */
2489   - if (css && atomic_read(&css->refcnt))
  2489 + if (css && (atomic_read(&css->refcnt) > 1))
2490 2490 return 1;
2491 2491 }
2492 2492 return 0;
2493 2493 }
2494 2494  
  2495 +/*
  2496 + * Atomically mark all (or else none) of the cgroup's CSS objects as
  2497 + * CSS_REMOVED. Return true on success, or false if the cgroup has
  2498 + * busy subsystems. Call with cgroup_mutex held
  2499 + */
  2500 +
  2501 +static int cgroup_clear_css_refs(struct cgroup *cgrp)
  2502 +{
  2503 + struct cgroup_subsys *ss;
  2504 + unsigned long flags;
  2505 + bool failed = false;
  2506 + local_irq_save(flags);
  2507 + for_each_subsys(cgrp->root, ss) {
  2508 + struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id];
  2509 + int refcnt;
  2510 + do {
  2511 + /* We can only remove a CSS with a refcnt==1 */
  2512 + refcnt = atomic_read(&css->refcnt);
  2513 + if (refcnt > 1) {
  2514 + failed = true;
  2515 + goto done;
  2516 + }
  2517 + BUG_ON(!refcnt);
  2518 + /*
  2519 + * Drop the refcnt to 0 while we check other
  2520 + * subsystems. This will cause any racing
  2521 + * css_tryget() to spin until we set the
  2522 + * CSS_REMOVED bits or abort
  2523 + */
  2524 + } while (atomic_cmpxchg(&css->refcnt, refcnt, 0) != refcnt);
  2525 + }
  2526 + done:
  2527 + for_each_subsys(cgrp->root, ss) {
  2528 + struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id];
  2529 + if (failed) {
  2530 + /*
  2531 + * Restore old refcnt if we previously managed
  2532 + * to clear it from 1 to 0
  2533 + */
  2534 + if (!atomic_read(&css->refcnt))
  2535 + atomic_set(&css->refcnt, 1);
  2536 + } else {
  2537 + /* Commit the fact that the CSS is removed */
  2538 + set_bit(CSS_REMOVED, &css->flags);
  2539 + }
  2540 + }
  2541 + local_irq_restore(flags);
  2542 + return !failed;
  2543 +}
  2544 +
2495 2545 static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
2496 2546 {
2497 2547 struct cgroup *cgrp = dentry->d_fsdata;
... ... @@ -2522,7 +2572,7 @@
2522 2572  
2523 2573 if (atomic_read(&cgrp->count)
2524 2574 || !list_empty(&cgrp->children)
2525   - || cgroup_has_css_refs(cgrp)) {
  2575 + || !cgroup_clear_css_refs(cgrp)) {
2526 2576 mutex_unlock(&cgroup_mutex);
2527 2577 return -EBUSY;
2528 2578 }
... ... @@ -3078,7 +3128,8 @@
3078 3128 {
3079 3129 struct cgroup *cgrp = css->cgroup;
3080 3130 rcu_read_lock();
3081   - if (atomic_dec_and_test(&css->refcnt) && notify_on_release(cgrp)) {
  3131 + if ((atomic_dec_return(&css->refcnt) == 1) &&
  3132 + notify_on_release(cgrp)) {
3082 3133 set_bit(CGRP_RELEASABLE, &cgrp->flags);
3083 3134 check_for_release(cgrp);
3084 3135 }