Commit a47295e6bc42ad35f9c15ac66f598aa24debd4e2

Authored by Paul Menage
Committed by Linus Torvalds
1 parent e7b80bb695

cgroups: make cgroup_path() RCU-safe

Fix races between /proc/sched_debug by freeing cgroup objects via an RCU
callback.  Thus any cgroup reference obtained from an RCU-safe source will
remain valid during the RCU section.  Since dentries are also RCU-safe,
this allows us to traverse up the tree safely.

Additionally, make cgroup_path() check for a NULL cgrp->dentry to avoid
trying to report a path for a partially-created cgroup.

[lizf@cn.fujitsu.com: call deactive_super() in cgroup_diput()]
Signed-off-by: Paul Menage <menage@google.com>
Reviewed-by: Li Zefan <lizf@cn.fujitsu.com>
Tested-by: Li Zefan <lizf@cn.fujitsu.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

Showing 2 changed files with 25 additions and 10 deletions Side-by-side Diff

include/linux/cgroup.h
... ... @@ -116,7 +116,7 @@
116 116 struct list_head children; /* my children */
117 117  
118 118 struct cgroup *parent; /* my parent */
119   - struct dentry *dentry; /* cgroup fs entry */
  119 + struct dentry *dentry; /* cgroup fs entry, RCU protected */
120 120  
121 121 /* Private pointers for each registered subsystem */
122 122 struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT];
... ... @@ -145,6 +145,9 @@
145 145 int pids_use_count;
146 146 /* Length of the current tasks_pids array */
147 147 int pids_length;
  148 +
  149 + /* For RCU-protected deletion */
  150 + struct rcu_head rcu_head;
148 151 };
149 152  
150 153 /* A css_set is a structure holding pointers to a set of
... ... @@ -271,7 +271,7 @@
271 271  
272 272 rcu_read_lock();
273 273 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
274   - struct cgroup *cgrp = cg->subsys[i]->cgroup;
  274 + struct cgroup *cgrp = rcu_dereference(cg->subsys[i]->cgroup);
275 275 if (atomic_dec_and_test(&cgrp->count) &&
276 276 notify_on_release(cgrp)) {
277 277 if (taskexit)
... ... @@ -594,6 +594,13 @@
594 594 return;
595 595 }
596 596  
  597 +static void free_cgroup_rcu(struct rcu_head *obj)
  598 +{
  599 + struct cgroup *cgrp = container_of(obj, struct cgroup, rcu_head);
  600 +
  601 + kfree(cgrp);
  602 +}
  603 +
597 604 static void cgroup_diput(struct dentry *dentry, struct inode *inode)
598 605 {
599 606 /* is dentry a directory ? if so, kfree() associated cgroup */
600 607  
... ... @@ -619,11 +626,13 @@
619 626 cgrp->root->number_of_cgroups--;
620 627 mutex_unlock(&cgroup_mutex);
621 628  
622   - /* Drop the active superblock reference that we took when we
623   - * created the cgroup */
  629 + /*
  630 + * Drop the active superblock reference that we took when we
  631 + * created the cgroup
  632 + */
624 633 deactivate_super(cgrp->root->sb);
625 634  
626   - kfree(cgrp);
  635 + call_rcu(&cgrp->rcu_head, free_cgroup_rcu);
627 636 }
628 637 iput(inode);
629 638 }
630 639  
631 640  
... ... @@ -1134,14 +1143,16 @@
1134 1143 * @buf: the buffer to write the path into
1135 1144 * @buflen: the length of the buffer
1136 1145 *
1137   - * Called with cgroup_mutex held. Writes path of cgroup into buf.
1138   - * Returns 0 on success, -errno on error.
  1146 + * Called with cgroup_mutex held or else with an RCU-protected cgroup
  1147 + * reference. Writes path of cgroup into buf. Returns 0 on success,
  1148 + * -errno on error.
1139 1149 */
1140 1150 int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen)
1141 1151 {
1142 1152 char *start;
  1153 + struct dentry *dentry = rcu_dereference(cgrp->dentry);
1143 1154  
1144   - if (cgrp == dummytop) {
  1155 + if (!dentry || cgrp == dummytop) {
1145 1156 /*
1146 1157 * Inactive subsystems have no dentry for their root
1147 1158 * cgroup
1148 1159  
... ... @@ -1154,13 +1165,14 @@
1154 1165  
1155 1166 *--start = '\0';
1156 1167 for (;;) {
1157   - int len = cgrp->dentry->d_name.len;
  1168 + int len = dentry->d_name.len;
1158 1169 if ((start -= len) < buf)
1159 1170 return -ENAMETOOLONG;
1160 1171 memcpy(start, cgrp->dentry->d_name.name, len);
1161 1172 cgrp = cgrp->parent;
1162 1173 if (!cgrp)
1163 1174 break;
  1175 + dentry = rcu_dereference(cgrp->dentry);
1164 1176 if (!cgrp->parent)
1165 1177 continue;
1166 1178 if (--start < buf)
... ... @@ -1663,7 +1675,7 @@
1663 1675 if (!error) {
1664 1676 dentry->d_fsdata = cgrp;
1665 1677 inc_nlink(parent->d_inode);
1666   - cgrp->dentry = dentry;
  1678 + rcu_assign_pointer(cgrp->dentry, dentry);
1667 1679 dget(dentry);
1668 1680 }
1669 1681 dput(dentry);