Commit e885dcde75685e09f23cffae1f6d5169c105b8a0

Authored by Serge E. Hallyn
Committed by Linus Torvalds
1 parent 856c13aa1f

cgroup_clone: use pid of newly created task for new cgroup

cgroup_clone creates a new cgroup with the pid of the task.  This works
correctly for unshare, but for clone cgroup_clone is called from
copy_namespaces inside copy_process, which happens before the new pid is
created.  As a result, the new cgroup was created with current's pid.
This patch:

	1. Moves the call inside copy_process to after the new pid
	   is created
	2. Passes the struct pid into ns_cgroup_clone (as it is not
	   yet attached to the task)
	3. Passes a name from ns_cgroup_clone() into cgroup_clone()
	   so as to keep cgroup_clone() itself simpler
	4. Uses pid_vnr() to get the process id value, so that the
	   pid used to name the new cgroup is always the pid as it
	   would be known to the task which did the cloning or
	   unsharing.  I think that is the most intuitive thing to
	   do.  This way, task t1 does clone(CLONE_NEWPID) to get
	   t2, which does clone(CLONE_NEWPID) to get t3, then the
	   cgroup for t3 will be named for the pid by which t2 knows
	   t3.

(Thanks to Dan Smith for finding the main bug)

Changelog:
	June 11: Incorporate Paul Menage's feedback:  don't pass
	         NULL to ns_cgroup_clone from unshare, and reduce
		 patch size by using 'nodename' in cgroup_clone.
	June 10: Original version

[akpm@linux-foundation.org: build fix]
[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Serge Hallyn <serge@us.ibm.com>
Acked-by: Paul Menage <menage@google.com>
Tested-by: Dan Smith <danms@us.ibm.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

Showing 6 changed files with 23 additions and 16 deletions Side-by-side Diff

include/linux/cgroup.h
... ... @@ -364,7 +364,8 @@
364 364 return task_subsys_state(task, subsys_id)->cgroup;
365 365 }
366 366  
367   -int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *ss);
  367 +int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *ss,
  368 + char *nodename);
368 369  
369 370 /* A cgroup_iter should be treated as an opaque object */
370 371 struct cgroup_iter {
include/linux/nsproxy.h
... ... @@ -82,9 +82,12 @@
82 82 }
83 83  
84 84 #ifdef CONFIG_CGROUP_NS
85   -int ns_cgroup_clone(struct task_struct *tsk);
  85 +int ns_cgroup_clone(struct task_struct *tsk, struct pid *pid);
86 86 #else
87   -static inline int ns_cgroup_clone(struct task_struct *tsk) { return 0; }
  87 +static inline int ns_cgroup_clone(struct task_struct *tsk, struct pid *pid)
  88 +{
  89 + return 0;
  90 +}
88 91 #endif
89 92  
90 93 #endif
... ... @@ -2848,16 +2848,17 @@
2848 2848 * cgroup_clone - clone the cgroup the given subsystem is attached to
2849 2849 * @tsk: the task to be moved
2850 2850 * @subsys: the given subsystem
  2851 + * @nodename: the name for the new cgroup
2851 2852 *
2852 2853 * Duplicate the current cgroup in the hierarchy that the given
2853 2854 * subsystem is attached to, and move this task into the new
2854 2855 * child.
2855 2856 */
2856   -int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys)
  2857 +int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys,
  2858 + char *nodename)
2857 2859 {
2858 2860 struct dentry *dentry;
2859 2861 int ret = 0;
2860   - char nodename[MAX_CGROUP_TYPE_NAMELEN];
2861 2862 struct cgroup *parent, *child;
2862 2863 struct inode *inode;
2863 2864 struct css_set *cg;
... ... @@ -2881,8 +2882,6 @@
2881 2882 }
2882 2883 cg = tsk->cgroups;
2883 2884 parent = task_cgroup(tsk, subsys->subsys_id);
2884   -
2885   - snprintf(nodename, MAX_CGROUP_TYPE_NAMELEN, "%d", tsk->pid);
2886 2885  
2887 2886 /* Pin the hierarchy */
2888 2887 atomic_inc(&parent->root->sb->s_active);
... ... @@ -1107,6 +1107,12 @@
1107 1107 if (clone_flags & CLONE_THREAD)
1108 1108 p->tgid = current->tgid;
1109 1109  
  1110 + if (current->nsproxy != p->nsproxy) {
  1111 + retval = ns_cgroup_clone(p, pid);
  1112 + if (retval)
  1113 + goto bad_fork_free_pid;
  1114 + }
  1115 +
1110 1116 p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL;
1111 1117 /*
1112 1118 * Clear TID on mm_release()?
... ... @@ -7,6 +7,7 @@
7 7 #include <linux/module.h>
8 8 #include <linux/cgroup.h>
9 9 #include <linux/fs.h>
  10 +#include <linux/proc_fs.h>
10 11 #include <linux/slab.h>
11 12 #include <linux/nsproxy.h>
12 13  
13 14  
... ... @@ -24,9 +25,12 @@
24 25 struct ns_cgroup, css);
25 26 }
26 27  
27   -int ns_cgroup_clone(struct task_struct *task)
  28 +int ns_cgroup_clone(struct task_struct *task, struct pid *pid)
28 29 {
29   - return cgroup_clone(task, &ns_subsys);
  30 + char name[PROC_NUMBUF];
  31 +
  32 + snprintf(name, PROC_NUMBUF, "%d", pid_vnr(pid));
  33 + return cgroup_clone(task, &ns_subsys, name);
30 34 }
31 35  
32 36 /*
... ... @@ -157,12 +157,6 @@
157 157 goto out;
158 158 }
159 159  
160   - err = ns_cgroup_clone(tsk);
161   - if (err) {
162   - put_nsproxy(new_ns);
163   - goto out;
164   - }
165   -
166 160 tsk->nsproxy = new_ns;
167 161  
168 162 out:
... ... @@ -209,7 +203,7 @@
209 203 goto out;
210 204 }
211 205  
212   - err = ns_cgroup_clone(current);
  206 + err = ns_cgroup_clone(current, task_pid(current));
213 207 if (err)
214 208 put_nsproxy(*new_nsp);
215 209