Commit 84d737866e2babdeab0c6b18ea155c6a649663b8

Authored by Sukadev Bhattiprolu
Committed by Linus Torvalds
1 parent 6cc1b22a4a

[PATCH] add child reaper to pid_namespace

Add a per pid_namespace child-reaper.  This is needed so processes are reaped
within the same pid space and do not spill over to the parent pid space.  Its
also needed so containers preserve existing semantic that pid == 1 would reap
orphaned children.

This is based on Eric Biederman's patch: http://lkml.org/lkml/2006/2/6/285

Signed-off-by: Sukadev Bhattiprolu <sukadev@us.ibm.com>
Signed-off-by: Cedric Le Goater <clg@fr.ibm.com>
Cc: Kirill Korotaev <dev@openvz.org>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Herbert Poetzl <herbert@13thfloor.at>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

Showing 8 changed files with 40 additions and 19 deletions Side-by-side Diff

... ... @@ -38,6 +38,7 @@
38 38 #include <linux/binfmts.h>
39 39 #include <linux/swap.h>
40 40 #include <linux/utsname.h>
  41 +#include <linux/pid_namespace.h>
41 42 #include <linux/module.h>
42 43 #include <linux/namei.h>
43 44 #include <linux/proc_fs.h>
... ... @@ -620,8 +621,8 @@
620 621 * Reparenting needs write_lock on tasklist_lock,
621 622 * so it is safe to do it under read_lock.
622 623 */
623   - if (unlikely(tsk->group_leader == child_reaper))
624   - child_reaper = tsk;
  624 + if (unlikely(tsk->group_leader == child_reaper(tsk)))
  625 + tsk->nsproxy->pid_ns->child_reaper = tsk;
625 626  
626 627 zap_other_threads(tsk);
627 628 read_unlock(&tasklist_lock);
... ... @@ -35,8 +35,9 @@
35 35 *
36 36 * Holding a reference to struct pid solves both of these problems.
37 37 * It is small so holding a reference does not consume a lot of
38   - * resources, and since a new struct pid is allocated when the numeric
39   - * pid value is reused we don't mistakenly refer to new processes.
  38 + * resources, and since a new struct pid is allocated when the numeric pid
  39 + * value is reused (when pids wrap around) we don't mistakenly refer to new
  40 + * processes.
40 41 */
41 42  
42 43 struct pid
include/linux/pid_namespace.h
... ... @@ -19,6 +19,7 @@
19 19 struct kref kref;
20 20 struct pidmap pidmap[PIDMAP_ENTRIES];
21 21 int last_pid;
  22 + struct task_struct *child_reaper;
22 23 };
23 24  
24 25 extern struct pid_namespace init_pid_ns;
... ... @@ -34,6 +35,11 @@
34 35 static inline void put_pid_ns(struct pid_namespace *ns)
35 36 {
36 37 kref_put(&ns->kref, free_pid_ns);
  38 +}
  39 +
  40 +static inline struct task_struct *child_reaper(struct task_struct *tsk)
  41 +{
  42 + return tsk->nsproxy->pid_ns->child_reaper;
37 43 }
38 44  
39 45 #endif /* _LINUX_PID_NS_H */
include/linux/sched.h
... ... @@ -1400,7 +1400,6 @@
1400 1400 extern void daemonize(const char *, ...);
1401 1401 extern int allow_signal(int);
1402 1402 extern int disallow_signal(int);
1403   -extern struct task_struct *child_reaper;
1404 1403  
1405 1404 extern int do_execve(char *, char __user * __user *, char __user * __user *, struct pt_regs *);
1406 1405 extern long do_fork(unsigned long, unsigned long, struct pt_regs *, unsigned long, int __user *, int __user *);
... ... @@ -51,6 +51,7 @@
51 51 #include <linux/debug_locks.h>
52 52 #include <linux/lockdep.h>
53 53 #include <linux/utsrelease.h>
  54 +#include <linux/pid_namespace.h>
54 55 #include <linux/compile.h>
55 56  
56 57 #include <asm/io.h>
... ... @@ -626,8 +627,6 @@
626 627 }
627 628 __setup("initcall_debug", initcall_debug_setup);
628 629  
629   -struct task_struct *child_reaper = &init_task;
630   -
631 630 extern initcall_t __initcall_start[], __initcall_end[];
632 631  
633 632 static void __init do_initcalls(void)
... ... @@ -727,7 +726,7 @@
727 726 * assumptions about where in the task array this
728 727 * can be found.
729 728 */
730   - child_reaper = current;
  729 + init_pid_ns.child_reaper = current;
731 730  
732 731 cad_pid = task_pid(current);
733 732  
... ... @@ -22,6 +22,7 @@
22 22 #include <linux/file.h>
23 23 #include <linux/binfmts.h>
24 24 #include <linux/nsproxy.h>
  25 +#include <linux/pid_namespace.h>
25 26 #include <linux/ptrace.h>
26 27 #include <linux/profile.h>
27 28 #include <linux/mount.h>
... ... @@ -48,7 +49,6 @@
48 49 #include <asm/mmu_context.h>
49 50  
50 51 extern void sem_exit (void);
51   -extern struct task_struct *child_reaper;
52 52  
53 53 static void exit_mm(struct task_struct * tsk);
54 54  
... ... @@ -260,7 +260,8 @@
260 260 }
261 261  
262 262 /**
263   - * reparent_to_init - Reparent the calling kernel thread to the init task.
  263 + * reparent_to_init - Reparent the calling kernel thread to the init task
  264 + * of the pid space that the thread belongs to.
264 265 *
265 266 * If a kernel thread is launched as a result of a system call, or if
266 267 * it ever exits, it should generally reparent itself to init so that
... ... @@ -278,8 +279,8 @@
278 279 ptrace_unlink(current);
279 280 /* Reparent to init */
280 281 remove_parent(current);
281   - current->parent = child_reaper;
282   - current->real_parent = child_reaper;
  282 + current->parent = child_reaper(current);
  283 + current->real_parent = child_reaper(current);
283 284 add_parent(current);
284 285  
285 286 /* Set the exit signal to SIGCHLD so we signal init on exit */
... ... @@ -662,7 +663,8 @@
662 663 * When we die, we re-parent all our children.
663 664 * Try to give them to another thread in our thread
664 665 * group, and if no such member exists, give it to
665   - * the global child reaper process (ie "init")
  666 + * the child reaper process (ie "init") in our pid
  667 + * space.
666 668 */
667 669 static void
668 670 forget_original_parent(struct task_struct *father, struct list_head *to_release)
... ... @@ -673,7 +675,7 @@
673 675 do {
674 676 reaper = next_thread(reaper);
675 677 if (reaper == father) {
676   - reaper = child_reaper;
  678 + reaper = child_reaper(father);
677 679 break;
678 680 }
679 681 } while (reaper->exit_state);
... ... @@ -859,8 +861,13 @@
859 861 panic("Aiee, killing interrupt handler!");
860 862 if (unlikely(!tsk->pid))
861 863 panic("Attempted to kill the idle task!");
862   - if (unlikely(tsk == child_reaper))
863   - panic("Attempted to kill init!");
  864 + if (unlikely(tsk == child_reaper(tsk))) {
  865 + if (tsk->nsproxy->pid_ns != &init_pid_ns)
  866 + tsk->nsproxy->pid_ns->child_reaper = init_pid_ns.child_reaper;
  867 + else
  868 + panic("Attempted to kill init!");
  869 + }
  870 +
864 871  
865 872 if (unlikely(current->ptrace & PT_TRACE_EXIT)) {
866 873 current->ptrace_message = code;
... ... @@ -65,7 +65,8 @@
65 65 .pidmap = {
66 66 [ 0 ... PIDMAP_ENTRIES-1] = { ATOMIC_INIT(BITS_PER_PAGE), NULL }
67 67 },
68   - .last_pid = 0
  68 + .last_pid = 0,
  69 + .child_reaper = &init_task
69 70 };
70 71  
71 72 /*
... ... @@ -24,6 +24,9 @@
24 24 #include <linux/signal.h>
25 25 #include <linux/capability.h>
26 26 #include <linux/freezer.h>
  27 +#include <linux/pid_namespace.h>
  28 +#include <linux/nsproxy.h>
  29 +
27 30 #include <asm/param.h>
28 31 #include <asm/uaccess.h>
29 32 #include <asm/unistd.h>
... ... @@ -1877,8 +1880,12 @@
1877 1880 if (sig_kernel_ignore(signr)) /* Default is nothing. */
1878 1881 continue;
1879 1882  
1880   - /* Init gets no signals it doesn't want. */
1881   - if (current == child_reaper)
  1883 + /*
  1884 + * Init of a pid space gets no signals it doesn't want from
  1885 + * within that pid space. It can of course get signals from
  1886 + * its parent pid space.
  1887 + */
  1888 + if (current == child_reaper(current))
1882 1889 continue;
1883 1890  
1884 1891 if (sig_kernel_stop(signr)) {