Commit 74bd59bb39eb08b4379e2590c5f160748d83f812

Authored by Pavel Emelyanov
Committed by Linus Torvalds
1 parent aee16ce73c

namespaces: cleanup the code managed with PID_NS option

Just like with the user namespaces, move the namespace management code into
the separate .c file and mark the (already existing) PID_NS option as "depend
on NAMESPACES"

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Cc: Cedric Le Goater <clg@fr.ibm.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Herbert Poetzl <herbert@13thfloor.at>
Cc: Kirill Korotaev <dev@sw.ru>
Cc: Sukadev Bhattiprolu <sukadev@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

Showing 6 changed files with 220 additions and 194 deletions Side-by-side Diff

... ... @@ -118,10 +118,10 @@
118 118 */
119 119 extern struct pid *find_get_pid(int nr);
120 120 extern struct pid *find_ge_pid(int nr, struct pid_namespace *);
  121 +int next_pidmap(struct pid_namespace *pid_ns, int last);
121 122  
122 123 extern struct pid *alloc_pid(struct pid_namespace *ns);
123 124 extern void FASTCALL(free_pid(struct pid *pid));
124   -extern void zap_pid_ns_processes(struct pid_namespace *pid_ns);
125 125  
126 126 /*
127 127 * the helpers to get the pid's id seen from different namespaces
include/linux/pid_namespace.h
... ... @@ -39,6 +39,7 @@
39 39  
40 40 extern struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *ns);
41 41 extern void free_pid_ns(struct kref *kref);
  42 +extern void zap_pid_ns_processes(struct pid_namespace *pid_ns);
42 43  
43 44 static inline void put_pid_ns(struct pid_namespace *ns)
44 45 {
... ... @@ -66,6 +67,11 @@
66 67 {
67 68 }
68 69  
  70 +
  71 +static inline void zap_pid_ns_processes(struct pid_namespace *ns)
  72 +{
  73 + BUG();
  74 +}
69 75 #endif /* CONFIG_PID_NS */
70 76  
71 77 static inline struct pid_namespace *task_active_pid_ns(struct task_struct *tsk)
... ... @@ -214,18 +214,6 @@
214 214  
215 215 Say N if unsure.
216 216  
217   -config PID_NS
218   - bool "PID Namespaces (EXPERIMENTAL)"
219   - default n
220   - depends on EXPERIMENTAL
221   - help
222   - Suport process id namespaces. This allows having multiple
223   - process with the same pid as long as they are in different
224   - pid namespaces. This is a building block of containers.
225   -
226   - Unless you want to work with an experimental feature
227   - say N here.
228   -
229 217 config AUDIT
230 218 bool "Auditing support"
231 219 depends on NET
... ... @@ -441,6 +429,18 @@
441 429 This allows containers, i.e. vservers, to use user namespaces
442 430 to provide different user info for different servers.
443 431 If unsure, say N.
  432 +
  433 +config PID_NS
  434 + bool "PID Namespaces (EXPERIMENTAL)"
  435 + default n
  436 + depends on NAMESPACES && EXPERIMENTAL
  437 + help
  438 + Suport process id namespaces. This allows having multiple
  439 + process with the same pid as long as they are in different
  440 + pid namespaces. This is a building block of containers.
  441 +
  442 + Unless you want to work with an experimental feature
  443 + say N here.
444 444  
445 445 config BLK_DEV_INITRD
446 446 bool "Initial RAM filesystem and RAM disk (initramfs/initrd) support"
... ... @@ -44,6 +44,7 @@
44 44 obj-$(CONFIG_CGROUP_NS) += ns_cgroup.o
45 45 obj-$(CONFIG_UTS_NS) += utsname.o
46 46 obj-$(CONFIG_USER_NS) += user_namespace.o
  47 +obj-$(CONFIG_PID_NS) += pid_namespace.o
47 48 obj-$(CONFIG_IKCONFIG) += configs.o
48 49 obj-$(CONFIG_RESOURCE_COUNTERS) += res_counter.o
49 50 obj-$(CONFIG_STOP_MACHINE) += stop_machine.o
... ... @@ -41,7 +41,6 @@
41 41 static struct hlist_head *pid_hash;
42 42 static int pidhash_shift;
43 43 struct pid init_struct_pid = INIT_STRUCT_PID;
44   -static struct kmem_cache *pid_ns_cachep;
45 44  
46 45 int pid_max = PID_MAX_DEFAULT;
47 46  
... ... @@ -181,7 +180,7 @@
181 180 return -1;
182 181 }
183 182  
184   -static int next_pidmap(struct pid_namespace *pid_ns, int last)
  183 +int next_pidmap(struct pid_namespace *pid_ns, int last)
185 184 {
186 185 int offset;
187 186 struct pidmap *map, *end;
188 187  
... ... @@ -488,181 +487,7 @@
488 487 }
489 488 EXPORT_SYMBOL_GPL(find_get_pid);
490 489  
491   -struct pid_cache {
492   - int nr_ids;
493   - char name[16];
494   - struct kmem_cache *cachep;
495   - struct list_head list;
496   -};
497   -
498   -static LIST_HEAD(pid_caches_lh);
499   -static DEFINE_MUTEX(pid_caches_mutex);
500   -
501 490 /*
502   - * creates the kmem cache to allocate pids from.
503   - * @nr_ids: the number of numerical ids this pid will have to carry
504   - */
505   -
506   -static struct kmem_cache *create_pid_cachep(int nr_ids)
507   -{
508   - struct pid_cache *pcache;
509   - struct kmem_cache *cachep;
510   -
511   - mutex_lock(&pid_caches_mutex);
512   - list_for_each_entry (pcache, &pid_caches_lh, list)
513   - if (pcache->nr_ids == nr_ids)
514   - goto out;
515   -
516   - pcache = kmalloc(sizeof(struct pid_cache), GFP_KERNEL);
517   - if (pcache == NULL)
518   - goto err_alloc;
519   -
520   - snprintf(pcache->name, sizeof(pcache->name), "pid_%d", nr_ids);
521   - cachep = kmem_cache_create(pcache->name,
522   - sizeof(struct pid) + (nr_ids - 1) * sizeof(struct upid),
523   - 0, SLAB_HWCACHE_ALIGN, NULL);
524   - if (cachep == NULL)
525   - goto err_cachep;
526   -
527   - pcache->nr_ids = nr_ids;
528   - pcache->cachep = cachep;
529   - list_add(&pcache->list, &pid_caches_lh);
530   -out:
531   - mutex_unlock(&pid_caches_mutex);
532   - return pcache->cachep;
533   -
534   -err_cachep:
535   - kfree(pcache);
536   -err_alloc:
537   - mutex_unlock(&pid_caches_mutex);
538   - return NULL;
539   -}
540   -
541   -#ifdef CONFIG_PID_NS
542   -static struct pid_namespace *create_pid_namespace(int level)
543   -{
544   - struct pid_namespace *ns;
545   - int i;
546   -
547   - ns = kmem_cache_alloc(pid_ns_cachep, GFP_KERNEL);
548   - if (ns == NULL)
549   - goto out;
550   -
551   - ns->pidmap[0].page = kzalloc(PAGE_SIZE, GFP_KERNEL);
552   - if (!ns->pidmap[0].page)
553   - goto out_free;
554   -
555   - ns->pid_cachep = create_pid_cachep(level + 1);
556   - if (ns->pid_cachep == NULL)
557   - goto out_free_map;
558   -
559   - kref_init(&ns->kref);
560   - ns->last_pid = 0;
561   - ns->child_reaper = NULL;
562   - ns->level = level;
563   -
564   - set_bit(0, ns->pidmap[0].page);
565   - atomic_set(&ns->pidmap[0].nr_free, BITS_PER_PAGE - 1);
566   -
567   - for (i = 1; i < PIDMAP_ENTRIES; i++) {
568   - ns->pidmap[i].page = 0;
569   - atomic_set(&ns->pidmap[i].nr_free, BITS_PER_PAGE);
570   - }
571   -
572   - return ns;
573   -
574   -out_free_map:
575   - kfree(ns->pidmap[0].page);
576   -out_free:
577   - kmem_cache_free(pid_ns_cachep, ns);
578   -out:
579   - return ERR_PTR(-ENOMEM);
580   -}
581   -
582   -static void destroy_pid_namespace(struct pid_namespace *ns)
583   -{
584   - int i;
585   -
586   - for (i = 0; i < PIDMAP_ENTRIES; i++)
587   - kfree(ns->pidmap[i].page);
588   - kmem_cache_free(pid_ns_cachep, ns);
589   -}
590   -
591   -struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *old_ns)
592   -{
593   - struct pid_namespace *new_ns;
594   -
595   - BUG_ON(!old_ns);
596   - new_ns = get_pid_ns(old_ns);
597   - if (!(flags & CLONE_NEWPID))
598   - goto out;
599   -
600   - new_ns = ERR_PTR(-EINVAL);
601   - if (flags & CLONE_THREAD)
602   - goto out_put;
603   -
604   - new_ns = create_pid_namespace(old_ns->level + 1);
605   - if (!IS_ERR(new_ns))
606   - new_ns->parent = get_pid_ns(old_ns);
607   -
608   -out_put:
609   - put_pid_ns(old_ns);
610   -out:
611   - return new_ns;
612   -}
613   -
614   -void free_pid_ns(struct kref *kref)
615   -{
616   - struct pid_namespace *ns, *parent;
617   -
618   - ns = container_of(kref, struct pid_namespace, kref);
619   -
620   - parent = ns->parent;
621   - destroy_pid_namespace(ns);
622   -
623   - if (parent != NULL)
624   - put_pid_ns(parent);
625   -}
626   -#endif /* CONFIG_PID_NS */
627   -
628   -void zap_pid_ns_processes(struct pid_namespace *pid_ns)
629   -{
630   - int nr;
631   - int rc;
632   -
633   - /*
634   - * The last thread in the cgroup-init thread group is terminating.
635   - * Find remaining pid_ts in the namespace, signal and wait for them
636   - * to exit.
637   - *
638   - * Note: This signals each threads in the namespace - even those that
639   - * belong to the same thread group, To avoid this, we would have
640   - * to walk the entire tasklist looking a processes in this
641   - * namespace, but that could be unnecessarily expensive if the
642   - * pid namespace has just a few processes. Or we need to
643   - * maintain a tasklist for each pid namespace.
644   - *
645   - */
646   - read_lock(&tasklist_lock);
647   - nr = next_pidmap(pid_ns, 1);
648   - while (nr > 0) {
649   - kill_proc_info(SIGKILL, SEND_SIG_PRIV, nr);
650   - nr = next_pidmap(pid_ns, nr);
651   - }
652   - read_unlock(&tasklist_lock);
653   -
654   - do {
655   - clear_thread_flag(TIF_SIGPENDING);
656   - rc = sys_wait4(-1, NULL, __WALL, NULL);
657   - } while (rc != -ECHILD);
658   -
659   -
660   - /* Child reaper for the pid namespace is going away */
661   - pid_ns->child_reaper = NULL;
662   - return;
663   -}
664   -
665   -/*
666 491 * The pid hash table is scaled according to the amount of memory in the
667 492 * machine. From a minimum of 16 slots up to 4096 slots at one gigabyte or
668 493 * more.
... ... @@ -694,10 +519,7 @@
694 519 set_bit(0, init_pid_ns.pidmap[0].page);
695 520 atomic_dec(&init_pid_ns.pidmap[0].nr_free);
696 521  
697   - init_pid_ns.pid_cachep = create_pid_cachep(1);
698   - if (init_pid_ns.pid_cachep == NULL)
699   - panic("Can't create pid_1 cachep\n");
700   -
701   - pid_ns_cachep = KMEM_CACHE(pid_namespace, SLAB_PANIC);
  522 + init_pid_ns.pid_cachep = KMEM_CACHE(pid,
  523 + SLAB_HWCACHE_ALIGN | SLAB_PANIC);
702 524 }
kernel/pid_namespace.c
  1 +/*
  2 + * Pid namespaces
  3 + *
  4 + * Authors:
  5 + * (C) 2007 Pavel Emelyanov <xemul@openvz.org>, OpenVZ, SWsoft Inc.
  6 + * (C) 2007 Sukadev Bhattiprolu <sukadev@us.ibm.com>, IBM
  7 + * Many thanks to Oleg Nesterov for comments and help
  8 + *
  9 + */
  10 +
  11 +#include <linux/pid.h>
  12 +#include <linux/pid_namespace.h>
  13 +#include <linux/syscalls.h>
  14 +#include <linux/err.h>
  15 +
  16 +#define BITS_PER_PAGE (PAGE_SIZE*8)
  17 +
  18 +struct pid_cache {
  19 + int nr_ids;
  20 + char name[16];
  21 + struct kmem_cache *cachep;
  22 + struct list_head list;
  23 +};
  24 +
  25 +static LIST_HEAD(pid_caches_lh);
  26 +static DEFINE_MUTEX(pid_caches_mutex);
  27 +static struct kmem_cache *pid_ns_cachep;
  28 +
  29 +/*
  30 + * creates the kmem cache to allocate pids from.
  31 + * @nr_ids: the number of numerical ids this pid will have to carry
  32 + */
  33 +
  34 +static struct kmem_cache *create_pid_cachep(int nr_ids)
  35 +{
  36 + struct pid_cache *pcache;
  37 + struct kmem_cache *cachep;
  38 +
  39 + mutex_lock(&pid_caches_mutex);
  40 + list_for_each_entry(pcache, &pid_caches_lh, list)
  41 + if (pcache->nr_ids == nr_ids)
  42 + goto out;
  43 +
  44 + pcache = kmalloc(sizeof(struct pid_cache), GFP_KERNEL);
  45 + if (pcache == NULL)
  46 + goto err_alloc;
  47 +
  48 + snprintf(pcache->name, sizeof(pcache->name), "pid_%d", nr_ids);
  49 + cachep = kmem_cache_create(pcache->name,
  50 + sizeof(struct pid) + (nr_ids - 1) * sizeof(struct upid),
  51 + 0, SLAB_HWCACHE_ALIGN, NULL);
  52 + if (cachep == NULL)
  53 + goto err_cachep;
  54 +
  55 + pcache->nr_ids = nr_ids;
  56 + pcache->cachep = cachep;
  57 + list_add(&pcache->list, &pid_caches_lh);
  58 +out:
  59 + mutex_unlock(&pid_caches_mutex);
  60 + return pcache->cachep;
  61 +
  62 +err_cachep:
  63 + kfree(pcache);
  64 +err_alloc:
  65 + mutex_unlock(&pid_caches_mutex);
  66 + return NULL;
  67 +}
  68 +
  69 +static struct pid_namespace *create_pid_namespace(int level)
  70 +{
  71 + struct pid_namespace *ns;
  72 + int i;
  73 +
  74 + ns = kmem_cache_alloc(pid_ns_cachep, GFP_KERNEL);
  75 + if (ns == NULL)
  76 + goto out;
  77 +
  78 + ns->pidmap[0].page = kzalloc(PAGE_SIZE, GFP_KERNEL);
  79 + if (!ns->pidmap[0].page)
  80 + goto out_free;
  81 +
  82 + ns->pid_cachep = create_pid_cachep(level + 1);
  83 + if (ns->pid_cachep == NULL)
  84 + goto out_free_map;
  85 +
  86 + kref_init(&ns->kref);
  87 + ns->last_pid = 0;
  88 + ns->child_reaper = NULL;
  89 + ns->level = level;
  90 +
  91 + set_bit(0, ns->pidmap[0].page);
  92 + atomic_set(&ns->pidmap[0].nr_free, BITS_PER_PAGE - 1);
  93 +
  94 + for (i = 1; i < PIDMAP_ENTRIES; i++) {
  95 + ns->pidmap[i].page = 0;
  96 + atomic_set(&ns->pidmap[i].nr_free, BITS_PER_PAGE);
  97 + }
  98 +
  99 + return ns;
  100 +
  101 +out_free_map:
  102 + kfree(ns->pidmap[0].page);
  103 +out_free:
  104 + kmem_cache_free(pid_ns_cachep, ns);
  105 +out:
  106 + return ERR_PTR(-ENOMEM);
  107 +}
  108 +
  109 +static void destroy_pid_namespace(struct pid_namespace *ns)
  110 +{
  111 + int i;
  112 +
  113 + for (i = 0; i < PIDMAP_ENTRIES; i++)
  114 + kfree(ns->pidmap[i].page);
  115 + kmem_cache_free(pid_ns_cachep, ns);
  116 +}
  117 +
  118 +struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *old_ns)
  119 +{
  120 + struct pid_namespace *new_ns;
  121 +
  122 + BUG_ON(!old_ns);
  123 + new_ns = get_pid_ns(old_ns);
  124 + if (!(flags & CLONE_NEWPID))
  125 + goto out;
  126 +
  127 + new_ns = ERR_PTR(-EINVAL);
  128 + if (flags & CLONE_THREAD)
  129 + goto out_put;
  130 +
  131 + new_ns = create_pid_namespace(old_ns->level + 1);
  132 + if (!IS_ERR(new_ns))
  133 + new_ns->parent = get_pid_ns(old_ns);
  134 +
  135 +out_put:
  136 + put_pid_ns(old_ns);
  137 +out:
  138 + return new_ns;
  139 +}
  140 +
  141 +void free_pid_ns(struct kref *kref)
  142 +{
  143 + struct pid_namespace *ns, *parent;
  144 +
  145 + ns = container_of(kref, struct pid_namespace, kref);
  146 +
  147 + parent = ns->parent;
  148 + destroy_pid_namespace(ns);
  149 +
  150 + if (parent != NULL)
  151 + put_pid_ns(parent);
  152 +}
  153 +
  154 +void zap_pid_ns_processes(struct pid_namespace *pid_ns)
  155 +{
  156 + int nr;
  157 + int rc;
  158 +
  159 + /*
  160 + * The last thread in the cgroup-init thread group is terminating.
  161 + * Find remaining pid_ts in the namespace, signal and wait for them
  162 + * to exit.
  163 + *
  164 + * Note: This signals each threads in the namespace - even those that
  165 + * belong to the same thread group, To avoid this, we would have
  166 + * to walk the entire tasklist looking a processes in this
  167 + * namespace, but that could be unnecessarily expensive if the
  168 + * pid namespace has just a few processes. Or we need to
  169 + * maintain a tasklist for each pid namespace.
  170 + *
  171 + */
  172 + read_lock(&tasklist_lock);
  173 + nr = next_pidmap(pid_ns, 1);
  174 + while (nr > 0) {
  175 + kill_proc_info(SIGKILL, SEND_SIG_PRIV, nr);
  176 + nr = next_pidmap(pid_ns, nr);
  177 + }
  178 + read_unlock(&tasklist_lock);
  179 +
  180 + do {
  181 + clear_thread_flag(TIF_SIGPENDING);
  182 + rc = sys_wait4(-1, NULL, __WALL, NULL);
  183 + } while (rc != -ECHILD);
  184 +
  185 +
  186 + /* Child reaper for the pid namespace is going away */
  187 + pid_ns->child_reaper = NULL;
  188 + return;
  189 +}
  190 +
  191 +static __init int pid_namespaces_init(void)
  192 +{
  193 + pid_ns_cachep = KMEM_CACHE(pid_namespace, SLAB_PANIC);
  194 + return 0;
  195 +}
  196 +
  197 +__initcall(pid_namespaces_init);