Commit 74bd59bb39eb08b4379e2590c5f160748d83f812
Committed by
Linus Torvalds
1 parent
aee16ce73c
Exists in
master
and in
20 other branches
namespaces: cleanup the code managed with PID_NS option
Just like with the user namespaces, move the namespace management code into the separate .c file and mark the (already existing) PID_NS option as "depend on NAMESPACES" [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Pavel Emelyanov <xemul@openvz.org> Acked-by: Serge Hallyn <serue@us.ibm.com> Cc: Cedric Le Goater <clg@fr.ibm.com> Cc: "Eric W. Biederman" <ebiederm@xmission.com> Cc: Herbert Poetzl <herbert@13thfloor.at> Cc: Kirill Korotaev <dev@sw.ru> Cc: Sukadev Bhattiprolu <sukadev@us.ibm.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Showing 6 changed files with 220 additions and 194 deletions Side-by-side Diff
include/linux/pid.h
... | ... | @@ -118,10 +118,10 @@ |
118 | 118 | */ |
119 | 119 | extern struct pid *find_get_pid(int nr); |
120 | 120 | extern struct pid *find_ge_pid(int nr, struct pid_namespace *); |
121 | +int next_pidmap(struct pid_namespace *pid_ns, int last); | |
121 | 122 | |
122 | 123 | extern struct pid *alloc_pid(struct pid_namespace *ns); |
123 | 124 | extern void FASTCALL(free_pid(struct pid *pid)); |
124 | -extern void zap_pid_ns_processes(struct pid_namespace *pid_ns); | |
125 | 125 | |
126 | 126 | /* |
127 | 127 | * the helpers to get the pid's id seen from different namespaces |
include/linux/pid_namespace.h
... | ... | @@ -39,6 +39,7 @@ |
39 | 39 | |
40 | 40 | extern struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *ns); |
41 | 41 | extern void free_pid_ns(struct kref *kref); |
42 | +extern void zap_pid_ns_processes(struct pid_namespace *pid_ns); | |
42 | 43 | |
43 | 44 | static inline void put_pid_ns(struct pid_namespace *ns) |
44 | 45 | { |
... | ... | @@ -66,6 +67,11 @@ |
66 | 67 | { |
67 | 68 | } |
68 | 69 | |
70 | + | |
71 | +static inline void zap_pid_ns_processes(struct pid_namespace *ns) | |
72 | +{ | |
73 | + BUG(); | |
74 | +} | |
69 | 75 | #endif /* CONFIG_PID_NS */ |
70 | 76 | |
71 | 77 | static inline struct pid_namespace *task_active_pid_ns(struct task_struct *tsk) |
init/Kconfig
... | ... | @@ -214,18 +214,6 @@ |
214 | 214 | |
215 | 215 | Say N if unsure. |
216 | 216 | |
217 | -config PID_NS | |
218 | - bool "PID Namespaces (EXPERIMENTAL)" | |
219 | - default n | |
220 | - depends on EXPERIMENTAL | |
221 | - help | |
222 | - Suport process id namespaces. This allows having multiple | |
223 | - process with the same pid as long as they are in different | |
224 | - pid namespaces. This is a building block of containers. | |
225 | - | |
226 | - Unless you want to work with an experimental feature | |
227 | - say N here. | |
228 | - | |
229 | 217 | config AUDIT |
230 | 218 | bool "Auditing support" |
231 | 219 | depends on NET |
... | ... | @@ -441,6 +429,18 @@ |
441 | 429 | This allows containers, i.e. vservers, to use user namespaces |
442 | 430 | to provide different user info for different servers. |
443 | 431 | If unsure, say N. |
432 | + | |
433 | +config PID_NS | |
434 | + bool "PID Namespaces (EXPERIMENTAL)" | |
435 | + default n | |
436 | + depends on NAMESPACES && EXPERIMENTAL | |
437 | + help | |
438 | + Suport process id namespaces. This allows having multiple | |
439 | + process with the same pid as long as they are in different | |
440 | + pid namespaces. This is a building block of containers. | |
441 | + | |
442 | + Unless you want to work with an experimental feature | |
443 | + say N here. | |
444 | 444 | |
445 | 445 | config BLK_DEV_INITRD |
446 | 446 | bool "Initial RAM filesystem and RAM disk (initramfs/initrd) support" |
kernel/Makefile
... | ... | @@ -44,6 +44,7 @@ |
44 | 44 | obj-$(CONFIG_CGROUP_NS) += ns_cgroup.o |
45 | 45 | obj-$(CONFIG_UTS_NS) += utsname.o |
46 | 46 | obj-$(CONFIG_USER_NS) += user_namespace.o |
47 | +obj-$(CONFIG_PID_NS) += pid_namespace.o | |
47 | 48 | obj-$(CONFIG_IKCONFIG) += configs.o |
48 | 49 | obj-$(CONFIG_RESOURCE_COUNTERS) += res_counter.o |
49 | 50 | obj-$(CONFIG_STOP_MACHINE) += stop_machine.o |
kernel/pid.c
... | ... | @@ -41,7 +41,6 @@ |
41 | 41 | static struct hlist_head *pid_hash; |
42 | 42 | static int pidhash_shift; |
43 | 43 | struct pid init_struct_pid = INIT_STRUCT_PID; |
44 | -static struct kmem_cache *pid_ns_cachep; | |
45 | 44 | |
46 | 45 | int pid_max = PID_MAX_DEFAULT; |
47 | 46 | |
... | ... | @@ -181,7 +180,7 @@ |
181 | 180 | return -1; |
182 | 181 | } |
183 | 182 | |
184 | -static int next_pidmap(struct pid_namespace *pid_ns, int last) | |
183 | +int next_pidmap(struct pid_namespace *pid_ns, int last) | |
185 | 184 | { |
186 | 185 | int offset; |
187 | 186 | struct pidmap *map, *end; |
188 | 187 | |
... | ... | @@ -488,181 +487,7 @@ |
488 | 487 | } |
489 | 488 | EXPORT_SYMBOL_GPL(find_get_pid); |
490 | 489 | |
491 | -struct pid_cache { | |
492 | - int nr_ids; | |
493 | - char name[16]; | |
494 | - struct kmem_cache *cachep; | |
495 | - struct list_head list; | |
496 | -}; | |
497 | - | |
498 | -static LIST_HEAD(pid_caches_lh); | |
499 | -static DEFINE_MUTEX(pid_caches_mutex); | |
500 | - | |
501 | 490 | /* |
502 | - * creates the kmem cache to allocate pids from. | |
503 | - * @nr_ids: the number of numerical ids this pid will have to carry | |
504 | - */ | |
505 | - | |
506 | -static struct kmem_cache *create_pid_cachep(int nr_ids) | |
507 | -{ | |
508 | - struct pid_cache *pcache; | |
509 | - struct kmem_cache *cachep; | |
510 | - | |
511 | - mutex_lock(&pid_caches_mutex); | |
512 | - list_for_each_entry (pcache, &pid_caches_lh, list) | |
513 | - if (pcache->nr_ids == nr_ids) | |
514 | - goto out; | |
515 | - | |
516 | - pcache = kmalloc(sizeof(struct pid_cache), GFP_KERNEL); | |
517 | - if (pcache == NULL) | |
518 | - goto err_alloc; | |
519 | - | |
520 | - snprintf(pcache->name, sizeof(pcache->name), "pid_%d", nr_ids); | |
521 | - cachep = kmem_cache_create(pcache->name, | |
522 | - sizeof(struct pid) + (nr_ids - 1) * sizeof(struct upid), | |
523 | - 0, SLAB_HWCACHE_ALIGN, NULL); | |
524 | - if (cachep == NULL) | |
525 | - goto err_cachep; | |
526 | - | |
527 | - pcache->nr_ids = nr_ids; | |
528 | - pcache->cachep = cachep; | |
529 | - list_add(&pcache->list, &pid_caches_lh); | |
530 | -out: | |
531 | - mutex_unlock(&pid_caches_mutex); | |
532 | - return pcache->cachep; | |
533 | - | |
534 | -err_cachep: | |
535 | - kfree(pcache); | |
536 | -err_alloc: | |
537 | - mutex_unlock(&pid_caches_mutex); | |
538 | - return NULL; | |
539 | -} | |
540 | - | |
541 | -#ifdef CONFIG_PID_NS | |
542 | -static struct pid_namespace *create_pid_namespace(int level) | |
543 | -{ | |
544 | - struct pid_namespace *ns; | |
545 | - int i; | |
546 | - | |
547 | - ns = kmem_cache_alloc(pid_ns_cachep, GFP_KERNEL); | |
548 | - if (ns == NULL) | |
549 | - goto out; | |
550 | - | |
551 | - ns->pidmap[0].page = kzalloc(PAGE_SIZE, GFP_KERNEL); | |
552 | - if (!ns->pidmap[0].page) | |
553 | - goto out_free; | |
554 | - | |
555 | - ns->pid_cachep = create_pid_cachep(level + 1); | |
556 | - if (ns->pid_cachep == NULL) | |
557 | - goto out_free_map; | |
558 | - | |
559 | - kref_init(&ns->kref); | |
560 | - ns->last_pid = 0; | |
561 | - ns->child_reaper = NULL; | |
562 | - ns->level = level; | |
563 | - | |
564 | - set_bit(0, ns->pidmap[0].page); | |
565 | - atomic_set(&ns->pidmap[0].nr_free, BITS_PER_PAGE - 1); | |
566 | - | |
567 | - for (i = 1; i < PIDMAP_ENTRIES; i++) { | |
568 | - ns->pidmap[i].page = 0; | |
569 | - atomic_set(&ns->pidmap[i].nr_free, BITS_PER_PAGE); | |
570 | - } | |
571 | - | |
572 | - return ns; | |
573 | - | |
574 | -out_free_map: | |
575 | - kfree(ns->pidmap[0].page); | |
576 | -out_free: | |
577 | - kmem_cache_free(pid_ns_cachep, ns); | |
578 | -out: | |
579 | - return ERR_PTR(-ENOMEM); | |
580 | -} | |
581 | - | |
582 | -static void destroy_pid_namespace(struct pid_namespace *ns) | |
583 | -{ | |
584 | - int i; | |
585 | - | |
586 | - for (i = 0; i < PIDMAP_ENTRIES; i++) | |
587 | - kfree(ns->pidmap[i].page); | |
588 | - kmem_cache_free(pid_ns_cachep, ns); | |
589 | -} | |
590 | - | |
591 | -struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *old_ns) | |
592 | -{ | |
593 | - struct pid_namespace *new_ns; | |
594 | - | |
595 | - BUG_ON(!old_ns); | |
596 | - new_ns = get_pid_ns(old_ns); | |
597 | - if (!(flags & CLONE_NEWPID)) | |
598 | - goto out; | |
599 | - | |
600 | - new_ns = ERR_PTR(-EINVAL); | |
601 | - if (flags & CLONE_THREAD) | |
602 | - goto out_put; | |
603 | - | |
604 | - new_ns = create_pid_namespace(old_ns->level + 1); | |
605 | - if (!IS_ERR(new_ns)) | |
606 | - new_ns->parent = get_pid_ns(old_ns); | |
607 | - | |
608 | -out_put: | |
609 | - put_pid_ns(old_ns); | |
610 | -out: | |
611 | - return new_ns; | |
612 | -} | |
613 | - | |
614 | -void free_pid_ns(struct kref *kref) | |
615 | -{ | |
616 | - struct pid_namespace *ns, *parent; | |
617 | - | |
618 | - ns = container_of(kref, struct pid_namespace, kref); | |
619 | - | |
620 | - parent = ns->parent; | |
621 | - destroy_pid_namespace(ns); | |
622 | - | |
623 | - if (parent != NULL) | |
624 | - put_pid_ns(parent); | |
625 | -} | |
626 | -#endif /* CONFIG_PID_NS */ | |
627 | - | |
628 | -void zap_pid_ns_processes(struct pid_namespace *pid_ns) | |
629 | -{ | |
630 | - int nr; | |
631 | - int rc; | |
632 | - | |
633 | - /* | |
634 | - * The last thread in the cgroup-init thread group is terminating. | |
635 | - * Find remaining pid_ts in the namespace, signal and wait for them | |
636 | - * to exit. | |
637 | - * | |
638 | - * Note: This signals each threads in the namespace - even those that | |
639 | - * belong to the same thread group, To avoid this, we would have | |
640 | - * to walk the entire tasklist looking a processes in this | |
641 | - * namespace, but that could be unnecessarily expensive if the | |
642 | - * pid namespace has just a few processes. Or we need to | |
643 | - * maintain a tasklist for each pid namespace. | |
644 | - * | |
645 | - */ | |
646 | - read_lock(&tasklist_lock); | |
647 | - nr = next_pidmap(pid_ns, 1); | |
648 | - while (nr > 0) { | |
649 | - kill_proc_info(SIGKILL, SEND_SIG_PRIV, nr); | |
650 | - nr = next_pidmap(pid_ns, nr); | |
651 | - } | |
652 | - read_unlock(&tasklist_lock); | |
653 | - | |
654 | - do { | |
655 | - clear_thread_flag(TIF_SIGPENDING); | |
656 | - rc = sys_wait4(-1, NULL, __WALL, NULL); | |
657 | - } while (rc != -ECHILD); | |
658 | - | |
659 | - | |
660 | - /* Child reaper for the pid namespace is going away */ | |
661 | - pid_ns->child_reaper = NULL; | |
662 | - return; | |
663 | -} | |
664 | - | |
665 | -/* | |
666 | 491 | * The pid hash table is scaled according to the amount of memory in the |
667 | 492 | * machine. From a minimum of 16 slots up to 4096 slots at one gigabyte or |
668 | 493 | * more. |
... | ... | @@ -694,10 +519,7 @@ |
694 | 519 | set_bit(0, init_pid_ns.pidmap[0].page); |
695 | 520 | atomic_dec(&init_pid_ns.pidmap[0].nr_free); |
696 | 521 | |
697 | - init_pid_ns.pid_cachep = create_pid_cachep(1); | |
698 | - if (init_pid_ns.pid_cachep == NULL) | |
699 | - panic("Can't create pid_1 cachep\n"); | |
700 | - | |
701 | - pid_ns_cachep = KMEM_CACHE(pid_namespace, SLAB_PANIC); | |
522 | + init_pid_ns.pid_cachep = KMEM_CACHE(pid, | |
523 | + SLAB_HWCACHE_ALIGN | SLAB_PANIC); | |
702 | 524 | } |
kernel/pid_namespace.c
1 | +/* | |
2 | + * Pid namespaces | |
3 | + * | |
4 | + * Authors: | |
5 | + * (C) 2007 Pavel Emelyanov <xemul@openvz.org>, OpenVZ, SWsoft Inc. | |
6 | + * (C) 2007 Sukadev Bhattiprolu <sukadev@us.ibm.com>, IBM | |
7 | + * Many thanks to Oleg Nesterov for comments and help | |
8 | + * | |
9 | + */ | |
10 | + | |
11 | +#include <linux/pid.h> | |
12 | +#include <linux/pid_namespace.h> | |
13 | +#include <linux/syscalls.h> | |
14 | +#include <linux/err.h> | |
15 | + | |
16 | +#define BITS_PER_PAGE (PAGE_SIZE*8) | |
17 | + | |
18 | +struct pid_cache { | |
19 | + int nr_ids; | |
20 | + char name[16]; | |
21 | + struct kmem_cache *cachep; | |
22 | + struct list_head list; | |
23 | +}; | |
24 | + | |
25 | +static LIST_HEAD(pid_caches_lh); | |
26 | +static DEFINE_MUTEX(pid_caches_mutex); | |
27 | +static struct kmem_cache *pid_ns_cachep; | |
28 | + | |
29 | +/* | |
30 | + * creates the kmem cache to allocate pids from. | |
31 | + * @nr_ids: the number of numerical ids this pid will have to carry | |
32 | + */ | |
33 | + | |
34 | +static struct kmem_cache *create_pid_cachep(int nr_ids) | |
35 | +{ | |
36 | + struct pid_cache *pcache; | |
37 | + struct kmem_cache *cachep; | |
38 | + | |
39 | + mutex_lock(&pid_caches_mutex); | |
40 | + list_for_each_entry(pcache, &pid_caches_lh, list) | |
41 | + if (pcache->nr_ids == nr_ids) | |
42 | + goto out; | |
43 | + | |
44 | + pcache = kmalloc(sizeof(struct pid_cache), GFP_KERNEL); | |
45 | + if (pcache == NULL) | |
46 | + goto err_alloc; | |
47 | + | |
48 | + snprintf(pcache->name, sizeof(pcache->name), "pid_%d", nr_ids); | |
49 | + cachep = kmem_cache_create(pcache->name, | |
50 | + sizeof(struct pid) + (nr_ids - 1) * sizeof(struct upid), | |
51 | + 0, SLAB_HWCACHE_ALIGN, NULL); | |
52 | + if (cachep == NULL) | |
53 | + goto err_cachep; | |
54 | + | |
55 | + pcache->nr_ids = nr_ids; | |
56 | + pcache->cachep = cachep; | |
57 | + list_add(&pcache->list, &pid_caches_lh); | |
58 | +out: | |
59 | + mutex_unlock(&pid_caches_mutex); | |
60 | + return pcache->cachep; | |
61 | + | |
62 | +err_cachep: | |
63 | + kfree(pcache); | |
64 | +err_alloc: | |
65 | + mutex_unlock(&pid_caches_mutex); | |
66 | + return NULL; | |
67 | +} | |
68 | + | |
69 | +static struct pid_namespace *create_pid_namespace(int level) | |
70 | +{ | |
71 | + struct pid_namespace *ns; | |
72 | + int i; | |
73 | + | |
74 | + ns = kmem_cache_alloc(pid_ns_cachep, GFP_KERNEL); | |
75 | + if (ns == NULL) | |
76 | + goto out; | |
77 | + | |
78 | + ns->pidmap[0].page = kzalloc(PAGE_SIZE, GFP_KERNEL); | |
79 | + if (!ns->pidmap[0].page) | |
80 | + goto out_free; | |
81 | + | |
82 | + ns->pid_cachep = create_pid_cachep(level + 1); | |
83 | + if (ns->pid_cachep == NULL) | |
84 | + goto out_free_map; | |
85 | + | |
86 | + kref_init(&ns->kref); | |
87 | + ns->last_pid = 0; | |
88 | + ns->child_reaper = NULL; | |
89 | + ns->level = level; | |
90 | + | |
91 | + set_bit(0, ns->pidmap[0].page); | |
92 | + atomic_set(&ns->pidmap[0].nr_free, BITS_PER_PAGE - 1); | |
93 | + | |
94 | + for (i = 1; i < PIDMAP_ENTRIES; i++) { | |
95 | + ns->pidmap[i].page = 0; | |
96 | + atomic_set(&ns->pidmap[i].nr_free, BITS_PER_PAGE); | |
97 | + } | |
98 | + | |
99 | + return ns; | |
100 | + | |
101 | +out_free_map: | |
102 | + kfree(ns->pidmap[0].page); | |
103 | +out_free: | |
104 | + kmem_cache_free(pid_ns_cachep, ns); | |
105 | +out: | |
106 | + return ERR_PTR(-ENOMEM); | |
107 | +} | |
108 | + | |
109 | +static void destroy_pid_namespace(struct pid_namespace *ns) | |
110 | +{ | |
111 | + int i; | |
112 | + | |
113 | + for (i = 0; i < PIDMAP_ENTRIES; i++) | |
114 | + kfree(ns->pidmap[i].page); | |
115 | + kmem_cache_free(pid_ns_cachep, ns); | |
116 | +} | |
117 | + | |
118 | +struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *old_ns) | |
119 | +{ | |
120 | + struct pid_namespace *new_ns; | |
121 | + | |
122 | + BUG_ON(!old_ns); | |
123 | + new_ns = get_pid_ns(old_ns); | |
124 | + if (!(flags & CLONE_NEWPID)) | |
125 | + goto out; | |
126 | + | |
127 | + new_ns = ERR_PTR(-EINVAL); | |
128 | + if (flags & CLONE_THREAD) | |
129 | + goto out_put; | |
130 | + | |
131 | + new_ns = create_pid_namespace(old_ns->level + 1); | |
132 | + if (!IS_ERR(new_ns)) | |
133 | + new_ns->parent = get_pid_ns(old_ns); | |
134 | + | |
135 | +out_put: | |
136 | + put_pid_ns(old_ns); | |
137 | +out: | |
138 | + return new_ns; | |
139 | +} | |
140 | + | |
141 | +void free_pid_ns(struct kref *kref) | |
142 | +{ | |
143 | + struct pid_namespace *ns, *parent; | |
144 | + | |
145 | + ns = container_of(kref, struct pid_namespace, kref); | |
146 | + | |
147 | + parent = ns->parent; | |
148 | + destroy_pid_namespace(ns); | |
149 | + | |
150 | + if (parent != NULL) | |
151 | + put_pid_ns(parent); | |
152 | +} | |
153 | + | |
154 | +void zap_pid_ns_processes(struct pid_namespace *pid_ns) | |
155 | +{ | |
156 | + int nr; | |
157 | + int rc; | |
158 | + | |
159 | + /* | |
160 | + * The last thread in the cgroup-init thread group is terminating. | |
161 | + * Find remaining pid_ts in the namespace, signal and wait for them | |
162 | + * to exit. | |
163 | + * | |
164 | + * Note: This signals each threads in the namespace - even those that | |
165 | + * belong to the same thread group, To avoid this, we would have | |
166 | + * to walk the entire tasklist looking a processes in this | |
167 | + * namespace, but that could be unnecessarily expensive if the | |
168 | + * pid namespace has just a few processes. Or we need to | |
169 | + * maintain a tasklist for each pid namespace. | |
170 | + * | |
171 | + */ | |
172 | + read_lock(&tasklist_lock); | |
173 | + nr = next_pidmap(pid_ns, 1); | |
174 | + while (nr > 0) { | |
175 | + kill_proc_info(SIGKILL, SEND_SIG_PRIV, nr); | |
176 | + nr = next_pidmap(pid_ns, nr); | |
177 | + } | |
178 | + read_unlock(&tasklist_lock); | |
179 | + | |
180 | + do { | |
181 | + clear_thread_flag(TIF_SIGPENDING); | |
182 | + rc = sys_wait4(-1, NULL, __WALL, NULL); | |
183 | + } while (rc != -ECHILD); | |
184 | + | |
185 | + | |
186 | + /* Child reaper for the pid namespace is going away */ | |
187 | + pid_ns->child_reaper = NULL; | |
188 | + return; | |
189 | +} | |
190 | + | |
191 | +static __init int pid_namespaces_init(void) | |
192 | +{ | |
193 | + pid_ns_cachep = KMEM_CACHE(pid_namespace, SLAB_PANIC); | |
194 | + return 0; | |
195 | +} | |
196 | + | |
197 | +__initcall(pid_namespaces_init); |