Blame view

kernel/pid_namespace.c 11.8 KB
74bd59bb3   Pavel Emelyanov   namespaces: clean...
1
2
3
4
5
6
7
8
9
10
11
12
  /*
   * Pid namespaces
   *
   * Authors:
   *    (C) 2007 Pavel Emelyanov <xemul@openvz.org>, OpenVZ, SWsoft Inc.
   *    (C) 2007 Sukadev Bhattiprolu <sukadev@us.ibm.com>, IBM
   *     Many thanks to Oleg Nesterov for comments and help
   *
   */
  
  #include <linux/pid.h>
  #include <linux/pid_namespace.h>
49f4d8b93   Eric W. Biederman   pidns: Capture th...
13
  #include <linux/user_namespace.h>
74bd59bb3   Pavel Emelyanov   namespaces: clean...
14
  #include <linux/syscalls.h>
5b825c3af   Ingo Molnar   sched/headers: Pr...
15
  #include <linux/cred.h>
74bd59bb3   Pavel Emelyanov   namespaces: clean...
16
  #include <linux/err.h>
0b6b030fc   Pavel Emelyanov   bsdacct: switch f...
17
  #include <linux/acct.h>
5a0e3ad6a   Tejun Heo   include cleanup: ...
18
  #include <linux/slab.h>
0bb80f240   David Howells   proc: Split the n...
19
  #include <linux/proc_ns.h>
cf3f89214   Daniel Lezcano   pidns: add reboot...
20
  #include <linux/reboot.h>
523a6a945   Eric W. Biederman   pidns: Export fre...
21
  #include <linux/export.h>
299300258   Ingo Molnar   sched/headers: Pr...
22
  #include <linux/sched/task.h>
f361bf4a6   Ingo Molnar   sched/headers: Pr...
23
  #include <linux/sched/signal.h>
74bd59bb3   Pavel Emelyanov   namespaces: clean...
24

74bd59bb3   Pavel Emelyanov   namespaces: clean...
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
  struct pid_cache {
  	int nr_ids;
  	char name[16];
  	struct kmem_cache *cachep;
  	struct list_head list;
  };
  
  static LIST_HEAD(pid_caches_lh);
  static DEFINE_MUTEX(pid_caches_mutex);
  static struct kmem_cache *pid_ns_cachep;
  
  /*
   * creates the kmem cache to allocate pids from.
   * @nr_ids: the number of numerical ids this pid will have to carry
   */
  
  static struct kmem_cache *create_pid_cachep(int nr_ids)
  {
  	struct pid_cache *pcache;
  	struct kmem_cache *cachep;
  
  	mutex_lock(&pid_caches_mutex);
  	list_for_each_entry(pcache, &pid_caches_lh, list)
  		if (pcache->nr_ids == nr_ids)
  			goto out;
  
  	pcache = kmalloc(sizeof(struct pid_cache), GFP_KERNEL);
  	if (pcache == NULL)
  		goto err_alloc;
  
  	snprintf(pcache->name, sizeof(pcache->name), "pid_%d", nr_ids);
  	cachep = kmem_cache_create(pcache->name,
  			sizeof(struct pid) + (nr_ids - 1) * sizeof(struct upid),
  			0, SLAB_HWCACHE_ALIGN, NULL);
  	if (cachep == NULL)
  		goto err_cachep;
  
  	pcache->nr_ids = nr_ids;
  	pcache->cachep = cachep;
  	list_add(&pcache->list, &pid_caches_lh);
  out:
  	mutex_unlock(&pid_caches_mutex);
  	return pcache->cachep;
  
  err_cachep:
  	kfree(pcache);
  err_alloc:
  	mutex_unlock(&pid_caches_mutex);
  	return NULL;
  }
0a01f2cc3   Eric W. Biederman   pidns: Make the p...
75
76
77
78
79
  static void proc_cleanup_work(struct work_struct *work)
  {
  	struct pid_namespace *ns = container_of(work, struct pid_namespace, proc_work);
  	pid_ns_release_proc(ns);
  }
f23025057   Andrew Vagin   pidns: limit the ...
80
81
  /* MAX_PID_NS_LEVEL is needed for limiting size of 'struct pid' */
  #define MAX_PID_NS_LEVEL 32
f333c700c   Eric W. Biederman   pidns: Add a limi...
82
83
84
85
86
87
88
89
90
  static struct ucounts *inc_pid_namespaces(struct user_namespace *ns)
  {
  	return inc_ucount(ns, current_euid(), UCOUNT_PID_NAMESPACES);
  }
  
  static void dec_pid_namespaces(struct ucounts *ucounts)
  {
  	dec_ucount(ucounts, UCOUNT_PID_NAMESPACES);
  }
49f4d8b93   Eric W. Biederman   pidns: Capture th...
91
92
  static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns,
  	struct pid_namespace *parent_pid_ns)
74bd59bb3   Pavel Emelyanov   namespaces: clean...
93
94
  {
  	struct pid_namespace *ns;
ed469a63c   Alexey Dobriyan   pidns: make creat...
95
  	unsigned int level = parent_pid_ns->level + 1;
f333c700c   Eric W. Biederman   pidns: Add a limi...
96
  	struct ucounts *ucounts;
f23025057   Andrew Vagin   pidns: limit the ...
97
98
  	int i;
  	int err;
a2b426267   Eric W. Biederman   userns,pidns: Ver...
99
100
101
  	err = -EINVAL;
  	if (!in_userns(parent_pid_ns->user_ns, user_ns))
  		goto out;
df75e7748   Eric W. Biederman   userns: When the ...
102
  	err = -ENOSPC;
f333c700c   Eric W. Biederman   pidns: Add a limi...
103
104
105
106
  	if (level > MAX_PID_NS_LEVEL)
  		goto out;
  	ucounts = inc_pid_namespaces(user_ns);
  	if (!ucounts)
f23025057   Andrew Vagin   pidns: limit the ...
107
  		goto out;
74bd59bb3   Pavel Emelyanov   namespaces: clean...
108

f23025057   Andrew Vagin   pidns: limit the ...
109
  	err = -ENOMEM;
84406c153   Pavel Emelyanov   pidns: use kzallo...
110
  	ns = kmem_cache_zalloc(pid_ns_cachep, GFP_KERNEL);
74bd59bb3   Pavel Emelyanov   namespaces: clean...
111
  	if (ns == NULL)
f333c700c   Eric W. Biederman   pidns: Add a limi...
112
  		goto out_dec;
74bd59bb3   Pavel Emelyanov   namespaces: clean...
113
114
115
116
117
118
119
120
  
  	ns->pidmap[0].page = kzalloc(PAGE_SIZE, GFP_KERNEL);
  	if (!ns->pidmap[0].page)
  		goto out_free;
  
  	ns->pid_cachep = create_pid_cachep(level + 1);
  	if (ns->pid_cachep == NULL)
  		goto out_free_map;
6344c433a   Al Viro   new helpers: ns_a...
121
  	err = ns_alloc_inum(&ns->ns);
98f842e67   Eric W. Biederman   proc: Usable inod...
122
123
  	if (err)
  		goto out_free_map;
33c429405   Al Viro   copy address of p...
124
  	ns->ns.ops = &pidns_operations;
98f842e67   Eric W. Biederman   proc: Usable inod...
125

74bd59bb3   Pavel Emelyanov   namespaces: clean...
126
  	kref_init(&ns->kref);
74bd59bb3   Pavel Emelyanov   namespaces: clean...
127
  	ns->level = level;
ed469a63c   Alexey Dobriyan   pidns: make creat...
128
  	ns->parent = get_pid_ns(parent_pid_ns);
49f4d8b93   Eric W. Biederman   pidns: Capture th...
129
  	ns->user_ns = get_user_ns(user_ns);
f333c700c   Eric W. Biederman   pidns: Add a limi...
130
  	ns->ucounts = ucounts;
c876ad768   Eric W. Biederman   pidns: Stop pid a...
131
  	ns->nr_hashed = PIDNS_HASH_ADDING;
0a01f2cc3   Eric W. Biederman   pidns: Make the p...
132
  	INIT_WORK(&ns->proc_work, proc_cleanup_work);
74bd59bb3   Pavel Emelyanov   namespaces: clean...
133
134
135
  
  	set_bit(0, ns->pidmap[0].page);
  	atomic_set(&ns->pidmap[0].nr_free, BITS_PER_PAGE - 1);
84406c153   Pavel Emelyanov   pidns: use kzallo...
136
  	for (i = 1; i < PIDMAP_ENTRIES; i++)
74bd59bb3   Pavel Emelyanov   namespaces: clean...
137
  		atomic_set(&ns->pidmap[i].nr_free, BITS_PER_PAGE);
74bd59bb3   Pavel Emelyanov   namespaces: clean...
138
139
140
141
142
143
144
  
  	return ns;
  
  out_free_map:
  	kfree(ns->pidmap[0].page);
  out_free:
  	kmem_cache_free(pid_ns_cachep, ns);
f333c700c   Eric W. Biederman   pidns: Add a limi...
145
146
  out_dec:
  	dec_pid_namespaces(ucounts);
74bd59bb3   Pavel Emelyanov   namespaces: clean...
147
  out:
4308eebbe   Eric W. Biederman   pidns: call pid_n...
148
  	return ERR_PTR(err);
74bd59bb3   Pavel Emelyanov   namespaces: clean...
149
  }
1adfcb03e   Al Viro   pid_namespace: ma...
150
151
  static void delayed_free_pidns(struct rcu_head *p)
  {
add7c65ca   Andrei Vagin   pid: fix lockdep ...
152
153
154
155
156
157
  	struct pid_namespace *ns = container_of(p, struct pid_namespace, rcu);
  
  	dec_pid_namespaces(ns->ucounts);
  	put_user_ns(ns->user_ns);
  
  	kmem_cache_free(pid_ns_cachep, ns);
1adfcb03e   Al Viro   pid_namespace: ma...
158
  }
74bd59bb3   Pavel Emelyanov   namespaces: clean...
159
160
161
  static void destroy_pid_namespace(struct pid_namespace *ns)
  {
  	int i;
6344c433a   Al Viro   new helpers: ns_a...
162
  	ns_free_inum(&ns->ns);
74bd59bb3   Pavel Emelyanov   namespaces: clean...
163
164
  	for (i = 0; i < PIDMAP_ENTRIES; i++)
  		kfree(ns->pidmap[i].page);
1adfcb03e   Al Viro   pid_namespace: ma...
165
  	call_rcu(&ns->rcu, delayed_free_pidns);
74bd59bb3   Pavel Emelyanov   namespaces: clean...
166
  }
49f4d8b93   Eric W. Biederman   pidns: Capture th...
167
168
  struct pid_namespace *copy_pid_ns(unsigned long flags,
  	struct user_namespace *user_ns, struct pid_namespace *old_ns)
74bd59bb3   Pavel Emelyanov   namespaces: clean...
169
  {
74bd59bb3   Pavel Emelyanov   namespaces: clean...
170
  	if (!(flags & CLONE_NEWPID))
dca4a9796   Alexey Dobriyan   pidns: rewrite co...
171
  		return get_pid_ns(old_ns);
225778d68   Eric W. Biederman   pidns: Deny stran...
172
173
  	if (task_active_pid_ns(current) != old_ns)
  		return ERR_PTR(-EINVAL);
49f4d8b93   Eric W. Biederman   pidns: Capture th...
174
  	return create_pid_namespace(user_ns, old_ns);
74bd59bb3   Pavel Emelyanov   namespaces: clean...
175
  }
bbc2e3ef8   Cyrill Gorcunov   pidns: remove rec...
176
  static void free_pid_ns(struct kref *kref)
74bd59bb3   Pavel Emelyanov   namespaces: clean...
177
  {
bbc2e3ef8   Cyrill Gorcunov   pidns: remove rec...
178
  	struct pid_namespace *ns;
74bd59bb3   Pavel Emelyanov   namespaces: clean...
179
180
  
  	ns = container_of(kref, struct pid_namespace, kref);
74bd59bb3   Pavel Emelyanov   namespaces: clean...
181
  	destroy_pid_namespace(ns);
bbc2e3ef8   Cyrill Gorcunov   pidns: remove rec...
182
  }
74bd59bb3   Pavel Emelyanov   namespaces: clean...
183

bbc2e3ef8   Cyrill Gorcunov   pidns: remove rec...
184
185
186
187
188
189
190
191
192
193
  void put_pid_ns(struct pid_namespace *ns)
  {
  	struct pid_namespace *parent;
  
  	while (ns != &init_pid_ns) {
  		parent = ns->parent;
  		if (!kref_put(&ns->kref, free_pid_ns))
  			break;
  		ns = parent;
  	}
74bd59bb3   Pavel Emelyanov   namespaces: clean...
194
  }
bbc2e3ef8   Cyrill Gorcunov   pidns: remove rec...
195
  EXPORT_SYMBOL_GPL(put_pid_ns);
74bd59bb3   Pavel Emelyanov   namespaces: clean...
196
197
198
199
200
  
  void zap_pid_ns_processes(struct pid_namespace *pid_ns)
  {
  	int nr;
  	int rc;
00c10bc13   Eric W. Biederman   pidns: make kille...
201
  	struct task_struct *task, *me = current;
751c644b9   Eric W. Biederman   pid: Handle the e...
202
  	int init_pids = thread_group_leader(me) ? 1 : 2;
00c10bc13   Eric W. Biederman   pidns: make kille...
203

c876ad768   Eric W. Biederman   pidns: Stop pid a...
204
205
  	/* Don't allow any more processes into the pid namespace */
  	disable_pid_allocation(pid_ns);
a53b83154   Oleg Nesterov   exit: pidns: fix/...
206
207
208
209
210
  	/*
  	 * Ignore SIGCHLD causing any terminated children to autoreap.
  	 * This speeds up the namespace shutdown, plus see the comment
  	 * below.
  	 */
00c10bc13   Eric W. Biederman   pidns: make kille...
211
212
213
  	spin_lock_irq(&me->sighand->siglock);
  	me->sighand->action[SIGCHLD - 1].sa.sa_handler = SIG_IGN;
  	spin_unlock_irq(&me->sighand->siglock);
74bd59bb3   Pavel Emelyanov   namespaces: clean...
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
  
  	/*
  	 * The last thread in the cgroup-init thread group is terminating.
  	 * Find remaining pid_ts in the namespace, signal and wait for them
  	 * to exit.
  	 *
  	 * Note:  This signals each threads in the namespace - even those that
  	 * 	  belong to the same thread group, To avoid this, we would have
  	 * 	  to walk the entire tasklist looking a processes in this
  	 * 	  namespace, but that could be unnecessarily expensive if the
  	 * 	  pid namespace has just a few processes. Or we need to
  	 * 	  maintain a tasklist for each pid namespace.
  	 *
  	 */
  	read_lock(&tasklist_lock);
  	nr = next_pidmap(pid_ns, 1);
  	while (nr > 0) {
e4da026f9   Sukadev Bhattiprolu   signals: zap_pid_...
231
  		rcu_read_lock();
e4da026f9   Sukadev Bhattiprolu   signals: zap_pid_...
232
  		task = pid_task(find_vpid(nr), PIDTYPE_PID);
a02d6fd64   Oleg Nesterov   signal: zap_pid_n...
233
234
  		if (task && !__fatal_signal_pending(task))
  			send_sig_info(SIGKILL, SEND_SIG_FORCED, task);
e4da026f9   Sukadev Bhattiprolu   signals: zap_pid_...
235
236
  
  		rcu_read_unlock();
74bd59bb3   Pavel Emelyanov   namespaces: clean...
237
238
239
  		nr = next_pidmap(pid_ns, nr);
  	}
  	read_unlock(&tasklist_lock);
a53b83154   Oleg Nesterov   exit: pidns: fix/...
240
241
242
243
244
  	/*
  	 * Reap the EXIT_ZOMBIE children we had before we ignored SIGCHLD.
  	 * sys_wait4() will also block until our children traced from the
  	 * parent namespace are detached and become EXIT_DEAD.
  	 */
74bd59bb3   Pavel Emelyanov   namespaces: clean...
245
246
247
248
  	do {
  		clear_thread_flag(TIF_SIGPENDING);
  		rc = sys_wait4(-1, NULL, __WALL, NULL);
  	} while (rc != -ECHILD);
6347e9009   Eric W. Biederman   pidns: guarantee ...
249
  	/*
a53b83154   Oleg Nesterov   exit: pidns: fix/...
250
251
252
253
254
255
256
257
258
259
260
261
262
263
  	 * sys_wait4() above can't reap the EXIT_DEAD children but we do not
  	 * really care, we could reparent them to the global init. We could
  	 * exit and reap ->child_reaper even if it is not the last thread in
  	 * this pid_ns, free_pid(nr_hashed == 0) calls proc_cleanup_work(),
  	 * pid_ns can not go away until proc_kill_sb() drops the reference.
  	 *
  	 * But this ns can also have other tasks injected by setns()+fork().
  	 * Again, ignoring the user visible semantics we do not really need
  	 * to wait until they are all reaped, but they can be reparented to
  	 * us and thus we need to ensure that pid->child_reaper stays valid
  	 * until they all go away. See free_pid()->wake_up_process().
  	 *
  	 * We rely on ignored SIGCHLD, an injected zombie must be autoreaped
  	 * if reparented.
6347e9009   Eric W. Biederman   pidns: guarantee ...
264
265
  	 */
  	for (;;) {
b9a985db9   Eric W. Biederman   pid_ns: Sleep in ...
266
  		set_current_state(TASK_INTERRUPTIBLE);
751c644b9   Eric W. Biederman   pid: Handle the e...
267
  		if (pid_ns->nr_hashed == init_pids)
6347e9009   Eric W. Biederman   pidns: guarantee ...
268
269
270
  			break;
  		schedule();
  	}
af4b8a83a   Eric W. Biederman   pidns: Wait in za...
271
  	__set_current_state(TASK_RUNNING);
6347e9009   Eric W. Biederman   pidns: guarantee ...
272

cf3f89214   Daniel Lezcano   pidns: add reboot...
273
274
  	if (pid_ns->reboot)
  		current->signal->group_exit_code = pid_ns->reboot;
0b6b030fc   Pavel Emelyanov   bsdacct: switch f...
275
  	acct_exit_ns(pid_ns);
74bd59bb3   Pavel Emelyanov   namespaces: clean...
276
277
  	return;
  }
98ed57eef   Cyrill Gorcunov   sysctl: make kern...
278
  #ifdef CONFIG_CHECKPOINT_RESTORE
b8f566b04   Pavel Emelyanov   sysctl: add the k...
279
280
281
  static int pid_ns_ctl_handler(struct ctl_table *table, int write,
  		void __user *buffer, size_t *lenp, loff_t *ppos)
  {
49f4d8b93   Eric W. Biederman   pidns: Capture th...
282
  	struct pid_namespace *pid_ns = task_active_pid_ns(current);
b8f566b04   Pavel Emelyanov   sysctl: add the k...
283
  	struct ctl_table tmp = *table;
49f4d8b93   Eric W. Biederman   pidns: Capture th...
284
  	if (write && !ns_capable(pid_ns->user_ns, CAP_SYS_ADMIN))
b8f566b04   Pavel Emelyanov   sysctl: add the k...
285
286
287
288
289
290
291
  		return -EPERM;
  
  	/*
  	 * Writing directly to ns' last_pid field is OK, since this field
  	 * is volatile in a living namespace anyway and a code writing to
  	 * it should synchronize its usage with external means.
  	 */
49f4d8b93   Eric W. Biederman   pidns: Capture th...
292
  	tmp.data = &pid_ns->last_pid;
579035dc5   Andrew Vagin   pid-namespace: li...
293
  	return proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
b8f566b04   Pavel Emelyanov   sysctl: add the k...
294
  }
579035dc5   Andrew Vagin   pid-namespace: li...
295
296
  extern int pid_max;
  static int zero = 0;
b8f566b04   Pavel Emelyanov   sysctl: add the k...
297
298
299
300
301
302
  static struct ctl_table pid_ns_ctl_table[] = {
  	{
  		.procname = "ns_last_pid",
  		.maxlen = sizeof(int),
  		.mode = 0666, /* permissions are checked in the handler */
  		.proc_handler = pid_ns_ctl_handler,
579035dc5   Andrew Vagin   pid-namespace: li...
303
304
  		.extra1 = &zero,
  		.extra2 = &pid_max,
b8f566b04   Pavel Emelyanov   sysctl: add the k...
305
306
307
  	},
  	{ }
  };
b8f566b04   Pavel Emelyanov   sysctl: add the k...
308
  static struct ctl_path kern_path[] = { { .procname = "kernel", }, { } };
98ed57eef   Cyrill Gorcunov   sysctl: make kern...
309
  #endif	/* CONFIG_CHECKPOINT_RESTORE */
b8f566b04   Pavel Emelyanov   sysctl: add the k...
310

cf3f89214   Daniel Lezcano   pidns: add reboot...
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
  int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd)
  {
  	if (pid_ns == &init_pid_ns)
  		return 0;
  
  	switch (cmd) {
  	case LINUX_REBOOT_CMD_RESTART2:
  	case LINUX_REBOOT_CMD_RESTART:
  		pid_ns->reboot = SIGHUP;
  		break;
  
  	case LINUX_REBOOT_CMD_POWER_OFF:
  	case LINUX_REBOOT_CMD_HALT:
  		pid_ns->reboot = SIGINT;
  		break;
  	default:
  		return -EINVAL;
  	}
  
  	read_lock(&tasklist_lock);
  	force_sig(SIGKILL, pid_ns->child_reaper);
  	read_unlock(&tasklist_lock);
  
  	do_exit(0);
  
  	/* Not reached */
  	return 0;
  }
3c0411846   Al Viro   switch the rest o...
339
340
341
342
  static inline struct pid_namespace *to_pid_ns(struct ns_common *ns)
  {
  	return container_of(ns, struct pid_namespace, ns);
  }
64964528b   Al Viro   make proc_ns_oper...
343
  static struct ns_common *pidns_get(struct task_struct *task)
57e8391d3   Eric W. Biederman   pidns: Add setns ...
344
345
346
347
  {
  	struct pid_namespace *ns;
  
  	rcu_read_lock();
d23082257   Oleg Nesterov   pid_namespace: pi...
348
349
350
  	ns = task_active_pid_ns(task);
  	if (ns)
  		get_pid_ns(ns);
57e8391d3   Eric W. Biederman   pidns: Add setns ...
351
  	rcu_read_unlock();
3c0411846   Al Viro   switch the rest o...
352
  	return ns ? &ns->ns : NULL;
57e8391d3   Eric W. Biederman   pidns: Add setns ...
353
  }
eaa0d190b   Kirill Tkhai   pidns: expose tas...
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
  static struct ns_common *pidns_for_children_get(struct task_struct *task)
  {
  	struct pid_namespace *ns = NULL;
  
  	task_lock(task);
  	if (task->nsproxy) {
  		ns = task->nsproxy->pid_ns_for_children;
  		get_pid_ns(ns);
  	}
  	task_unlock(task);
  
  	if (ns) {
  		read_lock(&tasklist_lock);
  		if (!ns->child_reaper) {
  			put_pid_ns(ns);
  			ns = NULL;
  		}
  		read_unlock(&tasklist_lock);
  	}
  
  	return ns ? &ns->ns : NULL;
  }
64964528b   Al Viro   make proc_ns_oper...
376
  static void pidns_put(struct ns_common *ns)
57e8391d3   Eric W. Biederman   pidns: Add setns ...
377
  {
3c0411846   Al Viro   switch the rest o...
378
  	put_pid_ns(to_pid_ns(ns));
57e8391d3   Eric W. Biederman   pidns: Add setns ...
379
  }
64964528b   Al Viro   make proc_ns_oper...
380
  static int pidns_install(struct nsproxy *nsproxy, struct ns_common *ns)
57e8391d3   Eric W. Biederman   pidns: Add setns ...
381
382
  {
  	struct pid_namespace *active = task_active_pid_ns(current);
3c0411846   Al Viro   switch the rest o...
383
  	struct pid_namespace *ancestor, *new = to_pid_ns(ns);
57e8391d3   Eric W. Biederman   pidns: Add setns ...
384

5e4a08476   Eric W. Biederman   userns: Require C...
385
  	if (!ns_capable(new->user_ns, CAP_SYS_ADMIN) ||
c7b96acf1   Eric W. Biederman   userns: Kill nso...
386
  	    !ns_capable(current_user_ns(), CAP_SYS_ADMIN))
57e8391d3   Eric W. Biederman   pidns: Add setns ...
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
  		return -EPERM;
  
  	/*
  	 * Only allow entering the current active pid namespace
  	 * or a child of the current active pid namespace.
  	 *
  	 * This is required for fork to return a usable pid value and
  	 * this maintains the property that processes and their
  	 * children can not escape their current pid namespace.
  	 */
  	if (new->level < active->level)
  		return -EINVAL;
  
  	ancestor = new;
  	while (ancestor->level > active->level)
  		ancestor = ancestor->parent;
  	if (ancestor != active)
  		return -EINVAL;
c2b1df2eb   Andy Lutomirski   Rename nsproxy.pi...
405
406
  	put_pid_ns(nsproxy->pid_ns_for_children);
  	nsproxy->pid_ns_for_children = get_pid_ns(new);
57e8391d3   Eric W. Biederman   pidns: Add setns ...
407
408
  	return 0;
  }
a7306ed8d   Andrey Vagin   nsfs: add ioctl t...
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
  static struct ns_common *pidns_get_parent(struct ns_common *ns)
  {
  	struct pid_namespace *active = task_active_pid_ns(current);
  	struct pid_namespace *pid_ns, *p;
  
  	/* See if the parent is in the current namespace */
  	pid_ns = p = to_pid_ns(ns)->parent;
  	for (;;) {
  		if (!p)
  			return ERR_PTR(-EPERM);
  		if (p == active)
  			break;
  		p = p->parent;
  	}
  
  	return &get_pid_ns(pid_ns)->ns;
  }
bcac25a58   Andrey Vagin   kernel: add a hel...
426
427
428
429
  static struct user_namespace *pidns_owner(struct ns_common *ns)
  {
  	return to_pid_ns(ns)->user_ns;
  }
57e8391d3   Eric W. Biederman   pidns: Add setns ...
430
431
432
433
434
435
  const struct proc_ns_operations pidns_operations = {
  	.name		= "pid",
  	.type		= CLONE_NEWPID,
  	.get		= pidns_get,
  	.put		= pidns_put,
  	.install	= pidns_install,
bcac25a58   Andrey Vagin   kernel: add a hel...
436
  	.owner		= pidns_owner,
a7306ed8d   Andrey Vagin   nsfs: add ioctl t...
437
  	.get_parent	= pidns_get_parent,
57e8391d3   Eric W. Biederman   pidns: Add setns ...
438
  };
eaa0d190b   Kirill Tkhai   pidns: expose tas...
439
440
441
442
443
444
445
446
447
448
  const struct proc_ns_operations pidns_for_children_operations = {
  	.name		= "pid_for_children",
  	.real_ns_name	= "pid",
  	.type		= CLONE_NEWPID,
  	.get		= pidns_for_children_get,
  	.put		= pidns_put,
  	.install	= pidns_install,
  	.owner		= pidns_owner,
  	.get_parent	= pidns_get_parent,
  };
74bd59bb3   Pavel Emelyanov   namespaces: clean...
449
450
451
  static __init int pid_namespaces_init(void)
  {
  	pid_ns_cachep = KMEM_CACHE(pid_namespace, SLAB_PANIC);
98ed57eef   Cyrill Gorcunov   sysctl: make kern...
452
453
  
  #ifdef CONFIG_CHECKPOINT_RESTORE
b8f566b04   Pavel Emelyanov   sysctl: add the k...
454
  	register_sysctl_paths(kern_path, pid_ns_ctl_table);
98ed57eef   Cyrill Gorcunov   sysctl: make kern...
455
  #endif
74bd59bb3   Pavel Emelyanov   namespaces: clean...
456
457
458
459
  	return 0;
  }
  
  __initcall(pid_namespaces_init);