Blame view

kernel/pid_namespace.c 11.4 KB
74bd59bb3   Pavel Emelyanov   namespaces: clean...
1
2
3
4
5
6
7
8
9
10
11
12
  /*
   * Pid namespaces
   *
   * Authors:
   *    (C) 2007 Pavel Emelyanov <xemul@openvz.org>, OpenVZ, SWsoft Inc.
   *    (C) 2007 Sukadev Bhattiprolu <sukadev@us.ibm.com>, IBM
   *     Many thanks to Oleg Nesterov for comments and help
   *
   */
  
  #include <linux/pid.h>
  #include <linux/pid_namespace.h>
49f4d8b93   Eric W. Biederman   pidns: Capture th...
13
  #include <linux/user_namespace.h>
74bd59bb3   Pavel Emelyanov   namespaces: clean...
14
  #include <linux/syscalls.h>
5b825c3af   Ingo Molnar   sched/headers: Pr...
15
  #include <linux/cred.h>
74bd59bb3   Pavel Emelyanov   namespaces: clean...
16
  #include <linux/err.h>
0b6b030fc   Pavel Emelyanov   bsdacct: switch f...
17
  #include <linux/acct.h>
5a0e3ad6a   Tejun Heo   include cleanup: ...
18
  #include <linux/slab.h>
0bb80f240   David Howells   proc: Split the n...
19
  #include <linux/proc_ns.h>
cf3f89214   Daniel Lezcano   pidns: add reboot...
20
  #include <linux/reboot.h>
523a6a945   Eric W. Biederman   pidns: Export fre...
21
  #include <linux/export.h>
299300258   Ingo Molnar   sched/headers: Pr...
22
  #include <linux/sched/task.h>
f361bf4a6   Ingo Molnar   sched/headers: Pr...
23
  #include <linux/sched/signal.h>
95846ecf9   Gargi Sharma   pid: replace pid ...
24
  #include <linux/idr.h>
74bd59bb3   Pavel Emelyanov   namespaces: clean...
25

74bd59bb3   Pavel Emelyanov   namespaces: clean...
26
27
  static DEFINE_MUTEX(pid_caches_mutex);
  static struct kmem_cache *pid_ns_cachep;
dd206bec9   Alexey Dobriyan   pidns: simpler al...
28
29
30
31
  /* MAX_PID_NS_LEVEL is needed for limiting size of 'struct pid' */
  #define MAX_PID_NS_LEVEL 32
  /* Write once array, filled from the beginning. */
  static struct kmem_cache *pid_cache[MAX_PID_NS_LEVEL];
74bd59bb3   Pavel Emelyanov   namespaces: clean...
32
33
34
  
  /*
   * creates the kmem cache to allocate pids from.
dd206bec9   Alexey Dobriyan   pidns: simpler al...
35
   * @level: pid namespace level
74bd59bb3   Pavel Emelyanov   namespaces: clean...
36
   */
dd206bec9   Alexey Dobriyan   pidns: simpler al...
37
  static struct kmem_cache *create_pid_cachep(unsigned int level)
74bd59bb3   Pavel Emelyanov   namespaces: clean...
38
  {
dd206bec9   Alexey Dobriyan   pidns: simpler al...
39
40
41
42
43
44
45
46
47
48
49
50
  	/* Level 0 is init_pid_ns.pid_cachep */
  	struct kmem_cache **pkc = &pid_cache[level - 1];
  	struct kmem_cache *kc;
  	char name[4 + 10 + 1];
  	unsigned int len;
  
  	kc = READ_ONCE(*pkc);
  	if (kc)
  		return kc;
  
  	snprintf(name, sizeof(name), "pid_%u", level + 1);
  	len = sizeof(struct pid) + level * sizeof(struct upid);
74bd59bb3   Pavel Emelyanov   namespaces: clean...
51
  	mutex_lock(&pid_caches_mutex);
dd206bec9   Alexey Dobriyan   pidns: simpler al...
52
53
54
  	/* Name collision forces to do allocation under mutex. */
  	if (!*pkc)
  		*pkc = kmem_cache_create(name, len, 0, SLAB_HWCACHE_ALIGN, 0);
74bd59bb3   Pavel Emelyanov   namespaces: clean...
55
  	mutex_unlock(&pid_caches_mutex);
dd206bec9   Alexey Dobriyan   pidns: simpler al...
56
57
  	/* current can fail, but someone else can succeed. */
  	return READ_ONCE(*pkc);
74bd59bb3   Pavel Emelyanov   namespaces: clean...
58
  }
0a01f2cc3   Eric W. Biederman   pidns: Make the p...
59
60
61
62
63
  static void proc_cleanup_work(struct work_struct *work)
  {
  	struct pid_namespace *ns = container_of(work, struct pid_namespace, proc_work);
  	pid_ns_release_proc(ns);
  }
f333c700c   Eric W. Biederman   pidns: Add a limi...
64
65
66
67
68
69
70
71
72
  static struct ucounts *inc_pid_namespaces(struct user_namespace *ns)
  {
  	return inc_ucount(ns, current_euid(), UCOUNT_PID_NAMESPACES);
  }
  
  static void dec_pid_namespaces(struct ucounts *ucounts)
  {
  	dec_ucount(ucounts, UCOUNT_PID_NAMESPACES);
  }
49f4d8b93   Eric W. Biederman   pidns: Capture th...
73
74
  static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns,
  	struct pid_namespace *parent_pid_ns)
74bd59bb3   Pavel Emelyanov   namespaces: clean...
75
76
  {
  	struct pid_namespace *ns;
ed469a63c   Alexey Dobriyan   pidns: make creat...
77
  	unsigned int level = parent_pid_ns->level + 1;
f333c700c   Eric W. Biederman   pidns: Add a limi...
78
  	struct ucounts *ucounts;
f23025057   Andrew Vagin   pidns: limit the ...
79
  	int err;
a2b426267   Eric W. Biederman   userns,pidns: Ver...
80
81
82
  	err = -EINVAL;
  	if (!in_userns(parent_pid_ns->user_ns, user_ns))
  		goto out;
df75e7748   Eric W. Biederman   userns: When the ...
83
  	err = -ENOSPC;
f333c700c   Eric W. Biederman   pidns: Add a limi...
84
85
86
87
  	if (level > MAX_PID_NS_LEVEL)
  		goto out;
  	ucounts = inc_pid_namespaces(user_ns);
  	if (!ucounts)
f23025057   Andrew Vagin   pidns: limit the ...
88
  		goto out;
74bd59bb3   Pavel Emelyanov   namespaces: clean...
89

f23025057   Andrew Vagin   pidns: limit the ...
90
  	err = -ENOMEM;
84406c153   Pavel Emelyanov   pidns: use kzallo...
91
  	ns = kmem_cache_zalloc(pid_ns_cachep, GFP_KERNEL);
74bd59bb3   Pavel Emelyanov   namespaces: clean...
92
  	if (ns == NULL)
f333c700c   Eric W. Biederman   pidns: Add a limi...
93
  		goto out_dec;
74bd59bb3   Pavel Emelyanov   namespaces: clean...
94

95846ecf9   Gargi Sharma   pid: replace pid ...
95
  	idr_init(&ns->idr);
74bd59bb3   Pavel Emelyanov   namespaces: clean...
96

dd206bec9   Alexey Dobriyan   pidns: simpler al...
97
  	ns->pid_cachep = create_pid_cachep(level);
74bd59bb3   Pavel Emelyanov   namespaces: clean...
98
  	if (ns->pid_cachep == NULL)
95846ecf9   Gargi Sharma   pid: replace pid ...
99
  		goto out_free_idr;
74bd59bb3   Pavel Emelyanov   namespaces: clean...
100

6344c433a   Al Viro   new helpers: ns_a...
101
  	err = ns_alloc_inum(&ns->ns);
98f842e67   Eric W. Biederman   proc: Usable inod...
102
  	if (err)
95846ecf9   Gargi Sharma   pid: replace pid ...
103
  		goto out_free_idr;
33c429405   Al Viro   copy address of p...
104
  	ns->ns.ops = &pidns_operations;
98f842e67   Eric W. Biederman   proc: Usable inod...
105

74bd59bb3   Pavel Emelyanov   namespaces: clean...
106
  	kref_init(&ns->kref);
74bd59bb3   Pavel Emelyanov   namespaces: clean...
107
  	ns->level = level;
ed469a63c   Alexey Dobriyan   pidns: make creat...
108
  	ns->parent = get_pid_ns(parent_pid_ns);
49f4d8b93   Eric W. Biederman   pidns: Capture th...
109
  	ns->user_ns = get_user_ns(user_ns);
f333c700c   Eric W. Biederman   pidns: Add a limi...
110
  	ns->ucounts = ucounts;
e8cfbc245   Gargi Sharma   pid: remove pidhash
111
  	ns->pid_allocated = PIDNS_ADDING;
0a01f2cc3   Eric W. Biederman   pidns: Make the p...
112
  	INIT_WORK(&ns->proc_work, proc_cleanup_work);
74bd59bb3   Pavel Emelyanov   namespaces: clean...
113

74bd59bb3   Pavel Emelyanov   namespaces: clean...
114
  	return ns;
95846ecf9   Gargi Sharma   pid: replace pid ...
115
116
  out_free_idr:
  	idr_destroy(&ns->idr);
74bd59bb3   Pavel Emelyanov   namespaces: clean...
117
  	kmem_cache_free(pid_ns_cachep, ns);
f333c700c   Eric W. Biederman   pidns: Add a limi...
118
119
  out_dec:
  	dec_pid_namespaces(ucounts);
74bd59bb3   Pavel Emelyanov   namespaces: clean...
120
  out:
4308eebbe   Eric W. Biederman   pidns: call pid_n...
121
  	return ERR_PTR(err);
74bd59bb3   Pavel Emelyanov   namespaces: clean...
122
  }
1adfcb03e   Al Viro   pid_namespace: ma...
123
124
  static void delayed_free_pidns(struct rcu_head *p)
  {
add7c65ca   Andrei Vagin   pid: fix lockdep ...
125
126
127
128
129
130
  	struct pid_namespace *ns = container_of(p, struct pid_namespace, rcu);
  
  	dec_pid_namespaces(ns->ucounts);
  	put_user_ns(ns->user_ns);
  
  	kmem_cache_free(pid_ns_cachep, ns);
1adfcb03e   Al Viro   pid_namespace: ma...
131
  }
74bd59bb3   Pavel Emelyanov   namespaces: clean...
132
133
  static void destroy_pid_namespace(struct pid_namespace *ns)
  {
6344c433a   Al Viro   new helpers: ns_a...
134
  	ns_free_inum(&ns->ns);
95846ecf9   Gargi Sharma   pid: replace pid ...
135
136
  
  	idr_destroy(&ns->idr);
1adfcb03e   Al Viro   pid_namespace: ma...
137
  	call_rcu(&ns->rcu, delayed_free_pidns);
74bd59bb3   Pavel Emelyanov   namespaces: clean...
138
  }
49f4d8b93   Eric W. Biederman   pidns: Capture th...
139
140
  struct pid_namespace *copy_pid_ns(unsigned long flags,
  	struct user_namespace *user_ns, struct pid_namespace *old_ns)
74bd59bb3   Pavel Emelyanov   namespaces: clean...
141
  {
74bd59bb3   Pavel Emelyanov   namespaces: clean...
142
  	if (!(flags & CLONE_NEWPID))
dca4a9796   Alexey Dobriyan   pidns: rewrite co...
143
  		return get_pid_ns(old_ns);
225778d68   Eric W. Biederman   pidns: Deny stran...
144
145
  	if (task_active_pid_ns(current) != old_ns)
  		return ERR_PTR(-EINVAL);
49f4d8b93   Eric W. Biederman   pidns: Capture th...
146
  	return create_pid_namespace(user_ns, old_ns);
74bd59bb3   Pavel Emelyanov   namespaces: clean...
147
  }
bbc2e3ef8   Cyrill Gorcunov   pidns: remove rec...
148
  static void free_pid_ns(struct kref *kref)
74bd59bb3   Pavel Emelyanov   namespaces: clean...
149
  {
bbc2e3ef8   Cyrill Gorcunov   pidns: remove rec...
150
  	struct pid_namespace *ns;
74bd59bb3   Pavel Emelyanov   namespaces: clean...
151
152
  
  	ns = container_of(kref, struct pid_namespace, kref);
74bd59bb3   Pavel Emelyanov   namespaces: clean...
153
  	destroy_pid_namespace(ns);
bbc2e3ef8   Cyrill Gorcunov   pidns: remove rec...
154
  }
74bd59bb3   Pavel Emelyanov   namespaces: clean...
155

bbc2e3ef8   Cyrill Gorcunov   pidns: remove rec...
156
157
158
159
160
161
162
163
164
165
  void put_pid_ns(struct pid_namespace *ns)
  {
  	struct pid_namespace *parent;
  
  	while (ns != &init_pid_ns) {
  		parent = ns->parent;
  		if (!kref_put(&ns->kref, free_pid_ns))
  			break;
  		ns = parent;
  	}
74bd59bb3   Pavel Emelyanov   namespaces: clean...
166
  }
bbc2e3ef8   Cyrill Gorcunov   pidns: remove rec...
167
  EXPORT_SYMBOL_GPL(put_pid_ns);
74bd59bb3   Pavel Emelyanov   namespaces: clean...
168
169
170
171
172
  
  void zap_pid_ns_processes(struct pid_namespace *pid_ns)
  {
  	int nr;
  	int rc;
00c10bc13   Eric W. Biederman   pidns: make kille...
173
  	struct task_struct *task, *me = current;
751c644b9   Eric W. Biederman   pid: Handle the e...
174
  	int init_pids = thread_group_leader(me) ? 1 : 2;
95846ecf9   Gargi Sharma   pid: replace pid ...
175
  	struct pid *pid;
00c10bc13   Eric W. Biederman   pidns: make kille...
176

c876ad768   Eric W. Biederman   pidns: Stop pid a...
177
178
  	/* Don't allow any more processes into the pid namespace */
  	disable_pid_allocation(pid_ns);
a53b83154   Oleg Nesterov   exit: pidns: fix/...
179
180
181
182
183
  	/*
  	 * Ignore SIGCHLD causing any terminated children to autoreap.
  	 * This speeds up the namespace shutdown, plus see the comment
  	 * below.
  	 */
00c10bc13   Eric W. Biederman   pidns: make kille...
184
185
186
  	spin_lock_irq(&me->sighand->siglock);
  	me->sighand->action[SIGCHLD - 1].sa.sa_handler = SIG_IGN;
  	spin_unlock_irq(&me->sighand->siglock);
74bd59bb3   Pavel Emelyanov   namespaces: clean...
187
188
189
190
191
192
193
194
195
196
197
198
199
200
  
  	/*
  	 * The last thread in the cgroup-init thread group is terminating.
  	 * Find remaining pid_ts in the namespace, signal and wait for them
  	 * to exit.
  	 *
  	 * Note:  This signals each threads in the namespace - even those that
  	 * 	  belong to the same thread group, To avoid this, we would have
  	 * 	  to walk the entire tasklist looking a processes in this
  	 * 	  namespace, but that could be unnecessarily expensive if the
  	 * 	  pid namespace has just a few processes. Or we need to
  	 * 	  maintain a tasklist for each pid namespace.
  	 *
  	 */
95846ecf9   Gargi Sharma   pid: replace pid ...
201
  	rcu_read_lock();
74bd59bb3   Pavel Emelyanov   namespaces: clean...
202
  	read_lock(&tasklist_lock);
95846ecf9   Gargi Sharma   pid: replace pid ...
203
204
205
  	nr = 2;
  	idr_for_each_entry_continue(&pid_ns->idr, pid, nr) {
  		task = pid_task(pid, PIDTYPE_PID);
a02d6fd64   Oleg Nesterov   signal: zap_pid_n...
206
207
  		if (task && !__fatal_signal_pending(task))
  			send_sig_info(SIGKILL, SEND_SIG_FORCED, task);
74bd59bb3   Pavel Emelyanov   namespaces: clean...
208
209
  	}
  	read_unlock(&tasklist_lock);
95846ecf9   Gargi Sharma   pid: replace pid ...
210
  	rcu_read_unlock();
74bd59bb3   Pavel Emelyanov   namespaces: clean...
211

a53b83154   Oleg Nesterov   exit: pidns: fix/...
212
213
  	/*
  	 * Reap the EXIT_ZOMBIE children we had before we ignored SIGCHLD.
d300b6108   Dominik Brodowski   kernel: use kerne...
214
  	 * kernel_wait4() will also block until our children traced from the
a53b83154   Oleg Nesterov   exit: pidns: fix/...
215
216
  	 * parent namespace are detached and become EXIT_DEAD.
  	 */
74bd59bb3   Pavel Emelyanov   namespaces: clean...
217
218
  	do {
  		clear_thread_flag(TIF_SIGPENDING);
d300b6108   Dominik Brodowski   kernel: use kerne...
219
  		rc = kernel_wait4(-1, NULL, __WALL, NULL);
74bd59bb3   Pavel Emelyanov   namespaces: clean...
220
  	} while (rc != -ECHILD);
6347e9009   Eric W. Biederman   pidns: guarantee ...
221
  	/*
d300b6108   Dominik Brodowski   kernel: use kerne...
222
  	 * kernel_wait4() above can't reap the EXIT_DEAD children but we do not
a53b83154   Oleg Nesterov   exit: pidns: fix/...
223
224
  	 * really care, we could reparent them to the global init. We could
  	 * exit and reap ->child_reaper even if it is not the last thread in
e8cfbc245   Gargi Sharma   pid: remove pidhash
225
  	 * this pid_ns, free_pid(pid_allocated == 0) calls proc_cleanup_work(),
a53b83154   Oleg Nesterov   exit: pidns: fix/...
226
227
228
229
230
231
232
233
234
235
  	 * pid_ns can not go away until proc_kill_sb() drops the reference.
  	 *
  	 * But this ns can also have other tasks injected by setns()+fork().
  	 * Again, ignoring the user visible semantics we do not really need
  	 * to wait until they are all reaped, but they can be reparented to
  	 * us and thus we need to ensure that pid->child_reaper stays valid
  	 * until they all go away. See free_pid()->wake_up_process().
  	 *
  	 * We rely on ignored SIGCHLD, an injected zombie must be autoreaped
  	 * if reparented.
6347e9009   Eric W. Biederman   pidns: guarantee ...
236
237
  	 */
  	for (;;) {
b9a985db9   Eric W. Biederman   pid_ns: Sleep in ...
238
  		set_current_state(TASK_INTERRUPTIBLE);
e8cfbc245   Gargi Sharma   pid: remove pidhash
239
  		if (pid_ns->pid_allocated == init_pids)
6347e9009   Eric W. Biederman   pidns: guarantee ...
240
241
242
  			break;
  		schedule();
  	}
af4b8a83a   Eric W. Biederman   pidns: Wait in za...
243
  	__set_current_state(TASK_RUNNING);
6347e9009   Eric W. Biederman   pidns: guarantee ...
244

cf3f89214   Daniel Lezcano   pidns: add reboot...
245
246
  	if (pid_ns->reboot)
  		current->signal->group_exit_code = pid_ns->reboot;
0b6b030fc   Pavel Emelyanov   bsdacct: switch f...
247
  	acct_exit_ns(pid_ns);
74bd59bb3   Pavel Emelyanov   namespaces: clean...
248
249
  	return;
  }
98ed57eef   Cyrill Gorcunov   sysctl: make kern...
250
  #ifdef CONFIG_CHECKPOINT_RESTORE
b8f566b04   Pavel Emelyanov   sysctl: add the k...
251
252
253
  static int pid_ns_ctl_handler(struct ctl_table *table, int write,
  		void __user *buffer, size_t *lenp, loff_t *ppos)
  {
49f4d8b93   Eric W. Biederman   pidns: Capture th...
254
  	struct pid_namespace *pid_ns = task_active_pid_ns(current);
b8f566b04   Pavel Emelyanov   sysctl: add the k...
255
  	struct ctl_table tmp = *table;
95846ecf9   Gargi Sharma   pid: replace pid ...
256
  	int ret, next;
b8f566b04   Pavel Emelyanov   sysctl: add the k...
257

49f4d8b93   Eric W. Biederman   pidns: Capture th...
258
  	if (write && !ns_capable(pid_ns->user_ns, CAP_SYS_ADMIN))
b8f566b04   Pavel Emelyanov   sysctl: add the k...
259
260
261
262
263
264
265
  		return -EPERM;
  
  	/*
  	 * Writing directly to ns' last_pid field is OK, since this field
  	 * is volatile in a living namespace anyway and a code writing to
  	 * it should synchronize its usage with external means.
  	 */
95846ecf9   Gargi Sharma   pid: replace pid ...
266
267
268
269
270
271
272
273
  	next = idr_get_cursor(&pid_ns->idr) - 1;
  
  	tmp.data = &next;
  	ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
  	if (!ret && write)
  		idr_set_cursor(&pid_ns->idr, next + 1);
  
  	return ret;
b8f566b04   Pavel Emelyanov   sysctl: add the k...
274
  }
579035dc5   Andrew Vagin   pid-namespace: li...
275
276
  extern int pid_max;
  static int zero = 0;
b8f566b04   Pavel Emelyanov   sysctl: add the k...
277
278
279
280
281
282
  static struct ctl_table pid_ns_ctl_table[] = {
  	{
  		.procname = "ns_last_pid",
  		.maxlen = sizeof(int),
  		.mode = 0666, /* permissions are checked in the handler */
  		.proc_handler = pid_ns_ctl_handler,
579035dc5   Andrew Vagin   pid-namespace: li...
283
284
  		.extra1 = &zero,
  		.extra2 = &pid_max,
b8f566b04   Pavel Emelyanov   sysctl: add the k...
285
286
287
  	},
  	{ }
  };
b8f566b04   Pavel Emelyanov   sysctl: add the k...
288
  static struct ctl_path kern_path[] = { { .procname = "kernel", }, { } };
98ed57eef   Cyrill Gorcunov   sysctl: make kern...
289
  #endif	/* CONFIG_CHECKPOINT_RESTORE */
b8f566b04   Pavel Emelyanov   sysctl: add the k...
290

cf3f89214   Daniel Lezcano   pidns: add reboot...
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
  int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd)
  {
  	if (pid_ns == &init_pid_ns)
  		return 0;
  
  	switch (cmd) {
  	case LINUX_REBOOT_CMD_RESTART2:
  	case LINUX_REBOOT_CMD_RESTART:
  		pid_ns->reboot = SIGHUP;
  		break;
  
  	case LINUX_REBOOT_CMD_POWER_OFF:
  	case LINUX_REBOOT_CMD_HALT:
  		pid_ns->reboot = SIGINT;
  		break;
  	default:
  		return -EINVAL;
  	}
  
  	read_lock(&tasklist_lock);
  	force_sig(SIGKILL, pid_ns->child_reaper);
  	read_unlock(&tasklist_lock);
  
  	do_exit(0);
  
  	/* Not reached */
  	return 0;
  }
3c0411846   Al Viro   switch the rest o...
319
320
321
322
  static inline struct pid_namespace *to_pid_ns(struct ns_common *ns)
  {
  	return container_of(ns, struct pid_namespace, ns);
  }
64964528b   Al Viro   make proc_ns_oper...
323
  static struct ns_common *pidns_get(struct task_struct *task)
57e8391d3   Eric W. Biederman   pidns: Add setns ...
324
325
326
327
  {
  	struct pid_namespace *ns;
  
  	rcu_read_lock();
d23082257   Oleg Nesterov   pid_namespace: pi...
328
329
330
  	ns = task_active_pid_ns(task);
  	if (ns)
  		get_pid_ns(ns);
57e8391d3   Eric W. Biederman   pidns: Add setns ...
331
  	rcu_read_unlock();
3c0411846   Al Viro   switch the rest o...
332
  	return ns ? &ns->ns : NULL;
57e8391d3   Eric W. Biederman   pidns: Add setns ...
333
  }
eaa0d190b   Kirill Tkhai   pidns: expose tas...
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
  static struct ns_common *pidns_for_children_get(struct task_struct *task)
  {
  	struct pid_namespace *ns = NULL;
  
  	task_lock(task);
  	if (task->nsproxy) {
  		ns = task->nsproxy->pid_ns_for_children;
  		get_pid_ns(ns);
  	}
  	task_unlock(task);
  
  	if (ns) {
  		read_lock(&tasklist_lock);
  		if (!ns->child_reaper) {
  			put_pid_ns(ns);
  			ns = NULL;
  		}
  		read_unlock(&tasklist_lock);
  	}
  
  	return ns ? &ns->ns : NULL;
  }
64964528b   Al Viro   make proc_ns_oper...
356
  static void pidns_put(struct ns_common *ns)
57e8391d3   Eric W. Biederman   pidns: Add setns ...
357
  {
3c0411846   Al Viro   switch the rest o...
358
  	put_pid_ns(to_pid_ns(ns));
57e8391d3   Eric W. Biederman   pidns: Add setns ...
359
  }
64964528b   Al Viro   make proc_ns_oper...
360
  static int pidns_install(struct nsproxy *nsproxy, struct ns_common *ns)
57e8391d3   Eric W. Biederman   pidns: Add setns ...
361
362
  {
  	struct pid_namespace *active = task_active_pid_ns(current);
3c0411846   Al Viro   switch the rest o...
363
  	struct pid_namespace *ancestor, *new = to_pid_ns(ns);
57e8391d3   Eric W. Biederman   pidns: Add setns ...
364

5e4a08476   Eric W. Biederman   userns: Require C...
365
  	if (!ns_capable(new->user_ns, CAP_SYS_ADMIN) ||
c7b96acf1   Eric W. Biederman   userns: Kill nso...
366
  	    !ns_capable(current_user_ns(), CAP_SYS_ADMIN))
57e8391d3   Eric W. Biederman   pidns: Add setns ...
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
  		return -EPERM;
  
  	/*
  	 * Only allow entering the current active pid namespace
  	 * or a child of the current active pid namespace.
  	 *
  	 * This is required for fork to return a usable pid value and
  	 * this maintains the property that processes and their
  	 * children can not escape their current pid namespace.
  	 */
  	if (new->level < active->level)
  		return -EINVAL;
  
  	ancestor = new;
  	while (ancestor->level > active->level)
  		ancestor = ancestor->parent;
  	if (ancestor != active)
  		return -EINVAL;
c2b1df2eb   Andy Lutomirski   Rename nsproxy.pi...
385
386
  	put_pid_ns(nsproxy->pid_ns_for_children);
  	nsproxy->pid_ns_for_children = get_pid_ns(new);
57e8391d3   Eric W. Biederman   pidns: Add setns ...
387
388
  	return 0;
  }
a7306ed8d   Andrey Vagin   nsfs: add ioctl t...
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
  static struct ns_common *pidns_get_parent(struct ns_common *ns)
  {
  	struct pid_namespace *active = task_active_pid_ns(current);
  	struct pid_namespace *pid_ns, *p;
  
  	/* See if the parent is in the current namespace */
  	pid_ns = p = to_pid_ns(ns)->parent;
  	for (;;) {
  		if (!p)
  			return ERR_PTR(-EPERM);
  		if (p == active)
  			break;
  		p = p->parent;
  	}
  
  	return &get_pid_ns(pid_ns)->ns;
  }
bcac25a58   Andrey Vagin   kernel: add a hel...
406
407
408
409
  static struct user_namespace *pidns_owner(struct ns_common *ns)
  {
  	return to_pid_ns(ns)->user_ns;
  }
57e8391d3   Eric W. Biederman   pidns: Add setns ...
410
411
412
413
414
415
  const struct proc_ns_operations pidns_operations = {
  	.name		= "pid",
  	.type		= CLONE_NEWPID,
  	.get		= pidns_get,
  	.put		= pidns_put,
  	.install	= pidns_install,
bcac25a58   Andrey Vagin   kernel: add a hel...
416
  	.owner		= pidns_owner,
a7306ed8d   Andrey Vagin   nsfs: add ioctl t...
417
  	.get_parent	= pidns_get_parent,
57e8391d3   Eric W. Biederman   pidns: Add setns ...
418
  };
eaa0d190b   Kirill Tkhai   pidns: expose tas...
419
420
421
422
423
424
425
426
427
428
  const struct proc_ns_operations pidns_for_children_operations = {
  	.name		= "pid_for_children",
  	.real_ns_name	= "pid",
  	.type		= CLONE_NEWPID,
  	.get		= pidns_for_children_get,
  	.put		= pidns_put,
  	.install	= pidns_install,
  	.owner		= pidns_owner,
  	.get_parent	= pidns_get_parent,
  };
74bd59bb3   Pavel Emelyanov   namespaces: clean...
429
430
431
  static __init int pid_namespaces_init(void)
  {
  	pid_ns_cachep = KMEM_CACHE(pid_namespace, SLAB_PANIC);
98ed57eef   Cyrill Gorcunov   sysctl: make kern...
432
433
  
  #ifdef CONFIG_CHECKPOINT_RESTORE
b8f566b04   Pavel Emelyanov   sysctl: add the k...
434
  	register_sysctl_paths(kern_path, pid_ns_ctl_table);
98ed57eef   Cyrill Gorcunov   sysctl: make kern...
435
  #endif
74bd59bb3   Pavel Emelyanov   namespaces: clean...
436
437
438
439
  	return 0;
  }
  
  __initcall(pid_namespaces_init);