Blame view

kernel/pid.c 17 KB
457c89965   Thomas Gleixner   treewide: Add SPD...
1
  // SPDX-License-Identifier: GPL-2.0-only
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2
3
4
  /*
   * Generic pidhash and scalable, time-bounded PID allocator
   *
6d49e352a   Nadia Yvette Chambers   propagate name ch...
5
6
   * (C) 2002-2003 Nadia Yvette Chambers, IBM
   * (C) 2004 Nadia Yvette Chambers, Oracle
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
   * (C) 2002-2004 Ingo Molnar, Red Hat
   *
   * pid-structures are backing objects for tasks sharing a given ID to chain
   * against. There is very little to them aside from hashing them and
   * parking tasks using given ID's on a list.
   *
   * The hash is always changed with the tasklist_lock write-acquired,
   * and the hash is only accessed with the tasklist_lock at least
   * read-acquired, so there's no additional SMP locking needed here.
   *
   * We have a list of bitmap pages, which bitmaps represent the PID space.
   * Allocating and freeing PIDs is completely lockless. The worst-case
   * allocation scenario when all but one out of 1 million PIDs possible are
   * allocated already: the scanning of 32 list entries and at most PAGE_SIZE
   * bytes. The typical fastpath is a single successful setbit. Freeing is O(1).
30e49c263   Pavel Emelyanov   pid namespaces: a...
22
23
24
25
26
27
   *
   * Pid namespaces:
   *    (C) 2007 Pavel Emelyanov <xemul@openvz.org>, OpenVZ, SWsoft Inc.
   *    (C) 2007 Sukadev Bhattiprolu <sukadev@us.ibm.com>, IBM
   *     Many thanks to Oleg Nesterov for comments and help
   *
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
28
29
30
   */
  
  #include <linux/mm.h>
9984de1a5   Paul Gortmaker   kernel: Map most ...
31
  #include <linux/export.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
32
33
  #include <linux/slab.h>
  #include <linux/init.h>
82524746c   Franck Bui-Huu   rcu: split list.h...
34
  #include <linux/rculist.h>
57c8a661d   Mike Rapoport   mm: remove includ...
35
  #include <linux/memblock.h>
61a58c6c2   Sukadev Bhattiprolu   [PATCH] rename st...
36
  #include <linux/pid_namespace.h>
820e45db2   Sukadev Bhattiprolu   statically initia...
37
  #include <linux/init_task.h>
3eb07c8c8   Sukadev Bhattiprolu   pid namespaces: d...
38
  #include <linux/syscalls.h>
0bb80f240   David Howells   proc: Split the n...
39
  #include <linux/proc_ns.h>
f57e515a1   Joel Fernandes (Google)   kernel/pid.c: con...
40
  #include <linux/refcount.h>
32fcb426e   Christian Brauner   pid: add pidfd_op...
41
42
  #include <linux/anon_inodes.h>
  #include <linux/sched/signal.h>
299300258   Ingo Molnar   sched/headers: Pr...
43
  #include <linux/sched/task.h>
95846ecf9   Gargi Sharma   pid: replace pid ...
44
  #include <linux/idr.h>
4969f8a07   Kees Cook   pidfd: Add missin...
45
  #include <net/sock.h>
6da73d152   Christian Brauner   pidfd: support PI...
46
  #include <uapi/linux/pidfd.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
47

e1e871aff   David Howells   Expand INIT_STRUC...
48
  struct pid init_struct_pid = {
f57e515a1   Joel Fernandes (Google)   kernel/pid.c: con...
49
  	.count		= REFCOUNT_INIT(1),
e1e871aff   David Howells   Expand INIT_STRUC...
50
51
52
53
54
55
56
57
58
59
60
  	.tasks		= {
  		{ .first = NULL },
  		{ .first = NULL },
  		{ .first = NULL },
  	},
  	.level		= 0,
  	.numbers	= { {
  		.nr		= 0,
  		.ns		= &init_pid_ns,
  	}, }
  };
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
61
62
  
  int pid_max = PID_MAX_DEFAULT;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
63
64
65
66
67
  
  #define RESERVED_PIDS		300
  
  int pid_max_min = RESERVED_PIDS + 1;
  int pid_max_max = PID_MAX_LIMIT;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
68
69
70
71
72
73
  /*
   * PID-map pages start out as NULL, they get allocated upon
   * first use and are never deallocated. This way a low pid_max
   * value does not cause lots of bitmaps to be allocated, but
   * the scheme scales to up to 4 million PIDs, runtime.
   */
61a58c6c2   Sukadev Bhattiprolu   [PATCH] rename st...
74
  struct pid_namespace init_pid_ns = {
1e24edca0   Peter Zijlstra   locking/atomic, k...
75
  	.kref = KREF_INIT(2),
f6bb2a2c0   Matthew Wilcox   xarray: add the x...
76
  	.idr = IDR_INIT(init_pid_ns.idr),
e8cfbc245   Gargi Sharma   pid: remove pidhash
77
  	.pid_allocated = PIDNS_ADDING,
faacbfd3a   Pavel Emelyanov   pid namespaces: a...
78
79
  	.level = 0,
  	.child_reaper = &init_task,
49f4d8b93   Eric W. Biederman   pidns: Capture th...
80
  	.user_ns = &init_user_ns,
435d5f4bb   Al Viro   common object emb...
81
  	.ns.inum = PROC_PID_INIT_INO,
33c429405   Al Viro   copy address of p...
82
83
84
  #ifdef CONFIG_PID_NS
  	.ns.ops = &pidns_operations,
  #endif
3fbc96486   Sukadev Bhattiprolu   [PATCH] Define st...
85
  };
198fe21b0   Pavel Emelyanov   pid namespaces: h...
86
  EXPORT_SYMBOL_GPL(init_pid_ns);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
87

92476d7fc   Eric W. Biederman   [PATCH] pidhash: ...
88
89
90
91
92
93
94
95
96
97
98
99
100
  /*
   * Note: disable interrupts while the pidmap_lock is held as an
   * interrupt might come in and do read_lock(&tasklist_lock).
   *
   * If we don't disable interrupts there is a nasty deadlock between
   * detach_pid()->free_pid() and another cpu that does
   * spin_lock(&pidmap_lock) followed by an interrupt routine that does
   * read_lock(&tasklist_lock);
   *
   * After we clean up the tasklist_lock and know there are no
   * irq handlers that take it we can leave the interrupts enabled.
   * For now it is easier to be safe than to prove it can't happen.
   */
3fbc96486   Sukadev Bhattiprolu   [PATCH] Define st...
101

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
102
  static  __cacheline_aligned_in_smp DEFINE_SPINLOCK(pidmap_lock);
7ad5b3a50   Harvey Harrison   kernel: remove fa...
103
  void put_pid(struct pid *pid)
92476d7fc   Eric W. Biederman   [PATCH] pidhash: ...
104
  {
baf8f0f82   Pavel Emelianov   pid namespaces: d...
105
  	struct pid_namespace *ns;
92476d7fc   Eric W. Biederman   [PATCH] pidhash: ...
106
107
  	if (!pid)
  		return;
baf8f0f82   Pavel Emelianov   pid namespaces: d...
108

8ef047aaa   Pavel Emelyanov   pid namespaces: m...
109
  	ns = pid->numbers[pid->level].ns;
f57e515a1   Joel Fernandes (Google)   kernel/pid.c: con...
110
  	if (refcount_dec_and_test(&pid->count)) {
baf8f0f82   Pavel Emelianov   pid namespaces: d...
111
  		kmem_cache_free(ns->pid_cachep, pid);
b461cc038   Pavel Emelyanov   pid namespaces: m...
112
  		put_pid_ns(ns);
8ef047aaa   Pavel Emelyanov   pid namespaces: m...
113
  	}
92476d7fc   Eric W. Biederman   [PATCH] pidhash: ...
114
  }
bbf73147e   Eric W. Biederman   [PATCH] pid: expo...
115
  EXPORT_SYMBOL_GPL(put_pid);
92476d7fc   Eric W. Biederman   [PATCH] pidhash: ...
116
117
118
119
120
121
  
  static void delayed_put_pid(struct rcu_head *rhp)
  {
  	struct pid *pid = container_of(rhp, struct pid, rcu);
  	put_pid(pid);
  }
7ad5b3a50   Harvey Harrison   kernel: remove fa...
122
  void free_pid(struct pid *pid)
92476d7fc   Eric W. Biederman   [PATCH] pidhash: ...
123
124
  {
  	/* We can be called with write_lock_irq(&tasklist_lock) held */
8ef047aaa   Pavel Emelyanov   pid namespaces: m...
125
  	int i;
92476d7fc   Eric W. Biederman   [PATCH] pidhash: ...
126
127
128
  	unsigned long flags;
  
  	spin_lock_irqsave(&pidmap_lock, flags);
0a01f2cc3   Eric W. Biederman   pidns: Make the p...
129
130
  	for (i = 0; i <= pid->level; i++) {
  		struct upid *upid = pid->numbers + i;
af4b8a83a   Eric W. Biederman   pidns: Wait in za...
131
  		struct pid_namespace *ns = upid->ns;
e8cfbc245   Gargi Sharma   pid: remove pidhash
132
  		switch (--ns->pid_allocated) {
a60648851   Eric W. Biederman   pidns: Fix hang i...
133
  		case 2:
af4b8a83a   Eric W. Biederman   pidns: Wait in za...
134
135
136
137
138
139
140
  		case 1:
  			/* When all that is left in the pid namespace
  			 * is the reaper wake up the reaper.  The reaper
  			 * may be sleeping in zap_pid_ns_processes().
  			 */
  			wake_up_process(ns->child_reaper);
  			break;
e8cfbc245   Gargi Sharma   pid: remove pidhash
141
  		case PIDNS_ADDING:
314a8ad0f   Oleg Nesterov   pidns: fix free_p...
142
143
  			/* Handle a fork failure of the first process */
  			WARN_ON(ns->child_reaper);
e8cfbc245   Gargi Sharma   pid: remove pidhash
144
  			ns->pid_allocated = 0;
af4b8a83a   Eric W. Biederman   pidns: Wait in za...
145
  			break;
5e1182deb   Eric W. Biederman   pidns: Don't allo...
146
  		}
95846ecf9   Gargi Sharma   pid: replace pid ...
147
148
  
  		idr_remove(&ns->idr, upid->nr);
0a01f2cc3   Eric W. Biederman   pidns: Make the p...
149
  	}
92476d7fc   Eric W. Biederman   [PATCH] pidhash: ...
150
  	spin_unlock_irqrestore(&pidmap_lock, flags);
92476d7fc   Eric W. Biederman   [PATCH] pidhash: ...
151
152
  	call_rcu(&pid->rcu, delayed_put_pid);
  }
49cb2fc42   Adrian Reber   fork: extend clon...
153
154
  struct pid *alloc_pid(struct pid_namespace *ns, pid_t *set_tid,
  		      size_t set_tid_size)
92476d7fc   Eric W. Biederman   [PATCH] pidhash: ...
155
156
157
  {
  	struct pid *pid;
  	enum pid_type type;
8ef047aaa   Pavel Emelyanov   pid namespaces: m...
158
159
  	int i, nr;
  	struct pid_namespace *tmp;
198fe21b0   Pavel Emelyanov   pid namespaces: h...
160
  	struct upid *upid;
35f71bc0a   Michal Hocko   fork: report pid ...
161
  	int retval = -ENOMEM;
92476d7fc   Eric W. Biederman   [PATCH] pidhash: ...
162

49cb2fc42   Adrian Reber   fork: extend clon...
163
164
165
166
167
168
169
170
171
172
  	/*
  	 * set_tid_size contains the size of the set_tid array. Starting at
  	 * the most nested currently active PID namespace it tells alloc_pid()
  	 * which PID to set for a process in that most nested PID namespace
  	 * up to set_tid_size PID namespaces. It does not have to set the PID
  	 * for a process in all nested PID namespaces but set_tid_size must
  	 * never be greater than the current ns->level + 1.
  	 */
  	if (set_tid_size > ns->level + 1)
  		return ERR_PTR(-EINVAL);
baf8f0f82   Pavel Emelianov   pid namespaces: d...
173
  	pid = kmem_cache_alloc(ns->pid_cachep, GFP_KERNEL);
92476d7fc   Eric W. Biederman   [PATCH] pidhash: ...
174
  	if (!pid)
35f71bc0a   Michal Hocko   fork: report pid ...
175
  		return ERR_PTR(retval);
92476d7fc   Eric W. Biederman   [PATCH] pidhash: ...
176

8ef047aaa   Pavel Emelyanov   pid namespaces: m...
177
  	tmp = ns;
0a01f2cc3   Eric W. Biederman   pidns: Make the p...
178
  	pid->level = ns->level;
95846ecf9   Gargi Sharma   pid: replace pid ...
179

8ef047aaa   Pavel Emelyanov   pid namespaces: m...
180
  	for (i = ns->level; i >= 0; i--) {
49cb2fc42   Adrian Reber   fork: extend clon...
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
  		int tid = 0;
  
  		if (set_tid_size) {
  			tid = set_tid[ns->level - i];
  
  			retval = -EINVAL;
  			if (tid < 1 || tid >= pid_max)
  				goto out_free;
  			/*
  			 * Also fail if a PID != 1 is requested and
  			 * no PID 1 exists.
  			 */
  			if (tid != 1 && !tmp->child_reaper)
  				goto out_free;
  			retval = -EPERM;
1caef81da   Adrian Reber   pid: use checkpoi...
196
  			if (!checkpoint_restore_ns_capable(tmp->user_ns))
49cb2fc42   Adrian Reber   fork: extend clon...
197
198
199
  				goto out_free;
  			set_tid_size--;
  		}
95846ecf9   Gargi Sharma   pid: replace pid ...
200
201
202
  
  		idr_preload(GFP_KERNEL);
  		spin_lock_irq(&pidmap_lock);
49cb2fc42   Adrian Reber   fork: extend clon...
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
  		if (tid) {
  			nr = idr_alloc(&tmp->idr, NULL, tid,
  				       tid + 1, GFP_ATOMIC);
  			/*
  			 * If ENOSPC is returned it means that the PID is
  			 * alreay in use. Return EEXIST in that case.
  			 */
  			if (nr == -ENOSPC)
  				nr = -EEXIST;
  		} else {
  			int pid_min = 1;
  			/*
  			 * init really needs pid 1, but after reaching the
  			 * maximum wrap back to RESERVED_PIDS
  			 */
  			if (idr_get_cursor(&tmp->idr) > RESERVED_PIDS)
  				pid_min = RESERVED_PIDS;
  
  			/*
  			 * Store a null pointer so find_pid_ns does not find
  			 * a partially initialized PID (see below).
  			 */
  			nr = idr_alloc_cyclic(&tmp->idr, NULL, pid_min,
  					      pid_max, GFP_ATOMIC);
  		}
95846ecf9   Gargi Sharma   pid: replace pid ...
228
229
  		spin_unlock_irq(&pidmap_lock);
  		idr_preload_end();
287980e49   Arnd Bergmann   remove lots of IS...
230
  		if (nr < 0) {
f83606f5e   KJ Tsanaktsidis   fork: report pid ...
231
  			retval = (nr == -ENOSPC) ? -EAGAIN : nr;
8ef047aaa   Pavel Emelyanov   pid namespaces: m...
232
  			goto out_free;
35f71bc0a   Michal Hocko   fork: report pid ...
233
  		}
92476d7fc   Eric W. Biederman   [PATCH] pidhash: ...
234

8ef047aaa   Pavel Emelyanov   pid namespaces: m...
235
236
237
238
  		pid->numbers[i].nr = nr;
  		pid->numbers[i].ns = tmp;
  		tmp = tmp->parent;
  	}
10dab84ca   Christian Brauner   pid: make ENOMEM ...
239
240
241
242
243
244
245
246
  	/*
  	 * ENOMEM is not the most obvious choice especially for the case
  	 * where the child subreaper has already exited and the pid
  	 * namespace denies the creation of any new processes. But ENOMEM
  	 * is what we have exposed to userspace for a long time and it is
  	 * documented behavior for pid namespaces. So we can't easily
  	 * change it even if there were an error code better suited.
  	 */
b26ebfe12   Corey Minyard   pid: Fix error re...
247
  	retval = -ENOMEM;
b461cc038   Pavel Emelyanov   pid namespaces: m...
248
  	get_pid_ns(ns);
f57e515a1   Joel Fernandes (Google)   kernel/pid.c: con...
249
  	refcount_set(&pid->count, 1);
63f818f46   Eric W. Biederman   proc: Use a dedic...
250
  	spin_lock_init(&pid->lock);
92476d7fc   Eric W. Biederman   [PATCH] pidhash: ...
251
252
  	for (type = 0; type < PIDTYPE_MAX; ++type)
  		INIT_HLIST_HEAD(&pid->tasks[type]);
b53b0b9d9   Joel Fernandes (Google)   pidfd: add pollin...
253
  	init_waitqueue_head(&pid->wait_pidfd);
7bc3e6e55   Eric W. Biederman   proc: Use a list ...
254
  	INIT_HLIST_HEAD(&pid->inodes);
b53b0b9d9   Joel Fernandes (Google)   pidfd: add pollin...
255

417e31524   André Goddard Rosa   pid: reduce code ...
256
  	upid = pid->numbers + ns->level;
92476d7fc   Eric W. Biederman   [PATCH] pidhash: ...
257
  	spin_lock_irq(&pidmap_lock);
e8cfbc245   Gargi Sharma   pid: remove pidhash
258
  	if (!(ns->pid_allocated & PIDNS_ADDING))
5e1182deb   Eric W. Biederman   pidns: Don't allo...
259
  		goto out_unlock;
0a01f2cc3   Eric W. Biederman   pidns: Make the p...
260
  	for ( ; upid >= pid->numbers; --upid) {
95846ecf9   Gargi Sharma   pid: replace pid ...
261
262
  		/* Make the PID visible to find_pid_ns. */
  		idr_replace(&upid->ns->idr, pid, upid->nr);
e8cfbc245   Gargi Sharma   pid: remove pidhash
263
  		upid->ns->pid_allocated++;
0a01f2cc3   Eric W. Biederman   pidns: Make the p...
264
  	}
92476d7fc   Eric W. Biederman   [PATCH] pidhash: ...
265
  	spin_unlock_irq(&pidmap_lock);
92476d7fc   Eric W. Biederman   [PATCH] pidhash: ...
266
  	return pid;
5e1182deb   Eric W. Biederman   pidns: Don't allo...
267
  out_unlock:
6e6668845   Eric W. Biederman   kernel/pid.c: ree...
268
  	spin_unlock_irq(&pidmap_lock);
24c037ebf   Oleg Nesterov   exit: pidns: allo...
269
  	put_pid_ns(ns);
92476d7fc   Eric W. Biederman   [PATCH] pidhash: ...
270
  out_free:
95846ecf9   Gargi Sharma   pid: replace pid ...
271
  	spin_lock_irq(&pidmap_lock);
1a80dade0   Matthew Wilcox   Fix failure path ...
272
273
274
275
  	while (++i <= ns->level) {
  		upid = pid->numbers + i;
  		idr_remove(&upid->ns->idr, upid->nr);
  	}
95846ecf9   Gargi Sharma   pid: replace pid ...
276

c0ee55490   Eric W. Biederman   pid: Handle failu...
277
278
279
  	/* On failure to allocate the first pid, reset the state */
  	if (ns->pid_allocated == PIDNS_ADDING)
  		idr_set_cursor(&ns->idr, 0);
95846ecf9   Gargi Sharma   pid: replace pid ...
280
  	spin_unlock_irq(&pidmap_lock);
8ef047aaa   Pavel Emelyanov   pid namespaces: m...
281

baf8f0f82   Pavel Emelianov   pid namespaces: d...
282
  	kmem_cache_free(ns->pid_cachep, pid);
35f71bc0a   Michal Hocko   fork: report pid ...
283
  	return ERR_PTR(retval);
92476d7fc   Eric W. Biederman   [PATCH] pidhash: ...
284
  }
c876ad768   Eric W. Biederman   pidns: Stop pid a...
285
286
287
  void disable_pid_allocation(struct pid_namespace *ns)
  {
  	spin_lock_irq(&pidmap_lock);
e8cfbc245   Gargi Sharma   pid: remove pidhash
288
  	ns->pid_allocated &= ~PIDNS_ADDING;
c876ad768   Eric W. Biederman   pidns: Stop pid a...
289
290
  	spin_unlock_irq(&pidmap_lock);
  }
7ad5b3a50   Harvey Harrison   kernel: remove fa...
291
  struct pid *find_pid_ns(int nr, struct pid_namespace *ns)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
292
  {
e8cfbc245   Gargi Sharma   pid: remove pidhash
293
  	return idr_find(&ns->idr, nr);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
294
  }
198fe21b0   Pavel Emelyanov   pid namespaces: h...
295
  EXPORT_SYMBOL_GPL(find_pid_ns);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
296

8990571eb   Pavel Emelyanov   Uninline find_pid...
297
298
  struct pid *find_vpid(int nr)
  {
17cf22c33   Eric W. Biederman   pidns: Use task_a...
299
  	return find_pid_ns(nr, task_active_pid_ns(current));
8990571eb   Pavel Emelyanov   Uninline find_pid...
300
301
  }
  EXPORT_SYMBOL_GPL(find_vpid);
2c4704756   Eric W. Biederman   pids: Move the pg...
302
303
304
305
  static struct pid **task_pid_ptr(struct task_struct *task, enum pid_type type)
  {
  	return (type == PIDTYPE_PID) ?
  		&task->thread_pid :
2c4704756   Eric W. Biederman   pids: Move the pg...
306
307
  		&task->signal->pids[type];
  }
e713d0dab   Sukadev Bhattiprolu   attach_pid() with...
308
309
310
  /*
   * attach_pid() must be called with the tasklist_lock write-held.
   */
819077398   Oleg Nesterov   kernel/fork.c:cop...
311
  void attach_pid(struct task_struct *task, enum pid_type type)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
312
  {
2c4704756   Eric W. Biederman   pids: Move the pg...
313
314
  	struct pid *pid = *task_pid_ptr(task, type);
  	hlist_add_head_rcu(&task->pid_links[type], &pid->tasks[type]);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
315
  }
24336eaee   Oleg Nesterov   pids: introduce c...
316
317
  static void __change_pid(struct task_struct *task, enum pid_type type,
  			struct pid *new)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
318
  {
2c4704756   Eric W. Biederman   pids: Move the pg...
319
  	struct pid **pid_ptr = task_pid_ptr(task, type);
92476d7fc   Eric W. Biederman   [PATCH] pidhash: ...
320
321
  	struct pid *pid;
  	int tmp;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
322

2c4704756   Eric W. Biederman   pids: Move the pg...
323
  	pid = *pid_ptr;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
324

2c4704756   Eric W. Biederman   pids: Move the pg...
325
326
  	hlist_del_rcu(&task->pid_links[type]);
  	*pid_ptr = new;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
327

92476d7fc   Eric W. Biederman   [PATCH] pidhash: ...
328
  	for (tmp = PIDTYPE_MAX; --tmp >= 0; )
1d416a113   Christian Brauner   pid: use pid_has_...
329
  		if (pid_has_task(pid, tmp))
92476d7fc   Eric W. Biederman   [PATCH] pidhash: ...
330
  			return;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
331

92476d7fc   Eric W. Biederman   [PATCH] pidhash: ...
332
  	free_pid(pid);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
333
  }
24336eaee   Oleg Nesterov   pids: introduce c...
334
335
336
337
338
339
340
341
342
  void detach_pid(struct task_struct *task, enum pid_type type)
  {
  	__change_pid(task, type, NULL);
  }
  
  void change_pid(struct task_struct *task, enum pid_type type,
  		struct pid *pid)
  {
  	__change_pid(task, type, pid);
819077398   Oleg Nesterov   kernel/fork.c:cop...
343
  	attach_pid(task, type);
24336eaee   Oleg Nesterov   pids: introduce c...
344
  }
6b03d1304   Eric W. Biederman   proc: Ensure we s...
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
  void exchange_tids(struct task_struct *left, struct task_struct *right)
  {
  	struct pid *pid1 = left->thread_pid;
  	struct pid *pid2 = right->thread_pid;
  	struct hlist_head *head1 = &pid1->tasks[PIDTYPE_PID];
  	struct hlist_head *head2 = &pid2->tasks[PIDTYPE_PID];
  
  	/* Swap the single entry tid lists */
  	hlists_swap_heads_rcu(head1, head2);
  
  	/* Swap the per task_struct pid */
  	rcu_assign_pointer(left->thread_pid, pid2);
  	rcu_assign_pointer(right->thread_pid, pid1);
  
  	/* Swap the cached value */
  	WRITE_ONCE(left->pid, pid_nr(pid2));
  	WRITE_ONCE(right->pid, pid_nr(pid1));
  }
c18258c6f   Eric W. Biederman   [PATCH] pid: Impl...
363
  /* transfer_pid is an optimization of attach_pid(new), detach_pid(old) */
7ad5b3a50   Harvey Harrison   kernel: remove fa...
364
  void transfer_pid(struct task_struct *old, struct task_struct *new,
c18258c6f   Eric W. Biederman   [PATCH] pid: Impl...
365
366
  			   enum pid_type type)
  {
2c4704756   Eric W. Biederman   pids: Move the pg...
367
368
369
  	if (type == PIDTYPE_PID)
  		new->thread_pid = old->thread_pid;
  	hlist_replace_rcu(&old->pid_links[type], &new->pid_links[type]);
c18258c6f   Eric W. Biederman   [PATCH] pid: Impl...
370
  }
7ad5b3a50   Harvey Harrison   kernel: remove fa...
371
  struct task_struct *pid_task(struct pid *pid, enum pid_type type)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
372
  {
92476d7fc   Eric W. Biederman   [PATCH] pidhash: ...
373
374
375
  	struct task_struct *result = NULL;
  	if (pid) {
  		struct hlist_node *first;
67bdbffd6   Arnd Bergmann   rculist: avoid __...
376
  		first = rcu_dereference_check(hlist_first_rcu(&pid->tasks[type]),
db1466b3e   Paul E. McKenney   rcu: Use wrapper ...
377
  					      lockdep_tasklist_lock_is_held());
92476d7fc   Eric W. Biederman   [PATCH] pidhash: ...
378
  		if (first)
2c4704756   Eric W. Biederman   pids: Move the pg...
379
  			result = hlist_entry(first, struct task_struct, pid_links[(type)]);
92476d7fc   Eric W. Biederman   [PATCH] pidhash: ...
380
381
382
  	}
  	return result;
  }
eccba0689   Pavel Emelyanov   gfs2: make gfs2_g...
383
  EXPORT_SYMBOL(pid_task);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
384

92476d7fc   Eric W. Biederman   [PATCH] pidhash: ...
385
  /*
9728e5d6e   Tetsuo Handa   kernel/pid.c: upd...
386
   * Must be called under rcu_read_lock().
92476d7fc   Eric W. Biederman   [PATCH] pidhash: ...
387
   */
17f98dcf6   Christoph Hellwig   pids: clean up fi...
388
  struct task_struct *find_task_by_pid_ns(pid_t nr, struct pid_namespace *ns)
92476d7fc   Eric W. Biederman   [PATCH] pidhash: ...
389
  {
f78f5b90c   Paul E. McKenney   rcu: Rename rcu_l...
390
391
  	RCU_LOCKDEP_WARN(!rcu_read_lock_held(),
  			 "find_task_by_pid_ns() needs rcu_read_lock() protection");
17f98dcf6   Christoph Hellwig   pids: clean up fi...
392
  	return pid_task(find_pid_ns(nr, ns), PIDTYPE_PID);
92476d7fc   Eric W. Biederman   [PATCH] pidhash: ...
393
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
394

228ebcbe6   Pavel Emelyanov   Uninline find_tas...
395
396
  struct task_struct *find_task_by_vpid(pid_t vnr)
  {
17cf22c33   Eric W. Biederman   pidns: Use task_a...
397
  	return find_task_by_pid_ns(vnr, task_active_pid_ns(current));
228ebcbe6   Pavel Emelyanov   Uninline find_tas...
398
  }
bee18dd57   Abhilasha Rao   ANDROID: GKI: pid...
399
  EXPORT_SYMBOL_GPL(find_task_by_vpid);
228ebcbe6   Pavel Emelyanov   Uninline find_tas...
400

2ee082608   Mike Rapoport   pids: introduce f...
401
402
403
404
405
406
407
408
409
410
411
412
  struct task_struct *find_get_task_by_vpid(pid_t nr)
  {
  	struct task_struct *task;
  
  	rcu_read_lock();
  	task = find_task_by_vpid(nr);
  	if (task)
  		get_task_struct(task);
  	rcu_read_unlock();
  
  	return task;
  }
1a657f78d   Oleg Nesterov   [PATCH] introduce...
413
414
415
416
  struct pid *get_task_pid(struct task_struct *task, enum pid_type type)
  {
  	struct pid *pid;
  	rcu_read_lock();
2c4704756   Eric W. Biederman   pids: Move the pg...
417
  	pid = get_pid(rcu_dereference(*task_pid_ptr(task, type)));
1a657f78d   Oleg Nesterov   [PATCH] introduce...
418
419
420
  	rcu_read_unlock();
  	return pid;
  }
77c100c83   Rik van Riel   export pid symbol...
421
  EXPORT_SYMBOL_GPL(get_task_pid);
1a657f78d   Oleg Nesterov   [PATCH] introduce...
422

7ad5b3a50   Harvey Harrison   kernel: remove fa...
423
  struct task_struct *get_pid_task(struct pid *pid, enum pid_type type)
92476d7fc   Eric W. Biederman   [PATCH] pidhash: ...
424
425
426
427
428
429
430
431
  {
  	struct task_struct *result;
  	rcu_read_lock();
  	result = pid_task(pid, type);
  	if (result)
  		get_task_struct(result);
  	rcu_read_unlock();
  	return result;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
432
  }
77c100c83   Rik van Riel   export pid symbol...
433
  EXPORT_SYMBOL_GPL(get_pid_task);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
434

92476d7fc   Eric W. Biederman   [PATCH] pidhash: ...
435
  struct pid *find_get_pid(pid_t nr)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
436
437
  {
  	struct pid *pid;
92476d7fc   Eric W. Biederman   [PATCH] pidhash: ...
438
  	rcu_read_lock();
198fe21b0   Pavel Emelyanov   pid namespaces: h...
439
  	pid = get_pid(find_vpid(nr));
92476d7fc   Eric W. Biederman   [PATCH] pidhash: ...
440
  	rcu_read_unlock();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
441

92476d7fc   Eric W. Biederman   [PATCH] pidhash: ...
442
  	return pid;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
443
  }
339caf2a2   David Sterba   proc: misplaced e...
444
  EXPORT_SYMBOL_GPL(find_get_pid);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
445

7af572947   Pavel Emelyanov   pid namespaces: h...
446
447
448
449
450
451
452
453
454
455
456
457
  pid_t pid_nr_ns(struct pid *pid, struct pid_namespace *ns)
  {
  	struct upid *upid;
  	pid_t nr = 0;
  
  	if (pid && ns->level <= pid->level) {
  		upid = &pid->numbers[ns->level];
  		if (upid->ns == ns)
  			nr = upid->nr;
  	}
  	return nr;
  }
4f82f4573   Eric W. Biederman   net ip6 flowlabel...
458
  EXPORT_SYMBOL_GPL(pid_nr_ns);
7af572947   Pavel Emelyanov   pid namespaces: h...
459

44c4e1b25   Eric W. Biederman   pid: Extend/Fix p...
460
461
  pid_t pid_vnr(struct pid *pid)
  {
17cf22c33   Eric W. Biederman   pidns: Use task_a...
462
  	return pid_nr_ns(pid, task_active_pid_ns(current));
44c4e1b25   Eric W. Biederman   pid: Extend/Fix p...
463
464
  }
  EXPORT_SYMBOL_GPL(pid_vnr);
52ee2dfdd   Oleg Nesterov   pids: refactor vn...
465
466
  pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type,
  			struct pid_namespace *ns)
2f2a3a46f   Pavel Emelyanov   Uninline the task...
467
  {
52ee2dfdd   Oleg Nesterov   pids: refactor vn...
468
469
470
471
  	pid_t nr = 0;
  
  	rcu_read_lock();
  	if (!ns)
17cf22c33   Eric W. Biederman   pidns: Use task_a...
472
  		ns = task_active_pid_ns(current);
1dd694a1b   Oleg Nesterov   remove the no lon...
473
  	nr = pid_nr_ns(rcu_dereference(*task_pid_ptr(task, type)), ns);
52ee2dfdd   Oleg Nesterov   pids: refactor vn...
474
475
476
  	rcu_read_unlock();
  
  	return nr;
2f2a3a46f   Pavel Emelyanov   Uninline the task...
477
  }
52ee2dfdd   Oleg Nesterov   pids: refactor vn...
478
  EXPORT_SYMBOL(__task_pid_nr_ns);
2f2a3a46f   Pavel Emelyanov   Uninline the task...
479

61bce0f13   Eric W. Biederman   pid: generalize t...
480
481
482
483
484
  struct pid_namespace *task_active_pid_ns(struct task_struct *tsk)
  {
  	return ns_of_pid(task_pid(tsk));
  }
  EXPORT_SYMBOL_GPL(task_active_pid_ns);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
485
  /*
025dfdafe   Frederik Schwarzer   trivial: fix then...
486
   * Used by proc to find the first pid that is greater than or equal to nr.
0804ef4b0   Eric W. Biederman   [PATCH] proc: rea...
487
   *
e49859e71   Pavel Emelyanov   pidns: remove now...
488
   * If there is a pid at nr this function is exactly the same as find_pid_ns.
0804ef4b0   Eric W. Biederman   [PATCH] proc: rea...
489
   */
198fe21b0   Pavel Emelyanov   pid namespaces: h...
490
  struct pid *find_ge_pid(int nr, struct pid_namespace *ns)
0804ef4b0   Eric W. Biederman   [PATCH] proc: rea...
491
  {
95846ecf9   Gargi Sharma   pid: replace pid ...
492
  	return idr_get_next(&ns->idr, &nr);
0804ef4b0   Eric W. Biederman   [PATCH] proc: rea...
493
  }
1aa92cd31   Minchan Kim   pid: move pidfd_g...
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
  struct pid *pidfd_get_pid(unsigned int fd, unsigned int *flags)
  {
  	struct fd f;
  	struct pid *pid;
  
  	f = fdget(fd);
  	if (!f.file)
  		return ERR_PTR(-EBADF);
  
  	pid = pidfd_pid(f.file);
  	if (!IS_ERR(pid)) {
  		get_pid(pid);
  		*flags = f.file->f_flags;
  	}
  
  	fdput(f);
  	return pid;
  }
32fcb426e   Christian Brauner   pid: add pidfd_op...
512
513
514
  /**
   * pidfd_create() - Create a new pid file descriptor.
   *
6da73d152   Christian Brauner   pidfd: support PI...
515
516
   * @pid:   struct pid that the pidfd will reference
   * @flags: flags to pass
32fcb426e   Christian Brauner   pid: add pidfd_op...
517
518
519
520
521
522
523
524
525
   *
   * This creates a new pid file descriptor with the O_CLOEXEC flag set.
   *
   * Note, that this function can only be called after the fd table has
   * been unshared to avoid leaking the pidfd to the new process.
   *
   * Return: On success, a cloexec pidfd is returned.
   *         On error, a negative errno number will be returned.
   */
6da73d152   Christian Brauner   pidfd: support PI...
526
  static int pidfd_create(struct pid *pid, unsigned int flags)
32fcb426e   Christian Brauner   pid: add pidfd_op...
527
528
529
530
  {
  	int fd;
  
  	fd = anon_inode_getfd("[pidfd]", &pidfd_fops, get_pid(pid),
6da73d152   Christian Brauner   pidfd: support PI...
531
  			      flags | O_RDWR | O_CLOEXEC);
32fcb426e   Christian Brauner   pid: add pidfd_op...
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
  	if (fd < 0)
  		put_pid(pid);
  
  	return fd;
  }
  
  /**
   * pidfd_open() - Open new pid file descriptor.
   *
   * @pid:   pid for which to retrieve a pidfd
   * @flags: flags to pass
   *
   * This creates a new pid file descriptor with the O_CLOEXEC flag set for
   * the process identified by @pid. Currently, the process identified by
   * @pid must be a thread-group leader. This restriction currently exists
   * for all aspects of pidfds including pidfd creation (CLONE_PIDFD cannot
   * be used with CLONE_THREAD) and pidfd polling (only supports thread group
   * leaders).
   *
   * Return: On success, a cloexec pidfd is returned.
   *         On error, a negative errno number will be returned.
   */
  SYSCALL_DEFINE2(pidfd_open, pid_t, pid, unsigned int, flags)
  {
1e1d0f0b1   Christian Brauner   pid: use pid_has_...
556
  	int fd;
32fcb426e   Christian Brauner   pid: add pidfd_op...
557
  	struct pid *p;
6da73d152   Christian Brauner   pidfd: support PI...
558
  	if (flags & ~PIDFD_NONBLOCK)
32fcb426e   Christian Brauner   pid: add pidfd_op...
559
560
561
562
563
564
565
566
  		return -EINVAL;
  
  	if (pid <= 0)
  		return -EINVAL;
  
  	p = find_get_pid(pid);
  	if (!p)
  		return -ESRCH;
1e1d0f0b1   Christian Brauner   pid: use pid_has_...
567
  	if (pid_has_task(p, PIDTYPE_TGID))
6da73d152   Christian Brauner   pidfd: support PI...
568
  		fd = pidfd_create(p, flags);
1e1d0f0b1   Christian Brauner   pid: use pid_has_...
569
570
  	else
  		fd = -EINVAL;
32fcb426e   Christian Brauner   pid: add pidfd_op...
571

32fcb426e   Christian Brauner   pid: add pidfd_op...
572
573
574
  	put_pid(p);
  	return fd;
  }
95846ecf9   Gargi Sharma   pid: replace pid ...
575
  void __init pid_idr_init(void)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
576
  {
840d6fe74   Zhen Lei   pid: Fix spelling...
577
  	/* Verify no one has done anything silly: */
e8cfbc245   Gargi Sharma   pid: remove pidhash
578
  	BUILD_BUG_ON(PID_MAX_LIMIT >= PIDNS_ADDING);
c876ad768   Eric W. Biederman   pidns: Stop pid a...
579

72680a191   Hedi Berriche   pids: increase pi...
580
581
582
583
584
585
586
  	/* bump default and minimum pid_max based on number of cpus */
  	pid_max = min(pid_max_max, max_t(int, pid_max,
  				PIDS_PER_CPU_DEFAULT * num_possible_cpus()));
  	pid_max_min = max_t(int, pid_max_min,
  				PIDS_PER_CPU_MIN * num_possible_cpus());
  	pr_info("pid_max: default: %u minimum: %u
  ", pid_max, pid_max_min);
95846ecf9   Gargi Sharma   pid: replace pid ...
587
  	idr_init(&init_pid_ns.idr);
92476d7fc   Eric W. Biederman   [PATCH] pidhash: ...
588

74bd59bb3   Pavel Emelyanov   namespaces: clean...
589
  	init_pid_ns.pid_cachep = KMEM_CACHE(pid,
5d097056c   Vladimir Davydov   kmemcg: account c...
590
  			SLAB_HWCACHE_ALIGN | SLAB_PANIC | SLAB_ACCOUNT);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
591
  }
8649c322f   Sargun Dhillon   pid: Implement pi...
592
593
594
595
596
  
  static struct file *__pidfd_fget(struct task_struct *task, int fd)
  {
  	struct file *file;
  	int ret;
ab7709b55   Eric W. Biederman   exec: Transform e...
597
  	ret = down_read_killable(&task->signal->exec_update_lock);
8649c322f   Sargun Dhillon   pid: Implement pi...
598
599
600
601
602
603
604
  	if (ret)
  		return ERR_PTR(ret);
  
  	if (ptrace_may_access(task, PTRACE_MODE_ATTACH_REALCREDS))
  		file = fget_task(task, fd);
  	else
  		file = ERR_PTR(-EPERM);
ab7709b55   Eric W. Biederman   exec: Transform e...
605
  	up_read(&task->signal->exec_update_lock);
8649c322f   Sargun Dhillon   pid: Implement pi...
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
  
  	return file ?: ERR_PTR(-EBADF);
  }
  
  static int pidfd_getfd(struct pid *pid, int fd)
  {
  	struct task_struct *task;
  	struct file *file;
  	int ret;
  
  	task = get_pid_task(pid, PIDTYPE_PID);
  	if (!task)
  		return -ESRCH;
  
  	file = __pidfd_fget(task, fd);
  	put_task_struct(task);
  	if (IS_ERR(file))
  		return PTR_ERR(file);
910d2f16a   Kees Cook   pidfd: Replace op...
624
625
  	ret = receive_fd(file, O_CLOEXEC);
  	fput(file);
8649c322f   Sargun Dhillon   pid: Implement pi...
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
  
  	return ret;
  }
  
  /**
   * sys_pidfd_getfd() - Get a file descriptor from another process
   *
   * @pidfd:	the pidfd file descriptor of the process
   * @fd:		the file descriptor number to get
   * @flags:	flags on how to get the fd (reserved)
   *
   * This syscall gets a copy of a file descriptor from another process
   * based on the pidfd, and file descriptor number. It requires that
   * the calling process has the ability to ptrace the process represented
   * by the pidfd. The process which is having its file descriptor copied
   * is otherwise unaffected.
   *
   * Return: On success, a cloexec file descriptor is returned.
   *         On error, a negative errno number will be returned.
   */
  SYSCALL_DEFINE3(pidfd_getfd, int, pidfd, int, fd,
  		unsigned int, flags)
  {
  	struct pid *pid;
  	struct fd f;
  	int ret;
  
  	/* flags is currently unused - make sure it's unset */
  	if (flags)
  		return -EINVAL;
  
  	f = fdget(pidfd);
  	if (!f.file)
  		return -EBADF;
  
  	pid = pidfd_pid(f.file);
  	if (IS_ERR(pid))
  		ret = PTR_ERR(pid);
  	else
  		ret = pidfd_getfd(pid, fd);
  
  	fdput(f);
  	return ret;
  }