Blame view

kernel/pid.c 13.8 KB
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
  /*
   * Generic pidhash and scalable, time-bounded PID allocator
   *
   * (C) 2002-2003 William Irwin, IBM
   * (C) 2004 William Irwin, Oracle
   * (C) 2002-2004 Ingo Molnar, Red Hat
   *
   * pid-structures are backing objects for tasks sharing a given ID to chain
   * against. There is very little to them aside from hashing them and
   * parking tasks using given ID's on a list.
   *
   * The hash is always changed with the tasklist_lock write-acquired,
   * and the hash is only accessed with the tasklist_lock at least
   * read-acquired, so there's no additional SMP locking needed here.
   *
   * We have a list of bitmap pages, which bitmaps represent the PID space.
   * Allocating and freeing PIDs is completely lockless. The worst-case
   * allocation scenario when all but one out of 1 million PIDs possible are
   * allocated already: the scanning of 32 list entries and at most PAGE_SIZE
   * bytes. The typical fastpath is a single successful setbit. Freeing is O(1).
30e49c263   Pavel Emelyanov   pid namespaces: a...
21
22
23
24
25
26
   *
   * Pid namespaces:
   *    (C) 2007 Pavel Emelyanov <xemul@openvz.org>, OpenVZ, SWsoft Inc.
   *    (C) 2007 Sukadev Bhattiprolu <sukadev@us.ibm.com>, IBM
   *     Many thanks to Oleg Nesterov for comments and help
   *
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
27
28
29
30
31
32
   */
  
  #include <linux/mm.h>
  #include <linux/module.h>
  #include <linux/slab.h>
  #include <linux/init.h>
82524746c   Franck Bui-Huu   rcu: split list.h...
33
  #include <linux/rculist.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
34
35
  #include <linux/bootmem.h>
  #include <linux/hash.h>
61a58c6c2   Sukadev Bhattiprolu   [PATCH] rename st...
36
  #include <linux/pid_namespace.h>
820e45db2   Sukadev Bhattiprolu   statically initia...
37
  #include <linux/init_task.h>
3eb07c8c8   Sukadev Bhattiprolu   pid namespaces: d...
38
  #include <linux/syscalls.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
39

8ef047aaa   Pavel Emelyanov   pid namespaces: m...
40
41
  #define pid_hashfn(nr, ns)	\
  	hash_long((unsigned long)nr + (unsigned long)ns, pidhash_shift)
92476d7fc   Eric W. Biederman   [PATCH] pidhash: ...
42
  static struct hlist_head *pid_hash;
2c85f51d2   Jan Beulich   mm: also use allo...
43
  static unsigned int pidhash_shift = 4;
820e45db2   Sukadev Bhattiprolu   statically initia...
44
  struct pid init_struct_pid = INIT_STRUCT_PID;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
45
46
  
  int pid_max = PID_MAX_DEFAULT;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
47
48
49
50
51
  
  #define RESERVED_PIDS		300
  
  int pid_max_min = RESERVED_PIDS + 1;
  int pid_max_max = PID_MAX_LIMIT;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
52
53
  #define BITS_PER_PAGE		(PAGE_SIZE*8)
  #define BITS_PER_PAGE_MASK	(BITS_PER_PAGE-1)
3fbc96486   Sukadev Bhattiprolu   [PATCH] Define st...
54

61a58c6c2   Sukadev Bhattiprolu   [PATCH] rename st...
55
56
  static inline int mk_pid(struct pid_namespace *pid_ns,
  		struct pidmap *map, int off)
3fbc96486   Sukadev Bhattiprolu   [PATCH] Define st...
57
  {
61a58c6c2   Sukadev Bhattiprolu   [PATCH] rename st...
58
  	return (map - pid_ns->pidmap)*BITS_PER_PAGE + off;
3fbc96486   Sukadev Bhattiprolu   [PATCH] Define st...
59
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
60
61
62
63
64
65
66
67
68
  #define find_next_offset(map, off)					\
  		find_next_zero_bit((map)->page, BITS_PER_PAGE, off)
  
  /*
   * PID-map pages start out as NULL, they get allocated upon
   * first use and are never deallocated. This way a low pid_max
   * value does not cause lots of bitmaps to be allocated, but
   * the scheme scales to up to 4 million PIDs, runtime.
   */
61a58c6c2   Sukadev Bhattiprolu   [PATCH] rename st...
69
  struct pid_namespace init_pid_ns = {
9a575a92d   Cedric Le Goater   [PATCH] to nsproxy
70
71
72
  	.kref = {
  		.refcount       = ATOMIC_INIT(2),
  	},
3fbc96486   Sukadev Bhattiprolu   [PATCH] Define st...
73
74
75
  	.pidmap = {
  		[ 0 ... PIDMAP_ENTRIES-1] = { ATOMIC_INIT(BITS_PER_PAGE), NULL }
  	},
84d737866   Sukadev Bhattiprolu   [PATCH] add child...
76
  	.last_pid = 0,
faacbfd3a   Pavel Emelyanov   pid namespaces: a...
77
78
  	.level = 0,
  	.child_reaper = &init_task,
3fbc96486   Sukadev Bhattiprolu   [PATCH] Define st...
79
  };
198fe21b0   Pavel Emelyanov   pid namespaces: h...
80
  EXPORT_SYMBOL_GPL(init_pid_ns);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
81

b461cc038   Pavel Emelyanov   pid namespaces: m...
82
  int is_container_init(struct task_struct *tsk)
b460cbc58   Serge E. Hallyn   pid namespaces: d...
83
  {
b461cc038   Pavel Emelyanov   pid namespaces: m...
84
85
86
87
88
89
90
91
92
93
  	int ret = 0;
  	struct pid *pid;
  
  	rcu_read_lock();
  	pid = task_pid(tsk);
  	if (pid != NULL && pid->numbers[pid->level].nr == 1)
  		ret = 1;
  	rcu_read_unlock();
  
  	return ret;
b460cbc58   Serge E. Hallyn   pid namespaces: d...
94
  }
b461cc038   Pavel Emelyanov   pid namespaces: m...
95
  EXPORT_SYMBOL(is_container_init);
b460cbc58   Serge E. Hallyn   pid namespaces: d...
96

92476d7fc   Eric W. Biederman   [PATCH] pidhash: ...
97
98
99
100
101
102
103
104
105
106
107
108
109
  /*
   * Note: disable interrupts while the pidmap_lock is held as an
   * interrupt might come in and do read_lock(&tasklist_lock).
   *
   * If we don't disable interrupts there is a nasty deadlock between
   * detach_pid()->free_pid() and another cpu that does
   * spin_lock(&pidmap_lock) followed by an interrupt routine that does
   * read_lock(&tasklist_lock);
   *
   * After we clean up the tasklist_lock and know there are no
   * irq handlers that take it we can leave the interrupts enabled.
   * For now it is easier to be safe than to prove it can't happen.
   */
3fbc96486   Sukadev Bhattiprolu   [PATCH] Define st...
110

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
111
  static  __cacheline_aligned_in_smp DEFINE_SPINLOCK(pidmap_lock);
b7127aa45   Oleg Nesterov   free_pidmap: turn...
112
  static void free_pidmap(struct upid *upid)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
113
  {
b7127aa45   Oleg Nesterov   free_pidmap: turn...
114
115
116
  	int nr = upid->nr;
  	struct pidmap *map = upid->ns->pidmap + nr / BITS_PER_PAGE;
  	int offset = nr & BITS_PER_PAGE_MASK;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
117
118
119
120
  
  	clear_bit(offset, map->page);
  	atomic_inc(&map->nr_free);
  }
5fdee8c4a   Salman   pids: fix a race ...
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
  /*
   * If we started walking pids at 'base', is 'a' seen before 'b'?
   */
  static int pid_before(int base, int a, int b)
  {
  	/*
  	 * This is the same as saying
  	 *
  	 * (a - base + MAXUINT) % MAXUINT < (b - base + MAXUINT) % MAXUINT
  	 * and that mapping orders 'a' and 'b' with respect to 'base'.
  	 */
  	return (unsigned)(a - base) < (unsigned)(b - base);
  }
  
  /*
   * We might be racing with someone else trying to set pid_ns->last_pid.
   * We want the winner to have the "later" value, because if the
   * "earlier" value prevails, then a pid may get reused immediately.
   *
   * Since pids rollover, it is not sufficient to just pick the bigger
   * value.  We have to consider where we started counting from.
   *
   * 'base' is the value of pid_ns->last_pid that we observed when
   * we started looking for a pid.
   *
   * 'pid' is the pid that we eventually found.
   */
  static void set_last_pid(struct pid_namespace *pid_ns, int base, int pid)
  {
  	int prev;
  	int last_write = base;
  	do {
  		prev = last_write;
  		last_write = cmpxchg(&pid_ns->last_pid, prev, pid);
  	} while ((prev != last_write) && (pid_before(base, last_write, pid)));
  }
61a58c6c2   Sukadev Bhattiprolu   [PATCH] rename st...
157
  static int alloc_pidmap(struct pid_namespace *pid_ns)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
158
  {
61a58c6c2   Sukadev Bhattiprolu   [PATCH] rename st...
159
  	int i, offset, max_scan, pid, last = pid_ns->last_pid;
6a1f3b845   Sukadev Bhattiprolu   [PATCH] pids: cod...
160
  	struct pidmap *map;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
161
162
163
164
165
  
  	pid = last + 1;
  	if (pid >= pid_max)
  		pid = RESERVED_PIDS;
  	offset = pid & BITS_PER_PAGE_MASK;
61a58c6c2   Sukadev Bhattiprolu   [PATCH] rename st...
166
  	map = &pid_ns->pidmap[pid/BITS_PER_PAGE];
c52b0b91b   Oleg Nesterov   pids: alloc_pidma...
167
168
169
170
171
172
  	/*
  	 * If last_pid points into the middle of the map->page we
  	 * want to scan this bitmap block twice, the second time
  	 * we start with offset == 0 (or RESERVED_PIDS).
  	 */
  	max_scan = DIV_ROUND_UP(pid_max, BITS_PER_PAGE) - !offset;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
173
174
  	for (i = 0; i <= max_scan; ++i) {
  		if (unlikely(!map->page)) {
3fbc96486   Sukadev Bhattiprolu   [PATCH] Define st...
175
  			void *page = kzalloc(PAGE_SIZE, GFP_KERNEL);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
176
177
178
179
  			/*
  			 * Free the page if someone raced with us
  			 * installing it:
  			 */
92476d7fc   Eric W. Biederman   [PATCH] pidhash: ...
180
  			spin_lock_irq(&pidmap_lock);
7be6d991b   André Goddard Rosa   pid: tighten pidm...
181
  			if (!map->page) {
3fbc96486   Sukadev Bhattiprolu   [PATCH] Define st...
182
  				map->page = page;
7be6d991b   André Goddard Rosa   pid: tighten pidm...
183
184
  				page = NULL;
  			}
92476d7fc   Eric W. Biederman   [PATCH] pidhash: ...
185
  			spin_unlock_irq(&pidmap_lock);
7be6d991b   André Goddard Rosa   pid: tighten pidm...
186
  			kfree(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
187
188
189
190
191
192
193
  			if (unlikely(!map->page))
  				break;
  		}
  		if (likely(atomic_read(&map->nr_free))) {
  			do {
  				if (!test_and_set_bit(offset, map->page)) {
  					atomic_dec(&map->nr_free);
5fdee8c4a   Salman   pids: fix a race ...
194
  					set_last_pid(pid_ns, last, pid);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
195
196
197
  					return pid;
  				}
  				offset = find_next_offset(map, offset);
61a58c6c2   Sukadev Bhattiprolu   [PATCH] rename st...
198
  				pid = mk_pid(pid_ns, map, offset);
c52b0b91b   Oleg Nesterov   pids: alloc_pidma...
199
  			} while (offset < BITS_PER_PAGE && pid < pid_max);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
200
  		}
61a58c6c2   Sukadev Bhattiprolu   [PATCH] rename st...
201
  		if (map < &pid_ns->pidmap[(pid_max-1)/BITS_PER_PAGE]) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
202
203
204
  			++map;
  			offset = 0;
  		} else {
61a58c6c2   Sukadev Bhattiprolu   [PATCH] rename st...
205
  			map = &pid_ns->pidmap[0];
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
206
207
208
209
  			offset = RESERVED_PIDS;
  			if (unlikely(last == offset))
  				break;
  		}
61a58c6c2   Sukadev Bhattiprolu   [PATCH] rename st...
210
  		pid = mk_pid(pid_ns, map, offset);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
211
212
213
  	}
  	return -1;
  }
c78193e9c   Linus Torvalds   next_pidmap: fix ...
214
  int next_pidmap(struct pid_namespace *pid_ns, unsigned int last)
0804ef4b0   Eric W. Biederman   [PATCH] proc: rea...
215
216
  {
  	int offset;
f40f50d3b   Eric W. Biederman   [PATCH] Use struc...
217
  	struct pidmap *map, *end;
0804ef4b0   Eric W. Biederman   [PATCH] proc: rea...
218

c78193e9c   Linus Torvalds   next_pidmap: fix ...
219
220
  	if (last >= PID_MAX_LIMIT)
  		return -1;
0804ef4b0   Eric W. Biederman   [PATCH] proc: rea...
221
  	offset = (last + 1) & BITS_PER_PAGE_MASK;
61a58c6c2   Sukadev Bhattiprolu   [PATCH] rename st...
222
223
  	map = &pid_ns->pidmap[(last + 1)/BITS_PER_PAGE];
  	end = &pid_ns->pidmap[PIDMAP_ENTRIES];
f40f50d3b   Eric W. Biederman   [PATCH] Use struc...
224
  	for (; map < end; map++, offset = 0) {
0804ef4b0   Eric W. Biederman   [PATCH] proc: rea...
225
226
227
228
  		if (unlikely(!map->page))
  			continue;
  		offset = find_next_bit((map)->page, BITS_PER_PAGE, offset);
  		if (offset < BITS_PER_PAGE)
61a58c6c2   Sukadev Bhattiprolu   [PATCH] rename st...
229
  			return mk_pid(pid_ns, map, offset);
0804ef4b0   Eric W. Biederman   [PATCH] proc: rea...
230
231
232
  	}
  	return -1;
  }
7ad5b3a50   Harvey Harrison   kernel: remove fa...
233
  void put_pid(struct pid *pid)
92476d7fc   Eric W. Biederman   [PATCH] pidhash: ...
234
  {
baf8f0f82   Pavel Emelianov   pid namespaces: d...
235
  	struct pid_namespace *ns;
92476d7fc   Eric W. Biederman   [PATCH] pidhash: ...
236
237
  	if (!pid)
  		return;
baf8f0f82   Pavel Emelianov   pid namespaces: d...
238

8ef047aaa   Pavel Emelyanov   pid namespaces: m...
239
  	ns = pid->numbers[pid->level].ns;
92476d7fc   Eric W. Biederman   [PATCH] pidhash: ...
240
  	if ((atomic_read(&pid->count) == 1) ||
8ef047aaa   Pavel Emelyanov   pid namespaces: m...
241
  	     atomic_dec_and_test(&pid->count)) {
baf8f0f82   Pavel Emelianov   pid namespaces: d...
242
  		kmem_cache_free(ns->pid_cachep, pid);
b461cc038   Pavel Emelyanov   pid namespaces: m...
243
  		put_pid_ns(ns);
8ef047aaa   Pavel Emelyanov   pid namespaces: m...
244
  	}
92476d7fc   Eric W. Biederman   [PATCH] pidhash: ...
245
  }
bbf73147e   Eric W. Biederman   [PATCH] pid: expo...
246
  EXPORT_SYMBOL_GPL(put_pid);
92476d7fc   Eric W. Biederman   [PATCH] pidhash: ...
247
248
249
250
251
252
  
  static void delayed_put_pid(struct rcu_head *rhp)
  {
  	struct pid *pid = container_of(rhp, struct pid, rcu);
  	put_pid(pid);
  }
7ad5b3a50   Harvey Harrison   kernel: remove fa...
253
  void free_pid(struct pid *pid)
92476d7fc   Eric W. Biederman   [PATCH] pidhash: ...
254
255
  {
  	/* We can be called with write_lock_irq(&tasklist_lock) held */
8ef047aaa   Pavel Emelyanov   pid namespaces: m...
256
  	int i;
92476d7fc   Eric W. Biederman   [PATCH] pidhash: ...
257
258
259
  	unsigned long flags;
  
  	spin_lock_irqsave(&pidmap_lock, flags);
198fe21b0   Pavel Emelyanov   pid namespaces: h...
260
261
  	for (i = 0; i <= pid->level; i++)
  		hlist_del_rcu(&pid->numbers[i].pid_chain);
92476d7fc   Eric W. Biederman   [PATCH] pidhash: ...
262
  	spin_unlock_irqrestore(&pidmap_lock, flags);
8ef047aaa   Pavel Emelyanov   pid namespaces: m...
263
  	for (i = 0; i <= pid->level; i++)
b7127aa45   Oleg Nesterov   free_pidmap: turn...
264
  		free_pidmap(pid->numbers + i);
8ef047aaa   Pavel Emelyanov   pid namespaces: m...
265

92476d7fc   Eric W. Biederman   [PATCH] pidhash: ...
266
267
  	call_rcu(&pid->rcu, delayed_put_pid);
  }
8ef047aaa   Pavel Emelyanov   pid namespaces: m...
268
  struct pid *alloc_pid(struct pid_namespace *ns)
92476d7fc   Eric W. Biederman   [PATCH] pidhash: ...
269
270
271
  {
  	struct pid *pid;
  	enum pid_type type;
8ef047aaa   Pavel Emelyanov   pid namespaces: m...
272
273
  	int i, nr;
  	struct pid_namespace *tmp;
198fe21b0   Pavel Emelyanov   pid namespaces: h...
274
  	struct upid *upid;
92476d7fc   Eric W. Biederman   [PATCH] pidhash: ...
275

baf8f0f82   Pavel Emelianov   pid namespaces: d...
276
  	pid = kmem_cache_alloc(ns->pid_cachep, GFP_KERNEL);
92476d7fc   Eric W. Biederman   [PATCH] pidhash: ...
277
278
  	if (!pid)
  		goto out;
8ef047aaa   Pavel Emelyanov   pid namespaces: m...
279
280
281
282
283
  	tmp = ns;
  	for (i = ns->level; i >= 0; i--) {
  		nr = alloc_pidmap(tmp);
  		if (nr < 0)
  			goto out_free;
92476d7fc   Eric W. Biederman   [PATCH] pidhash: ...
284

8ef047aaa   Pavel Emelyanov   pid namespaces: m...
285
286
287
288
  		pid->numbers[i].nr = nr;
  		pid->numbers[i].ns = tmp;
  		tmp = tmp->parent;
  	}
b461cc038   Pavel Emelyanov   pid namespaces: m...
289
  	get_pid_ns(ns);
8ef047aaa   Pavel Emelyanov   pid namespaces: m...
290
  	pid->level = ns->level;
92476d7fc   Eric W. Biederman   [PATCH] pidhash: ...
291
  	atomic_set(&pid->count, 1);
92476d7fc   Eric W. Biederman   [PATCH] pidhash: ...
292
293
  	for (type = 0; type < PIDTYPE_MAX; ++type)
  		INIT_HLIST_HEAD(&pid->tasks[type]);
417e31524   André Goddard Rosa   pid: reduce code ...
294
  	upid = pid->numbers + ns->level;
92476d7fc   Eric W. Biederman   [PATCH] pidhash: ...
295
  	spin_lock_irq(&pidmap_lock);
417e31524   André Goddard Rosa   pid: reduce code ...
296
  	for ( ; upid >= pid->numbers; --upid)
198fe21b0   Pavel Emelyanov   pid namespaces: h...
297
298
  		hlist_add_head_rcu(&upid->pid_chain,
  				&pid_hash[pid_hashfn(upid->nr, upid->ns)]);
92476d7fc   Eric W. Biederman   [PATCH] pidhash: ...
299
300
301
302
303
304
  	spin_unlock_irq(&pidmap_lock);
  
  out:
  	return pid;
  
  out_free:
b7127aa45   Oleg Nesterov   free_pidmap: turn...
305
306
  	while (++i <= ns->level)
  		free_pidmap(pid->numbers + i);
8ef047aaa   Pavel Emelyanov   pid namespaces: m...
307

baf8f0f82   Pavel Emelianov   pid namespaces: d...
308
  	kmem_cache_free(ns->pid_cachep, pid);
92476d7fc   Eric W. Biederman   [PATCH] pidhash: ...
309
310
311
  	pid = NULL;
  	goto out;
  }
7ad5b3a50   Harvey Harrison   kernel: remove fa...
312
  struct pid *find_pid_ns(int nr, struct pid_namespace *ns)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
313
314
  {
  	struct hlist_node *elem;
198fe21b0   Pavel Emelyanov   pid namespaces: h...
315
316
317
318
319
320
321
  	struct upid *pnr;
  
  	hlist_for_each_entry_rcu(pnr, elem,
  			&pid_hash[pid_hashfn(nr, ns)], pid_chain)
  		if (pnr->nr == nr && pnr->ns == ns)
  			return container_of(pnr, struct pid,
  					numbers[ns->level]);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
322

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
323
324
  	return NULL;
  }
198fe21b0   Pavel Emelyanov   pid namespaces: h...
325
  EXPORT_SYMBOL_GPL(find_pid_ns);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
326

8990571eb   Pavel Emelyanov   Uninline find_pid...
327
328
329
330
331
  struct pid *find_vpid(int nr)
  {
  	return find_pid_ns(nr, current->nsproxy->pid_ns);
  }
  EXPORT_SYMBOL_GPL(find_vpid);
e713d0dab   Sukadev Bhattiprolu   attach_pid() with...
332
333
334
  /*
   * attach_pid() must be called with the tasklist_lock write-held.
   */
24336eaee   Oleg Nesterov   pids: introduce c...
335
  void attach_pid(struct task_struct *task, enum pid_type type,
e713d0dab   Sukadev Bhattiprolu   attach_pid() with...
336
  		struct pid *pid)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
337
  {
92476d7fc   Eric W. Biederman   [PATCH] pidhash: ...
338
  	struct pid_link *link;
92476d7fc   Eric W. Biederman   [PATCH] pidhash: ...
339

92476d7fc   Eric W. Biederman   [PATCH] pidhash: ...
340
  	link = &task->pids[type];
e713d0dab   Sukadev Bhattiprolu   attach_pid() with...
341
  	link->pid = pid;
92476d7fc   Eric W. Biederman   [PATCH] pidhash: ...
342
  	hlist_add_head_rcu(&link->node, &pid->tasks[type]);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
343
  }
24336eaee   Oleg Nesterov   pids: introduce c...
344
345
  static void __change_pid(struct task_struct *task, enum pid_type type,
  			struct pid *new)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
346
  {
92476d7fc   Eric W. Biederman   [PATCH] pidhash: ...
347
348
349
  	struct pid_link *link;
  	struct pid *pid;
  	int tmp;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
350

92476d7fc   Eric W. Biederman   [PATCH] pidhash: ...
351
352
  	link = &task->pids[type];
  	pid = link->pid;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
353

92476d7fc   Eric W. Biederman   [PATCH] pidhash: ...
354
  	hlist_del_rcu(&link->node);
24336eaee   Oleg Nesterov   pids: introduce c...
355
  	link->pid = new;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
356

92476d7fc   Eric W. Biederman   [PATCH] pidhash: ...
357
358
359
  	for (tmp = PIDTYPE_MAX; --tmp >= 0; )
  		if (!hlist_empty(&pid->tasks[tmp]))
  			return;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
360

92476d7fc   Eric W. Biederman   [PATCH] pidhash: ...
361
  	free_pid(pid);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
362
  }
24336eaee   Oleg Nesterov   pids: introduce c...
363
364
365
366
367
368
369
370
371
372
373
  void detach_pid(struct task_struct *task, enum pid_type type)
  {
  	__change_pid(task, type, NULL);
  }
  
  void change_pid(struct task_struct *task, enum pid_type type,
  		struct pid *pid)
  {
  	__change_pid(task, type, pid);
  	attach_pid(task, type, pid);
  }
c18258c6f   Eric W. Biederman   [PATCH] pid: Impl...
374
  /* transfer_pid is an optimization of attach_pid(new), detach_pid(old) */
7ad5b3a50   Harvey Harrison   kernel: remove fa...
375
  void transfer_pid(struct task_struct *old, struct task_struct *new,
c18258c6f   Eric W. Biederman   [PATCH] pid: Impl...
376
377
378
379
  			   enum pid_type type)
  {
  	new->pids[type].pid = old->pids[type].pid;
  	hlist_replace_rcu(&old->pids[type].node, &new->pids[type].node);
c18258c6f   Eric W. Biederman   [PATCH] pid: Impl...
380
  }
7ad5b3a50   Harvey Harrison   kernel: remove fa...
381
  struct task_struct *pid_task(struct pid *pid, enum pid_type type)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
382
  {
92476d7fc   Eric W. Biederman   [PATCH] pidhash: ...
383
384
385
  	struct task_struct *result = NULL;
  	if (pid) {
  		struct hlist_node *first;
67bdbffd6   Arnd Bergmann   rculist: avoid __...
386
  		first = rcu_dereference_check(hlist_first_rcu(&pid->tasks[type]),
db1466b3e   Paul E. McKenney   rcu: Use wrapper ...
387
388
  					      rcu_read_lock_held() ||
  					      lockdep_tasklist_lock_is_held());
92476d7fc   Eric W. Biederman   [PATCH] pidhash: ...
389
390
391
392
393
  		if (first)
  			result = hlist_entry(first, struct task_struct, pids[(type)].node);
  	}
  	return result;
  }
eccba0689   Pavel Emelyanov   gfs2: make gfs2_g...
394
  EXPORT_SYMBOL(pid_task);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
395

92476d7fc   Eric W. Biederman   [PATCH] pidhash: ...
396
  /*
9728e5d6e   Tetsuo Handa   kernel/pid.c: upd...
397
   * Must be called under rcu_read_lock().
92476d7fc   Eric W. Biederman   [PATCH] pidhash: ...
398
   */
17f98dcf6   Christoph Hellwig   pids: clean up fi...
399
  struct task_struct *find_task_by_pid_ns(pid_t nr, struct pid_namespace *ns)
92476d7fc   Eric W. Biederman   [PATCH] pidhash: ...
400
  {
4221a9918   Tetsuo Handa   Add RCU check for...
401
  	rcu_lockdep_assert(rcu_read_lock_held());
17f98dcf6   Christoph Hellwig   pids: clean up fi...
402
  	return pid_task(find_pid_ns(nr, ns), PIDTYPE_PID);
92476d7fc   Eric W. Biederman   [PATCH] pidhash: ...
403
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
404

228ebcbe6   Pavel Emelyanov   Uninline find_tas...
405
406
  struct task_struct *find_task_by_vpid(pid_t vnr)
  {
17f98dcf6   Christoph Hellwig   pids: clean up fi...
407
  	return find_task_by_pid_ns(vnr, current->nsproxy->pid_ns);
228ebcbe6   Pavel Emelyanov   Uninline find_tas...
408
  }
228ebcbe6   Pavel Emelyanov   Uninline find_tas...
409

1a657f78d   Oleg Nesterov   [PATCH] introduce...
410
411
412
413
  struct pid *get_task_pid(struct task_struct *task, enum pid_type type)
  {
  	struct pid *pid;
  	rcu_read_lock();
2ae448efc   Oleg Nesterov   pids: improve get...
414
415
  	if (type != PIDTYPE_PID)
  		task = task->group_leader;
1a657f78d   Oleg Nesterov   [PATCH] introduce...
416
417
418
419
  	pid = get_pid(task->pids[type].pid);
  	rcu_read_unlock();
  	return pid;
  }
77c100c83   Rik van Riel   export pid symbol...
420
  EXPORT_SYMBOL_GPL(get_task_pid);
1a657f78d   Oleg Nesterov   [PATCH] introduce...
421

7ad5b3a50   Harvey Harrison   kernel: remove fa...
422
  struct task_struct *get_pid_task(struct pid *pid, enum pid_type type)
92476d7fc   Eric W. Biederman   [PATCH] pidhash: ...
423
424
425
426
427
428
429
430
  {
  	struct task_struct *result;
  	rcu_read_lock();
  	result = pid_task(pid, type);
  	if (result)
  		get_task_struct(result);
  	rcu_read_unlock();
  	return result;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
431
  }
77c100c83   Rik van Riel   export pid symbol...
432
  EXPORT_SYMBOL_GPL(get_pid_task);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
433

92476d7fc   Eric W. Biederman   [PATCH] pidhash: ...
434
  struct pid *find_get_pid(pid_t nr)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
435
436
  {
  	struct pid *pid;
92476d7fc   Eric W. Biederman   [PATCH] pidhash: ...
437
  	rcu_read_lock();
198fe21b0   Pavel Emelyanov   pid namespaces: h...
438
  	pid = get_pid(find_vpid(nr));
92476d7fc   Eric W. Biederman   [PATCH] pidhash: ...
439
  	rcu_read_unlock();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
440

92476d7fc   Eric W. Biederman   [PATCH] pidhash: ...
441
  	return pid;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
442
  }
339caf2a2   David Sterba   proc: misplaced e...
443
  EXPORT_SYMBOL_GPL(find_get_pid);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
444

7af572947   Pavel Emelyanov   pid namespaces: h...
445
446
447
448
449
450
451
452
453
454
455
456
  pid_t pid_nr_ns(struct pid *pid, struct pid_namespace *ns)
  {
  	struct upid *upid;
  	pid_t nr = 0;
  
  	if (pid && ns->level <= pid->level) {
  		upid = &pid->numbers[ns->level];
  		if (upid->ns == ns)
  			nr = upid->nr;
  	}
  	return nr;
  }
44c4e1b25   Eric W. Biederman   pid: Extend/Fix p...
457
458
459
460
461
  pid_t pid_vnr(struct pid *pid)
  {
  	return pid_nr_ns(pid, current->nsproxy->pid_ns);
  }
  EXPORT_SYMBOL_GPL(pid_vnr);
52ee2dfdd   Oleg Nesterov   pids: refactor vn...
462
463
  pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type,
  			struct pid_namespace *ns)
2f2a3a46f   Pavel Emelyanov   Uninline the task...
464
  {
52ee2dfdd   Oleg Nesterov   pids: refactor vn...
465
466
467
468
469
470
471
472
473
474
475
476
477
  	pid_t nr = 0;
  
  	rcu_read_lock();
  	if (!ns)
  		ns = current->nsproxy->pid_ns;
  	if (likely(pid_alive(task))) {
  		if (type != PIDTYPE_PID)
  			task = task->group_leader;
  		nr = pid_nr_ns(task->pids[type].pid, ns);
  	}
  	rcu_read_unlock();
  
  	return nr;
2f2a3a46f   Pavel Emelyanov   Uninline the task...
478
  }
52ee2dfdd   Oleg Nesterov   pids: refactor vn...
479
  EXPORT_SYMBOL(__task_pid_nr_ns);
2f2a3a46f   Pavel Emelyanov   Uninline the task...
480
481
482
483
484
485
  
  pid_t task_tgid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns)
  {
  	return pid_nr_ns(task_tgid(tsk), ns);
  }
  EXPORT_SYMBOL(task_tgid_nr_ns);
61bce0f13   Eric W. Biederman   pid: generalize t...
486
487
488
489
490
  struct pid_namespace *task_active_pid_ns(struct task_struct *tsk)
  {
  	return ns_of_pid(task_pid(tsk));
  }
  EXPORT_SYMBOL_GPL(task_active_pid_ns);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
491
  /*
025dfdafe   Frederik Schwarzer   trivial: fix then...
492
   * Used by proc to find the first pid that is greater than or equal to nr.
0804ef4b0   Eric W. Biederman   [PATCH] proc: rea...
493
   *
e49859e71   Pavel Emelyanov   pidns: remove now...
494
   * If there is a pid at nr this function is exactly the same as find_pid_ns.
0804ef4b0   Eric W. Biederman   [PATCH] proc: rea...
495
   */
198fe21b0   Pavel Emelyanov   pid namespaces: h...
496
  struct pid *find_ge_pid(int nr, struct pid_namespace *ns)
0804ef4b0   Eric W. Biederman   [PATCH] proc: rea...
497
498
499
500
  {
  	struct pid *pid;
  
  	do {
198fe21b0   Pavel Emelyanov   pid namespaces: h...
501
  		pid = find_pid_ns(nr, ns);
0804ef4b0   Eric W. Biederman   [PATCH] proc: rea...
502
503
  		if (pid)
  			break;
198fe21b0   Pavel Emelyanov   pid namespaces: h...
504
  		nr = next_pidmap(ns, nr);
0804ef4b0   Eric W. Biederman   [PATCH] proc: rea...
505
506
507
508
509
510
  	} while (nr > 0);
  
  	return pid;
  }
  
  /*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
511
512
513
514
515
516
   * The pid hash table is scaled according to the amount of memory in the
   * machine.  From a minimum of 16 slots up to 4096 slots at one gigabyte or
   * more.
   */
  void __init pidhash_init(void)
  {
92476d7fc   Eric W. Biederman   [PATCH] pidhash: ...
517
  	int i, pidhash_size;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
518

2c85f51d2   Jan Beulich   mm: also use allo...
519
520
521
  	pid_hash = alloc_large_system_hash("PID", sizeof(*pid_hash), 0, 18,
  					   HASH_EARLY | HASH_SMALL,
  					   &pidhash_shift, NULL, 4096);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
522
  	pidhash_size = 1 << pidhash_shift;
92476d7fc   Eric W. Biederman   [PATCH] pidhash: ...
523
524
  	for (i = 0; i < pidhash_size; i++)
  		INIT_HLIST_HEAD(&pid_hash[i]);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
525
526
527
528
  }
  
  void __init pidmap_init(void)
  {
72680a191   Hedi Berriche   pids: increase pi...
529
530
531
532
533
534
535
  	/* bump default and minimum pid_max based on number of cpus */
  	pid_max = min(pid_max_max, max_t(int, pid_max,
  				PIDS_PER_CPU_DEFAULT * num_possible_cpus()));
  	pid_max_min = max_t(int, pid_max_min,
  				PIDS_PER_CPU_MIN * num_possible_cpus());
  	pr_info("pid_max: default: %u minimum: %u
  ", pid_max, pid_max_min);
61a58c6c2   Sukadev Bhattiprolu   [PATCH] rename st...
536
  	init_pid_ns.pidmap[0].page = kzalloc(PAGE_SIZE, GFP_KERNEL);
73b9ebfe1   Oleg Nesterov   [PATCH] pidhash: ...
537
  	/* Reserve PID 0. We never call free_pidmap(0) */
61a58c6c2   Sukadev Bhattiprolu   [PATCH] rename st...
538
539
  	set_bit(0, init_pid_ns.pidmap[0].page);
  	atomic_dec(&init_pid_ns.pidmap[0].nr_free);
92476d7fc   Eric W. Biederman   [PATCH] pidhash: ...
540

74bd59bb3   Pavel Emelyanov   namespaces: clean...
541
542
  	init_pid_ns.pid_cachep = KMEM_CACHE(pid,
  			SLAB_HWCACHE_ALIGN | SLAB_PANIC);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
543
  }