Blame view

kernel/exit.c 43.2 KB
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1
2
3
4
5
  /*
   *  linux/kernel/exit.c
   *
   *  Copyright (C) 1991, 1992  Linus Torvalds
   */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
6
7
8
  #include <linux/mm.h>
  #include <linux/slab.h>
  #include <linux/interrupt.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
9
  #include <linux/module.h>
c59ede7b7   Randy.Dunlap   [PATCH] move capa...
10
  #include <linux/capability.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
11
12
13
  #include <linux/completion.h>
  #include <linux/personality.h>
  #include <linux/tty.h>
da9cbc873   Jens Axboe   block: blkdev.h c...
14
  #include <linux/iocontext.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
15
16
17
18
  #include <linux/key.h>
  #include <linux/security.h>
  #include <linux/cpu.h>
  #include <linux/acct.h>
8f0ab5147   Jay Lan   [PATCH] csa: conv...
19
  #include <linux/tsacct_kern.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
20
  #include <linux/file.h>
9f3acc314   Al Viro   [PATCH] split lin...
21
  #include <linux/fdtable.h>
80d26af89   Mandeep Singh Baines   coredump: use a f...
22
  #include <linux/freezer.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
23
  #include <linux/binfmts.h>
ab516013a   Serge E. Hallyn   [PATCH] namespace...
24
  #include <linux/nsproxy.h>
84d737866   Sukadev Bhattiprolu   [PATCH] add child...
25
  #include <linux/pid_namespace.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
26
27
28
29
  #include <linux/ptrace.h>
  #include <linux/profile.h>
  #include <linux/mount.h>
  #include <linux/proc_fs.h>
49d769d52   Eric W. Biederman   Change reparent_t...
30
  #include <linux/kthread.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
31
  #include <linux/mempolicy.h>
c757249af   Shailabh Nagar   [PATCH] per-task-...
32
  #include <linux/taskstats_kern.h>
ca74e92b4   Shailabh Nagar   [PATCH] per-task-...
33
  #include <linux/delayacct.h>
b4f48b636   Paul Menage   Task Control Grou...
34
  #include <linux/cgroup.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
35
  #include <linux/syscalls.h>
7ed20e1ad   Jesper Juhl   [PATCH] convert t...
36
  #include <linux/signal.h>
6a14c5c9d   Oleg Nesterov   [PATCH] move __ex...
37
  #include <linux/posix-timers.h>
9f46080c4   Matt Helsley   [PATCH] Process E...
38
  #include <linux/cn_proc.h>
de5097c2e   Ingo Molnar   [PATCH] mutex sub...
39
  #include <linux/mutex.h>
0771dfefc   Ingo Molnar   [PATCH] lightweig...
40
  #include <linux/futex.h>
b92ce5589   Jens Axboe   [PATCH] splice: a...
41
  #include <linux/pipe_fs_i.h>
fa84cb935   Al Viro   [PATCH] move call...
42
  #include <linux/audit.h> /* for audit_free() */
83cc5ed3c   Adrian Bunk   [PATCH] kernel/sy...
43
  #include <linux/resource.h>
0d67a46df   David Howells   [PATCH] BLOCK: Re...
44
  #include <linux/blkdev.h>
6eaeeaba3   Eric Dumazet   getrusage(): fill...
45
  #include <linux/task_io_accounting_ops.h>
30199f5a4   Roland McGrath   tracehook: exit
46
  #include <linux/tracehook.h>
5ad4e53bd   Al Viro   Get rid of indire...
47
  #include <linux/fs_struct.h>
d84f4f992   David Howells   CRED: Inaugurate ...
48
  #include <linux/init_task.h>
cdd6c482c   Ingo Molnar   perf: Do the big ...
49
  #include <linux/perf_event.h>
ad8d75fff   Steven Rostedt   tracing/events: m...
50
  #include <trace/events/sched.h>
24f1e32c6   Frederic Weisbecker   hw-breakpoints: R...
51
  #include <linux/hw_breakpoint.h>
3d5992d2a   Ying Han   oom: add per-mm o...
52
  #include <linux/oom.h>
54848d73f   Wu Fengguang   writeback: charge...
53
  #include <linux/writeback.h>
404015308   Al Viro   security: trim se...
54
  #include <linux/shm.h>
5c9a8750a   Dmitry Vyukov   kernel: add kcov ...
55
  #include <linux/kcov.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
56
57
58
59
60
  
  #include <asm/uaccess.h>
  #include <asm/unistd.h>
  #include <asm/pgtable.h>
  #include <asm/mmu_context.h>
d40e48e02   Oleg Nesterov   exit: __exit_sign...
61
  static void __unhash_process(struct task_struct *p, bool group_dead)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
62
63
  {
  	nr_threads--;
50d75f8da   Oleg Nesterov   pidns: find_new_r...
64
  	detach_pid(p, PIDTYPE_PID);
d40e48e02   Oleg Nesterov   exit: __exit_sign...
65
  	if (group_dead) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
66
67
  		detach_pid(p, PIDTYPE_PGID);
  		detach_pid(p, PIDTYPE_SID);
c97d98931   Oleg Nesterov   [PATCH] kill SET_...
68

5e85d4abe   Eric W. Biederman   [PATCH] task: Mak...
69
  		list_del_rcu(&p->tasks);
9cd80bbb0   Oleg Nesterov   do_wait() optimiz...
70
  		list_del_init(&p->sibling);
909ea9646   Christoph Lameter   core: Replace __g...
71
  		__this_cpu_dec(process_counts);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
72
  	}
47e65328a   Oleg Nesterov   [PATCH] pids: kil...
73
  	list_del_rcu(&p->thread_group);
0c740d0af   Oleg Nesterov   introduce for_eac...
74
  	list_del_rcu(&p->thread_node);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
75
  }
6a14c5c9d   Oleg Nesterov   [PATCH] move __ex...
76
77
78
79
80
81
  /*
   * This function expects the tasklist_lock write-locked.
   */
  static void __exit_signal(struct task_struct *tsk)
  {
  	struct signal_struct *sig = tsk->signal;
d40e48e02   Oleg Nesterov   exit: __exit_sign...
82
  	bool group_dead = thread_group_leader(tsk);
6a14c5c9d   Oleg Nesterov   [PATCH] move __ex...
83
  	struct sighand_struct *sighand;
4ada856fb   Oleg Nesterov   signals: clear si...
84
  	struct tty_struct *uninitialized_var(tty);
6fac4829c   Frederic Weisbecker   cputime: Use acce...
85
  	cputime_t utime, stime;
6a14c5c9d   Oleg Nesterov   [PATCH] move __ex...
86

d11c563dd   Paul E. McKenney   sched: Use lockde...
87
  	sighand = rcu_dereference_check(tsk->sighand,
db1466b3e   Paul E. McKenney   rcu: Use wrapper ...
88
  					lockdep_tasklist_lock_is_held());
6a14c5c9d   Oleg Nesterov   [PATCH] move __ex...
89
90
91
  	spin_lock(&sighand->siglock);
  
  	posix_cpu_timers_exit(tsk);
d40e48e02   Oleg Nesterov   exit: __exit_sign...
92
  	if (group_dead) {
6a14c5c9d   Oleg Nesterov   [PATCH] move __ex...
93
  		posix_cpu_timers_exit_group(tsk);
4ada856fb   Oleg Nesterov   signals: clear si...
94
95
  		tty = sig->tty;
  		sig->tty = NULL;
4a5999429   Oleg Nesterov   exit: avoid sig->...
96
  	} else {
6a14c5c9d   Oleg Nesterov   [PATCH] move __ex...
97
  		/*
e0a702171   Oleg Nesterov   posix-cpu-timers:...
98
99
100
101
102
103
104
105
  		 * This can only happen if the caller is de_thread().
  		 * FIXME: this is the temporary hack, we should teach
  		 * posix-cpu-timers to handle this case correctly.
  		 */
  		if (unlikely(has_group_leader_pid(tsk)))
  			posix_cpu_timers_exit_group(tsk);
  
  		/*
6a14c5c9d   Oleg Nesterov   [PATCH] move __ex...
106
107
108
  		 * If there is any task waiting for the group exit
  		 * then notify it:
  		 */
d344193a0   Oleg Nesterov   exit: avoid sig->...
109
  		if (sig->notify_count > 0 && !--sig->notify_count)
6a14c5c9d   Oleg Nesterov   [PATCH] move __ex...
110
  			wake_up_process(sig->group_exit_task);
6db840fa7   Oleg Nesterov   exec: RT sub-thre...
111

6a14c5c9d   Oleg Nesterov   [PATCH] move __ex...
112
113
  		if (tsk == sig->curr_target)
  			sig->curr_target = next_thread(tsk);
6a14c5c9d   Oleg Nesterov   [PATCH] move __ex...
114
  	}
90ed9cbe7   Rik van Riel   exit: Always reap...
115
  	/*
26e75b5c3   Oleg Nesterov   exit: release_tas...
116
117
118
119
  	 * Accumulate here the counters for all threads as they die. We could
  	 * skip the group leader because it is the last user of signal_struct,
  	 * but we want to avoid the race with thread_group_cputime() which can
  	 * see the empty ->thread_head list.
90ed9cbe7   Rik van Riel   exit: Always reap...
120
121
  	 */
  	task_cputime(tsk, &utime, &stime);
e78c34967   Rik van Riel   time, signal: Pro...
122
  	write_seqlock(&sig->stats_lock);
90ed9cbe7   Rik van Riel   exit: Always reap...
123
124
125
126
127
128
129
130
131
132
133
  	sig->utime += utime;
  	sig->stime += stime;
  	sig->gtime += task_gtime(tsk);
  	sig->min_flt += tsk->min_flt;
  	sig->maj_flt += tsk->maj_flt;
  	sig->nvcsw += tsk->nvcsw;
  	sig->nivcsw += tsk->nivcsw;
  	sig->inblock += task_io_get_inblock(tsk);
  	sig->oublock += task_io_get_oublock(tsk);
  	task_io_accounting_add(&sig->ioac, &tsk->ioac);
  	sig->sum_sched_runtime += tsk->se.sum_exec_runtime;
b3ac022cb   Oleg Nesterov   proc: turn signal...
134
  	sig->nr_threads--;
d40e48e02   Oleg Nesterov   exit: __exit_sign...
135
  	__unhash_process(tsk, group_dead);
e78c34967   Rik van Riel   time, signal: Pro...
136
  	write_sequnlock(&sig->stats_lock);
5876700cd   Oleg Nesterov   [PATCH] do __unha...
137

da7978b03   Oleg Nesterov   signals: fix sigq...
138
139
140
141
142
  	/*
  	 * Do this under ->siglock, we can race with another thread
  	 * doing sigqueue_free() if we have SIGQUEUE_PREALLOC signals.
  	 */
  	flush_sigqueue(&tsk->pending);
a7e5328a0   Oleg Nesterov   [PATCH] cleanup _...
143
  	tsk->sighand = NULL;
6a14c5c9d   Oleg Nesterov   [PATCH] move __ex...
144
  	spin_unlock(&sighand->siglock);
6a14c5c9d   Oleg Nesterov   [PATCH] move __ex...
145

a7e5328a0   Oleg Nesterov   [PATCH] cleanup _...
146
  	__cleanup_sighand(sighand);
a0be55dee   Ionut Alexa   kernel/exit.c: fi...
147
  	clear_tsk_thread_flag(tsk, TIF_SIGPENDING);
d40e48e02   Oleg Nesterov   exit: __exit_sign...
148
  	if (group_dead) {
6a14c5c9d   Oleg Nesterov   [PATCH] move __ex...
149
  		flush_sigqueue(&sig->shared_pending);
4ada856fb   Oleg Nesterov   signals: clear si...
150
  		tty_kref_put(tty);
6a14c5c9d   Oleg Nesterov   [PATCH] move __ex...
151
152
  	}
  }
8c7904a00   Eric W. Biederman   [PATCH] task: RCU...
153
154
  static void delayed_put_task_struct(struct rcu_head *rhp)
  {
0a16b6075   Mathieu Desnoyers   tracing, sched: L...
155
  	struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);
4e231c796   Peter Zijlstra   perf: Fix up dela...
156
  	perf_event_delayed_put(tsk);
0a16b6075   Mathieu Desnoyers   tracing, sched: L...
157
158
  	trace_sched_process_free(tsk);
  	put_task_struct(tsk);
8c7904a00   Eric W. Biederman   [PATCH] task: RCU...
159
  }
f470021ad   Roland McGrath   ptrace children r...
160

a0be55dee   Ionut Alexa   kernel/exit.c: fi...
161
  void release_task(struct task_struct *p)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
162
  {
36c8b5868   Ingo Molnar   [PATCH] sched: cl...
163
  	struct task_struct *leader;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
164
  	int zap_leader;
1f09f9749   Oleg Nesterov   [PATCH] release_t...
165
  repeat:
c69e8d9c0   David Howells   CRED: Use RCU to ...
166
  	/* don't need to get the RCU readlock here - the process is dead and
d11c563dd   Paul E. McKenney   sched: Use lockde...
167
168
  	 * can't be modifying its own credentials. But shut RCU-lockdep up */
  	rcu_read_lock();
c69e8d9c0   David Howells   CRED: Use RCU to ...
169
  	atomic_dec(&__task_cred(p)->user->processes);
d11c563dd   Paul E. McKenney   sched: Use lockde...
170
  	rcu_read_unlock();
c69e8d9c0   David Howells   CRED: Use RCU to ...
171

60347f671   Pavel Emelyanov   pid namespaces: p...
172
  	proc_flush_task(p);
0203026b5   Ingo Molnar   perf_counter: fix...
173

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
174
  	write_lock_irq(&tasklist_lock);
a288eecce   Tejun Heo   ptrace: kill triv...
175
  	ptrace_release_task(p);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
176
  	__exit_signal(p);
35f5cad8c   Oleg Nesterov   [PATCH] revert "O...
177

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
178
179
180
181
182
183
184
  	/*
  	 * If we are the last non-leader member of the thread
  	 * group, and the leader is zombie, then notify the
  	 * group leader's parent process. (if it wants notification.)
  	 */
  	zap_leader = 0;
  	leader = p->group_leader;
a0be55dee   Ionut Alexa   kernel/exit.c: fi...
185
186
  	if (leader != p && thread_group_empty(leader)
  			&& leader->exit_state == EXIT_ZOMBIE) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
187
188
189
190
  		/*
  		 * If we were the last child thread and the leader has
  		 * exited already, and the leader's parent ignores SIGCHLD,
  		 * then we are the one who should release the leader.
dae33574d   Roland McGrath   tracehook: releas...
191
  		 */
867734737   Oleg Nesterov   make do_notify_pa...
192
  		zap_leader = do_notify_parent(leader, leader->exit_signal);
dae33574d   Roland McGrath   tracehook: releas...
193
194
  		if (zap_leader)
  			leader->exit_state = EXIT_DEAD;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
195
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
196
  	write_unlock_irq(&tasklist_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
197
  	release_thread(p);
8c7904a00   Eric W. Biederman   [PATCH] task: RCU...
198
  	call_rcu(&p->rcu, delayed_put_task_struct);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
199
200
201
202
203
  
  	p = leader;
  	if (unlikely(zap_leader))
  		goto repeat;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
204
  /*
150593bf8   Oleg Nesterov   sched/api: Introd...
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
   * Note that if this function returns a valid task_struct pointer (!NULL)
   * task->usage must remain >0 for the duration of the RCU critical section.
   */
  struct task_struct *task_rcu_dereference(struct task_struct **ptask)
  {
  	struct sighand_struct *sighand;
  	struct task_struct *task;
  
  	/*
  	 * We need to verify that release_task() was not called and thus
  	 * delayed_put_task_struct() can't run and drop the last reference
  	 * before rcu_read_unlock(). We check task->sighand != NULL,
  	 * but we can read the already freed and reused memory.
  	 */
  retry:
  	task = rcu_dereference(*ptask);
  	if (!task)
  		return NULL;
  
  	probe_kernel_address(&task->sighand, sighand);
  
  	/*
  	 * Pairs with atomic_dec_and_test() in put_task_struct(). If this task
  	 * was already freed we can not miss the preceding update of this
  	 * pointer.
  	 */
  	smp_rmb();
  	if (unlikely(task != READ_ONCE(*ptask)))
  		goto retry;
  
  	/*
  	 * We've re-checked that "task == *ptask", now we have two different
  	 * cases:
  	 *
  	 * 1. This is actually the same task/task_struct. In this case
  	 *    sighand != NULL tells us it is still alive.
  	 *
  	 * 2. This is another task which got the same memory for task_struct.
  	 *    We can't know this of course, and we can not trust
  	 *    sighand != NULL.
  	 *
  	 *    In this case we actually return a random value, but this is
  	 *    correct.
  	 *
  	 *    If we return NULL - we can pretend that we actually noticed that
  	 *    *ptask was updated when the previous task has exited. Or pretend
  	 *    that probe_slab_address(&sighand) reads NULL.
  	 *
  	 *    If we return the new task (because sighand is not NULL for any
  	 *    reason) - this is fine too. This (new) task can't go away before
  	 *    another gp pass.
  	 *
  	 *    And note: We could even eliminate the false positive if re-read
  	 *    task->sighand once again to avoid the falsely NULL. But this case
  	 *    is very unlikely so we don't care.
  	 */
  	if (!sighand)
  		return NULL;
  
  	return task;
  }
  
  struct task_struct *try_get_task_struct(struct task_struct **ptask)
  {
  	struct task_struct *task;
  
  	rcu_read_lock();
  	task = task_rcu_dereference(ptask);
  	if (task)
  		get_task_struct(task);
  	rcu_read_unlock();
  
  	return task;
  }
  
  /*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
281
282
283
284
285
286
287
   * Determine if a process group is "orphaned", according to the POSIX
   * definition in 2.2.2.52.  Orphaned process groups are not to be affected
   * by terminal-generated stop signals.  Newly orphaned process groups are
   * to receive a SIGHUP and a SIGCONT.
   *
   * "I ask you, have you ever known what it is to be an orphan?"
   */
a0be55dee   Ionut Alexa   kernel/exit.c: fi...
288
289
  static int will_become_orphaned_pgrp(struct pid *pgrp,
  					struct task_struct *ignored_task)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
290
291
  {
  	struct task_struct *p;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
292

0475ac084   Eric W. Biederman   [PATCH] pid: use ...
293
  	do_each_pid_task(pgrp, PIDTYPE_PGID, p) {
05e83df62   Oleg Nesterov   will_become_orpha...
294
295
296
  		if ((p == ignored_task) ||
  		    (p->exit_state && thread_group_empty(p)) ||
  		    is_global_init(p->real_parent))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
297
  			continue;
05e83df62   Oleg Nesterov   will_become_orpha...
298

0475ac084   Eric W. Biederman   [PATCH] pid: use ...
299
  		if (task_pgrp(p->real_parent) != pgrp &&
05e83df62   Oleg Nesterov   will_become_orpha...
300
301
  		    task_session(p->real_parent) == task_session(p))
  			return 0;
0475ac084   Eric W. Biederman   [PATCH] pid: use ...
302
  	} while_each_pid_task(pgrp, PIDTYPE_PGID, p);
05e83df62   Oleg Nesterov   will_become_orpha...
303
304
  
  	return 1;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
305
  }
3e7cd6c41   Eric W. Biederman   [PATCH] pid: repl...
306
  int is_current_pgrp_orphaned(void)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
307
308
309
310
  {
  	int retval;
  
  	read_lock(&tasklist_lock);
3e7cd6c41   Eric W. Biederman   [PATCH] pid: repl...
311
  	retval = will_become_orphaned_pgrp(task_pgrp(current), NULL);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
312
313
314
315
  	read_unlock(&tasklist_lock);
  
  	return retval;
  }
961c4675c   Oleg Nesterov   has_stopped_jobs:...
316
  static bool has_stopped_jobs(struct pid *pgrp)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
317
  {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
318
  	struct task_struct *p;
0475ac084   Eric W. Biederman   [PATCH] pid: use ...
319
  	do_each_pid_task(pgrp, PIDTYPE_PGID, p) {
961c4675c   Oleg Nesterov   has_stopped_jobs:...
320
321
  		if (p->signal->flags & SIGNAL_STOP_STOPPED)
  			return true;
0475ac084   Eric W. Biederman   [PATCH] pid: use ...
322
  	} while_each_pid_task(pgrp, PIDTYPE_PGID, p);
961c4675c   Oleg Nesterov   has_stopped_jobs:...
323
324
  
  	return false;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
325
  }
f49ee505b   Oleg Nesterov   introduce kill_or...
326
327
328
329
330
331
332
333
334
335
336
337
  /*
   * Check to see if any process groups have become orphaned as
   * a result of our exiting, and if they have any stopped jobs,
   * send them a SIGHUP and then a SIGCONT. (POSIX 3.2.2.2)
   */
  static void
  kill_orphaned_pgrp(struct task_struct *tsk, struct task_struct *parent)
  {
  	struct pid *pgrp = task_pgrp(tsk);
  	struct task_struct *ignored_task = tsk;
  
  	if (!parent)
a0be55dee   Ionut Alexa   kernel/exit.c: fi...
338
339
340
  		/* exit: our father is in a different pgrp than
  		 * we are and we were the only connection outside.
  		 */
f49ee505b   Oleg Nesterov   introduce kill_or...
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
  		parent = tsk->real_parent;
  	else
  		/* reparent: our child is in a different pgrp than
  		 * we are, and it was the only connection outside.
  		 */
  		ignored_task = NULL;
  
  	if (task_pgrp(parent) != pgrp &&
  	    task_session(parent) == task_session(tsk) &&
  	    will_become_orphaned_pgrp(pgrp, ignored_task) &&
  	    has_stopped_jobs(pgrp)) {
  		__kill_pgrp_info(SIGHUP, SEND_SIG_PRIV, pgrp);
  		__kill_pgrp_info(SIGCONT, SEND_SIG_PRIV, pgrp);
  	}
  }
f98bafa06   Oleg Nesterov   memcg: kill CONFI...
356
  #ifdef CONFIG_MEMCG
cf475ad28   Balbir Singh   cgroups: add an o...
357
  /*
733eda7ac   KAMEZAWA Hiroyuki   memcg: clear mm->...
358
   * A task is exiting.   If it owned this mm, find a new owner for the mm.
cf475ad28   Balbir Singh   cgroups: add an o...
359
   */
cf475ad28   Balbir Singh   cgroups: add an o...
360
361
362
363
364
  void mm_update_next_owner(struct mm_struct *mm)
  {
  	struct task_struct *c, *g, *p = current;
  
  retry:
733eda7ac   KAMEZAWA Hiroyuki   memcg: clear mm->...
365
366
367
368
369
  	/*
  	 * If the exiting or execing task is not the owner, it's
  	 * someone else's problem.
  	 */
  	if (mm->owner != p)
cf475ad28   Balbir Singh   cgroups: add an o...
370
  		return;
733eda7ac   KAMEZAWA Hiroyuki   memcg: clear mm->...
371
372
373
374
375
376
377
378
379
  	/*
  	 * The current owner is exiting/execing and there are no other
  	 * candidates.  Do not leave the mm pointing to a possibly
  	 * freed task structure.
  	 */
  	if (atomic_read(&mm->mm_users) <= 1) {
  		mm->owner = NULL;
  		return;
  	}
cf475ad28   Balbir Singh   cgroups: add an o...
380
381
382
383
384
385
386
387
388
389
390
391
392
  
  	read_lock(&tasklist_lock);
  	/*
  	 * Search in the children
  	 */
  	list_for_each_entry(c, &p->children, sibling) {
  		if (c->mm == mm)
  			goto assign_new_owner;
  	}
  
  	/*
  	 * Search in the siblings
  	 */
dea33cfd9   Oleg Nesterov   ptrace: mm_need_n...
393
  	list_for_each_entry(c, &p->real_parent->children, sibling) {
cf475ad28   Balbir Singh   cgroups: add an o...
394
395
396
397
398
  		if (c->mm == mm)
  			goto assign_new_owner;
  	}
  
  	/*
f87fb599a   Oleg Nesterov   memcg: mm_update_...
399
  	 * Search through everything else, we should not get here often.
cf475ad28   Balbir Singh   cgroups: add an o...
400
  	 */
39af1765f   Oleg Nesterov   memcg: optimize t...
401
402
403
404
405
406
407
408
409
  	for_each_process(g) {
  		if (g->flags & PF_KTHREAD)
  			continue;
  		for_each_thread(g, c) {
  			if (c->mm == mm)
  				goto assign_new_owner;
  			if (c->mm)
  				break;
  		}
f87fb599a   Oleg Nesterov   memcg: mm_update_...
410
  	}
cf475ad28   Balbir Singh   cgroups: add an o...
411
  	read_unlock(&tasklist_lock);
31a78f23b   Balbir Singh   mm owner: fix rac...
412
413
414
  	/*
  	 * We found no owner yet mm_users > 1: this implies that we are
  	 * most likely racing with swapoff (try_to_unuse()) or /proc or
e5991371e   Hugh Dickins   mm: remove cgroup...
415
  	 * ptrace or page migration (get_task_mm()).  Mark owner as NULL.
31a78f23b   Balbir Singh   mm owner: fix rac...
416
  	 */
31a78f23b   Balbir Singh   mm owner: fix rac...
417
  	mm->owner = NULL;
cf475ad28   Balbir Singh   cgroups: add an o...
418
419
420
421
422
423
424
425
426
427
  	return;
  
  assign_new_owner:
  	BUG_ON(c == p);
  	get_task_struct(c);
  	/*
  	 * The task_lock protects c->mm from changing.
  	 * We always want mm->owner->mm == mm
  	 */
  	task_lock(c);
e5991371e   Hugh Dickins   mm: remove cgroup...
428
429
430
431
432
  	/*
  	 * Delay read_unlock() till we have the task_lock()
  	 * to ensure that c does not slip away underneath us
  	 */
  	read_unlock(&tasklist_lock);
cf475ad28   Balbir Singh   cgroups: add an o...
433
434
435
436
437
  	if (c->mm != mm) {
  		task_unlock(c);
  		put_task_struct(c);
  		goto retry;
  	}
cf475ad28   Balbir Singh   cgroups: add an o...
438
439
440
441
  	mm->owner = c;
  	task_unlock(c);
  	put_task_struct(c);
  }
f98bafa06   Oleg Nesterov   memcg: kill CONFI...
442
  #endif /* CONFIG_MEMCG */
cf475ad28   Balbir Singh   cgroups: add an o...
443

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
444
445
446
447
  /*
   * Turn us into a lazy TLB process if we
   * aren't already..
   */
a0be55dee   Ionut Alexa   kernel/exit.c: fi...
448
  static void exit_mm(struct task_struct *tsk)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
449
450
  {
  	struct mm_struct *mm = tsk->mm;
b564daf80   Oleg Nesterov   coredump: constru...
451
  	struct core_state *core_state;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
452

48d212a2e   Linus Torvalds   Revert "mm: corre...
453
  	mm_release(tsk, mm);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
454
455
  	if (!mm)
  		return;
4fe7efdbd   Konstantin Khlebnikov   mm: correctly syn...
456
  	sync_mm_rss(mm);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
457
458
  	/*
  	 * Serialize with any possible pending coredump.
999d9fc16   Oleg Nesterov   coredump: move mm...
459
  	 * We must hold mmap_sem around checking core_state
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
460
  	 * and clearing tsk->mm.  The core-inducing thread
999d9fc16   Oleg Nesterov   coredump: move mm...
461
  	 * will increment ->nr_threads for each thread in the
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
462
463
464
  	 * group with ->mm != NULL.
  	 */
  	down_read(&mm->mmap_sem);
b564daf80   Oleg Nesterov   coredump: constru...
465
466
467
  	core_state = mm->core_state;
  	if (core_state) {
  		struct core_thread self;
a0be55dee   Ionut Alexa   kernel/exit.c: fi...
468

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
469
  		up_read(&mm->mmap_sem);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
470

b564daf80   Oleg Nesterov   coredump: constru...
471
472
473
474
475
476
477
478
  		self.task = tsk;
  		self.next = xchg(&core_state->dumper.next, &self);
  		/*
  		 * Implies mb(), the result of xchg() must be visible
  		 * to core_state->dumper.
  		 */
  		if (atomic_dec_and_test(&core_state->nr_threads))
  			complete(&core_state->startup);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
479

a94e2d408   Oleg Nesterov   coredump: kill mm...
480
481
482
483
  		for (;;) {
  			set_task_state(tsk, TASK_UNINTERRUPTIBLE);
  			if (!self.task) /* see coredump_finish() */
  				break;
80d26af89   Mandeep Singh Baines   coredump: use a f...
484
  			freezable_schedule();
a94e2d408   Oleg Nesterov   coredump: kill mm...
485
486
  		}
  		__set_task_state(tsk, TASK_RUNNING);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
487
488
489
  		down_read(&mm->mmap_sem);
  	}
  	atomic_inc(&mm->mm_count);
125e18745   Eric Sesterhenn   [PATCH] More BUG_...
490
  	BUG_ON(mm != tsk->active_mm);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
491
492
493
494
495
496
  	/* more a memory barrier than a real lock */
  	task_lock(tsk);
  	tsk->mm = NULL;
  	up_read(&mm->mmap_sem);
  	enter_lazy_tlb(mm, current);
  	task_unlock(tsk);
cf475ad28   Balbir Singh   cgroups: add an o...
497
  	mm_update_next_owner(mm);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
498
  	mmput(mm);
c32b3cbe0   Michal Hocko   oom, PM: make OOM...
499
  	if (test_thread_flag(TIF_MEMDIE))
38531201c   Tetsuo Handa   mm, oom: enforce ...
500
  		exit_oom_victim();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
501
  }
c9dc05bfd   Oleg Nesterov   exit: reparent: i...
502
503
504
505
506
507
508
509
510
511
  static struct task_struct *find_alive_thread(struct task_struct *p)
  {
  	struct task_struct *t;
  
  	for_each_thread(p, t) {
  		if (!(t->flags & PF_EXITING))
  			return t;
  	}
  	return NULL;
  }
1109909c7   Oleg Nesterov   exit: reparent: i...
512
513
514
515
516
517
518
519
520
  static struct task_struct *find_child_reaper(struct task_struct *father)
  	__releases(&tasklist_lock)
  	__acquires(&tasklist_lock)
  {
  	struct pid_namespace *pid_ns = task_active_pid_ns(father);
  	struct task_struct *reaper = pid_ns->child_reaper;
  
  	if (likely(reaper != father))
  		return reaper;
c9dc05bfd   Oleg Nesterov   exit: reparent: i...
521
522
  	reaper = find_alive_thread(father);
  	if (reaper) {
1109909c7   Oleg Nesterov   exit: reparent: i...
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
  		pid_ns->child_reaper = reaper;
  		return reaper;
  	}
  
  	write_unlock_irq(&tasklist_lock);
  	if (unlikely(pid_ns == &init_pid_ns)) {
  		panic("Attempted to kill init! exitcode=0x%08x
  ",
  			father->signal->group_exit_code ?: father->exit_code);
  	}
  	zap_pid_ns_processes(pid_ns);
  	write_lock_irq(&tasklist_lock);
  
  	return father;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
538
  /*
ebec18a6d   Lennart Poettering   prctl: add PR_{SE...
539
540
541
542
543
   * When we die, we re-parent all our children, and try to:
   * 1. give them to another thread in our thread group, if such a member exists
   * 2. give it to the first ancestor process which prctl'd itself as a
   *    child_subreaper for its children (like a service manager)
   * 3. give it to the init process (PID 1) in our pid namespace
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
544
   */
1109909c7   Oleg Nesterov   exit: reparent: i...
545
546
  static struct task_struct *find_new_reaper(struct task_struct *father,
  					   struct task_struct *child_reaper)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
547
  {
c9dc05bfd   Oleg Nesterov   exit: reparent: i...
548
  	struct task_struct *thread, *reaper;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
549

c9dc05bfd   Oleg Nesterov   exit: reparent: i...
550
551
  	thread = find_alive_thread(father);
  	if (thread)
950bbabb5   Oleg Nesterov   pid_ns: (BUG 1139...
552
  		return thread;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
553

7d24e2df5   Oleg Nesterov   exit: reparent: f...
554
  	if (father->signal->has_child_subreaper) {
ebec18a6d   Lennart Poettering   prctl: add PR_{SE...
555
  		/*
175aed3f8   Oleg Nesterov   exit: reparent: d...
556
557
558
  		 * Find the first ->is_child_subreaper ancestor in our pid_ns.
  		 * We start from father to ensure we can not look into another
  		 * namespace, this is safe because all its threads are dead.
ebec18a6d   Lennart Poettering   prctl: add PR_{SE...
559
  		 */
7d24e2df5   Oleg Nesterov   exit: reparent: f...
560
  		for (reaper = father;
1109909c7   Oleg Nesterov   exit: reparent: i...
561
  		     !same_thread_group(reaper, child_reaper);
ebec18a6d   Lennart Poettering   prctl: add PR_{SE...
562
  		     reaper = reaper->real_parent) {
175aed3f8   Oleg Nesterov   exit: reparent: d...
563
564
  			/* call_usermodehelper() descendants need this check */
  			if (reaper == &init_task)
ebec18a6d   Lennart Poettering   prctl: add PR_{SE...
565
566
567
  				break;
  			if (!reaper->signal->is_child_subreaper)
  				continue;
c9dc05bfd   Oleg Nesterov   exit: reparent: i...
568
569
570
  			thread = find_alive_thread(reaper);
  			if (thread)
  				return thread;
ebec18a6d   Lennart Poettering   prctl: add PR_{SE...
571
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
572
  	}
762a24bee   Oleg Nesterov   pid namespaces: r...
573

1109909c7   Oleg Nesterov   exit: reparent: i...
574
  	return child_reaper;
950bbabb5   Oleg Nesterov   pid_ns: (BUG 1139...
575
  }
5dfc80be7   Oleg Nesterov   forget_original_p...
576
577
578
  /*
  * Any that need to be release_task'd are put on the @dead list.
   */
9cd80bbb0   Oleg Nesterov   do_wait() optimiz...
579
  static void reparent_leader(struct task_struct *father, struct task_struct *p,
5dfc80be7   Oleg Nesterov   forget_original_p...
580
581
  				struct list_head *dead)
  {
2831096e2   Oleg Nesterov   exit: reparent: c...
582
  	if (unlikely(p->exit_state == EXIT_DEAD))
5dfc80be7   Oleg Nesterov   forget_original_p...
583
  		return;
abd50b39e   Oleg Nesterov   wait: introduce E...
584
  	/* We don't want people slaying init. */
5dfc80be7   Oleg Nesterov   forget_original_p...
585
586
587
  	p->exit_signal = SIGCHLD;
  
  	/* If it has exited notify the new parent about this child's death. */
d21142ece   Tejun Heo   ptrace: kill task...
588
  	if (!p->ptrace &&
5dfc80be7   Oleg Nesterov   forget_original_p...
589
  	    p->exit_state == EXIT_ZOMBIE && thread_group_empty(p)) {
867734737   Oleg Nesterov   make do_notify_pa...
590
  		if (do_notify_parent(p, p->exit_signal)) {
5dfc80be7   Oleg Nesterov   forget_original_p...
591
  			p->exit_state = EXIT_DEAD;
dc2fd4b00   Oleg Nesterov   exit: reparent: u...
592
  			list_add(&p->ptrace_entry, dead);
5dfc80be7   Oleg Nesterov   forget_original_p...
593
594
595
596
597
  		}
  	}
  
  	kill_orphaned_pgrp(p, father);
  }
482a3767e   Oleg Nesterov   exit: reparent: c...
598
599
600
601
602
603
604
605
606
607
  /*
   * This does two things:
   *
   * A.  Make init inherit all the child processes
   * B.  Check to see if any process groups have become orphaned
   *	as a result of our exiting, and if they have any stopped
   *	jobs, send them a SIGHUP and then a SIGCONT.  (POSIX 3.2.2.2)
   */
  static void forget_original_parent(struct task_struct *father,
  					struct list_head *dead)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
608
  {
482a3767e   Oleg Nesterov   exit: reparent: c...
609
  	struct task_struct *p, *t, *reaper;
762a24bee   Oleg Nesterov   pid namespaces: r...
610

7c8bd2322   Oleg Nesterov   exit: ptrace: shi...
611
  	if (unlikely(!list_empty(&father->ptraced)))
482a3767e   Oleg Nesterov   exit: reparent: c...
612
  		exit_ptrace(father, dead);
f470021ad   Roland McGrath   ptrace children r...
613

7c8bd2322   Oleg Nesterov   exit: ptrace: shi...
614
  	/* Can drop and reacquire tasklist_lock */
1109909c7   Oleg Nesterov   exit: reparent: i...
615
  	reaper = find_child_reaper(father);
ad9e206ae   Oleg Nesterov   exit: reparent: a...
616
  	if (list_empty(&father->children))
482a3767e   Oleg Nesterov   exit: reparent: c...
617
  		return;
1109909c7   Oleg Nesterov   exit: reparent: i...
618
619
  
  	reaper = find_new_reaper(father, reaper);
2831096e2   Oleg Nesterov   exit: reparent: c...
620
  	list_for_each_entry(p, &father->children, sibling) {
57a059187   Oleg Nesterov   exit: reparent: c...
621
  		for_each_thread(p, t) {
9cd80bbb0   Oleg Nesterov   do_wait() optimiz...
622
  			t->real_parent = reaper;
57a059187   Oleg Nesterov   exit: reparent: c...
623
624
  			BUG_ON((!t->ptrace) != (t->parent == father));
  			if (likely(!t->ptrace))
9cd80bbb0   Oleg Nesterov   do_wait() optimiz...
625
  				t->parent = t->real_parent;
9cd80bbb0   Oleg Nesterov   do_wait() optimiz...
626
627
628
  			if (t->pdeath_signal)
  				group_send_sig_info(t->pdeath_signal,
  						    SEND_SIG_NOINFO, t);
57a059187   Oleg Nesterov   exit: reparent: c...
629
  		}
2831096e2   Oleg Nesterov   exit: reparent: c...
630
631
632
633
634
  		/*
  		 * If this is a threaded reparent there is no need to
  		 * notify anyone anything has happened.
  		 */
  		if (!same_thread_group(reaper, father))
482a3767e   Oleg Nesterov   exit: reparent: c...
635
  			reparent_leader(father, p, dead);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
636
  	}
2831096e2   Oleg Nesterov   exit: reparent: c...
637
  	list_splice_tail_init(&father->children, &reaper->children);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
638
639
640
641
642
643
  }
  
  /*
   * Send signals to all our closest relatives so that they know
   * to properly mourn us..
   */
821c7de71   Oleg Nesterov   exit_notify: fix ...
644
  static void exit_notify(struct task_struct *tsk, int group_dead)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
645
  {
53c8f9f19   Oleg Nesterov   make do_notify_pa...
646
  	bool autoreap;
482a3767e   Oleg Nesterov   exit: reparent: c...
647
648
  	struct task_struct *p, *n;
  	LIST_HEAD(dead);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
649

762a24bee   Oleg Nesterov   pid namespaces: r...
650
  	write_lock_irq(&tasklist_lock);
482a3767e   Oleg Nesterov   exit: reparent: c...
651
  	forget_original_parent(tsk, &dead);
821c7de71   Oleg Nesterov   exit_notify: fix ...
652
653
  	if (group_dead)
  		kill_orphaned_pgrp(tsk->group_leader, NULL);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
654

45cdf5cc0   Oleg Nesterov   kill tracehook_no...
655
656
657
658
659
660
661
662
663
664
665
666
  	if (unlikely(tsk->ptrace)) {
  		int sig = thread_group_leader(tsk) &&
  				thread_group_empty(tsk) &&
  				!ptrace_reparented(tsk) ?
  			tsk->exit_signal : SIGCHLD;
  		autoreap = do_notify_parent(tsk, sig);
  	} else if (thread_group_leader(tsk)) {
  		autoreap = thread_group_empty(tsk) &&
  			do_notify_parent(tsk, tsk->exit_signal);
  	} else {
  		autoreap = true;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
667

53c8f9f19   Oleg Nesterov   make do_notify_pa...
668
  	tsk->exit_state = autoreap ? EXIT_DEAD : EXIT_ZOMBIE;
6c66e7dba   Oleg Nesterov   exit: exit_notify...
669
670
  	if (tsk->exit_state == EXIT_DEAD)
  		list_add(&tsk->ptrace_entry, &dead);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
671

9c3391684   Oleg Nesterov   exit: exit_notify...
672
673
  	/* mt-exec, de_thread() is waiting for group leader */
  	if (unlikely(tsk->signal->notify_count < 0))
6db840fa7   Oleg Nesterov   exec: RT sub-thre...
674
  		wake_up_process(tsk->signal->group_exit_task);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
675
  	write_unlock_irq(&tasklist_lock);
482a3767e   Oleg Nesterov   exit: reparent: c...
676
677
678
679
  	list_for_each_entry_safe(p, n, &dead, ptrace_entry) {
  		list_del_init(&p->ptrace_entry);
  		release_task(p);
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
680
  }
e18eecb8b   Jeff Dike   Add generic exit-...
681
682
683
684
685
  #ifdef CONFIG_DEBUG_STACK_USAGE
  static void check_stack_usage(void)
  {
  	static DEFINE_SPINLOCK(low_water_lock);
  	static int lowest_to_date = THREAD_SIZE;
e18eecb8b   Jeff Dike   Add generic exit-...
686
  	unsigned long free;
7c9f8861e   Eric Sandeen   stackprotector: u...
687
  	free = stack_not_used(current);
e18eecb8b   Jeff Dike   Add generic exit-...
688
689
690
691
692
693
  
  	if (free >= lowest_to_date)
  		return;
  
  	spin_lock(&low_water_lock);
  	if (free < lowest_to_date) {
627393d44   Anton Blanchard   kernel/exit.c: qu...
694
695
  		pr_info("%s (%d) used greatest stack depth: %lu bytes left
  ",
a0be55dee   Ionut Alexa   kernel/exit.c: fi...
696
  			current->comm, task_pid_nr(current), free);
e18eecb8b   Jeff Dike   Add generic exit-...
697
698
699
700
701
702
703
  		lowest_to_date = free;
  	}
  	spin_unlock(&low_water_lock);
  }
  #else
  static inline void check_stack_usage(void) {}
  #endif
9af6528ee   Peter Zijlstra   sched/core: Optim...
704
  void __noreturn do_exit(long code)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
705
706
707
  {
  	struct task_struct *tsk = current;
  	int group_dead;
3f95aa81d   Paul E. McKenney   rcu: Make TASKS_R...
708
  	TASKS_RCU(int tasks_rcu_i);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
709
710
  
  	profile_task_exit(tsk);
5c9a8750a   Dmitry Vyukov   kernel: add kcov ...
711
  	kcov_task_exit(tsk);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
712

73c101011   Jens Axboe   block: initial pa...
713
  	WARN_ON(blk_needs_flush_plug(tsk));
22e2c507c   Jens Axboe   [PATCH] Update cf...
714

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
715
716
717
718
  	if (unlikely(in_interrupt()))
  		panic("Aiee, killing interrupt handler!");
  	if (unlikely(!tsk->pid))
  		panic("Attempted to kill the idle task!");
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
719

33dd94ae1   Nelson Elhage   do_exit(): make s...
720
721
722
723
724
725
726
727
  	/*
  	 * If do_exit is called because this processes oopsed, it's possible
  	 * that get_fs() was left as KERNEL_DS, so reset it to USER_DS before
  	 * continuing. Amongst other possible reasons, this is to prevent
  	 * mm_release()->clear_child_tid() from writing to a user-controlled
  	 * kernel address.
  	 */
  	set_fs(USER_DS);
a288eecce   Tejun Heo   ptrace: kill triv...
728
  	ptrace_event(PTRACE_EVENT_EXIT, code);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
729

e0e817392   David Howells   CRED: Add some co...
730
  	validate_creds_for_do_exit(tsk);
df164db5f   Alexander Nyberg   [PATCH] avoid res...
731
732
733
734
735
  	/*
  	 * We're taking recursive faults here in do_exit. Safest is to just
  	 * leave this task alone and wait for reboot.
  	 */
  	if (unlikely(tsk->flags & PF_EXITING)) {
a0be55dee   Ionut Alexa   kernel/exit.c: fi...
736
737
  		pr_alert("Fixing recursive fault but reboot is needed!
  ");
778e9a9c3   Alexey Kuznetsov   pi-futex: fix exi...
738
739
740
741
742
743
744
745
746
747
  		/*
  		 * We can do this unlocked here. The futex code uses
  		 * this flag just to verify whether the pi state
  		 * cleanup has been done or not. In the worst case it
  		 * loops once more. We pretend that the cleanup was
  		 * done as there is no way to return. Either the
  		 * OWNER_DIED bit is set by now or we push the blocked
  		 * task into the wait for ever nirwana as well.
  		 */
  		tsk->flags |= PF_EXITPIDONE;
df164db5f   Alexander Nyberg   [PATCH] avoid res...
748
749
750
  		set_current_state(TASK_UNINTERRUPTIBLE);
  		schedule();
  	}
d12619b5f   Oleg Nesterov   fix group stop wi...
751
  	exit_signals(tsk);  /* sets PF_EXITING */
778e9a9c3   Alexey Kuznetsov   pi-futex: fix exi...
752
  	/*
be3e78449   Peter Zijlstra   locking/spinlock:...
753
754
  	 * Ensure that all new tsk->pi_lock acquisitions must observe
  	 * PF_EXITING. Serializes against futex.c:attach_to_pi_owner().
778e9a9c3   Alexey Kuznetsov   pi-futex: fix exi...
755
  	 */
d2ee7198c   Oleg Nesterov   pi-futex: set PF_...
756
  	smp_mb();
be3e78449   Peter Zijlstra   locking/spinlock:...
757
758
759
760
  	/*
  	 * Ensure that we must observe the pi_state in exit_mm() ->
  	 * mm_release() -> exit_pi_state_list().
  	 */
1d6154825   Thomas Gleixner   sched: Convert pi...
761
  	raw_spin_unlock_wait(&tsk->pi_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
762

1dc0fffc4   Peter Zijlstra   sched/core: Robus...
763
  	if (unlikely(in_atomic())) {
a0be55dee   Ionut Alexa   kernel/exit.c: fi...
764
765
766
767
  		pr_info("note: %s[%d] exited with preempt_count %d
  ",
  			current->comm, task_pid_nr(current),
  			preempt_count());
1dc0fffc4   Peter Zijlstra   sched/core: Robus...
768
769
  		preempt_count_set(PREEMPT_ENABLED);
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
770

48d212a2e   Linus Torvalds   Revert "mm: corre...
771
772
773
  	/* sync mm's RSS info before statistics gathering */
  	if (tsk->mm)
  		sync_mm_rss(tsk->mm);
51229b495   Rik van Riel   exit,stats: /* ob...
774
  	acct_update_integrals(tsk);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
775
  	group_dead = atomic_dec_and_test(&tsk->signal->live);
c30689516   Andrew Morton   [PATCH] revert "t...
776
  	if (group_dead) {
778e9a9c3   Alexey Kuznetsov   pi-futex: fix exi...
777
  		hrtimer_cancel(&tsk->signal->real_timer);
25f407f0b   Roland McGrath   [PATCH] Call exit...
778
  		exit_itimers(tsk->signal);
1f10206cf   Jiri Pirko   getrusage: fill r...
779
780
  		if (tsk->mm)
  			setmax_mm_hiwater_rss(&tsk->signal->maxrss, tsk->mm);
c30689516   Andrew Morton   [PATCH] revert "t...
781
  	}
f6ec29a42   KaiGai Kohei   [PATCH] pacct: av...
782
  	acct_collect(code, group_dead);
522ed7767   Miloslav Trmac   Audit: add TTY in...
783
784
  	if (group_dead)
  		tty_audit_exit();
a4ff8dba7   Eric Paris   audit: inline aud...
785
  	audit_free(tsk);
115085ea0   Oleg Nesterov   [PATCH] taskstats...
786

48d212a2e   Linus Torvalds   Revert "mm: corre...
787
  	tsk->exit_code = code;
115085ea0   Oleg Nesterov   [PATCH] taskstats...
788
  	taskstats_exit(tsk, group_dead);
c757249af   Shailabh Nagar   [PATCH] per-task-...
789

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
790
  	exit_mm(tsk);
0e4648141   KaiGai Kohei   [PATCH] pacct: ad...
791
  	if (group_dead)
f6ec29a42   KaiGai Kohei   [PATCH] pacct: av...
792
  		acct_process();
0a16b6075   Mathieu Desnoyers   tracing, sched: L...
793
  	trace_sched_process_exit(tsk);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
794
  	exit_sem(tsk);
b34a6b1da   Vasiliy Kulikov   ipc: introduce sh...
795
  	exit_shm(tsk);
1ec7f1ddb   Al Viro   [PATCH] get rid o...
796
797
  	exit_files(tsk);
  	exit_fs(tsk);
c39df5fa3   Oleg Nesterov   exit: call disass...
798
799
  	if (group_dead)
  		disassociate_ctty(1);
8aac62706   Oleg Nesterov   move exit_task_na...
800
  	exit_task_namespaces(tsk);
ed3e694d7   Al Viro   move exit_task_wo...
801
  	exit_task_work(tsk);
e64646946   Jiri Slaby   exit_thread: acce...
802
  	exit_thread(tsk);
0b3fcf178   Stephane Eranian   perf_events: Move...
803
804
805
806
807
808
809
810
  
  	/*
  	 * Flush inherited counters to the parent - before the parent
  	 * gets woken up by child-exit notifications.
  	 *
  	 * because of cgroup mode, must be called before cgroup_exit()
  	 */
  	perf_event_exit_task(tsk);
8e5bfa8c1   Oleg Nesterov   sched/autogroup: ...
811
  	sched_autogroup_exit_task(tsk);
1ec41830e   Li Zefan   cgroup: remove us...
812
  	cgroup_exit(tsk);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
813

33b2fb303   Ingo Molnar   perf_counter: fix...
814
  	/*
24f1e32c6   Frederic Weisbecker   hw-breakpoints: R...
815
816
  	 * FIXME: do that only when needed, using sched_exit tracepoint
  	 */
7c8df2863   Oleg Nesterov   ptrace: revert "P...
817
  	flush_ptrace_hw_breakpoint(tsk);
33b2fb303   Ingo Molnar   perf_counter: fix...
818

49f5903b4   Paul E. McKenney   rcu: Move preempt...
819
  	TASKS_RCU(preempt_disable());
3f95aa81d   Paul E. McKenney   rcu: Make TASKS_R...
820
  	TASKS_RCU(tasks_rcu_i = __srcu_read_lock(&tasks_rcu_exit_srcu));
49f5903b4   Paul E. McKenney   rcu: Move preempt...
821
  	TASKS_RCU(preempt_enable());
821c7de71   Oleg Nesterov   exit_notify: fix ...
822
  	exit_notify(tsk, group_dead);
ef9823939   Guillaume Morin   kernel/exit.c: ca...
823
  	proc_exit_connector(tsk);
c11600e4f   David Rientjes   mm, mempolicy: ta...
824
  	mpol_put_task_policy(tsk);
42b2dd0a0   Alexey Dobriyan   Shrink task_struc...
825
  #ifdef CONFIG_FUTEX
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
826
827
  	if (unlikely(current->pi_state_cache))
  		kfree(current->pi_state_cache);
42b2dd0a0   Alexey Dobriyan   Shrink task_struc...
828
  #endif
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
829
  	/*
9a11b49a8   Ingo Molnar   [PATCH] lockdep: ...
830
  	 * Make sure we are holding no locks:
de5097c2e   Ingo Molnar   [PATCH] mutex sub...
831
  	 */
1b1d2fb44   Colin Cross   lockdep: remove t...
832
  	debug_check_no_locks_held();
778e9a9c3   Alexey Kuznetsov   pi-futex: fix exi...
833
834
835
836
837
838
  	/*
  	 * We can do this unlocked here. The futex code uses this flag
  	 * just to verify whether the pi state cleanup has been done
  	 * or not. In the worst case it loops once more.
  	 */
  	tsk->flags |= PF_EXITPIDONE;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
839

afc847b7d   Al Viro   [PATCH] don't do ...
840
  	if (tsk->io_context)
b69f22920   Louis Rilling   block: Fix io_con...
841
  		exit_io_context(tsk);
afc847b7d   Al Viro   [PATCH] don't do ...
842

b92ce5589   Jens Axboe   [PATCH] splice: a...
843
  	if (tsk->splice_pipe)
4b8a8f1e4   Al Viro   get rid of the la...
844
  		free_pipe_info(tsk->splice_pipe);
b92ce5589   Jens Axboe   [PATCH] splice: a...
845

5640f7685   Eric Dumazet   net: use a per ta...
846
847
  	if (tsk->task_frag.page)
  		put_page(tsk->task_frag.page);
e0e817392   David Howells   CRED: Add some co...
848
  	validate_creds_for_do_exit(tsk);
4bcb8232c   Oleg Nesterov   exit: move check_...
849
  	check_stack_usage();
7407251a0   Coywolf Qi Hunt   [PATCH] PF_DEAD c...
850
  	preempt_disable();
54848d73f   Wu Fengguang   writeback: charge...
851
852
  	if (tsk->nr_dirtied)
  		__this_cpu_add(dirty_throttle_leaks, tsk->nr_dirtied);
f41d911f8   Paul E. McKenney   rcu: Merge preemp...
853
  	exit_rcu();
3f95aa81d   Paul E. McKenney   rcu: Make TASKS_R...
854
  	TASKS_RCU(__srcu_read_unlock(&tasks_rcu_exit_srcu, tasks_rcu_i));
b5740f4b2   Yasunori Goto   sched: Fix ancien...
855

9af6528ee   Peter Zijlstra   sched/core: Optim...
856
  	do_task_dead();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
857
  }
012914dad   Russ Anderson   [patch] MCA recov...
858
  EXPORT_SYMBOL_GPL(do_exit);
9402c95f3   Joe Perches   treewide: remove ...
859
  void complete_and_exit(struct completion *comp, long code)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
860
861
862
  {
  	if (comp)
  		complete(comp);
55a101f8f   Oleg Nesterov   [PATCH] kill PF_D...
863

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
864
865
  	do_exit(code);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
866
  EXPORT_SYMBOL(complete_and_exit);
754fe8d29   Heiko Carstens   [CVE-2009-0029] S...
867
  SYSCALL_DEFINE1(exit, int, error_code)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
868
869
870
  {
  	do_exit((error_code&0xff)<<8);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
871
872
873
874
  /*
   * Take down every thread in the group.  This is called by fatal signals
   * as well as by sys_exit_group (below).
   */
9402c95f3   Joe Perches   treewide: remove ...
875
  void
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
876
877
  do_group_exit(int exit_code)
  {
bfc4b0890   Oleg Nesterov   signals: do_group...
878
  	struct signal_struct *sig = current->signal;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
879
  	BUG_ON(exit_code & 0x80); /* core dumps don't get here */
bfc4b0890   Oleg Nesterov   signals: do_group...
880
881
  	if (signal_group_exit(sig))
  		exit_code = sig->group_exit_code;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
882
  	else if (!thread_group_empty(current)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
883
  		struct sighand_struct *const sighand = current->sighand;
a0be55dee   Ionut Alexa   kernel/exit.c: fi...
884

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
885
  		spin_lock_irq(&sighand->siglock);
ed5d2cac1   Oleg Nesterov   exec: rework the ...
886
  		if (signal_group_exit(sig))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
887
888
889
  			/* Another thread got here before we took the lock.  */
  			exit_code = sig->group_exit_code;
  		else {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
890
  			sig->group_exit_code = exit_code;
ed5d2cac1   Oleg Nesterov   exec: rework the ...
891
  			sig->flags = SIGNAL_GROUP_EXIT;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
892
893
894
  			zap_other_threads(current);
  		}
  		spin_unlock_irq(&sighand->siglock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
895
896
897
898
899
900
901
902
903
904
905
  	}
  
  	do_exit(exit_code);
  	/* NOTREACHED */
  }
  
  /*
   * this kills every thread in the thread group. Note that any externally
   * wait4()-ing process will get the correct exit code - even if this
   * thread is not the thread group leader.
   */
754fe8d29   Heiko Carstens   [CVE-2009-0029] S...
906
  SYSCALL_DEFINE1(exit_group, int, error_code)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
907
908
  {
  	do_group_exit((error_code & 0xff) << 8);
2ed7c03ec   Heiko Carstens   [CVE-2009-0029] C...
909
910
  	/* NOTREACHED */
  	return 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
911
  }
9e8ae01d1   Oleg Nesterov   introduce "struct...
912
913
  struct wait_opts {
  	enum pid_type		wo_type;
9e8ae01d1   Oleg Nesterov   introduce "struct...
914
  	int			wo_flags;
e1eb1ebcc   Richard Kennedy   mm: exit.c reorde...
915
  	struct pid		*wo_pid;
9e8ae01d1   Oleg Nesterov   introduce "struct...
916
917
918
919
  
  	struct siginfo __user	*wo_info;
  	int __user		*wo_stat;
  	struct rusage __user	*wo_rusage;
0b7570e77   Oleg Nesterov   do_wait() wakeup ...
920
  	wait_queue_t		child_wait;
9e8ae01d1   Oleg Nesterov   introduce "struct...
921
922
  	int			notask_error;
  };
989264f46   Oleg Nesterov   do_wait-wakeup-op...
923
924
  static inline
  struct pid *task_pid_type(struct task_struct *task, enum pid_type type)
161550d74   Eric W. Biederman   pid: sys_wait... ...
925
  {
989264f46   Oleg Nesterov   do_wait-wakeup-op...
926
927
928
  	if (type != PIDTYPE_PID)
  		task = task->group_leader;
  	return task->pids[type].pid;
161550d74   Eric W. Biederman   pid: sys_wait... ...
929
  }
989264f46   Oleg Nesterov   do_wait-wakeup-op...
930
  static int eligible_pid(struct wait_opts *wo, struct task_struct *p)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
931
  {
5c01ba49e   Oleg Nesterov   do_wait-wakeup-op...
932
933
934
  	return	wo->wo_type == PIDTYPE_MAX ||
  		task_pid_type(p, wo->wo_type) == wo->wo_pid;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
935

bf959931d   Oleg Nesterov   wait/ptrace: assu...
936
937
  static int
  eligible_child(struct wait_opts *wo, bool ptrace, struct task_struct *p)
5c01ba49e   Oleg Nesterov   do_wait-wakeup-op...
938
939
940
  {
  	if (!eligible_pid(wo, p))
  		return 0;
bf959931d   Oleg Nesterov   wait/ptrace: assu...
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
  
  	/*
  	 * Wait for all children (clone and not) if __WALL is set or
  	 * if it is traced by us.
  	 */
  	if (ptrace || (wo->wo_flags & __WALL))
  		return 1;
  
  	/*
  	 * Otherwise, wait for clone children *only* if __WCLONE is set;
  	 * otherwise, wait for non-clone children *only*.
  	 *
  	 * Note: a "clone" child here is one that reports to its parent
  	 * using a signal other than SIGCHLD, or a non-leader thread which
  	 * we can only see if it is traced by us.
  	 */
  	if ((p->exit_signal != SIGCHLD) ^ !!(wo->wo_flags & __WCLONE))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
958
  		return 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
959

14dd0b814   Roland McGrath   do_wait: return s...
960
  	return 1;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
961
  }
9e8ae01d1   Oleg Nesterov   introduce "struct...
962
963
  static int wait_noreap_copyout(struct wait_opts *wo, struct task_struct *p,
  				pid_t pid, uid_t uid, int why, int status)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
964
  {
9e8ae01d1   Oleg Nesterov   introduce "struct...
965
966
967
  	struct siginfo __user *infop;
  	int retval = wo->wo_rusage
  		? getrusage(p, RUSAGE_BOTH, wo->wo_rusage) : 0;
36c8b5868   Ingo Molnar   [PATCH] sched: cl...
968

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
969
  	put_task_struct(p);
9e8ae01d1   Oleg Nesterov   introduce "struct...
970
  	infop = wo->wo_info;
b6fe2d117   Vitaly Mayatskikh   wait_noreap_copyo...
971
972
973
974
975
976
977
978
979
980
981
982
983
984
  	if (infop) {
  		if (!retval)
  			retval = put_user(SIGCHLD, &infop->si_signo);
  		if (!retval)
  			retval = put_user(0, &infop->si_errno);
  		if (!retval)
  			retval = put_user((short)why, &infop->si_code);
  		if (!retval)
  			retval = put_user(pid, &infop->si_pid);
  		if (!retval)
  			retval = put_user(uid, &infop->si_uid);
  		if (!retval)
  			retval = put_user(status, &infop->si_status);
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
985
986
987
988
989
990
991
992
993
994
995
  	if (!retval)
  		retval = pid;
  	return retval;
  }
  
  /*
   * Handle sys_wait4 work for one task in state EXIT_ZOMBIE.  We hold
   * read_lock(&tasklist_lock) on entry.  If we return zero, we still hold
   * the lock and this task is uninteresting.  If we return nonzero, we have
   * released the lock and the system call should return.
   */
9e8ae01d1   Oleg Nesterov   introduce "struct...
996
  static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
997
  {
f6507f83b   Oleg Nesterov   exit: wait: clean...
998
  	int state, retval, status;
6c5f3e7b4   Pavel Emelyanov   Pidns: make full ...
999
  	pid_t pid = task_pid_vnr(p);
43e13cc10   Oleg Nesterov   cred: remove task...
1000
  	uid_t uid = from_kuid_munged(current_user_ns(), task_uid(p));
9e8ae01d1   Oleg Nesterov   introduce "struct...
1001
  	struct siginfo __user *infop;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1002

9e8ae01d1   Oleg Nesterov   introduce "struct...
1003
  	if (!likely(wo->wo_flags & WEXITED))
98abed020   Roland McGrath   do_wait reorganiz...
1004
  		return 0;
9e8ae01d1   Oleg Nesterov   introduce "struct...
1005
  	if (unlikely(wo->wo_flags & WNOWAIT)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1006
  		int exit_code = p->exit_code;
f3abd4f95   Thiago Farina   kernel/exit.c: fi...
1007
  		int why;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1008

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1009
1010
  		get_task_struct(p);
  		read_unlock(&tasklist_lock);
1029a2b52   Peter Zijlstra   sched, exit: Deal...
1011
  		sched_annotate_sleep();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1012
1013
1014
1015
1016
1017
1018
  		if ((exit_code & 0x7f) == 0) {
  			why = CLD_EXITED;
  			status = exit_code >> 8;
  		} else {
  			why = (exit_code & 0x80) ? CLD_DUMPED : CLD_KILLED;
  			status = exit_code & 0x7f;
  		}
9e8ae01d1   Oleg Nesterov   introduce "struct...
1019
  		return wait_noreap_copyout(wo, p, pid, uid, why, status);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1020
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1021
  	/*
abd50b39e   Oleg Nesterov   wait: introduce E...
1022
  	 * Move the task's state to DEAD/TRACE, only one thread can do this.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1023
  	 */
f6507f83b   Oleg Nesterov   exit: wait: clean...
1024
1025
  	state = (ptrace_reparented(p) && thread_group_leader(p)) ?
  		EXIT_TRACE : EXIT_DEAD;
abd50b39e   Oleg Nesterov   wait: introduce E...
1026
  	if (cmpxchg(&p->exit_state, EXIT_ZOMBIE, state) != EXIT_ZOMBIE)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1027
  		return 0;
986094dfe   Oleg Nesterov   exit: wait: drop ...
1028
1029
1030
1031
1032
  	/*
  	 * We own this thread, nobody else can reap it.
  	 */
  	read_unlock(&tasklist_lock);
  	sched_annotate_sleep();
f6507f83b   Oleg Nesterov   exit: wait: clean...
1033

befca9677   Oleg Nesterov   ptrace: wait_task...
1034
  	/*
f6507f83b   Oleg Nesterov   exit: wait: clean...
1035
  	 * Check thread_group_leader() to exclude the traced sub-threads.
befca9677   Oleg Nesterov   ptrace: wait_task...
1036
  	 */
f6507f83b   Oleg Nesterov   exit: wait: clean...
1037
  	if (state == EXIT_DEAD && thread_group_leader(p)) {
f953ccd00   Oleg Nesterov   exit: wait: don't...
1038
1039
  		struct signal_struct *sig = p->signal;
  		struct signal_struct *psig = current->signal;
1f10206cf   Jiri Pirko   getrusage: fill r...
1040
  		unsigned long maxrss;
0cf55e1ec   Hidetoshi Seto   sched, cputime: I...
1041
  		cputime_t tgutime, tgstime;
3795e1616   Jesper Juhl   [PATCH] Decrease ...
1042

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1043
1044
1045
1046
1047
1048
1049
1050
  		/*
  		 * The resource counters for the group leader are in its
  		 * own task_struct.  Those for dead threads in the group
  		 * are in its signal_struct, as are those for the child
  		 * processes it has previously reaped.  All these
  		 * accumulate in the parent's signal_struct c* fields.
  		 *
  		 * We don't bother to take a lock here to protect these
f953ccd00   Oleg Nesterov   exit: wait: don't...
1051
1052
1053
1054
1055
1056
1057
  		 * p->signal fields because the whole thread group is dead
  		 * and nobody can change them.
  		 *
  		 * psig->stats_lock also protects us from our sub-theads
  		 * which can reap other children at the same time. Until
  		 * we change k_getrusage()-like users to rely on this lock
  		 * we have to take ->siglock as well.
0cf55e1ec   Hidetoshi Seto   sched, cputime: I...
1058
  		 *
a0be55dee   Ionut Alexa   kernel/exit.c: fi...
1059
1060
1061
  		 * We use thread_group_cputime_adjusted() to get times for
  		 * the thread group, which consolidates times for all threads
  		 * in the group including the group leader.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1062
  		 */
e80d0a1ae   Frederic Weisbecker   cputime: Rename t...
1063
  		thread_group_cputime_adjusted(p, &tgutime, &tgstime);
f953ccd00   Oleg Nesterov   exit: wait: don't...
1064
  		spin_lock_irq(&current->sighand->siglock);
e78c34967   Rik van Riel   time, signal: Pro...
1065
  		write_seqlock(&psig->stats_lock);
648616343   Martin Schwidefsky   [S390] cputime: a...
1066
1067
  		psig->cutime += tgutime + sig->cutime;
  		psig->cstime += tgstime + sig->cstime;
6fac4829c   Frederic Weisbecker   cputime: Use acce...
1068
  		psig->cgtime += task_gtime(p) + sig->gtime + sig->cgtime;
3795e1616   Jesper Juhl   [PATCH] Decrease ...
1069
1070
1071
1072
1073
1074
1075
1076
  		psig->cmin_flt +=
  			p->min_flt + sig->min_flt + sig->cmin_flt;
  		psig->cmaj_flt +=
  			p->maj_flt + sig->maj_flt + sig->cmaj_flt;
  		psig->cnvcsw +=
  			p->nvcsw + sig->nvcsw + sig->cnvcsw;
  		psig->cnivcsw +=
  			p->nivcsw + sig->nivcsw + sig->cnivcsw;
6eaeeaba3   Eric Dumazet   getrusage(): fill...
1077
1078
1079
1080
1081
1082
  		psig->cinblock +=
  			task_io_get_inblock(p) +
  			sig->inblock + sig->cinblock;
  		psig->coublock +=
  			task_io_get_oublock(p) +
  			sig->oublock + sig->coublock;
1f10206cf   Jiri Pirko   getrusage: fill r...
1083
1084
1085
  		maxrss = max(sig->maxrss, sig->cmaxrss);
  		if (psig->cmaxrss < maxrss)
  			psig->cmaxrss = maxrss;
5995477ab   Andrea Righi   task IO accountin...
1086
1087
  		task_io_accounting_add(&psig->ioac, &p->ioac);
  		task_io_accounting_add(&psig->ioac, &sig->ioac);
e78c34967   Rik van Riel   time, signal: Pro...
1088
  		write_sequnlock(&psig->stats_lock);
f953ccd00   Oleg Nesterov   exit: wait: don't...
1089
  		spin_unlock_irq(&current->sighand->siglock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1090
  	}
9e8ae01d1   Oleg Nesterov   introduce "struct...
1091
1092
  	retval = wo->wo_rusage
  		? getrusage(p, RUSAGE_BOTH, wo->wo_rusage) : 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1093
1094
  	status = (p->signal->flags & SIGNAL_GROUP_EXIT)
  		? p->signal->group_exit_code : p->exit_code;
9e8ae01d1   Oleg Nesterov   introduce "struct...
1095
1096
1097
1098
  	if (!retval && wo->wo_stat)
  		retval = put_user(status, wo->wo_stat);
  
  	infop = wo->wo_info;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
  	if (!retval && infop)
  		retval = put_user(SIGCHLD, &infop->si_signo);
  	if (!retval && infop)
  		retval = put_user(0, &infop->si_errno);
  	if (!retval && infop) {
  		int why;
  
  		if ((status & 0x7f) == 0) {
  			why = CLD_EXITED;
  			status >>= 8;
  		} else {
  			why = (status & 0x80) ? CLD_DUMPED : CLD_KILLED;
  			status &= 0x7f;
  		}
  		retval = put_user((short)why, &infop->si_code);
  		if (!retval)
  			retval = put_user(status, &infop->si_status);
  	}
  	if (!retval && infop)
3a515e4a6   Oleg Nesterov   wait_task_continu...
1118
  		retval = put_user(pid, &infop->si_pid);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1119
  	if (!retval && infop)
c69e8d9c0   David Howells   CRED: Use RCU to ...
1120
  		retval = put_user(uid, &infop->si_uid);
2f4e6e2a8   Oleg Nesterov   wait_task_zombie:...
1121
  	if (!retval)
3a515e4a6   Oleg Nesterov   wait_task_continu...
1122
  		retval = pid;
2f4e6e2a8   Oleg Nesterov   wait_task_zombie:...
1123

b43606905   Oleg Nesterov   wait: use EXIT_TR...
1124
  	if (state == EXIT_TRACE) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1125
  		write_lock_irq(&tasklist_lock);
2f4e6e2a8   Oleg Nesterov   wait_task_zombie:...
1126
1127
  		/* We dropped tasklist, ptracer could die and untrace */
  		ptrace_unlink(p);
b43606905   Oleg Nesterov   wait: use EXIT_TR...
1128
1129
1130
1131
1132
  
  		/* If parent wants a zombie, don't release it now */
  		state = EXIT_ZOMBIE;
  		if (do_notify_parent(p, p->exit_signal))
  			state = EXIT_DEAD;
abd50b39e   Oleg Nesterov   wait: introduce E...
1133
  		p->exit_state = state;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1134
1135
  		write_unlock_irq(&tasklist_lock);
  	}
abd50b39e   Oleg Nesterov   wait: introduce E...
1136
  	if (state == EXIT_DEAD)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1137
  		release_task(p);
2f4e6e2a8   Oleg Nesterov   wait_task_zombie:...
1138

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1139
1140
  	return retval;
  }
90bc8d8b1   Oleg Nesterov   do_wait: fix wait...
1141
1142
1143
  static int *task_stopped_code(struct task_struct *p, bool ptrace)
  {
  	if (ptrace) {
570ac9337   Oleg Nesterov   ptrace: task_stop...
1144
  		if (task_is_traced(p) && !(p->jobctl & JOBCTL_LISTENING))
90bc8d8b1   Oleg Nesterov   do_wait: fix wait...
1145
1146
1147
1148
1149
1150
1151
  			return &p->exit_code;
  	} else {
  		if (p->signal->flags & SIGNAL_STOP_STOPPED)
  			return &p->signal->group_exit_code;
  	}
  	return NULL;
  }
19e274630   Tejun Heo   job control: reor...
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
  /**
   * wait_task_stopped - Wait for %TASK_STOPPED or %TASK_TRACED
   * @wo: wait options
   * @ptrace: is the wait for ptrace
   * @p: task to wait for
   *
   * Handle sys_wait4() work for %p in state %TASK_STOPPED or %TASK_TRACED.
   *
   * CONTEXT:
   * read_lock(&tasklist_lock), which is released if return value is
   * non-zero.  Also, grabs and releases @p->sighand->siglock.
   *
   * RETURNS:
   * 0 if wait condition didn't exist and search for other wait conditions
   * should continue.  Non-zero return, -errno on failure and @p's pid on
   * success, implies that tasklist_lock is released and wait condition
   * search should terminate.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1169
   */
9e8ae01d1   Oleg Nesterov   introduce "struct...
1170
1171
  static int wait_task_stopped(struct wait_opts *wo,
  				int ptrace, struct task_struct *p)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1172
  {
9e8ae01d1   Oleg Nesterov   introduce "struct...
1173
  	struct siginfo __user *infop;
90bc8d8b1   Oleg Nesterov   do_wait: fix wait...
1174
  	int retval, exit_code, *p_code, why;
ee7c82da8   Oleg Nesterov   wait_task_stopped...
1175
  	uid_t uid = 0; /* unneeded, required by compiler */
c89507835   Oleg Nesterov   wait_task_stopped...
1176
  	pid_t pid;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1177

47918025e   Oleg Nesterov   shift "ptrace imp...
1178
1179
1180
  	/*
  	 * Traditionally we see ptrace'd stopped tasks regardless of options.
  	 */
9e8ae01d1   Oleg Nesterov   introduce "struct...
1181
  	if (!ptrace && !(wo->wo_flags & WUNTRACED))
98abed020   Roland McGrath   do_wait reorganiz...
1182
  		return 0;
19e274630   Tejun Heo   job control: reor...
1183
1184
  	if (!task_stopped_code(p, ptrace))
  		return 0;
ee7c82da8   Oleg Nesterov   wait_task_stopped...
1185
1186
  	exit_code = 0;
  	spin_lock_irq(&p->sighand->siglock);
90bc8d8b1   Oleg Nesterov   do_wait: fix wait...
1187
1188
  	p_code = task_stopped_code(p, ptrace);
  	if (unlikely(!p_code))
ee7c82da8   Oleg Nesterov   wait_task_stopped...
1189
  		goto unlock_sig;
90bc8d8b1   Oleg Nesterov   do_wait: fix wait...
1190
  	exit_code = *p_code;
ee7c82da8   Oleg Nesterov   wait_task_stopped...
1191
1192
  	if (!exit_code)
  		goto unlock_sig;
9e8ae01d1   Oleg Nesterov   introduce "struct...
1193
  	if (!unlikely(wo->wo_flags & WNOWAIT))
90bc8d8b1   Oleg Nesterov   do_wait: fix wait...
1194
  		*p_code = 0;
ee7c82da8   Oleg Nesterov   wait_task_stopped...
1195

8ca937a66   Sasha Levin   cred: use correct...
1196
  	uid = from_kuid_munged(current_user_ns(), task_uid(p));
ee7c82da8   Oleg Nesterov   wait_task_stopped...
1197
1198
1199
  unlock_sig:
  	spin_unlock_irq(&p->sighand->siglock);
  	if (!exit_code)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
  		return 0;
  
  	/*
  	 * Now we are pretty sure this task is interesting.
  	 * Make sure it doesn't get reaped out from under us while we
  	 * give up the lock and then examine it below.  We don't want to
  	 * keep holding onto the tasklist_lock while we call getrusage and
  	 * possibly take page faults for user memory.
  	 */
  	get_task_struct(p);
6c5f3e7b4   Pavel Emelyanov   Pidns: make full ...
1210
  	pid = task_pid_vnr(p);
f470021ad   Roland McGrath   ptrace children r...
1211
  	why = ptrace ? CLD_TRAPPED : CLD_STOPPED;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1212
  	read_unlock(&tasklist_lock);
1029a2b52   Peter Zijlstra   sched, exit: Deal...
1213
  	sched_annotate_sleep();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1214

9e8ae01d1   Oleg Nesterov   introduce "struct...
1215
1216
1217
1218
1219
1220
1221
  	if (unlikely(wo->wo_flags & WNOWAIT))
  		return wait_noreap_copyout(wo, p, pid, uid, why, exit_code);
  
  	retval = wo->wo_rusage
  		? getrusage(p, RUSAGE_BOTH, wo->wo_rusage) : 0;
  	if (!retval && wo->wo_stat)
  		retval = put_user((exit_code << 8) | 0x7f, wo->wo_stat);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1222

9e8ae01d1   Oleg Nesterov   introduce "struct...
1223
  	infop = wo->wo_info;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1224
1225
1226
1227
1228
  	if (!retval && infop)
  		retval = put_user(SIGCHLD, &infop->si_signo);
  	if (!retval && infop)
  		retval = put_user(0, &infop->si_errno);
  	if (!retval && infop)
6efcae460   Roland McGrath   Fix waitid si_cod...
1229
  		retval = put_user((short)why, &infop->si_code);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1230
1231
1232
  	if (!retval && infop)
  		retval = put_user(exit_code, &infop->si_status);
  	if (!retval && infop)
c89507835   Oleg Nesterov   wait_task_stopped...
1233
  		retval = put_user(pid, &infop->si_pid);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1234
  	if (!retval && infop)
ee7c82da8   Oleg Nesterov   wait_task_stopped...
1235
  		retval = put_user(uid, &infop->si_uid);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1236
  	if (!retval)
c89507835   Oleg Nesterov   wait_task_stopped...
1237
  		retval = pid;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
  	put_task_struct(p);
  
  	BUG_ON(!retval);
  	return retval;
  }
  
  /*
   * Handle do_wait work for one task in a live, non-stopped state.
   * read_lock(&tasklist_lock) on entry.  If we return zero, we still hold
   * the lock and this task is uninteresting.  If we return nonzero, we have
   * released the lock and the system call should return.
   */
9e8ae01d1   Oleg Nesterov   introduce "struct...
1250
  static int wait_task_continued(struct wait_opts *wo, struct task_struct *p)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1251
1252
1253
1254
  {
  	int retval;
  	pid_t pid;
  	uid_t uid;
9e8ae01d1   Oleg Nesterov   introduce "struct...
1255
  	if (!unlikely(wo->wo_flags & WCONTINUED))
98abed020   Roland McGrath   do_wait reorganiz...
1256
  		return 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1257
1258
1259
1260
1261
1262
1263
1264
1265
  	if (!(p->signal->flags & SIGNAL_STOP_CONTINUED))
  		return 0;
  
  	spin_lock_irq(&p->sighand->siglock);
  	/* Re-check with the lock held.  */
  	if (!(p->signal->flags & SIGNAL_STOP_CONTINUED)) {
  		spin_unlock_irq(&p->sighand->siglock);
  		return 0;
  	}
9e8ae01d1   Oleg Nesterov   introduce "struct...
1266
  	if (!unlikely(wo->wo_flags & WNOWAIT))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1267
  		p->signal->flags &= ~SIGNAL_STOP_CONTINUED;
8ca937a66   Sasha Levin   cred: use correct...
1268
  	uid = from_kuid_munged(current_user_ns(), task_uid(p));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1269
  	spin_unlock_irq(&p->sighand->siglock);
6c5f3e7b4   Pavel Emelyanov   Pidns: make full ...
1270
  	pid = task_pid_vnr(p);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1271
1272
  	get_task_struct(p);
  	read_unlock(&tasklist_lock);
1029a2b52   Peter Zijlstra   sched, exit: Deal...
1273
  	sched_annotate_sleep();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1274

9e8ae01d1   Oleg Nesterov   introduce "struct...
1275
1276
1277
  	if (!wo->wo_info) {
  		retval = wo->wo_rusage
  			? getrusage(p, RUSAGE_BOTH, wo->wo_rusage) : 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1278
  		put_task_struct(p);
9e8ae01d1   Oleg Nesterov   introduce "struct...
1279
1280
  		if (!retval && wo->wo_stat)
  			retval = put_user(0xffff, wo->wo_stat);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1281
  		if (!retval)
3a515e4a6   Oleg Nesterov   wait_task_continu...
1282
  			retval = pid;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1283
  	} else {
9e8ae01d1   Oleg Nesterov   introduce "struct...
1284
1285
  		retval = wait_noreap_copyout(wo, p, pid, uid,
  					     CLD_CONTINUED, SIGCONT);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1286
1287
1288
1289
1290
  		BUG_ON(retval == 0);
  	}
  
  	return retval;
  }
98abed020   Roland McGrath   do_wait reorganiz...
1291
1292
1293
  /*
   * Consider @p for a wait by @parent.
   *
9e8ae01d1   Oleg Nesterov   introduce "struct...
1294
   * -ECHILD should be in ->notask_error before the first call.
98abed020   Roland McGrath   do_wait reorganiz...
1295
1296
   * Returns nonzero for a final return, when we have unlocked tasklist_lock.
   * Returns zero if the search for a child should continue;
9e8ae01d1   Oleg Nesterov   introduce "struct...
1297
   * then ->notask_error is 0 if @p is an eligible child,
14dd0b814   Roland McGrath   do_wait: return s...
1298
   * or another error from security_task_wait(), or still -ECHILD.
98abed020   Roland McGrath   do_wait reorganiz...
1299
   */
b6e763f07   Oleg Nesterov   wait_consider_tas...
1300
1301
  static int wait_consider_task(struct wait_opts *wo, int ptrace,
  				struct task_struct *p)
98abed020   Roland McGrath   do_wait reorganiz...
1302
  {
3245d6aca   Oleg Nesterov   exit: fix race be...
1303
1304
1305
1306
1307
1308
  	/*
  	 * We can race with wait_task_zombie() from another thread.
  	 * Ensure that EXIT_ZOMBIE -> EXIT_DEAD/EXIT_TRACE transition
  	 * can't confuse the checks below.
  	 */
  	int exit_state = ACCESS_ONCE(p->exit_state);
b3ab03160   Oleg Nesterov   wait: completely ...
1309
  	int ret;
3245d6aca   Oleg Nesterov   exit: fix race be...
1310
  	if (unlikely(exit_state == EXIT_DEAD))
b3ab03160   Oleg Nesterov   wait: completely ...
1311
  		return 0;
bf959931d   Oleg Nesterov   wait/ptrace: assu...
1312
  	ret = eligible_child(wo, ptrace, p);
14dd0b814   Roland McGrath   do_wait: return s...
1313
  	if (!ret)
98abed020   Roland McGrath   do_wait reorganiz...
1314
  		return ret;
a2322e1d2   Oleg Nesterov   do_wait() wakeup ...
1315
  	ret = security_task_wait(p);
14dd0b814   Roland McGrath   do_wait: return s...
1316
1317
1318
1319
1320
1321
1322
1323
  	if (unlikely(ret < 0)) {
  		/*
  		 * If we have not yet seen any eligible child,
  		 * then let this error code replace -ECHILD.
  		 * A permission error will give the user a clue
  		 * to look for security policy problems, rather
  		 * than for mysterious wait bugs.
  		 */
9e8ae01d1   Oleg Nesterov   introduce "struct...
1324
1325
  		if (wo->notask_error)
  			wo->notask_error = ret;
78a3d9d56   Oleg Nesterov   do_wait: do take ...
1326
  		return 0;
14dd0b814   Roland McGrath   do_wait: return s...
1327
  	}
3245d6aca   Oleg Nesterov   exit: fix race be...
1328
  	if (unlikely(exit_state == EXIT_TRACE)) {
50b8d2574   Oleg Nesterov   ptrace: partially...
1329
  		/*
abd50b39e   Oleg Nesterov   wait: introduce E...
1330
1331
  		 * ptrace == 0 means we are the natural parent. In this case
  		 * we should clear notask_error, debugger will notify us.
50b8d2574   Oleg Nesterov   ptrace: partially...
1332
  		 */
abd50b39e   Oleg Nesterov   wait: introduce E...
1333
  		if (likely(!ptrace))
50b8d2574   Oleg Nesterov   ptrace: partially...
1334
  			wo->notask_error = 0;
823b018e5   Tejun Heo   job control: Smal...
1335
  		return 0;
50b8d2574   Oleg Nesterov   ptrace: partially...
1336
  	}
823b018e5   Tejun Heo   job control: Smal...
1337

377d75daf   Oleg Nesterov   wait: WSTOPPED|WC...
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
  	if (likely(!ptrace) && unlikely(p->ptrace)) {
  		/*
  		 * If it is traced by its real parent's group, just pretend
  		 * the caller is ptrace_do_wait() and reap this child if it
  		 * is zombie.
  		 *
  		 * This also hides group stop state from real parent; otherwise
  		 * a single stop can be reported twice as group and ptrace stop.
  		 * If a ptracer wants to distinguish these two events for its
  		 * own children it should create a separate process which takes
  		 * the role of real parent.
  		 */
  		if (!ptrace_reparented(p))
  			ptrace = 1;
  	}
45cb24a1d   Tejun Heo   job control: Allo...
1353
  	/* slay zombie? */
3245d6aca   Oleg Nesterov   exit: fix race be...
1354
  	if (exit_state == EXIT_ZOMBIE) {
9b84cca25   Tejun Heo   job control: Fix ...
1355
  		/* we don't reap group leaders with subthreads */
7c733eb3e   Oleg Nesterov   wait: WSTOPPED|WC...
1356
1357
1358
1359
1360
1361
1362
1363
1364
  		if (!delay_group_leader(p)) {
  			/*
  			 * A zombie ptracee is only visible to its ptracer.
  			 * Notification and reaping will be cascaded to the
  			 * real parent when the ptracer detaches.
  			 */
  			if (unlikely(ptrace) || likely(!p->ptrace))
  				return wait_task_zombie(wo, p);
  		}
98abed020   Roland McGrath   do_wait reorganiz...
1365

f470021ad   Roland McGrath   ptrace children r...
1366
  		/*
9b84cca25   Tejun Heo   job control: Fix ...
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
  		 * Allow access to stopped/continued state via zombie by
  		 * falling through.  Clearing of notask_error is complex.
  		 *
  		 * When !@ptrace:
  		 *
  		 * If WEXITED is set, notask_error should naturally be
  		 * cleared.  If not, subset of WSTOPPED|WCONTINUED is set,
  		 * so, if there are live subthreads, there are events to
  		 * wait for.  If all subthreads are dead, it's still safe
  		 * to clear - this function will be called again in finite
  		 * amount time once all the subthreads are released and
  		 * will then return without clearing.
  		 *
  		 * When @ptrace:
  		 *
  		 * Stopped state is per-task and thus can't change once the
  		 * target task dies.  Only continued and exited can happen.
  		 * Clear notask_error if WCONTINUED | WEXITED.
  		 */
  		if (likely(!ptrace) || (wo->wo_flags & (WCONTINUED | WEXITED)))
  			wo->notask_error = 0;
  	} else {
  		/*
  		 * @p is alive and it's gonna stop, continue or exit, so
  		 * there always is something to wait for.
f470021ad   Roland McGrath   ptrace children r...
1392
  		 */
9e8ae01d1   Oleg Nesterov   introduce "struct...
1393
  		wo->notask_error = 0;
f470021ad   Roland McGrath   ptrace children r...
1394
  	}
98abed020   Roland McGrath   do_wait reorganiz...
1395
  	/*
45cb24a1d   Tejun Heo   job control: Allo...
1396
1397
  	 * Wait for stopped.  Depending on @ptrace, different stopped state
  	 * is used and the two don't interact with each other.
98abed020   Roland McGrath   do_wait reorganiz...
1398
  	 */
19e274630   Tejun Heo   job control: reor...
1399
1400
1401
  	ret = wait_task_stopped(wo, ptrace, p);
  	if (ret)
  		return ret;
98abed020   Roland McGrath   do_wait reorganiz...
1402
1403
  
  	/*
45cb24a1d   Tejun Heo   job control: Allo...
1404
1405
1406
  	 * Wait for continued.  There's only one continued state and the
  	 * ptracer can consume it which can confuse the real parent.  Don't
  	 * use WCONTINUED from ptracer.  You don't need or want it.
98abed020   Roland McGrath   do_wait reorganiz...
1407
  	 */
9e8ae01d1   Oleg Nesterov   introduce "struct...
1408
  	return wait_task_continued(wo, p);
98abed020   Roland McGrath   do_wait reorganiz...
1409
1410
1411
1412
1413
  }
  
  /*
   * Do the work of do_wait() for one thread in the group, @tsk.
   *
9e8ae01d1   Oleg Nesterov   introduce "struct...
1414
   * -ECHILD should be in ->notask_error before the first call.
98abed020   Roland McGrath   do_wait reorganiz...
1415
1416
   * Returns nonzero for a final return, when we have unlocked tasklist_lock.
   * Returns zero if the search for a child should continue; then
9e8ae01d1   Oleg Nesterov   introduce "struct...
1417
   * ->notask_error is 0 if there were any eligible children,
14dd0b814   Roland McGrath   do_wait: return s...
1418
   * or another error from security_task_wait(), or still -ECHILD.
98abed020   Roland McGrath   do_wait reorganiz...
1419
   */
9e8ae01d1   Oleg Nesterov   introduce "struct...
1420
  static int do_wait_thread(struct wait_opts *wo, struct task_struct *tsk)
98abed020   Roland McGrath   do_wait reorganiz...
1421
1422
1423
1424
  {
  	struct task_struct *p;
  
  	list_for_each_entry(p, &tsk->children, sibling) {
9cd80bbb0   Oleg Nesterov   do_wait() optimiz...
1425
  		int ret = wait_consider_task(wo, 0, p);
a0be55dee   Ionut Alexa   kernel/exit.c: fi...
1426

9cd80bbb0   Oleg Nesterov   do_wait() optimiz...
1427
1428
  		if (ret)
  			return ret;
98abed020   Roland McGrath   do_wait reorganiz...
1429
1430
1431
1432
  	}
  
  	return 0;
  }
9e8ae01d1   Oleg Nesterov   introduce "struct...
1433
  static int ptrace_do_wait(struct wait_opts *wo, struct task_struct *tsk)
98abed020   Roland McGrath   do_wait reorganiz...
1434
1435
  {
  	struct task_struct *p;
f470021ad   Roland McGrath   ptrace children r...
1436
  	list_for_each_entry(p, &tsk->ptraced, ptrace_entry) {
b6e763f07   Oleg Nesterov   wait_consider_tas...
1437
  		int ret = wait_consider_task(wo, 1, p);
a0be55dee   Ionut Alexa   kernel/exit.c: fi...
1438

f470021ad   Roland McGrath   ptrace children r...
1439
  		if (ret)
98abed020   Roland McGrath   do_wait reorganiz...
1440
  			return ret;
98abed020   Roland McGrath   do_wait reorganiz...
1441
1442
1443
1444
  	}
  
  	return 0;
  }
0b7570e77   Oleg Nesterov   do_wait() wakeup ...
1445
1446
1447
1448
1449
1450
  static int child_wait_callback(wait_queue_t *wait, unsigned mode,
  				int sync, void *key)
  {
  	struct wait_opts *wo = container_of(wait, struct wait_opts,
  						child_wait);
  	struct task_struct *p = key;
5c01ba49e   Oleg Nesterov   do_wait-wakeup-op...
1451
  	if (!eligible_pid(wo, p))
0b7570e77   Oleg Nesterov   do_wait() wakeup ...
1452
  		return 0;
b4fe51823   Oleg Nesterov   do_wait() wakeup ...
1453
1454
  	if ((wo->wo_flags & __WNOTHREAD) && wait->private != p->parent)
  		return 0;
0b7570e77   Oleg Nesterov   do_wait() wakeup ...
1455
1456
  	return default_wake_function(wait, mode, sync, key);
  }
a7f0765ed   Oleg Nesterov   ptrace: __ptrace_...
1457
1458
  void __wake_up_parent(struct task_struct *p, struct task_struct *parent)
  {
0b7570e77   Oleg Nesterov   do_wait() wakeup ...
1459
1460
  	__wake_up_sync_key(&parent->signal->wait_chldexit,
  				TASK_INTERRUPTIBLE, 1, p);
a7f0765ed   Oleg Nesterov   ptrace: __ptrace_...
1461
  }
9e8ae01d1   Oleg Nesterov   introduce "struct...
1462
  static long do_wait(struct wait_opts *wo)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1463
  {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1464
  	struct task_struct *tsk;
98abed020   Roland McGrath   do_wait reorganiz...
1465
  	int retval;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1466

9e8ae01d1   Oleg Nesterov   introduce "struct...
1467
  	trace_sched_process_wait(wo->wo_pid);
0a16b6075   Mathieu Desnoyers   tracing, sched: L...
1468

0b7570e77   Oleg Nesterov   do_wait() wakeup ...
1469
1470
1471
  	init_waitqueue_func_entry(&wo->child_wait, child_wait_callback);
  	wo->child_wait.private = current;
  	add_wait_queue(&current->signal->wait_chldexit, &wo->child_wait);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1472
  repeat:
98abed020   Roland McGrath   do_wait reorganiz...
1473
  	/*
3da56d166   Frans Klaver   kernel: exit: fix...
1474
  	 * If there is nothing that can match our criteria, just get out.
9e8ae01d1   Oleg Nesterov   introduce "struct...
1475
1476
1477
  	 * We will clear ->notask_error to zero if we see any child that
  	 * might later match our criteria, even if we are not able to reap
  	 * it yet.
98abed020   Roland McGrath   do_wait reorganiz...
1478
  	 */
64a16caf5   Oleg Nesterov   do_wait: simplify...
1479
  	wo->notask_error = -ECHILD;
9e8ae01d1   Oleg Nesterov   introduce "struct...
1480
1481
  	if ((wo->wo_type < PIDTYPE_MAX) &&
  	   (!wo->wo_pid || hlist_empty(&wo->wo_pid->tasks[wo->wo_type])))
64a16caf5   Oleg Nesterov   do_wait: simplify...
1482
  		goto notask;
161550d74   Eric W. Biederman   pid: sys_wait... ...
1483

f95d39d10   Oleg Nesterov   do_wait: fix the ...
1484
  	set_current_state(TASK_INTERRUPTIBLE);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1485
1486
1487
  	read_lock(&tasklist_lock);
  	tsk = current;
  	do {
64a16caf5   Oleg Nesterov   do_wait: simplify...
1488
1489
1490
  		retval = do_wait_thread(wo, tsk);
  		if (retval)
  			goto end;
9e8ae01d1   Oleg Nesterov   introduce "struct...
1491

64a16caf5   Oleg Nesterov   do_wait: simplify...
1492
1493
  		retval = ptrace_do_wait(wo, tsk);
  		if (retval)
98abed020   Roland McGrath   do_wait reorganiz...
1494
  			goto end;
98abed020   Roland McGrath   do_wait reorganiz...
1495

9e8ae01d1   Oleg Nesterov   introduce "struct...
1496
  		if (wo->wo_flags & __WNOTHREAD)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1497
  			break;
a3f6dfb72   Oleg Nesterov   do_wait: kill the...
1498
  	} while_each_thread(current, tsk);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1499
  	read_unlock(&tasklist_lock);
f2cc3eb13   Oleg Nesterov   do_wait: fix secu...
1500

64a16caf5   Oleg Nesterov   do_wait: simplify...
1501
  notask:
9e8ae01d1   Oleg Nesterov   introduce "struct...
1502
1503
  	retval = wo->notask_error;
  	if (!retval && !(wo->wo_flags & WNOHANG)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1504
  		retval = -ERESTARTSYS;
98abed020   Roland McGrath   do_wait reorganiz...
1505
1506
1507
1508
  		if (!signal_pending(current)) {
  			schedule();
  			goto repeat;
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1509
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1510
  end:
f95d39d10   Oleg Nesterov   do_wait: fix the ...
1511
  	__set_current_state(TASK_RUNNING);
0b7570e77   Oleg Nesterov   do_wait() wakeup ...
1512
  	remove_wait_queue(&current->signal->wait_chldexit, &wo->child_wait);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1513
1514
  	return retval;
  }
17da2bd90   Heiko Carstens   [CVE-2009-0029] S...
1515
1516
  SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *,
  		infop, int, options, struct rusage __user *, ru)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1517
  {
9e8ae01d1   Oleg Nesterov   introduce "struct...
1518
  	struct wait_opts wo;
161550d74   Eric W. Biederman   pid: sys_wait... ...
1519
1520
  	struct pid *pid = NULL;
  	enum pid_type type;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1521
  	long ret;
91c4e8ea8   Oleg Nesterov   wait: allow sys_w...
1522
1523
  	if (options & ~(WNOHANG|WNOWAIT|WEXITED|WSTOPPED|WCONTINUED|
  			__WNOTHREAD|__WCLONE|__WALL))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1524
1525
1526
1527
1528
1529
  		return -EINVAL;
  	if (!(options & (WEXITED|WSTOPPED|WCONTINUED)))
  		return -EINVAL;
  
  	switch (which) {
  	case P_ALL:
161550d74   Eric W. Biederman   pid: sys_wait... ...
1530
  		type = PIDTYPE_MAX;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1531
1532
  		break;
  	case P_PID:
161550d74   Eric W. Biederman   pid: sys_wait... ...
1533
1534
  		type = PIDTYPE_PID;
  		if (upid <= 0)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1535
1536
1537
  			return -EINVAL;
  		break;
  	case P_PGID:
161550d74   Eric W. Biederman   pid: sys_wait... ...
1538
1539
  		type = PIDTYPE_PGID;
  		if (upid <= 0)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1540
  			return -EINVAL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1541
1542
1543
1544
  		break;
  	default:
  		return -EINVAL;
  	}
161550d74   Eric W. Biederman   pid: sys_wait... ...
1545
1546
  	if (type < PIDTYPE_MAX)
  		pid = find_get_pid(upid);
9e8ae01d1   Oleg Nesterov   introduce "struct...
1547
1548
1549
1550
1551
1552
1553
1554
  
  	wo.wo_type	= type;
  	wo.wo_pid	= pid;
  	wo.wo_flags	= options;
  	wo.wo_info	= infop;
  	wo.wo_stat	= NULL;
  	wo.wo_rusage	= ru;
  	ret = do_wait(&wo);
dfe16dfa4   Vitaly Mayatskikh   do_wait: fix sys_...
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
  
  	if (ret > 0) {
  		ret = 0;
  	} else if (infop) {
  		/*
  		 * For a WNOHANG return, clear out all the fields
  		 * we would set so the user can easily tell the
  		 * difference.
  		 */
  		if (!ret)
  			ret = put_user(0, &infop->si_signo);
  		if (!ret)
  			ret = put_user(0, &infop->si_errno);
  		if (!ret)
  			ret = put_user(0, &infop->si_code);
  		if (!ret)
  			ret = put_user(0, &infop->si_pid);
  		if (!ret)
  			ret = put_user(0, &infop->si_uid);
  		if (!ret)
  			ret = put_user(0, &infop->si_status);
  	}
161550d74   Eric W. Biederman   pid: sys_wait... ...
1577
  	put_pid(pid);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1578
1579
  	return ret;
  }
754fe8d29   Heiko Carstens   [CVE-2009-0029] S...
1580
1581
  SYSCALL_DEFINE4(wait4, pid_t, upid, int __user *, stat_addr,
  		int, options, struct rusage __user *, ru)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1582
  {
9e8ae01d1   Oleg Nesterov   introduce "struct...
1583
  	struct wait_opts wo;
161550d74   Eric W. Biederman   pid: sys_wait... ...
1584
1585
  	struct pid *pid = NULL;
  	enum pid_type type;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1586
1587
1588
1589
1590
  	long ret;
  
  	if (options & ~(WNOHANG|WUNTRACED|WCONTINUED|
  			__WNOTHREAD|__WCLONE|__WALL))
  		return -EINVAL;
161550d74   Eric W. Biederman   pid: sys_wait... ...
1591
1592
1593
1594
1595
1596
1597
1598
  
  	if (upid == -1)
  		type = PIDTYPE_MAX;
  	else if (upid < 0) {
  		type = PIDTYPE_PGID;
  		pid = find_get_pid(-upid);
  	} else if (upid == 0) {
  		type = PIDTYPE_PGID;
2ae448efc   Oleg Nesterov   pids: improve get...
1599
  		pid = get_task_pid(current, PIDTYPE_PGID);
161550d74   Eric W. Biederman   pid: sys_wait... ...
1600
1601
1602
1603
  	} else /* upid > 0 */ {
  		type = PIDTYPE_PID;
  		pid = find_get_pid(upid);
  	}
9e8ae01d1   Oleg Nesterov   introduce "struct...
1604
1605
1606
1607
1608
1609
1610
  	wo.wo_type	= type;
  	wo.wo_pid	= pid;
  	wo.wo_flags	= options | WEXITED;
  	wo.wo_info	= NULL;
  	wo.wo_stat	= stat_addr;
  	wo.wo_rusage	= ru;
  	ret = do_wait(&wo);
161550d74   Eric W. Biederman   pid: sys_wait... ...
1611
  	put_pid(pid);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1612

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1613
1614
1615
1616
1617
1618
1619
1620
1621
  	return ret;
  }
  
  #ifdef __ARCH_WANT_SYS_WAITPID
  
  /*
   * sys_waitpid() remains for compatibility. waitpid() should be
   * implemented by calling sys_wait4() from libc.a.
   */
17da2bd90   Heiko Carstens   [CVE-2009-0029] S...
1622
  SYSCALL_DEFINE3(waitpid, pid_t, pid, int __user *, stat_addr, int, options)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1623
1624
1625
1626
1627
  {
  	return sys_wait4(pid, stat_addr, options, NULL);
  }
  
  #endif