Doug / smarc-fsl-linux-kernel

1

/*

1

/*

2

* linux/kernel/exit.c

2

* linux/kernel/exit.c

3

*

3

*

4

5

*/

5

*/

6

7

#include <linux/mm.h>

7

#include <linux/mm.h>

8

#include <linux/slab.h>

8

#include <linux/slab.h>

9

#include <linux/interrupt.h>

9

#include <linux/interrupt.h>

10

#include <linux/module.h>

10

#include <linux/module.h>

11

#include <linux/capability.h>

11

#include <linux/capability.h>

12

#include <linux/completion.h>

12

#include <linux/completion.h>

13

#include <linux/personality.h>

13

#include <linux/personality.h>

14

#include <linux/tty.h>

14

#include <linux/tty.h>

15

#include <linux/iocontext.h>

15

#include <linux/iocontext.h>

16

#include <linux/key.h>

16

#include <linux/key.h>

17

#include <linux/security.h>

17

#include <linux/security.h>

18

#include <linux/cpu.h>

18

#include <linux/cpu.h>

19

#include <linux/acct.h>

19

#include <linux/acct.h>

20

#include <linux/tsacct_kern.h>

20

#include <linux/tsacct_kern.h>

21

#include <linux/file.h>

21

#include <linux/file.h>

22

#include <linux/fdtable.h>

22

#include <linux/fdtable.h>

23

#include <linux/binfmts.h>

23

#include <linux/binfmts.h>

24

#include <linux/nsproxy.h>

24

#include <linux/nsproxy.h>

25

#include <linux/pid_namespace.h>

25

#include <linux/pid_namespace.h>

26

#include <linux/ptrace.h>

26

#include <linux/ptrace.h>

27

#include <linux/profile.h>

27

#include <linux/profile.h>

28

#include <linux/mount.h>

28

#include <linux/mount.h>

29

#include <linux/proc_fs.h>

29

#include <linux/proc_fs.h>

30

#include <linux/kthread.h>

30

#include <linux/kthread.h>

31

#include <linux/mempolicy.h>

31

#include <linux/mempolicy.h>

32

#include <linux/taskstats_kern.h>

32

#include <linux/taskstats_kern.h>

33

#include <linux/delayacct.h>

33

#include <linux/delayacct.h>

34

#include <linux/freezer.h>

34

#include <linux/freezer.h>

35

#include <linux/cgroup.h>

35

#include <linux/cgroup.h>

36

#include <linux/syscalls.h>

36

#include <linux/syscalls.h>

37

#include <linux/signal.h>

37

#include <linux/signal.h>

38

#include <linux/posix-timers.h>

38

#include <linux/posix-timers.h>

39

#include <linux/cn_proc.h>

39

#include <linux/cn_proc.h>

40

#include <linux/mutex.h>

40

#include <linux/mutex.h>

41

#include <linux/futex.h>

41

#include <linux/futex.h>

42

#include <linux/pipe_fs_i.h>

42

#include <linux/pipe_fs_i.h>

43

#include <linux/audit.h> /* for audit_free() */

43

#include <linux/audit.h> /* for audit_free() */

44

#include <linux/resource.h>

44

#include <linux/resource.h>

45

#include <linux/blkdev.h>

45

#include <linux/blkdev.h>

46

#include <linux/task_io_accounting_ops.h>

46

#include <linux/task_io_accounting_ops.h>

47

#include <linux/tracehook.h>

47

#include <linux/tracehook.h>

48

#include <linux/fs_struct.h>

48

#include <linux/fs_struct.h>

49

#include <linux/init_task.h>

49

#include <linux/init_task.h>

50

#include <linux/perf_event.h>

50

#include <linux/perf_event.h>

51

#include <trace/events/sched.h>

51

#include <trace/events/sched.h>

52

#include <linux/hw_breakpoint.h>

52

#include <linux/hw_breakpoint.h>

53

#include <linux/oom.h>

53

#include <linux/oom.h>

54

55

#include <asm/uaccess.h>

55

#include <asm/uaccess.h>

56

#include <asm/unistd.h>

56

#include <asm/unistd.h>

57

#include <asm/pgtable.h>

57

#include <asm/pgtable.h>

58

#include <asm/mmu_context.h>

58

#include <asm/mmu_context.h>

59

60

static void exit_mm(struct task_struct * tsk);

60

static void exit_mm(struct task_struct * tsk);

61

62

static void __unhash_process(struct task_struct *p, bool group_dead)

62

static void __unhash_process(struct task_struct *p, bool group_dead)

63

{

63

{

64

nr_threads--;

64

nr_threads--;

65

detach_pid(p, PIDTYPE_PID);

65

detach_pid(p, PIDTYPE_PID);

66

if (group_dead) {

66

if (group_dead) {

67

detach_pid(p, PIDTYPE_PGID);

67

detach_pid(p, PIDTYPE_PGID);

68

detach_pid(p, PIDTYPE_SID);

68

detach_pid(p, PIDTYPE_SID);

69

70

list_del_rcu(&p->tasks);

70

list_del_rcu(&p->tasks);

71

list_del_init(&p->sibling);

71

list_del_init(&p->sibling);

72

__this_cpu_dec(process_counts);

72

__this_cpu_dec(process_counts);

73

}

73

}

74

list_del_rcu(&p->thread_group);

74

list_del_rcu(&p->thread_group);

75

}

75

}

76

77

/*

77

/*

78

* This function expects the tasklist_lock write-locked.

78

* This function expects the tasklist_lock write-locked.

79

*/

79

*/

80

static void __exit_signal(struct task_struct *tsk)

80

static void __exit_signal(struct task_struct *tsk)

81

{

81

{

82

struct signal_struct *sig = tsk->signal;

82

struct signal_struct *sig = tsk->signal;

83

bool group_dead = thread_group_leader(tsk);

83

bool group_dead = thread_group_leader(tsk);

84

struct sighand_struct *sighand;

84

struct sighand_struct *sighand;

85

struct tty_struct *uninitialized_var(tty);

85

struct tty_struct *uninitialized_var(tty);

86

87

sighand = rcu_dereference_check(tsk->sighand,

87

sighand = rcu_dereference_check(tsk->sighand,

88

rcu_read_lock_held() ||

88

rcu_read_lock_held() ||

89

lockdep_tasklist_lock_is_held());

89

lockdep_tasklist_lock_is_held());

90

spin_lock(&sighand->siglock);

90

spin_lock(&sighand->siglock);

91

92

posix_cpu_timers_exit(tsk);

92

posix_cpu_timers_exit(tsk);

93

if (group_dead) {

93

if (group_dead) {

94

posix_cpu_timers_exit_group(tsk);

94

posix_cpu_timers_exit_group(tsk);

95

tty = sig->tty;

95

tty = sig->tty;

96

sig->tty = NULL;

96

sig->tty = NULL;

97

} else {

97

} else {

98

/*

98

/*

99

* This can only happen if the caller is de_thread().

99

* This can only happen if the caller is de_thread().

100

* FIXME: this is the temporary hack, we should teach

100

* FIXME: this is the temporary hack, we should teach

101

* posix-cpu-timers to handle this case correctly.

101

* posix-cpu-timers to handle this case correctly.

102

*/

102

*/

103

if (unlikely(has_group_leader_pid(tsk)))

103

if (unlikely(has_group_leader_pid(tsk)))

104

posix_cpu_timers_exit_group(tsk);

104

posix_cpu_timers_exit_group(tsk);

105

106

/*

106

/*

107

* If there is any task waiting for the group exit

107

* If there is any task waiting for the group exit

108

* then notify it:

108

* then notify it:

109

*/

109

*/

110

if (sig->notify_count > 0 && !--sig->notify_count)

110

if (sig->notify_count > 0 && !--sig->notify_count)

111

wake_up_process(sig->group_exit_task);

111

wake_up_process(sig->group_exit_task);

112

113

if (tsk == sig->curr_target)

113

if (tsk == sig->curr_target)

114

sig->curr_target = next_thread(tsk);

114

sig->curr_target = next_thread(tsk);

115

/*

115

/*

116

* Accumulate here the counters for all threads but the

116

* Accumulate here the counters for all threads but the

117

* group leader as they die, so they can be added into

117

* group leader as they die, so they can be added into

118

* the process-wide totals when those are taken.

118

* the process-wide totals when those are taken.

119

* The group leader stays around as a zombie as long

119

* The group leader stays around as a zombie as long

120

* as there are other threads. When it gets reaped,

120

* as there are other threads. When it gets reaped,

121

* the exit.c code will add its counts into these totals.

121

* the exit.c code will add its counts into these totals.

122

* We won't ever get here for the group leader, since it

122

* We won't ever get here for the group leader, since it

123

* will have been the last reference on the signal_struct.

123

* will have been the last reference on the signal_struct.

124

*/

124

*/

125

sig->utime = cputime_add(sig->utime, tsk->utime);

125

sig->utime = cputime_add(sig->utime, tsk->utime);

126

sig->stime = cputime_add(sig->stime, tsk->stime);

126

sig->stime = cputime_add(sig->stime, tsk->stime);

127

sig->gtime = cputime_add(sig->gtime, tsk->gtime);

127

sig->gtime = cputime_add(sig->gtime, tsk->gtime);

128

sig->min_flt += tsk->min_flt;

128

sig->min_flt += tsk->min_flt;

129

sig->maj_flt += tsk->maj_flt;

129

sig->maj_flt += tsk->maj_flt;

130

sig->nvcsw += tsk->nvcsw;

130

sig->nvcsw += tsk->nvcsw;

131

sig->nivcsw += tsk->nivcsw;

131

sig->nivcsw += tsk->nivcsw;

132

sig->inblock += task_io_get_inblock(tsk);

132

sig->inblock += task_io_get_inblock(tsk);

133

sig->oublock += task_io_get_oublock(tsk);

133

sig->oublock += task_io_get_oublock(tsk);

134

task_io_accounting_add(&sig->ioac, &tsk->ioac);

134

task_io_accounting_add(&sig->ioac, &tsk->ioac);

135

sig->sum_sched_runtime += tsk->se.sum_exec_runtime;

135

sig->sum_sched_runtime += tsk->se.sum_exec_runtime;

136

}

136

}

137

138

sig->nr_threads--;

138

sig->nr_threads--;

139

__unhash_process(tsk, group_dead);

139

__unhash_process(tsk, group_dead);

140

141

/*

141

/*

142

* Do this under ->siglock, we can race with another thread

142

* Do this under ->siglock, we can race with another thread

143

* doing sigqueue_free() if we have SIGQUEUE_PREALLOC signals.

143

* doing sigqueue_free() if we have SIGQUEUE_PREALLOC signals.

144

*/

144

*/

145

flush_sigqueue(&tsk->pending);

145

flush_sigqueue(&tsk->pending);

146

tsk->sighand = NULL;

146

tsk->sighand = NULL;

147

spin_unlock(&sighand->siglock);

147

spin_unlock(&sighand->siglock);

148

149

__cleanup_sighand(sighand);

149

__cleanup_sighand(sighand);

150

clear_tsk_thread_flag(tsk,TIF_SIGPENDING);

150

clear_tsk_thread_flag(tsk,TIF_SIGPENDING);

151

if (group_dead) {

151

if (group_dead) {

152

flush_sigqueue(&sig->shared_pending);

152

flush_sigqueue(&sig->shared_pending);

153

tty_kref_put(tty);

153

tty_kref_put(tty);

154

}

154

}

155

}

155

}

156

157

static void delayed_put_task_struct(struct rcu_head *rhp)

157

static void delayed_put_task_struct(struct rcu_head *rhp)

158

{

158

{

159

struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);

159

struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);

160

161

perf_event_delayed_put(tsk);

161

perf_event_delayed_put(tsk);

162

trace_sched_process_free(tsk);

162

trace_sched_process_free(tsk);

163

put_task_struct(tsk);

163

put_task_struct(tsk);

164

}

164

}

165

166

167

void release_task(struct task_struct * p)

167

void release_task(struct task_struct * p)

168

{

168

{

169

struct task_struct *leader;

169

struct task_struct *leader;

170

int zap_leader;

170

int zap_leader;

171

repeat:

171

repeat:

172

/* don't need to get the RCU readlock here - the process is dead and

172

/* don't need to get the RCU readlock here - the process is dead and

173

* can't be modifying its own credentials. But shut RCU-lockdep up */

173

* can't be modifying its own credentials. But shut RCU-lockdep up */

174

rcu_read_lock();

174

rcu_read_lock();

175

atomic_dec(&__task_cred(p)->user->processes);

175

atomic_dec(&__task_cred(p)->user->processes);

176

rcu_read_unlock();

176

rcu_read_unlock();

177

178

proc_flush_task(p);

178

proc_flush_task(p);

179

180

write_lock_irq(&tasklist_lock);

180

write_lock_irq(&tasklist_lock);

181

ptrace_release_task(p);

181

ptrace_release_task(p);

182

__exit_signal(p);

182

__exit_signal(p);

183

184

/*

184

/*

185

* If we are the last non-leader member of the thread

185

* If we are the last non-leader member of the thread

186

* group, and the leader is zombie, then notify the

186

* group, and the leader is zombie, then notify the

187

* group leader's parent process. (if it wants notification.)

187

* group leader's parent process. (if it wants notification.)

188

*/

188

*/

189

zap_leader = 0;

189

zap_leader = 0;

190

leader = p->group_leader;

190

leader = p->group_leader;

191

if (leader != p && thread_group_empty(leader) && leader->exit_state == EXIT_ZOMBIE) {

191

if (leader != p && thread_group_empty(leader) && leader->exit_state == EXIT_ZOMBIE) {

192

BUG_ON(task_detached(leader));

192

BUG_ON(task_detached(leader));

193

do_notify_parent(leader, leader->exit_signal);

193

do_notify_parent(leader, leader->exit_signal);

194

/*

194

/*

195

* If we were the last child thread and the leader has

195

* If we were the last child thread and the leader has

196

* exited already, and the leader's parent ignores SIGCHLD,

196

* exited already, and the leader's parent ignores SIGCHLD,

197

* then we are the one who should release the leader.

197

* then we are the one who should release the leader.

198

*

198

*

199

* do_notify_parent() will have marked it self-reaping in

199

* do_notify_parent() will have marked it self-reaping in

200

* that case.

200

* that case.

201

*/

201

*/

202

zap_leader = task_detached(leader);

202

zap_leader = task_detached(leader);

203

204

/*

204

/*

205

* This maintains the invariant that release_task()

205

* This maintains the invariant that release_task()

206

* only runs on a task in EXIT_DEAD, just for sanity.

206

* only runs on a task in EXIT_DEAD, just for sanity.

207

*/

207

*/

208

if (zap_leader)

208

if (zap_leader)

209

leader->exit_state = EXIT_DEAD;

209

leader->exit_state = EXIT_DEAD;

210

}

210

}

211

212

write_unlock_irq(&tasklist_lock);

212

write_unlock_irq(&tasklist_lock);

213

release_thread(p);

213

release_thread(p);

214

call_rcu(&p->rcu, delayed_put_task_struct);

214

call_rcu(&p->rcu, delayed_put_task_struct);

215

216

p = leader;

216

p = leader;

217

if (unlikely(zap_leader))

217

if (unlikely(zap_leader))

218

goto repeat;

218

goto repeat;

219

}

219

}

220

221

/*

221

/*

222

* This checks not only the pgrp, but falls back on the pid if no

222

* This checks not only the pgrp, but falls back on the pid if no

223

* satisfactory pgrp is found. I dunno - gdb doesn't work correctly

223

* satisfactory pgrp is found. I dunno - gdb doesn't work correctly

224

* without this...

224

* without this...

225

*

225

*

226

* The caller must hold rcu lock or the tasklist lock.

226

* The caller must hold rcu lock or the tasklist lock.

227

*/

227

*/

228

struct pid *session_of_pgrp(struct pid *pgrp)

228

struct pid *session_of_pgrp(struct pid *pgrp)

229

{

229

{

230

struct task_struct *p;

230

struct task_struct *p;

231

struct pid *sid = NULL;

231

struct pid *sid = NULL;

232

233

p = pid_task(pgrp, PIDTYPE_PGID);

233

p = pid_task(pgrp, PIDTYPE_PGID);

234

if (p == NULL)

234

if (p == NULL)

235

p = pid_task(pgrp, PIDTYPE_PID);

235

p = pid_task(pgrp, PIDTYPE_PID);

236

if (p != NULL)

236

if (p != NULL)

237

sid = task_session(p);

237

sid = task_session(p);

238

239

return sid;

239

return sid;

240

}

240

}

241

242

/*

242

/*

243

* Determine if a process group is "orphaned", according to the POSIX

243

* Determine if a process group is "orphaned", according to the POSIX

244

* definition in 2.2.2.52. Orphaned process groups are not to be affected

244

* definition in 2.2.2.52. Orphaned process groups are not to be affected

245

* by terminal-generated stop signals. Newly orphaned process groups are

245

* by terminal-generated stop signals. Newly orphaned process groups are

246

* to receive a SIGHUP and a SIGCONT.

246

* to receive a SIGHUP and a SIGCONT.

247

*

247

*

248

* "I ask you, have you ever known what it is to be an orphan?"

248

* "I ask you, have you ever known what it is to be an orphan?"

249

*/

249

*/

250

static int will_become_orphaned_pgrp(struct pid *pgrp, struct task_struct *ignored_task)

250

static int will_become_orphaned_pgrp(struct pid *pgrp, struct task_struct *ignored_task)

251

{

251

{

252

struct task_struct *p;

252

struct task_struct *p;

253

254

do_each_pid_task(pgrp, PIDTYPE_PGID, p) {

254

do_each_pid_task(pgrp, PIDTYPE_PGID, p) {

255

if ((p == ignored_task) ||

255

if ((p == ignored_task) ||

256

(p->exit_state && thread_group_empty(p)) ||

256

(p->exit_state && thread_group_empty(p)) ||

257

is_global_init(p->real_parent))

257

is_global_init(p->real_parent))

258

continue;

258

continue;

259

260

if (task_pgrp(p->real_parent) != pgrp &&

260

if (task_pgrp(p->real_parent) != pgrp &&

261

task_session(p->real_parent) == task_session(p))

261

task_session(p->real_parent) == task_session(p))

262

return 0;

262

return 0;

263

} while_each_pid_task(pgrp, PIDTYPE_PGID, p);

263

} while_each_pid_task(pgrp, PIDTYPE_PGID, p);

264

265

return 1;

265

return 1;

266

}

266

}

267

268

int is_current_pgrp_orphaned(void)

268

int is_current_pgrp_orphaned(void)

269

{

269

{

270

int retval;

270

int retval;

271

272

read_lock(&tasklist_lock);

272

read_lock(&tasklist_lock);

273

retval = will_become_orphaned_pgrp(task_pgrp(current), NULL);

273

retval = will_become_orphaned_pgrp(task_pgrp(current), NULL);

274

read_unlock(&tasklist_lock);

274

read_unlock(&tasklist_lock);

275

276

return retval;

276

return retval;

277

}

277

}

278

279

static int has_stopped_jobs(struct pid *pgrp)

279

static int has_stopped_jobs(struct pid *pgrp)

280

{

280

{

281

int retval = 0;

281

int retval = 0;

282

struct task_struct *p;

282

struct task_struct *p;

283

284

do_each_pid_task(pgrp, PIDTYPE_PGID, p) {

284

do_each_pid_task(pgrp, PIDTYPE_PGID, p) {

285

if (!task_is_stopped(p))

285

if (!task_is_stopped(p))

286

continue;

286

continue;

287

retval = 1;

287

retval = 1;

288

break;

288

break;

289

} while_each_pid_task(pgrp, PIDTYPE_PGID, p);

289

} while_each_pid_task(pgrp, PIDTYPE_PGID, p);

290

return retval;

290

return retval;

291

}

291

}

292

293

/*

293

/*

294

* Check to see if any process groups have become orphaned as

294

* Check to see if any process groups have become orphaned as

295

* a result of our exiting, and if they have any stopped jobs,

295

* a result of our exiting, and if they have any stopped jobs,

296

* send them a SIGHUP and then a SIGCONT. (POSIX 3.2.2.2)

296

* send them a SIGHUP and then a SIGCONT. (POSIX 3.2.2.2)

297

*/

297

*/

298

static void

298

static void

299

kill_orphaned_pgrp(struct task_struct *tsk, struct task_struct *parent)

299

kill_orphaned_pgrp(struct task_struct *tsk, struct task_struct *parent)

300

{

300

{

301

struct pid *pgrp = task_pgrp(tsk);

301

struct pid *pgrp = task_pgrp(tsk);

302

struct task_struct *ignored_task = tsk;

302

struct task_struct *ignored_task = tsk;

303

304

if (!parent)

304

if (!parent)

305

/* exit: our father is in a different pgrp than

305

/* exit: our father is in a different pgrp than

306

* we are and we were the only connection outside.

306

* we are and we were the only connection outside.

307

*/

307

*/

308

parent = tsk->real_parent;

308

parent = tsk->real_parent;

309

else

309

else

310

/* reparent: our child is in a different pgrp than

310

/* reparent: our child is in a different pgrp than

311

* we are, and it was the only connection outside.

311

* we are, and it was the only connection outside.

312

*/

312

*/

313

ignored_task = NULL;

313

ignored_task = NULL;

314

315

if (task_pgrp(parent) != pgrp &&

315

if (task_pgrp(parent) != pgrp &&

316

task_session(parent) == task_session(tsk) &&

316

task_session(parent) == task_session(tsk) &&

317

will_become_orphaned_pgrp(pgrp, ignored_task) &&

317

will_become_orphaned_pgrp(pgrp, ignored_task) &&

318

has_stopped_jobs(pgrp)) {

318

has_stopped_jobs(pgrp)) {

319

__kill_pgrp_info(SIGHUP, SEND_SIG_PRIV, pgrp);

319

__kill_pgrp_info(SIGHUP, SEND_SIG_PRIV, pgrp);

320

__kill_pgrp_info(SIGCONT, SEND_SIG_PRIV, pgrp);

320

__kill_pgrp_info(SIGCONT, SEND_SIG_PRIV, pgrp);

321

}

321

}

322

}

322

}

323

324

/**

324

/**

325

* reparent_to_kthreadd - Reparent the calling kernel thread to kthreadd

325

* reparent_to_kthreadd - Reparent the calling kernel thread to kthreadd

326

*

326

*

327

* If a kernel thread is launched as a result of a system call, or if

327

* If a kernel thread is launched as a result of a system call, or if

328

* it ever exits, it should generally reparent itself to kthreadd so it

328

* it ever exits, it should generally reparent itself to kthreadd so it

329

* isn't in the way of other processes and is correctly cleaned up on exit.

329

* isn't in the way of other processes and is correctly cleaned up on exit.

330

*

330

*

331

* The various task state such as scheduling policy and priority may have

331

* The various task state such as scheduling policy and priority may have

332

* been inherited from a user process, so we reset them to sane values here.

332

* been inherited from a user process, so we reset them to sane values here.

333

*

333

*

334

* NOTE that reparent_to_kthreadd() gives the caller full capabilities.

334

* NOTE that reparent_to_kthreadd() gives the caller full capabilities.

335

*/

335

*/

336

static void reparent_to_kthreadd(void)

336

static void reparent_to_kthreadd(void)

337

{

337

{

338

write_lock_irq(&tasklist_lock);

338

write_lock_irq(&tasklist_lock);

339

340

ptrace_unlink(current);

340

ptrace_unlink(current);

341

/* Reparent to init */

341

/* Reparent to init */

342

current->real_parent = current->parent = kthreadd_task;

342

current->real_parent = current->parent = kthreadd_task;

343

list_move_tail(&current->sibling, &current->real_parent->children);

343

list_move_tail(&current->sibling, &current->real_parent->children);

344

345

/* Set the exit signal to SIGCHLD so we signal init on exit */

345

/* Set the exit signal to SIGCHLD so we signal init on exit */

346

current->exit_signal = SIGCHLD;

346

current->exit_signal = SIGCHLD;

347

348

if (task_nice(current) < 0)

348

if (task_nice(current) < 0)

349

set_user_nice(current, 0);

349

set_user_nice(current, 0);

350

/* cpus_allowed? */

350

/* cpus_allowed? */

351

/* rt_priority? */

351

/* rt_priority? */

352

/* signals? */

352

/* signals? */

353

memcpy(current->signal->rlim, init_task.signal->rlim,

353

memcpy(current->signal->rlim, init_task.signal->rlim,

354

sizeof(current->signal->rlim));

354

sizeof(current->signal->rlim));

355

356

atomic_inc(&init_cred.usage);

356

atomic_inc(&init_cred.usage);

357

commit_creds(&init_cred);

357

commit_creds(&init_cred);

358

write_unlock_irq(&tasklist_lock);

358

write_unlock_irq(&tasklist_lock);

359

}

359

}

360

361

void __set_special_pids(struct pid *pid)

361

void __set_special_pids(struct pid *pid)

362

{

362

{

363

struct task_struct *curr = current->group_leader;

363

struct task_struct *curr = current->group_leader;

364

365

if (task_session(curr) != pid)

365

if (task_session(curr) != pid)

366

change_pid(curr, PIDTYPE_SID, pid);

366

change_pid(curr, PIDTYPE_SID, pid);

367

368

if (task_pgrp(curr) != pid)

368

if (task_pgrp(curr) != pid)

369

change_pid(curr, PIDTYPE_PGID, pid);

369

change_pid(curr, PIDTYPE_PGID, pid);

370

}

370

}

371

372

static void set_special_pids(struct pid *pid)

372

static void set_special_pids(struct pid *pid)

373

{

373

{

374

write_lock_irq(&tasklist_lock);

374

write_lock_irq(&tasklist_lock);

375

__set_special_pids(pid);

375

__set_special_pids(pid);

376

write_unlock_irq(&tasklist_lock);

376

write_unlock_irq(&tasklist_lock);

377

}

377

}

378

379

/*

379

/*

380

* Let kernel threads use this to say that they allow a certain signal.

380

* Let kernel threads use this to say that they allow a certain signal.

381

* Must not be used if kthread was cloned with CLONE_SIGHAND.

381

* Must not be used if kthread was cloned with CLONE_SIGHAND.

382

*/

382

*/

383

int allow_signal(int sig)

383

int allow_signal(int sig)

384

{

384

{

385

if (!valid_signal(sig) || sig < 1)

385

if (!valid_signal(sig) || sig < 1)

386

return -EINVAL;

386

return -EINVAL;

387

388

spin_lock_irq(&current->sighand->siglock);

388

spin_lock_irq(&current->sighand->siglock);

389

/* This is only needed for daemonize()'ed kthreads */

389

/* This is only needed for daemonize()'ed kthreads */

390

sigdelset(&current->blocked, sig);

390

sigdelset(&current->blocked, sig);

391

/*

391

/*

392

* Kernel threads handle their own signals. Let the signal code

392

* Kernel threads handle their own signals. Let the signal code

393

* know it'll be handled, so that they don't get converted to

393

* know it'll be handled, so that they don't get converted to

394

* SIGKILL or just silently dropped.

394

* SIGKILL or just silently dropped.

395

*/

395

*/

396

current->sighand->action[(sig)-1].sa.sa_handler = (void __user *)2;

396

current->sighand->action[(sig)-1].sa.sa_handler = (void __user *)2;

397

recalc_sigpending();

397

recalc_sigpending();

398

spin_unlock_irq(&current->sighand->siglock);

398

spin_unlock_irq(&current->sighand->siglock);

399

return 0;

399

return 0;

400

}

400

}

401

402

EXPORT_SYMBOL(allow_signal);

402

EXPORT_SYMBOL(allow_signal);

403

404

int disallow_signal(int sig)

404

int disallow_signal(int sig)

405

{

405

{

406

if (!valid_signal(sig) || sig < 1)

406

if (!valid_signal(sig) || sig < 1)

407

return -EINVAL;

407

return -EINVAL;

408

409

spin_lock_irq(&current->sighand->siglock);

409

spin_lock_irq(&current->sighand->siglock);

410

current->sighand->action[(sig)-1].sa.sa_handler = SIG_IGN;

410

current->sighand->action[(sig)-1].sa.sa_handler = SIG_IGN;

411

recalc_sigpending();

411

recalc_sigpending();

412

spin_unlock_irq(&current->sighand->siglock);

412

spin_unlock_irq(&current->sighand->siglock);

413

return 0;

413

return 0;

414

}

414

}

415

416

EXPORT_SYMBOL(disallow_signal);

416

EXPORT_SYMBOL(disallow_signal);

417

418

/*

418

/*

419

* Put all the gunge required to become a kernel thread without

419

* Put all the gunge required to become a kernel thread without

420

* attached user resources in one place where it belongs.

420

* attached user resources in one place where it belongs.

421

*/

421

*/

422

423

void daemonize(const char *name, ...)

423

void daemonize(const char *name, ...)

424

{

424

{

425

va_list args;

425

va_list args;

426

sigset_t blocked;

426

sigset_t blocked;

427

428

va_start(args, name);

428

va_start(args, name);

429

vsnprintf(current->comm, sizeof(current->comm), name, args);

429

vsnprintf(current->comm, sizeof(current->comm), name, args);

430

va_end(args);

430

va_end(args);

431

432

/*

432

/*

433

* If we were started as result of loading a module, close all of the

433

* If we were started as result of loading a module, close all of the

434

* user space pages. We don't need them, and if we didn't close them

434

* user space pages. We don't need them, and if we didn't close them

435

* they would be locked into memory.

435

* they would be locked into memory.

436

*/

436

*/

437

exit_mm(current);

437

exit_mm(current);

438

/*

438

/*

439

* We don't want to have TIF_FREEZE set if the system-wide hibernation

439

* We don't want to have TIF_FREEZE set if the system-wide hibernation

440

* or suspend transition begins right now.

440

* or suspend transition begins right now.

441

*/

441

*/

442

current->flags |= (PF_NOFREEZE | PF_KTHREAD);

442

current->flags |= (PF_NOFREEZE | PF_KTHREAD);

443

444

if (current->nsproxy != &init_nsproxy) {

444

if (current->nsproxy != &init_nsproxy) {

445

get_nsproxy(&init_nsproxy);

445

get_nsproxy(&init_nsproxy);

446

switch_task_namespaces(current, &init_nsproxy);

446

switch_task_namespaces(current, &init_nsproxy);

447

}

447

}

448

set_special_pids(&init_struct_pid);

448

set_special_pids(&init_struct_pid);

449

proc_clear_tty(current);

449

proc_clear_tty(current);

450

451

/* Block and flush all signals */

451

/* Block and flush all signals */

452

sigfillset(&blocked);

452

sigfillset(&blocked);

453

sigprocmask(SIG_BLOCK, &blocked, NULL);

453

sigprocmask(SIG_BLOCK, &blocked, NULL);

454

flush_signals(current);

454

flush_signals(current);

455

456

/* Become as one with the init task */

456

/* Become as one with the init task */

457

458

daemonize_fs_struct();

458

daemonize_fs_struct();

459

exit_files(current);

459

exit_files(current);

460

current->files = init_task.files;

460

current->files = init_task.files;

461

atomic_inc(&current->files->count);

461

atomic_inc(&current->files->count);

462

463

reparent_to_kthreadd();

463

reparent_to_kthreadd();

464

}

464

}

465

466

EXPORT_SYMBOL(daemonize);

466

EXPORT_SYMBOL(daemonize);

467

468

static void close_files(struct files_struct * files)

468

static void close_files(struct files_struct * files)

469

{

469

{

470

int i, j;

470

int i, j;

471

struct fdtable *fdt;

471

struct fdtable *fdt;

472

473

j = 0;

473

j = 0;

474

475

/*

475

/*

476

* It is safe to dereference the fd table without RCU or

476

* It is safe to dereference the fd table without RCU or

477

* ->file_lock because this is the last reference to the

477

* ->file_lock because this is the last reference to the

478

* files structure. But use RCU to shut RCU-lockdep up.

478

* files structure. But use RCU to shut RCU-lockdep up.

479

*/

479

*/

480

rcu_read_lock();

480

rcu_read_lock();

481

fdt = files_fdtable(files);

481

fdt = files_fdtable(files);

482

rcu_read_unlock();

482

rcu_read_unlock();

483

for (;;) {

483

for (;;) {

484

unsigned long set;

484

unsigned long set;

485

i = j * __NFDBITS;

485

i = j * __NFDBITS;

486

if (i >= fdt->max_fds)

486

if (i >= fdt->max_fds)

487

break;

487

break;

488

set = fdt->open_fds->fds_bits[j++];

488

set = fdt->open_fds->fds_bits[j++];

489

while (set) {

489

while (set) {

490

if (set & 1) {

490

if (set & 1) {

491

struct file * file = xchg(&fdt->fd[i], NULL);

491

struct file * file = xchg(&fdt->fd[i], NULL);

492

if (file) {

492

if (file) {

493

filp_close(file, files);

493

filp_close(file, files);

494

cond_resched();

494

cond_resched();

495

}

495

}

496

}

496

}

497

i++;

497

i++;

498

set >>= 1;

498

set >>= 1;

499

}

499

}

500

}

500

}

501

}

501

}

502

503

struct files_struct *get_files_struct(struct task_struct *task)

503

struct files_struct *get_files_struct(struct task_struct *task)

504

{

504

{

505

struct files_struct *files;

505

struct files_struct *files;

506

507

task_lock(task);

507

task_lock(task);

508

files = task->files;

508

files = task->files;

509

if (files)

509

if (files)

510

atomic_inc(&files->count);

510

atomic_inc(&files->count);

511

task_unlock(task);

511

task_unlock(task);

512

513

return files;

513

return files;

514

}

514

}

515

516

void put_files_struct(struct files_struct *files)

516

void put_files_struct(struct files_struct *files)

517

{

517

{

518

struct fdtable *fdt;

518

struct fdtable *fdt;

519

520

if (atomic_dec_and_test(&files->count)) {

520

if (atomic_dec_and_test(&files->count)) {

521

close_files(files);

521

close_files(files);

522

/*

522

/*

523

* Free the fd and fdset arrays if we expanded them.

523

* Free the fd and fdset arrays if we expanded them.

524

* If the fdtable was embedded, pass files for freeing

524

* If the fdtable was embedded, pass files for freeing

525

* at the end of the RCU grace period. Otherwise,

525

* at the end of the RCU grace period. Otherwise,

526

* you can free files immediately.

526

* you can free files immediately.

527

*/

527

*/

528

rcu_read_lock();

528

rcu_read_lock();

529

fdt = files_fdtable(files);

529

fdt = files_fdtable(files);

530

if (fdt != &files->fdtab)

530

if (fdt != &files->fdtab)

531

kmem_cache_free(files_cachep, files);

531

kmem_cache_free(files_cachep, files);

532

free_fdtable(fdt);

532

free_fdtable(fdt);

533

rcu_read_unlock();

533

rcu_read_unlock();

534

}

534

}

535

}

535

}

536

537

void reset_files_struct(struct files_struct *files)

537

void reset_files_struct(struct files_struct *files)

538

{

538

{

539

struct task_struct *tsk = current;

539

struct task_struct *tsk = current;

540

struct files_struct *old;

540

struct files_struct *old;

541

542

old = tsk->files;

542

old = tsk->files;

543

task_lock(tsk);

543

task_lock(tsk);

544

tsk->files = files;

544

tsk->files = files;

545

task_unlock(tsk);

545

task_unlock(tsk);

546

put_files_struct(old);

546

put_files_struct(old);

547

}

547

}

548

549

void exit_files(struct task_struct *tsk)

549

void exit_files(struct task_struct *tsk)

550

{

550

{

551

struct files_struct * files = tsk->files;

551

struct files_struct * files = tsk->files;

552

553

if (files) {

553

if (files) {

554

task_lock(tsk);

554

task_lock(tsk);

555

tsk->files = NULL;

555

tsk->files = NULL;

556

task_unlock(tsk);

556

task_unlock(tsk);

557

put_files_struct(files);

557

put_files_struct(files);

558

}

558

}

559

}

559

}

560

561

#ifdef CONFIG_MM_OWNER

561

#ifdef CONFIG_MM_OWNER

562

/*

562

/*

563

* Task p is exiting and it owned mm, lets find a new owner for it

563

* Task p is exiting and it owned mm, lets find a new owner for it

564

*/

564

*/

565

static inline int

565

static inline int

566

mm_need_new_owner(struct mm_struct *mm, struct task_struct *p)

566

mm_need_new_owner(struct mm_struct *mm, struct task_struct *p)

567

{

567

{

568

/*

568

/*

569

* If there are other users of the mm and the owner (us) is exiting

569

* If there are other users of the mm and the owner (us) is exiting

570

* we need to find a new owner to take on the responsibility.

570

* we need to find a new owner to take on the responsibility.

571

*/

571

*/

572

if (atomic_read(&mm->mm_users) <= 1)

572

if (atomic_read(&mm->mm_users) <= 1)

573

return 0;

573

return 0;

574

if (mm->owner != p)

574

if (mm->owner != p)

575

return 0;

575

return 0;

576

return 1;

576

return 1;

577

}

577

}

578

579

void mm_update_next_owner(struct mm_struct *mm)

579

void mm_update_next_owner(struct mm_struct *mm)

580

{

580

{

581

struct task_struct *c, *g, *p = current;

581

struct task_struct *c, *g, *p = current;

582

583

retry:

583

retry:

584

if (!mm_need_new_owner(mm, p))

584

if (!mm_need_new_owner(mm, p))

585

return;

585

return;

586

587

read_lock(&tasklist_lock);

587

read_lock(&tasklist_lock);

588

/*

588

/*

589

* Search in the children

589

* Search in the children

590

*/

590

*/

591

list_for_each_entry(c, &p->children, sibling) {

591

list_for_each_entry(c, &p->children, sibling) {

592

if (c->mm == mm)

592

if (c->mm == mm)

593

goto assign_new_owner;

593

goto assign_new_owner;

594

}

594

}

595

596

/*

596

/*

597

* Search in the siblings

597

* Search in the siblings

598

*/

598

*/

599

list_for_each_entry(c, &p->real_parent->children, sibling) {

599

list_for_each_entry(c, &p->real_parent->children, sibling) {

600

if (c->mm == mm)

600

if (c->mm == mm)

601

goto assign_new_owner;

601

goto assign_new_owner;

602

}

602

}

603

604

/*

604

/*

605

* Search through everything else. We should not get

605

* Search through everything else. We should not get

606

* here often

606

* here often

607

*/

607

*/

608

do_each_thread(g, c) {

608

do_each_thread(g, c) {

609

if (c->mm == mm)

609

if (c->mm == mm)

610

goto assign_new_owner;

610

goto assign_new_owner;

611

} while_each_thread(g, c);

611

} while_each_thread(g, c);

612

613

read_unlock(&tasklist_lock);

613

read_unlock(&tasklist_lock);

614

/*

614

/*

615

* We found no owner yet mm_users > 1: this implies that we are

615

* We found no owner yet mm_users > 1: this implies that we are

616

* most likely racing with swapoff (try_to_unuse()) or /proc or

616

* most likely racing with swapoff (try_to_unuse()) or /proc or

617

* ptrace or page migration (get_task_mm()). Mark owner as NULL.

617

* ptrace or page migration (get_task_mm()). Mark owner as NULL.

618

*/

618

*/

619

mm->owner = NULL;

619

mm->owner = NULL;

620

return;

620

return;

621

622

assign_new_owner:

622

assign_new_owner:

623

BUG_ON(c == p);

623

BUG_ON(c == p);

624

get_task_struct(c);

624

get_task_struct(c);

625

/*

625

/*

626

* The task_lock protects c->mm from changing.

626

* The task_lock protects c->mm from changing.

627

* We always want mm->owner->mm == mm

627

* We always want mm->owner->mm == mm

628

*/

628

*/

629

task_lock(c);

629

task_lock(c);

630

/*

630

/*

631

* Delay read_unlock() till we have the task_lock()

631

* Delay read_unlock() till we have the task_lock()

632

* to ensure that c does not slip away underneath us

632

* to ensure that c does not slip away underneath us

633

*/

633

*/

634

read_unlock(&tasklist_lock);

634

read_unlock(&tasklist_lock);

635

if (c->mm != mm) {

635

if (c->mm != mm) {

636

task_unlock(c);

636

task_unlock(c);

637

put_task_struct(c);

637

put_task_struct(c);

638

goto retry;

638

goto retry;

639

}

639

}

640

mm->owner = c;

640

mm->owner = c;

641

task_unlock(c);

641

task_unlock(c);

642

put_task_struct(c);

642

put_task_struct(c);

643

}

643

}

644

#endif /* CONFIG_MM_OWNER */

644

#endif /* CONFIG_MM_OWNER */

645

646

/*

646

/*

647

* Turn us into a lazy TLB process if we

647

* Turn us into a lazy TLB process if we

648

* aren't already..

648

* aren't already..

649

*/

649

*/

650

static void exit_mm(struct task_struct * tsk)

650

static void exit_mm(struct task_struct * tsk)

651

{

651

{

652

struct mm_struct *mm = tsk->mm;

652

struct mm_struct *mm = tsk->mm;

653

struct core_state *core_state;

653

struct core_state *core_state;

654

655

mm_release(tsk, mm);

655

mm_release(tsk, mm);

656

if (!mm)

656

if (!mm)

657

return;

657

return;

658

/*

658

/*

659

* Serialize with any possible pending coredump.

659

* Serialize with any possible pending coredump.

660

* We must hold mmap_sem around checking core_state

660

* We must hold mmap_sem around checking core_state

661

* and clearing tsk->mm. The core-inducing thread

661

* and clearing tsk->mm. The core-inducing thread

662

* will increment ->nr_threads for each thread in the

662

* will increment ->nr_threads for each thread in the

663

* group with ->mm != NULL.

663

* group with ->mm != NULL.

664

*/

664

*/

665

down_read(&mm->mmap_sem);

665

down_read(&mm->mmap_sem);

666

core_state = mm->core_state;

666

core_state = mm->core_state;

667

if (core_state) {

667

if (core_state) {

668

struct core_thread self;

668

struct core_thread self;

669

up_read(&mm->mmap_sem);

669

up_read(&mm->mmap_sem);

670

671

self.task = tsk;

671

self.task = tsk;

672

self.next = xchg(&core_state->dumper.next, &self);

672

self.next = xchg(&core_state->dumper.next, &self);

673

/*

673

/*

674

* Implies mb(), the result of xchg() must be visible

674

* Implies mb(), the result of xchg() must be visible

675

* to core_state->dumper.

675

* to core_state->dumper.

676

*/

676

*/

677

if (atomic_dec_and_test(&core_state->nr_threads))

677

if (atomic_dec_and_test(&core_state->nr_threads))

678

complete(&core_state->startup);

678

complete(&core_state->startup);

679

680

for (;;) {

680

for (;;) {

681

set_task_state(tsk, TASK_UNINTERRUPTIBLE);

681

set_task_state(tsk, TASK_UNINTERRUPTIBLE);

682

if (!self.task) /* see coredump_finish() */

682

if (!self.task) /* see coredump_finish() */

683

break;

683

break;

684

schedule();

684

schedule();

685

}

685

}

686

__set_task_state(tsk, TASK_RUNNING);

686

__set_task_state(tsk, TASK_RUNNING);

687

down_read(&mm->mmap_sem);

687

down_read(&mm->mmap_sem);

688

}

688

}

689

atomic_inc(&mm->mm_count);

689

atomic_inc(&mm->mm_count);

690

BUG_ON(mm != tsk->active_mm);

690

BUG_ON(mm != tsk->active_mm);

691

/* more a memory barrier than a real lock */

691

/* more a memory barrier than a real lock */

692

task_lock(tsk);

692

task_lock(tsk);

693

tsk->mm = NULL;

693

tsk->mm = NULL;

694

up_read(&mm->mmap_sem);

694

up_read(&mm->mmap_sem);

695

enter_lazy_tlb(mm, current);

695

enter_lazy_tlb(mm, current);

696

/* We don't want this task to be frozen prematurely */

696

/* We don't want this task to be frozen prematurely */

697

clear_freeze_flag(tsk);

697

clear_freeze_flag(tsk);

698

if (tsk->signal->oom_score_adj == OOM_SCORE_ADJ_MIN)

698

if (tsk->signal->oom_score_adj == OOM_SCORE_ADJ_MIN)

699

atomic_dec(&mm->oom_disable_count);

699

atomic_dec(&mm->oom_disable_count);

700

task_unlock(tsk);

700

task_unlock(tsk);

701

mm_update_next_owner(mm);

701

mm_update_next_owner(mm);

702

mmput(mm);

702

mmput(mm);

703

}

703

}

704

705

/*

705

/*

706

* When we die, we re-parent all our children.

706

* When we die, we re-parent all our children.

707

* Try to give them to another thread in our thread

707

* Try to give them to another thread in our thread

708

* group, and if no such member exists, give it to

708

* group, and if no such member exists, give it to

709

* the child reaper process (ie "init") in our pid

709

* the child reaper process (ie "init") in our pid

710

* space.

710

* space.

711

*/

711

*/

712

static struct task_struct *find_new_reaper(struct task_struct *father)

712

static struct task_struct *find_new_reaper(struct task_struct *father)

713

__releases(&tasklist_lock)

713

__releases(&tasklist_lock)

714

__acquires(&tasklist_lock)

714

__acquires(&tasklist_lock)

715

{

715

{

716

struct pid_namespace *pid_ns = task_active_pid_ns(father);

716

struct pid_namespace *pid_ns = task_active_pid_ns(father);

717

struct task_struct *thread;

717

struct task_struct *thread;

718

719

thread = father;

719

thread = father;

720

while_each_thread(father, thread) {

720

while_each_thread(father, thread) {

721

if (thread->flags & PF_EXITING)

721

if (thread->flags & PF_EXITING)

722

continue;

722

continue;

723

if (unlikely(pid_ns->child_reaper == father))

723

if (unlikely(pid_ns->child_reaper == father))

724

pid_ns->child_reaper = thread;

724

pid_ns->child_reaper = thread;

725

return thread;

725

return thread;

726

}

726

}

727

728

if (unlikely(pid_ns->child_reaper == father)) {

728

if (unlikely(pid_ns->child_reaper == father)) {

729

write_unlock_irq(&tasklist_lock);

729

write_unlock_irq(&tasklist_lock);

730

if (unlikely(pid_ns == &init_pid_ns))

730

if (unlikely(pid_ns == &init_pid_ns))

731

panic("Attempted to kill init!");

731

panic("Attempted to kill init!");

732

733

zap_pid_ns_processes(pid_ns);

733

zap_pid_ns_processes(pid_ns);

734

write_lock_irq(&tasklist_lock);

734

write_lock_irq(&tasklist_lock);

735

/*

735

/*

736

* We can not clear ->child_reaper or leave it alone.

736

* We can not clear ->child_reaper or leave it alone.

737

* There may by stealth EXIT_DEAD tasks on ->children,

737

* There may by stealth EXIT_DEAD tasks on ->children,

738

* forget_original_parent() must move them somewhere.

738

* forget_original_parent() must move them somewhere.

739

*/

739

*/

740

pid_ns->child_reaper = init_pid_ns.child_reaper;

740

pid_ns->child_reaper = init_pid_ns.child_reaper;

741

}

741

}

742

743

return pid_ns->child_reaper;

743

return pid_ns->child_reaper;

744

}

744

}

745

746

/*

746

/*

747

* Any that need to be release_task'd are put on the @dead list.

747

* Any that need to be release_task'd are put on the @dead list.

748

*/

748

*/

749

static void reparent_leader(struct task_struct *father, struct task_struct *p,

749

static void reparent_leader(struct task_struct *father, struct task_struct *p,

750

struct list_head *dead)

750

struct list_head *dead)

751

{

751

{

752

list_move_tail(&p->sibling, &p->real_parent->children);

752

list_move_tail(&p->sibling, &p->real_parent->children);

753

754

if (task_detached(p))

754

if (task_detached(p))

755

return;

755

return;

756

/*

756

/*

757

* If this is a threaded reparent there is no need to

757

* If this is a threaded reparent there is no need to

758

* notify anyone anything has happened.

758

* notify anyone anything has happened.

759

*/

759

*/

760

if (same_thread_group(p->real_parent, father))

760

if (same_thread_group(p->real_parent, father))

761

return;

761

return;

762

763

/* We don't want people slaying init. */

763

/* We don't want people slaying init. */

764

p->exit_signal = SIGCHLD;

764

p->exit_signal = SIGCHLD;

765

766

/* If it has exited notify the new parent about this child's death. */

766

/* If it has exited notify the new parent about this child's death. */

767

if (!p->ptrace &&

767

if (!p->ptrace &&

768

p->exit_state == EXIT_ZOMBIE && thread_group_empty(p)) {

768

p->exit_state == EXIT_ZOMBIE && thread_group_empty(p)) {

769

do_notify_parent(p, p->exit_signal);

769

do_notify_parent(p, p->exit_signal);

770

if (task_detached(p)) {

770

if (task_detached(p)) {

771

p->exit_state = EXIT_DEAD;

771

p->exit_state = EXIT_DEAD;

772

list_move_tail(&p->sibling, dead);

772

list_move_tail(&p->sibling, dead);

773

}

773

}

774

}

774

}

775

776

kill_orphaned_pgrp(p, father);

776

kill_orphaned_pgrp(p, father);

777

}

777

}

778

779

static void forget_original_parent(struct task_struct *father)

779

static void forget_original_parent(struct task_struct *father)

780

{

780

{

781

struct task_struct *p, *n, *reaper;

781

struct task_struct *p, *n, *reaper;

782

LIST_HEAD(dead_children);

782

LIST_HEAD(dead_children);

783

784

write_lock_irq(&tasklist_lock);

784

write_lock_irq(&tasklist_lock);

785

/*

785

/*

786

* Note that exit_ptrace() and find_new_reaper() might

786

* Note that exit_ptrace() and find_new_reaper() might

787

* drop tasklist_lock and reacquire it.

787

* drop tasklist_lock and reacquire it.

788

*/

788

*/

789

exit_ptrace(father);

789

exit_ptrace(father);

790

reaper = find_new_reaper(father);

790

reaper = find_new_reaper(father);

791

792

list_for_each_entry_safe(p, n, &father->children, sibling) {

792

list_for_each_entry_safe(p, n, &father->children, sibling) {

793

struct task_struct *t = p;

793

struct task_struct *t = p;

794

do {

794

do {

795

t->real_parent = reaper;

795

t->real_parent = reaper;

796

if (t->parent == father) {

796

if (t->parent == father) {

797

BUG_ON(t->ptrace);

797

BUG_ON(t->ptrace);

798

t->parent = t->real_parent;

798

t->parent = t->real_parent;

799

}

799

}

800

if (t->pdeath_signal)

800

if (t->pdeath_signal)

801

group_send_sig_info(t->pdeath_signal,

801

group_send_sig_info(t->pdeath_signal,

802

SEND_SIG_NOINFO, t);

802

SEND_SIG_NOINFO, t);

803

} while_each_thread(p, t);

803

} while_each_thread(p, t);

804

reparent_leader(father, p, &dead_children);

804

reparent_leader(father, p, &dead_children);

805

}

805

}

806

write_unlock_irq(&tasklist_lock);

806

write_unlock_irq(&tasklist_lock);

807

808

BUG_ON(!list_empty(&father->children));

808

BUG_ON(!list_empty(&father->children));

809

810

list_for_each_entry_safe(p, n, &dead_children, sibling) {

810

list_for_each_entry_safe(p, n, &dead_children, sibling) {

811

list_del_init(&p->sibling);

811

list_del_init(&p->sibling);

812

release_task(p);

812

release_task(p);

813

}

813

}

814

}

814

}

815

816

/*

816

/*

817

* Send signals to all our closest relatives so that they know

817

* Send signals to all our closest relatives so that they know

818

* to properly mourn us..

818

* to properly mourn us..

819

*/

819

*/

820

static void exit_notify(struct task_struct *tsk, int group_dead)

820

static void exit_notify(struct task_struct *tsk, int group_dead)

821

{

821

{

822

int signal;

823

bool autoreap;

822

bool autoreap;

824

void *cookie;

825

823

826

/*

824

/*

827

* This does two things:

825

* This does two things:

828

*

826

*

829

* A. Make init inherit all the child processes

827

* A. Make init inherit all the child processes

830

* B. Check to see if any process groups have become orphaned

828

* B. Check to see if any process groups have become orphaned

831

* as a result of our exiting, and if they have any stopped

829

* as a result of our exiting, and if they have any stopped

832

* jobs, send them a SIGHUP and then a SIGCONT. (POSIX 3.2.2.2)

830

* jobs, send them a SIGHUP and then a SIGCONT. (POSIX 3.2.2.2)

833

*/

831

*/

834

forget_original_parent(tsk);

832

forget_original_parent(tsk);

835

exit_task_namespaces(tsk);

833

exit_task_namespaces(tsk);

836

834

837

write_lock_irq(&tasklist_lock);

835

write_lock_irq(&tasklist_lock);

838

if (group_dead)

836

if (group_dead)

839

kill_orphaned_pgrp(tsk->group_leader, NULL);

837

kill_orphaned_pgrp(tsk->group_leader, NULL);

840

838

841

/* Let father know we died

839

/* Let father know we died

842

*

840

*

843

* Thread signals are configurable, but you aren't going to use

841

* Thread signals are configurable, but you aren't going to use

844

* that to send signals to arbitrary processes.

842

* that to send signals to arbitrary processes.

845

* That stops right now.

843

* That stops right now.

846

*

844

*

847

* If the parent exec id doesn't match the exec id we saved

845

* If the parent exec id doesn't match the exec id we saved

848

* when we started then we know the parent has changed security

846

* when we started then we know the parent has changed security

849

* domain.

847

* domain.

850

*

848

*

851

* If our self_exec id doesn't match our parent_exec_id then

849

* If our self_exec id doesn't match our parent_exec_id then

852

* we have changed execution domain as these two values started

850

* we have changed execution domain as these two values started

853

* the same after a fork.

851

* the same after a fork.

854

*/

852

*/

855

if (tsk->exit_signal != SIGCHLD && !task_detached(tsk) &&

853

if (thread_group_leader(tsk) && tsk->exit_signal != SIGCHLD &&

856

(tsk->parent_exec_id != tsk->real_parent->self_exec_id ||

854

(tsk->parent_exec_id != tsk->real_parent->self_exec_id ||

857

tsk->self_exec_id != tsk->parent_exec_id))

855

tsk->self_exec_id != tsk->parent_exec_id))

858

tsk->exit_signal = SIGCHLD;

856

tsk->exit_signal = SIGCHLD;

859

857

860

signal = tracehook_notify_death(tsk, &cookie, group_dead);

858

if (unlikely(tsk->ptrace)) {

861

if (signal >= 0)

859

int sig = thread_group_leader(tsk) &&

862

autoreap = do_notify_parent(tsk, signal);

860

thread_group_empty(tsk) &&

863

else

861

!ptrace_reparented(tsk) ?

864

autoreap = (signal == DEATH_REAP);

862

tsk->exit_signal : SIGCHLD;

863

autoreap = do_notify_parent(tsk, sig);

864

} else if (thread_group_leader(tsk)) {

865

autoreap = thread_group_empty(tsk) &&

866

do_notify_parent(tsk, tsk->exit_signal);

867

} else {

868

autoreap = true;

869

}

865

870

866

tsk->exit_state = autoreap ? EXIT_DEAD : EXIT_ZOMBIE;

871

tsk->exit_state = autoreap ? EXIT_DEAD : EXIT_ZOMBIE;

867

872

868

/* mt-exec, de_thread() is waiting for group leader */

873

/* mt-exec, de_thread() is waiting for group leader */

869

if (unlikely(tsk->signal->notify_count < 0))

874

if (unlikely(tsk->signal->notify_count < 0))

870

wake_up_process(tsk->signal->group_exit_task);

875

wake_up_process(tsk->signal->group_exit_task);

871

write_unlock_irq(&tasklist_lock);

876

write_unlock_irq(&tasklist_lock);

872

877

873

/* If the process is dead, release it - nobody will wait for it */

878

/* If the process is dead, release it - nobody will wait for it */

874

if (autoreap)

879

if (autoreap)

875

release_task(tsk);

880

release_task(tsk);

876

}

881

}

877

882

878

#ifdef CONFIG_DEBUG_STACK_USAGE

883

#ifdef CONFIG_DEBUG_STACK_USAGE

879

static void check_stack_usage(void)

884

static void check_stack_usage(void)

880

{

885

{

881

static DEFINE_SPINLOCK(low_water_lock);

886

static DEFINE_SPINLOCK(low_water_lock);

882

static int lowest_to_date = THREAD_SIZE;

887

static int lowest_to_date = THREAD_SIZE;

883

unsigned long free;

888

unsigned long free;

884

889

885

free = stack_not_used(current);

890

free = stack_not_used(current);

886

891

887

if (free >= lowest_to_date)

892

if (free >= lowest_to_date)

888

return;

893

return;

889

894

890

spin_lock(&low_water_lock);

895

spin_lock(&low_water_lock);

891

if (free < lowest_to_date) {

896

if (free < lowest_to_date) {

892

printk(KERN_WARNING "%s used greatest stack depth: %lu bytes "

897

printk(KERN_WARNING "%s used greatest stack depth: %lu bytes "

893

"left\n",

898

"left\n",

894

current->comm, free);

899

current->comm, free);

895

lowest_to_date = free;

900

lowest_to_date = free;

896

}

901

}

897

spin_unlock(&low_water_lock);

902

spin_unlock(&low_water_lock);

898

}

903

}

899

#else

904

#else

900

static inline void check_stack_usage(void) {}

905

static inline void check_stack_usage(void) {}

901

#endif

906

#endif

902

907

903

NORET_TYPE void do_exit(long code)

908

NORET_TYPE void do_exit(long code)

904

{

909

{

905

struct task_struct *tsk = current;

910

struct task_struct *tsk = current;

906

int group_dead;

911

int group_dead;

907

912

908

profile_task_exit(tsk);

913

profile_task_exit(tsk);

909

914

910

WARN_ON(atomic_read(&tsk->fs_excl));

915

WARN_ON(atomic_read(&tsk->fs_excl));

911

WARN_ON(blk_needs_flush_plug(tsk));

916

WARN_ON(blk_needs_flush_plug(tsk));

912

917

913

if (unlikely(in_interrupt()))

918

if (unlikely(in_interrupt()))

914

panic("Aiee, killing interrupt handler!");

919

panic("Aiee, killing interrupt handler!");

915

if (unlikely(!tsk->pid))

920

if (unlikely(!tsk->pid))

916

panic("Attempted to kill the idle task!");

921

panic("Attempted to kill the idle task!");

917

922

918

/*

923

/*

919

* If do_exit is called because this processes oopsed, it's possible

924

* If do_exit is called because this processes oopsed, it's possible

920

* that get_fs() was left as KERNEL_DS, so reset it to USER_DS before

925

* that get_fs() was left as KERNEL_DS, so reset it to USER_DS before

921

* continuing. Amongst other possible reasons, this is to prevent

926

* continuing. Amongst other possible reasons, this is to prevent

922

* mm_release()->clear_child_tid() from writing to a user-controlled

927

* mm_release()->clear_child_tid() from writing to a user-controlled

923

* kernel address.

928

* kernel address.

924

*/

929

*/

925

set_fs(USER_DS);

930

set_fs(USER_DS);

926

931

927

ptrace_event(PTRACE_EVENT_EXIT, code);

932

ptrace_event(PTRACE_EVENT_EXIT, code);

928

933

929

validate_creds_for_do_exit(tsk);

934

validate_creds_for_do_exit(tsk);

930

935

931

/*

936

/*

932

* We're taking recursive faults here in do_exit. Safest is to just

937

* We're taking recursive faults here in do_exit. Safest is to just

933

* leave this task alone and wait for reboot.

938

* leave this task alone and wait for reboot.

934

*/

939

*/

935

if (unlikely(tsk->flags & PF_EXITING)) {

940

if (unlikely(tsk->flags & PF_EXITING)) {

936

printk(KERN_ALERT

941

printk(KERN_ALERT

937

"Fixing recursive fault but reboot is needed!\n");

942

"Fixing recursive fault but reboot is needed!\n");

938

/*

943

/*

939

* We can do this unlocked here. The futex code uses

944

* We can do this unlocked here. The futex code uses

940

* this flag just to verify whether the pi state

945

* this flag just to verify whether the pi state

941

* cleanup has been done or not. In the worst case it

946

* cleanup has been done or not. In the worst case it

942

* loops once more. We pretend that the cleanup was

947

* loops once more. We pretend that the cleanup was

943

* done as there is no way to return. Either the

948

* done as there is no way to return. Either the

944

* OWNER_DIED bit is set by now or we push the blocked

949

* OWNER_DIED bit is set by now or we push the blocked

945

* task into the wait for ever nirwana as well.

950

* task into the wait for ever nirwana as well.

946

*/

951

*/

947

tsk->flags |= PF_EXITPIDONE;

952

tsk->flags |= PF_EXITPIDONE;

948

set_current_state(TASK_UNINTERRUPTIBLE);

953

set_current_state(TASK_UNINTERRUPTIBLE);

949

schedule();

954

schedule();

950

}

955

}

951

956

952

exit_irq_thread();

957

exit_irq_thread();

953

958

954

exit_signals(tsk); /* sets PF_EXITING */

959

exit_signals(tsk); /* sets PF_EXITING */

955

/*

960

/*

956

* tsk->flags are checked in the futex code to protect against

961

* tsk->flags are checked in the futex code to protect against

957

* an exiting task cleaning up the robust pi futexes.

962

* an exiting task cleaning up the robust pi futexes.

958

*/

963

*/

959

smp_mb();

964

smp_mb();

960

raw_spin_unlock_wait(&tsk->pi_lock);

965

raw_spin_unlock_wait(&tsk->pi_lock);

961

966

962

if (unlikely(in_atomic()))

967

if (unlikely(in_atomic()))

963

printk(KERN_INFO "note: %s[%d] exited with preempt_count %d\n",

968

printk(KERN_INFO "note: %s[%d] exited with preempt_count %d\n",

964

current->comm, task_pid_nr(current),

969

current->comm, task_pid_nr(current),

965

preempt_count());

970

preempt_count());

966

971

967

acct_update_integrals(tsk);

972

acct_update_integrals(tsk);

968

/* sync mm's RSS info before statistics gathering */

973

/* sync mm's RSS info before statistics gathering */

969

if (tsk->mm)

974

if (tsk->mm)

970

sync_mm_rss(tsk, tsk->mm);

975

sync_mm_rss(tsk, tsk->mm);

971

group_dead = atomic_dec_and_test(&tsk->signal->live);

976

group_dead = atomic_dec_and_test(&tsk->signal->live);

972

if (group_dead) {

977

if (group_dead) {

973

hrtimer_cancel(&tsk->signal->real_timer);

978

hrtimer_cancel(&tsk->signal->real_timer);

974

exit_itimers(tsk->signal);

979

exit_itimers(tsk->signal);

975

if (tsk->mm)

980

if (tsk->mm)

976

setmax_mm_hiwater_rss(&tsk->signal->maxrss, tsk->mm);

981

setmax_mm_hiwater_rss(&tsk->signal->maxrss, tsk->mm);

977

}

982

}

978

acct_collect(code, group_dead);

983

acct_collect(code, group_dead);

979

if (group_dead)

984

if (group_dead)

980

tty_audit_exit();

985

tty_audit_exit();

981

if (unlikely(tsk->audit_context))

986

if (unlikely(tsk->audit_context))

982

audit_free(tsk);

987

audit_free(tsk);

983

988

984

tsk->exit_code = code;

989

tsk->exit_code = code;

985

taskstats_exit(tsk, group_dead);

990

taskstats_exit(tsk, group_dead);

986

991

987

exit_mm(tsk);

992

exit_mm(tsk);

988

993

989

if (group_dead)

994

if (group_dead)

990

acct_process();

995

acct_process();

991

trace_sched_process_exit(tsk);

996

trace_sched_process_exit(tsk);

992

997

993

exit_sem(tsk);

998

exit_sem(tsk);

994

exit_files(tsk);

999

exit_files(tsk);

995

exit_fs(tsk);

1000

exit_fs(tsk);

996

check_stack_usage();

1001

check_stack_usage();

997

exit_thread();

1002

exit_thread();

998

1003

999

/*

1004

/*

1000

* Flush inherited counters to the parent - before the parent

1005

* Flush inherited counters to the parent - before the parent

1001

* gets woken up by child-exit notifications.

1006

* gets woken up by child-exit notifications.

1002

*

1007

*

1003

* because of cgroup mode, must be called before cgroup_exit()

1008

* because of cgroup mode, must be called before cgroup_exit()

1004

*/

1009

*/

1005

perf_event_exit_task(tsk);

1010

perf_event_exit_task(tsk);

1006

1011

1007

cgroup_exit(tsk, 1);

1012

cgroup_exit(tsk, 1);

1008

1013

1009

if (group_dead)

1014

if (group_dead)

1010

disassociate_ctty(1);

1015

disassociate_ctty(1);

1011

1016

1012

module_put(task_thread_info(tsk)->exec_domain->module);

1017

module_put(task_thread_info(tsk)->exec_domain->module);

1013

1018

1014

proc_exit_connector(tsk);

1019

proc_exit_connector(tsk);

1015

1020

1016

/*

1021

/*

1017

* FIXME: do that only when needed, using sched_exit tracepoint

1022

* FIXME: do that only when needed, using sched_exit tracepoint

1018

*/

1023

*/

1019

ptrace_put_breakpoints(tsk);

1024

ptrace_put_breakpoints(tsk);

1020

1025

1021

exit_notify(tsk, group_dead);

1026

exit_notify(tsk, group_dead);

1022

#ifdef CONFIG_NUMA

1027

#ifdef CONFIG_NUMA

1023

task_lock(tsk);

1028

task_lock(tsk);

1024

mpol_put(tsk->mempolicy);

1029

mpol_put(tsk->mempolicy);

1025

tsk->mempolicy = NULL;

1030

tsk->mempolicy = NULL;

1026

task_unlock(tsk);

1031

task_unlock(tsk);

1027

#endif

1032

#endif

1028

#ifdef CONFIG_FUTEX

1033

#ifdef CONFIG_FUTEX

1029

if (unlikely(current->pi_state_cache))

1034

if (unlikely(current->pi_state_cache))

1030

kfree(current->pi_state_cache);

1035

kfree(current->pi_state_cache);

1031

#endif

1036

#endif

1032

/*

1037

/*

1033

* Make sure we are holding no locks:

1038

* Make sure we are holding no locks:

1034

*/

1039

*/

1035

debug_check_no_locks_held(tsk);

1040

debug_check_no_locks_held(tsk);

1036

/*

1041

/*

1037

* We can do this unlocked here. The futex code uses this flag

1042

* We can do this unlocked here. The futex code uses this flag

1038

* just to verify whether the pi state cleanup has been done

1043

* just to verify whether the pi state cleanup has been done

1039

* or not. In the worst case it loops once more.

1044

* or not. In the worst case it loops once more.

1040

*/

1045

*/

1041

tsk->flags |= PF_EXITPIDONE;

1046

tsk->flags |= PF_EXITPIDONE;

1042

1047

1043

if (tsk->io_context)

1048

if (tsk->io_context)

1044

exit_io_context(tsk);

1049

exit_io_context(tsk);

1045

1050

1046

if (tsk->splice_pipe)

1051

if (tsk->splice_pipe)

1047

__free_pipe_info(tsk->splice_pipe);

1052

__free_pipe_info(tsk->splice_pipe);

1048

1053

1049

validate_creds_for_do_exit(tsk);

1054

validate_creds_for_do_exit(tsk);

1050

1055

1051

preempt_disable();

1056

preempt_disable();

1052

exit_rcu();

1057

exit_rcu();

1053

/* causes final put_task_struct in finish_task_switch(). */

1058

/* causes final put_task_struct in finish_task_switch(). */

1054

tsk->state = TASK_DEAD;

1059

tsk->state = TASK_DEAD;

1055

schedule();

1060

schedule();

1056

BUG();

1061

BUG();

1057

/* Avoid "noreturn function does return". */

1062

/* Avoid "noreturn function does return". */

1058

for (;;)

1063

for (;;)

1059

cpu_relax(); /* For when BUG is null */

1064

cpu_relax(); /* For when BUG is null */

1060

}

1065

}

1061

1066

1062

EXPORT_SYMBOL_GPL(do_exit);

1067

EXPORT_SYMBOL_GPL(do_exit);

1063

1068

1064

NORET_TYPE void complete_and_exit(struct completion *comp, long code)

1069

NORET_TYPE void complete_and_exit(struct completion *comp, long code)

1065

{

1070

{

1066

if (comp)

1071

if (comp)

1067

complete(comp);

1072

complete(comp);

1068

1073

1069

do_exit(code);

1074

do_exit(code);

1070

}

1075

}

1071

1076

1072

EXPORT_SYMBOL(complete_and_exit);

1077

EXPORT_SYMBOL(complete_and_exit);

1073

1078

1074

SYSCALL_DEFINE1(exit, int, error_code)

1079

SYSCALL_DEFINE1(exit, int, error_code)

1075

{

1080

{

1076

do_exit((error_code&0xff)<<8);

1081

do_exit((error_code&0xff)<<8);

1077

}

1082

}

1078

1083

1079

/*

1084

/*

1080

* Take down every thread in the group. This is called by fatal signals

1085

* Take down every thread in the group. This is called by fatal signals

1081

* as well as by sys_exit_group (below).

1086

* as well as by sys_exit_group (below).

1082

*/

1087

*/

1083

NORET_TYPE void

1088

NORET_TYPE void

1084

do_group_exit(int exit_code)

1089

do_group_exit(int exit_code)

1085

{

1090

{

1086

struct signal_struct *sig = current->signal;

1091

struct signal_struct *sig = current->signal;

1087

1092

1088

BUG_ON(exit_code & 0x80); /* core dumps don't get here */

1093

BUG_ON(exit_code & 0x80); /* core dumps don't get here */

1089

1094

1090

if (signal_group_exit(sig))

1095

if (signal_group_exit(sig))

1091

exit_code = sig->group_exit_code;

1096

exit_code = sig->group_exit_code;

1092

else if (!thread_group_empty(current)) {

1097

else if (!thread_group_empty(current)) {

1093

struct sighand_struct *const sighand = current->sighand;

1098

struct sighand_struct *const sighand = current->sighand;

1094

spin_lock_irq(&sighand->siglock);

1099

spin_lock_irq(&sighand->siglock);

1095

if (signal_group_exit(sig))

1100

if (signal_group_exit(sig))

1096

/* Another thread got here before we took the lock. */

1101

/* Another thread got here before we took the lock. */

1097

exit_code = sig->group_exit_code;

1102

exit_code = sig->group_exit_code;

1098

else {

1103

else {

1099

sig->group_exit_code = exit_code;

1104

sig->group_exit_code = exit_code;

1100

sig->flags = SIGNAL_GROUP_EXIT;

1105

sig->flags = SIGNAL_GROUP_EXIT;

1101

zap_other_threads(current);

1106

zap_other_threads(current);

1102

}

1107

}

1103

spin_unlock_irq(&sighand->siglock);

1108

spin_unlock_irq(&sighand->siglock);

1104

}

1109

}

1105

1110

1106

do_exit(exit_code);

1111

do_exit(exit_code);

1107

/* NOTREACHED */

1112

/* NOTREACHED */

1108

}

1113

}

1109

1114

1110

/*

1115

/*

1111

* this kills every thread in the thread group. Note that any externally

1116

* this kills every thread in the thread group. Note that any externally

1112

* wait4()-ing process will get the correct exit code - even if this

1117

* wait4()-ing process will get the correct exit code - even if this

1113

* thread is not the thread group leader.

1118

* thread is not the thread group leader.

1114

*/

1119

*/

1115

SYSCALL_DEFINE1(exit_group, int, error_code)

1120

SYSCALL_DEFINE1(exit_group, int, error_code)

1116

{

1121

{

1117

do_group_exit((error_code & 0xff) << 8);

1122

do_group_exit((error_code & 0xff) << 8);

1118

/* NOTREACHED */

1123

/* NOTREACHED */

1119

return 0;

1124

return 0;

1120

}

1125

}

1121

1126

1122

struct wait_opts {

1127

struct wait_opts {

1123

enum pid_type wo_type;

1128

enum pid_type wo_type;

1124

int wo_flags;

1129

int wo_flags;

1125

struct pid *wo_pid;

1130

struct pid *wo_pid;

1126

1131

1127

struct siginfo __user *wo_info;

1132

struct siginfo __user *wo_info;

1128

int __user *wo_stat;

1133

int __user *wo_stat;

1129

struct rusage __user *wo_rusage;

1134

struct rusage __user *wo_rusage;

1130

1135

1131

wait_queue_t child_wait;

1136

wait_queue_t child_wait;

1132

int notask_error;

1137

int notask_error;

1133

};

1138

};

1134

1139

1135

static inline

1140

static inline

1136

struct pid *task_pid_type(struct task_struct *task, enum pid_type type)

1141

struct pid *task_pid_type(struct task_struct *task, enum pid_type type)

1137

{

1142

{

1138

if (type != PIDTYPE_PID)

1143

if (type != PIDTYPE_PID)

1139

task = task->group_leader;

1144

task = task->group_leader;

1140

return task->pids[type].pid;

1145

return task->pids[type].pid;

1141

}

1146

}

1142

1147

1143

static int eligible_pid(struct wait_opts *wo, struct task_struct *p)

1148

static int eligible_pid(struct wait_opts *wo, struct task_struct *p)

1144

{

1149

{

1145

return wo->wo_type == PIDTYPE_MAX ||

1150

return wo->wo_type == PIDTYPE_MAX ||

1146

task_pid_type(p, wo->wo_type) == wo->wo_pid;

1151

task_pid_type(p, wo->wo_type) == wo->wo_pid;

1147

}

1152

}

1148

1153

1149

static int eligible_child(struct wait_opts *wo, struct task_struct *p)

1154

static int eligible_child(struct wait_opts *wo, struct task_struct *p)

1150

{

1155

{

1151

if (!eligible_pid(wo, p))

1156

if (!eligible_pid(wo, p))

1152

return 0;

1157

return 0;

1153

/* Wait for all children (clone and not) if __WALL is set;

1158

/* Wait for all children (clone and not) if __WALL is set;

1154

* otherwise, wait for clone children *only* if __WCLONE is

1159

* otherwise, wait for clone children *only* if __WCLONE is

1155

* set; otherwise, wait for non-clone children *only*. (Note:

1160

* set; otherwise, wait for non-clone children *only*. (Note:

1156

* A "clone" child here is one that reports to its parent

1161

* A "clone" child here is one that reports to its parent

1157

* using a signal other than SIGCHLD.) */

1162

* using a signal other than SIGCHLD.) */

1158

if (((p->exit_signal != SIGCHLD) ^ !!(wo->wo_flags & __WCLONE))

1163

if (((p->exit_signal != SIGCHLD) ^ !!(wo->wo_flags & __WCLONE))

1159

&& !(wo->wo_flags & __WALL))

1164

&& !(wo->wo_flags & __WALL))

1160

return 0;

1165

return 0;

1161

1166

1162

return 1;

1167

return 1;

1163

}

1168

}

1164

1169

1165

static int wait_noreap_copyout(struct wait_opts *wo, struct task_struct *p,

1170

static int wait_noreap_copyout(struct wait_opts *wo, struct task_struct *p,

1166

pid_t pid, uid_t uid, int why, int status)

1171

pid_t pid, uid_t uid, int why, int status)

1167

{

1172

{

1168

struct siginfo __user *infop;

1173

struct siginfo __user *infop;

1169

int retval = wo->wo_rusage

1174

int retval = wo->wo_rusage

1170

? getrusage(p, RUSAGE_BOTH, wo->wo_rusage) : 0;

1175

? getrusage(p, RUSAGE_BOTH, wo->wo_rusage) : 0;

1171

1176

1172

put_task_struct(p);

1177

put_task_struct(p);

1173

infop = wo->wo_info;

1178

infop = wo->wo_info;

1174

if (infop) {

1179

if (infop) {

1175

if (!retval)

1180

if (!retval)

1176

retval = put_user(SIGCHLD, &infop->si_signo);

1181

retval = put_user(SIGCHLD, &infop->si_signo);

1177

if (!retval)

1182

if (!retval)

1178

retval = put_user(0, &infop->si_errno);

1183

retval = put_user(0, &infop->si_errno);

1179

if (!retval)

1184

if (!retval)

1180

retval = put_user((short)why, &infop->si_code);

1185

retval = put_user((short)why, &infop->si_code);

1181

if (!retval)

1186

if (!retval)

1182

retval = put_user(pid, &infop->si_pid);

1187

retval = put_user(pid, &infop->si_pid);

1183

if (!retval)

1188

if (!retval)

1184

retval = put_user(uid, &infop->si_uid);

1189

retval = put_user(uid, &infop->si_uid);

1185

if (!retval)

1190

if (!retval)

1186

retval = put_user(status, &infop->si_status);

1191

retval = put_user(status, &infop->si_status);

1187

}

1192

}

1188

if (!retval)

1193

if (!retval)

1189

retval = pid;

1194

retval = pid;

1190

return retval;

1195

return retval;

1191

}

1196

}

1192

1197

1193

/*

1198

/*

1194

* Handle sys_wait4 work for one task in state EXIT_ZOMBIE. We hold

1199

* Handle sys_wait4 work for one task in state EXIT_ZOMBIE. We hold

1195

* read_lock(&tasklist_lock) on entry. If we return zero, we still hold

1200

* read_lock(&tasklist_lock) on entry. If we return zero, we still hold

1196

* the lock and this task is uninteresting. If we return nonzero, we have

1201

* the lock and this task is uninteresting. If we return nonzero, we have

1197

* released the lock and the system call should return.

1202

* released the lock and the system call should return.

1198

*/

1203

*/

1199

static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)

1204

static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)

1200

{

1205

{

1201

unsigned long state;

1206

unsigned long state;

1202

int retval, status, traced;

1207

int retval, status, traced;

1203

pid_t pid = task_pid_vnr(p);

1208

pid_t pid = task_pid_vnr(p);

1204

uid_t uid = __task_cred(p)->uid;

1209

uid_t uid = __task_cred(p)->uid;

1205

struct siginfo __user *infop;

1210

struct siginfo __user *infop;

1206

1211

1207

if (!likely(wo->wo_flags & WEXITED))

1212

if (!likely(wo->wo_flags & WEXITED))

1208

return 0;

1213

return 0;

1209

1214

1210

if (unlikely(wo->wo_flags & WNOWAIT)) {

1215

if (unlikely(wo->wo_flags & WNOWAIT)) {

1211

int exit_code = p->exit_code;

1216

int exit_code = p->exit_code;

1212

int why;

1217

int why;

1213

1218

1214

get_task_struct(p);

1219

get_task_struct(p);

1215

read_unlock(&tasklist_lock);

1220

read_unlock(&tasklist_lock);

1216

if ((exit_code & 0x7f) == 0) {

1221

if ((exit_code & 0x7f) == 0) {

1217

why = CLD_EXITED;

1222

why = CLD_EXITED;

1218

status = exit_code >> 8;

1223

status = exit_code >> 8;

1219

} else {

1224

} else {

1220

why = (exit_code & 0x80) ? CLD_DUMPED : CLD_KILLED;

1225

why = (exit_code & 0x80) ? CLD_DUMPED : CLD_KILLED;

1221

status = exit_code & 0x7f;

1226

status = exit_code & 0x7f;

1222

}

1227

}

1223

return wait_noreap_copyout(wo, p, pid, uid, why, status);

1228

return wait_noreap_copyout(wo, p, pid, uid, why, status);

1224

}

1229

}

1225

1230

1226

/*

1231

/*

1227

* Try to move the task's state to DEAD

1232

* Try to move the task's state to DEAD

1228

* only one thread is allowed to do this:

1233

* only one thread is allowed to do this:

1229

*/

1234

*/

1230

state = xchg(&p->exit_state, EXIT_DEAD);

1235

state = xchg(&p->exit_state, EXIT_DEAD);

1231

if (state != EXIT_ZOMBIE) {

1236

if (state != EXIT_ZOMBIE) {

1232

BUG_ON(state != EXIT_DEAD);

1237

BUG_ON(state != EXIT_DEAD);

1233

return 0;

1238

return 0;

1234

}

1239

}

1235

1240

1236

traced = ptrace_reparented(p);

1241

traced = ptrace_reparented(p);

1237

/*

1242

/*

1238

* It can be ptraced but not reparented, check

1243

* It can be ptraced but not reparented, check

1239

* !task_detached() to filter out sub-threads.

1244

* !task_detached() to filter out sub-threads.

1240

*/

1245

*/

1241

if (likely(!traced) && likely(!task_detached(p))) {

1246

if (likely(!traced) && likely(!task_detached(p))) {

1242

struct signal_struct *psig;

1247

struct signal_struct *psig;

1243

struct signal_struct *sig;

1248

struct signal_struct *sig;

1244

unsigned long maxrss;

1249

unsigned long maxrss;

1245

cputime_t tgutime, tgstime;

1250

cputime_t tgutime, tgstime;

1246

1251

1247

/*

1252

/*

1248

* The resource counters for the group leader are in its

1253

* The resource counters for the group leader are in its

1249

* own task_struct. Those for dead threads in the group

1254

* own task_struct. Those for dead threads in the group

1250

* are in its signal_struct, as are those for the child

1255

* are in its signal_struct, as are those for the child

1251

* processes it has previously reaped. All these

1256

* processes it has previously reaped. All these

1252

* accumulate in the parent's signal_struct c* fields.

1257

* accumulate in the parent's signal_struct c* fields.

1253

*

1258

*

1254

* We don't bother to take a lock here to protect these

1259

* We don't bother to take a lock here to protect these

1255

* p->signal fields, because they are only touched by

1260

* p->signal fields, because they are only touched by

1256

* __exit_signal, which runs with tasklist_lock

1261

* __exit_signal, which runs with tasklist_lock

1257

* write-locked anyway, and so is excluded here. We do

1262

* write-locked anyway, and so is excluded here. We do

1258

* need to protect the access to parent->signal fields,

1263

* need to protect the access to parent->signal fields,

1259

* as other threads in the parent group can be right

1264

* as other threads in the parent group can be right

1260

* here reaping other children at the same time.

1265

* here reaping other children at the same time.

1261

*

1266

*

1262

* We use thread_group_times() to get times for the thread

1267

* We use thread_group_times() to get times for the thread

1263

* group, which consolidates times for all threads in the

1268

* group, which consolidates times for all threads in the

1264

* group including the group leader.

1269

* group including the group leader.

1265

*/

1270

*/

1266

thread_group_times(p, &tgutime, &tgstime);

1271

thread_group_times(p, &tgutime, &tgstime);

1267

spin_lock_irq(&p->real_parent->sighand->siglock);

1272

spin_lock_irq(&p->real_parent->sighand->siglock);

1268

psig = p->real_parent->signal;

1273

psig = p->real_parent->signal;

1269

sig = p->signal;

1274

sig = p->signal;

1270

psig->cutime =

1275

psig->cutime =

1271

cputime_add(psig->cutime,

1276

cputime_add(psig->cutime,

1272

cputime_add(tgutime,

1277

cputime_add(tgutime,

1273

sig->cutime));

1278

sig->cutime));

1274

psig->cstime =

1279

psig->cstime =

1275

cputime_add(psig->cstime,

1280

cputime_add(psig->cstime,

1276

cputime_add(tgstime,

1281

cputime_add(tgstime,

1277

sig->cstime));

1282

sig->cstime));

1278

psig->cgtime =

1283

psig->cgtime =

1279

cputime_add(psig->cgtime,

1284

cputime_add(psig->cgtime,

1280

cputime_add(p->gtime,

1285

cputime_add(p->gtime,

1281

cputime_add(sig->gtime,

1286

cputime_add(sig->gtime,

1282

sig->cgtime)));

1287

sig->cgtime)));

1283

psig->cmin_flt +=

1288

psig->cmin_flt +=

1284

p->min_flt + sig->min_flt + sig->cmin_flt;

1289

p->min_flt + sig->min_flt + sig->cmin_flt;

1285

psig->cmaj_flt +=

1290

psig->cmaj_flt +=

1286

p->maj_flt + sig->maj_flt + sig->cmaj_flt;

1291

p->maj_flt + sig->maj_flt + sig->cmaj_flt;

1287

psig->cnvcsw +=

1292

psig->cnvcsw +=

1288

p->nvcsw + sig->nvcsw + sig->cnvcsw;

1293

p->nvcsw + sig->nvcsw + sig->cnvcsw;

1289

psig->cnivcsw +=

1294

psig->cnivcsw +=

1290

p->nivcsw + sig->nivcsw + sig->cnivcsw;

1295

p->nivcsw + sig->nivcsw + sig->cnivcsw;

1291

psig->cinblock +=

1296

psig->cinblock +=

1292

task_io_get_inblock(p) +

1297

task_io_get_inblock(p) +

1293

sig->inblock + sig->cinblock;

1298

sig->inblock + sig->cinblock;

1294

psig->coublock +=

1299

psig->coublock +=

1295

task_io_get_oublock(p) +

1300

task_io_get_oublock(p) +

1296

sig->oublock + sig->coublock;

1301

sig->oublock + sig->coublock;

1297

maxrss = max(sig->maxrss, sig->cmaxrss);

1302

maxrss = max(sig->maxrss, sig->cmaxrss);

1298

if (psig->cmaxrss < maxrss)

1303

if (psig->cmaxrss < maxrss)

1299

psig->cmaxrss = maxrss;

1304

psig->cmaxrss = maxrss;

1300

task_io_accounting_add(&psig->ioac, &p->ioac);

1305

task_io_accounting_add(&psig->ioac, &p->ioac);

1301

task_io_accounting_add(&psig->ioac, &sig->ioac);

1306

task_io_accounting_add(&psig->ioac, &sig->ioac);

1302

spin_unlock_irq(&p->real_parent->sighand->siglock);

1307

spin_unlock_irq(&p->real_parent->sighand->siglock);

1303

}

1308

}

1304

1309

1305

/*

1310

/*

1306

* Now we are sure this task is interesting, and no other

1311

* Now we are sure this task is interesting, and no other

1307

* thread can reap it because we set its state to EXIT_DEAD.

1312

* thread can reap it because we set its state to EXIT_DEAD.

1308

*/

1313

*/

1309

read_unlock(&tasklist_lock);

1314

read_unlock(&tasklist_lock);

1310

1315

1311

retval = wo->wo_rusage

1316

retval = wo->wo_rusage

1312

? getrusage(p, RUSAGE_BOTH, wo->wo_rusage) : 0;

1317

? getrusage(p, RUSAGE_BOTH, wo->wo_rusage) : 0;

1313

status = (p->signal->flags & SIGNAL_GROUP_EXIT)

1318

status = (p->signal->flags & SIGNAL_GROUP_EXIT)

1314

? p->signal->group_exit_code : p->exit_code;

1319

? p->signal->group_exit_code : p->exit_code;

1315

if (!retval && wo->wo_stat)

1320

if (!retval && wo->wo_stat)

1316

retval = put_user(status, wo->wo_stat);

1321

retval = put_user(status, wo->wo_stat);

1317

1322

1318

infop = wo->wo_info;

1323

infop = wo->wo_info;

1319

if (!retval && infop)

1324

if (!retval && infop)

1320

retval = put_user(SIGCHLD, &infop->si_signo);

1325

retval = put_user(SIGCHLD, &infop->si_signo);

1321

if (!retval && infop)

1326

if (!retval && infop)

1322

retval = put_user(0, &infop->si_errno);

1327

retval = put_user(0, &infop->si_errno);

1323

if (!retval && infop) {

1328

if (!retval && infop) {

1324

int why;

1329

int why;

1325

1330

1326

if ((status & 0x7f) == 0) {

1331

if ((status & 0x7f) == 0) {

1327

why = CLD_EXITED;

1332

why = CLD_EXITED;

1328

status >>= 8;

1333

status >>= 8;

1329

} else {

1334

} else {

1330

why = (status & 0x80) ? CLD_DUMPED : CLD_KILLED;

1335

why = (status & 0x80) ? CLD_DUMPED : CLD_KILLED;

1331

status &= 0x7f;

1336

status &= 0x7f;

1332

}

1337

}

1333

retval = put_user((short)why, &infop->si_code);

1338

retval = put_user((short)why, &infop->si_code);

1334

if (!retval)

1339

if (!retval)

1335

retval = put_user(status, &infop->si_status);

1340

retval = put_user(status, &infop->si_status);

1336

}

1341

}

1337

if (!retval && infop)

1342

if (!retval && infop)

1338

retval = put_user(pid, &infop->si_pid);

1343

retval = put_user(pid, &infop->si_pid);

1339

if (!retval && infop)

1344

if (!retval && infop)

1340

retval = put_user(uid, &infop->si_uid);

1345

retval = put_user(uid, &infop->si_uid);

1341

if (!retval)

1346

if (!retval)

1342

retval = pid;

1347

retval = pid;

1343

1348

1344

if (traced) {

1349

if (traced) {

1345

write_lock_irq(&tasklist_lock);

1350

write_lock_irq(&tasklist_lock);

1346

/* We dropped tasklist, ptracer could die and untrace */

1351

/* We dropped tasklist, ptracer could die and untrace */

1347

ptrace_unlink(p);

1352

ptrace_unlink(p);

1348

/*

1353

/*

1349

* If this is not a detached task, notify the parent.

1354

* If this is not a detached task, notify the parent.

1350

* If it's still not detached after that, don't release

1355

* If it's still not detached after that, don't release

1351

* it now.

1356

* it now.

1352

*/

1357

*/

1353

if (!task_detached(p)) {

1358

if (!task_detached(p)) {

1354

do_notify_parent(p, p->exit_signal);

1359

do_notify_parent(p, p->exit_signal);

1355

if (!task_detached(p)) {

1360

if (!task_detached(p)) {

1356

p->exit_state = EXIT_ZOMBIE;

1361

p->exit_state = EXIT_ZOMBIE;

1357

p = NULL;

1362

p = NULL;

1358

}

1363

}

1359

}

1364

}

1360

write_unlock_irq(&tasklist_lock);

1365

write_unlock_irq(&tasklist_lock);

1361

}

1366

}

1362

if (p != NULL)

1367

if (p != NULL)

1363

release_task(p);

1368

release_task(p);

1364

1369

1365

return retval;

1370

return retval;

1366

}

1371

}

1367

1372

1368

static int *task_stopped_code(struct task_struct *p, bool ptrace)

1373

static int *task_stopped_code(struct task_struct *p, bool ptrace)

1369

{

1374

{

1370

if (ptrace) {

1375

if (ptrace) {

1371

if (task_is_stopped_or_traced(p) &&

1376

if (task_is_stopped_or_traced(p) &&

1372

!(p->jobctl & JOBCTL_LISTENING))

1377

!(p->jobctl & JOBCTL_LISTENING))

1373

return &p->exit_code;

1378

return &p->exit_code;

1374

} else {

1379

} else {

1375

if (p->signal->flags & SIGNAL_STOP_STOPPED)

1380

if (p->signal->flags & SIGNAL_STOP_STOPPED)

1376

return &p->signal->group_exit_code;

1381

return &p->signal->group_exit_code;

1377

}

1382

}

1378

return NULL;

1383

return NULL;

1379

}

1384

}

1380

1385

1381

/**

1386

/**

1382

* wait_task_stopped - Wait for %TASK_STOPPED or %TASK_TRACED

1387

* wait_task_stopped - Wait for %TASK_STOPPED or %TASK_TRACED

1383

* @wo: wait options

1388

* @wo: wait options

1384

* @ptrace: is the wait for ptrace

1389

* @ptrace: is the wait for ptrace

1385

* @p: task to wait for

1390

* @p: task to wait for

1386

*

1391

*

1387

* Handle sys_wait4() work for %p in state %TASK_STOPPED or %TASK_TRACED.

1392

* Handle sys_wait4() work for %p in state %TASK_STOPPED or %TASK_TRACED.

1388

*

1393

*

1389

* CONTEXT:

1394

* CONTEXT:

1390

* read_lock(&tasklist_lock), which is released if return value is

1395

* read_lock(&tasklist_lock), which is released if return value is

1391

* non-zero. Also, grabs and releases @p->sighand->siglock.

1396

* non-zero. Also, grabs and releases @p->sighand->siglock.

1392

*

1397

*

1393

* RETURNS:

1398

* RETURNS:

1394

* 0 if wait condition didn't exist and search for other wait conditions

1399

* 0 if wait condition didn't exist and search for other wait conditions

1395

* should continue. Non-zero return, -errno on failure and @p's pid on

1400

* should continue. Non-zero return, -errno on failure and @p's pid on

1396

* success, implies that tasklist_lock is released and wait condition

1401

* success, implies that tasklist_lock is released and wait condition

1397

* search should terminate.

1402

* search should terminate.

1398

*/

1403

*/

1399

static int wait_task_stopped(struct wait_opts *wo,

1404

static int wait_task_stopped(struct wait_opts *wo,

1400

int ptrace, struct task_struct *p)

1405

int ptrace, struct task_struct *p)

1401

{

1406

{

1402

struct siginfo __user *infop;

1407

struct siginfo __user *infop;

1403

int retval, exit_code, *p_code, why;

1408

int retval, exit_code, *p_code, why;

1404

uid_t uid = 0; /* unneeded, required by compiler */

1409

uid_t uid = 0; /* unneeded, required by compiler */

1405

pid_t pid;

1410

pid_t pid;

1406

1411

1407

/*

1412

/*

1408

* Traditionally we see ptrace'd stopped tasks regardless of options.

1413

* Traditionally we see ptrace'd stopped tasks regardless of options.

1409

*/

1414

*/

1410

if (!ptrace && !(wo->wo_flags & WUNTRACED))

1415

if (!ptrace && !(wo->wo_flags & WUNTRACED))

1411

return 0;

1416

return 0;

1412

1417

1413

if (!task_stopped_code(p, ptrace))

1418

if (!task_stopped_code(p, ptrace))

1414

return 0;

1419

return 0;

1415

1420

1416

exit_code = 0;

1421

exit_code = 0;

1417

spin_lock_irq(&p->sighand->siglock);

1422

spin_lock_irq(&p->sighand->siglock);

1418

1423

1419

p_code = task_stopped_code(p, ptrace);

1424

p_code = task_stopped_code(p, ptrace);

1420

if (unlikely(!p_code))

1425

if (unlikely(!p_code))

1421

goto unlock_sig;

1426

goto unlock_sig;

1422

1427

1423

exit_code = *p_code;

1428

exit_code = *p_code;

1424

if (!exit_code)

1429

if (!exit_code)

1425

goto unlock_sig;

1430

goto unlock_sig;

1426

1431

1427

if (!unlikely(wo->wo_flags & WNOWAIT))

1432

if (!unlikely(wo->wo_flags & WNOWAIT))

1428

*p_code = 0;

1433

*p_code = 0;

1429

1434

1430

uid = task_uid(p);

1435

uid = task_uid(p);

1431

unlock_sig:

1436

unlock_sig:

1432

spin_unlock_irq(&p->sighand->siglock);

1437

spin_unlock_irq(&p->sighand->siglock);

1433

if (!exit_code)

1438

if (!exit_code)

1434

return 0;

1439

return 0;

1435

1440

1436

/*

1441

/*

1437

* Now we are pretty sure this task is interesting.

1442

* Now we are pretty sure this task is interesting.

1438

* Make sure it doesn't get reaped out from under us while we

1443

* Make sure it doesn't get reaped out from under us while we

1439

* give up the lock and then examine it below. We don't want to

1444

* give up the lock and then examine it below. We don't want to

1440

* keep holding onto the tasklist_lock while we call getrusage and

1445

* keep holding onto the tasklist_lock while we call getrusage and

1441

* possibly take page faults for user memory.

1446

* possibly take page faults for user memory.

1442

*/

1447

*/

1443

get_task_struct(p);

1448

get_task_struct(p);

1444

pid = task_pid_vnr(p);

1449

pid = task_pid_vnr(p);

1445

why = ptrace ? CLD_TRAPPED : CLD_STOPPED;

1450

why = ptrace ? CLD_TRAPPED : CLD_STOPPED;

1446

read_unlock(&tasklist_lock);

1451

read_unlock(&tasklist_lock);

1447

1452

1448

if (unlikely(wo->wo_flags & WNOWAIT))

1453

if (unlikely(wo->wo_flags & WNOWAIT))

1449

return wait_noreap_copyout(wo, p, pid, uid, why, exit_code);

1454

return wait_noreap_copyout(wo, p, pid, uid, why, exit_code);

1450

1455

1451

retval = wo->wo_rusage

1456

retval = wo->wo_rusage

1452

? getrusage(p, RUSAGE_BOTH, wo->wo_rusage) : 0;

1457

? getrusage(p, RUSAGE_BOTH, wo->wo_rusage) : 0;

1453

if (!retval && wo->wo_stat)

1458

if (!retval && wo->wo_stat)

1454

retval = put_user((exit_code << 8) | 0x7f, wo->wo_stat);

1459

retval = put_user((exit_code << 8) | 0x7f, wo->wo_stat);

1455

1460

1456

infop = wo->wo_info;

1461

infop = wo->wo_info;

1457

if (!retval && infop)

1462

if (!retval && infop)

1458

retval = put_user(SIGCHLD, &infop->si_signo);

1463

retval = put_user(SIGCHLD, &infop->si_signo);

1459

if (!retval && infop)

1464

if (!retval && infop)

1460

retval = put_user(0, &infop->si_errno);

1465

retval = put_user(0, &infop->si_errno);

1461

if (!retval && infop)

1466

if (!retval && infop)

1462

retval = put_user((short)why, &infop->si_code);

1467

retval = put_user((short)why, &infop->si_code);

1463

if (!retval && infop)

1468

if (!retval && infop)

1464

retval = put_user(exit_code, &infop->si_status);

1469

retval = put_user(exit_code, &infop->si_status);

1465

if (!retval && infop)

1470

if (!retval && infop)

1466

retval = put_user(pid, &infop->si_pid);

1471

retval = put_user(pid, &infop->si_pid);

1467

if (!retval && infop)

1472

if (!retval && infop)

1468

retval = put_user(uid, &infop->si_uid);

1473

retval = put_user(uid, &infop->si_uid);

1469

if (!retval)

1474

if (!retval)

1470

retval = pid;

1475

retval = pid;

1471

put_task_struct(p);

1476

put_task_struct(p);

1472

1477

1473

BUG_ON(!retval);

1478

BUG_ON(!retval);

1474

return retval;

1479

return retval;

1475

}

1480

}

1476

1481

1477

/*

1482

/*

1478

* Handle do_wait work for one task in a live, non-stopped state.

1483

* Handle do_wait work for one task in a live, non-stopped state.

1479

* read_lock(&tasklist_lock) on entry. If we return zero, we still hold

1484

* read_lock(&tasklist_lock) on entry. If we return zero, we still hold

1480

* the lock and this task is uninteresting. If we return nonzero, we have

1485

* the lock and this task is uninteresting. If we return nonzero, we have

1481

* released the lock and the system call should return.

1486

* released the lock and the system call should return.

1482

*/

1487

*/

1483

static int wait_task_continued(struct wait_opts *wo, struct task_struct *p)

1488

static int wait_task_continued(struct wait_opts *wo, struct task_struct *p)

1484

{

1489

{

1485

int retval;

1490

int retval;

1486

pid_t pid;

1491

pid_t pid;

1487

uid_t uid;

1492

uid_t uid;

1488

1493

1489

if (!unlikely(wo->wo_flags & WCONTINUED))

1494

if (!unlikely(wo->wo_flags & WCONTINUED))

1490

return 0;

1495

return 0;

1491

1496

1492

if (!(p->signal->flags & SIGNAL_STOP_CONTINUED))

1497

if (!(p->signal->flags & SIGNAL_STOP_CONTINUED))

1493

return 0;

1498

return 0;

1494

1499

1495

spin_lock_irq(&p->sighand->siglock);

1500

spin_lock_irq(&p->sighand->siglock);

1496

/* Re-check with the lock held. */

1501

/* Re-check with the lock held. */

1497

if (!(p->signal->flags & SIGNAL_STOP_CONTINUED)) {

1502

if (!(p->signal->flags & SIGNAL_STOP_CONTINUED)) {

1498

spin_unlock_irq(&p->sighand->siglock);

1503

spin_unlock_irq(&p->sighand->siglock);

1499

return 0;

1504

return 0;

1500

}

1505

}

1501

if (!unlikely(wo->wo_flags & WNOWAIT))

1506

if (!unlikely(wo->wo_flags & WNOWAIT))

1502

p->signal->flags &= ~SIGNAL_STOP_CONTINUED;

1507

p->signal->flags &= ~SIGNAL_STOP_CONTINUED;

1503

uid = task_uid(p);

1508

uid = task_uid(p);

1504

spin_unlock_irq(&p->sighand->siglock);

1509

spin_unlock_irq(&p->sighand->siglock);

1505

1510

1506

pid = task_pid_vnr(p);

1511

pid = task_pid_vnr(p);

1507

get_task_struct(p);

1512

get_task_struct(p);

1508

read_unlock(&tasklist_lock);

1513

read_unlock(&tasklist_lock);

1509

1514

1510

if (!wo->wo_info) {

1515

if (!wo->wo_info) {

1511

retval = wo->wo_rusage

1516

retval = wo->wo_rusage

1512

? getrusage(p, RUSAGE_BOTH, wo->wo_rusage) : 0;

1517

? getrusage(p, RUSAGE_BOTH, wo->wo_rusage) : 0;

1513

put_task_struct(p);

1518

put_task_struct(p);

1514

if (!retval && wo->wo_stat)

1519

if (!retval && wo->wo_stat)

1515

retval = put_user(0xffff, wo->wo_stat);

1520

retval = put_user(0xffff, wo->wo_stat);

1516

if (!retval)

1521

if (!retval)

1517

retval = pid;

1522

retval = pid;

1518

} else {

1523

} else {

1519

retval = wait_noreap_copyout(wo, p, pid, uid,

1524

retval = wait_noreap_copyout(wo, p, pid, uid,

1520

CLD_CONTINUED, SIGCONT);

1525

CLD_CONTINUED, SIGCONT);

1521

BUG_ON(retval == 0);

1526

BUG_ON(retval == 0);

1522

}

1527

}

1523

1528

1524

return retval;

1529

return retval;

1525

}

1530

}

1526

1531

1527

/*

1532

/*

1528

* Consider @p for a wait by @parent.

1533

* Consider @p for a wait by @parent.

1529

*

1534

*

1530

* -ECHILD should be in ->notask_error before the first call.

1535

* -ECHILD should be in ->notask_error before the first call.

1531

* Returns nonzero for a final return, when we have unlocked tasklist_lock.

1536

* Returns nonzero for a final return, when we have unlocked tasklist_lock.

1532

* Returns zero if the search for a child should continue;

1537

* Returns zero if the search for a child should continue;

1533

* then ->notask_error is 0 if @p is an eligible child,

1538

* then ->notask_error is 0 if @p is an eligible child,

1534

* or another error from security_task_wait(), or still -ECHILD.

1539

* or another error from security_task_wait(), or still -ECHILD.

1535

*/

1540

*/

1536

static int wait_consider_task(struct wait_opts *wo, int ptrace,

1541

static int wait_consider_task(struct wait_opts *wo, int ptrace,

1537

struct task_struct *p)

1542

struct task_struct *p)

1538

{

1543

{

1539

int ret = eligible_child(wo, p);

1544

int ret = eligible_child(wo, p);

1540

if (!ret)

1545

if (!ret)

1541

return ret;

1546

return ret;

1542

1547

1543

ret = security_task_wait(p);

1548

ret = security_task_wait(p);

1544

if (unlikely(ret < 0)) {

1549

if (unlikely(ret < 0)) {

1545

/*

1550

/*

1546

* If we have not yet seen any eligible child,

1551

* If we have not yet seen any eligible child,

1547

* then let this error code replace -ECHILD.

1552

* then let this error code replace -ECHILD.

1548

* A permission error will give the user a clue

1553

* A permission error will give the user a clue

1549

* to look for security policy problems, rather

1554

* to look for security policy problems, rather

1550

* than for mysterious wait bugs.

1555

* than for mysterious wait bugs.

1551

*/

1556

*/

1552

if (wo->notask_error)

1557

if (wo->notask_error)

1553

wo->notask_error = ret;

1558

wo->notask_error = ret;

1554

return 0;

1559

return 0;

1555

}

1560

}

1556

1561

1557

/* dead body doesn't have much to contribute */

1562

/* dead body doesn't have much to contribute */

1558

if (p->exit_state == EXIT_DEAD)

1563

if (p->exit_state == EXIT_DEAD)

1559

return 0;

1564

return 0;

1560

1565

1561

/* slay zombie? */

1566

/* slay zombie? */

1562

if (p->exit_state == EXIT_ZOMBIE) {

1567

if (p->exit_state == EXIT_ZOMBIE) {

1563

/*

1568

/*

1564

* A zombie ptracee is only visible to its ptracer.

1569

* A zombie ptracee is only visible to its ptracer.

1565

* Notification and reaping will be cascaded to the real

1570

* Notification and reaping will be cascaded to the real

1566

* parent when the ptracer detaches.

1571

* parent when the ptracer detaches.

1567

*/

1572

*/

1568

if (likely(!ptrace) && unlikely(p->ptrace)) {

1573

if (likely(!ptrace) && unlikely(p->ptrace)) {

1569

/* it will become visible, clear notask_error */

1574

/* it will become visible, clear notask_error */

1570

wo->notask_error = 0;

1575

wo->notask_error = 0;

1571

return 0;

1576

return 0;

1572

}

1577

}

1573

1578

1574

/* we don't reap group leaders with subthreads */

1579

/* we don't reap group leaders with subthreads */

1575

if (!delay_group_leader(p))

1580

if (!delay_group_leader(p))

1576

return wait_task_zombie(wo, p);

1581

return wait_task_zombie(wo, p);

1577

1582

1578

/*

1583

/*

1579

* Allow access to stopped/continued state via zombie by

1584

* Allow access to stopped/continued state via zombie by

1580

* falling through. Clearing of notask_error is complex.

1585

* falling through. Clearing of notask_error is complex.

1581

*

1586

*

1582

* When !@ptrace:

1587

* When !@ptrace:

1583

*

1588

*

1584

* If WEXITED is set, notask_error should naturally be

1589

* If WEXITED is set, notask_error should naturally be

1585

* cleared. If not, subset of WSTOPPED|WCONTINUED is set,

1590

* cleared. If not, subset of WSTOPPED|WCONTINUED is set,

1586

* so, if there are live subthreads, there are events to

1591

* so, if there are live subthreads, there are events to

1587

* wait for. If all subthreads are dead, it's still safe

1592

* wait for. If all subthreads are dead, it's still safe

1588

* to clear - this function will be called again in finite

1593

* to clear - this function will be called again in finite

1589

* amount time once all the subthreads are released and

1594

* amount time once all the subthreads are released and

1590

* will then return without clearing.

1595

* will then return without clearing.

1591

*

1596

*

1592

* When @ptrace:

1597

* When @ptrace:

1593

*

1598

*

1594

* Stopped state is per-task and thus can't change once the

1599

* Stopped state is per-task and thus can't change once the

1595

* target task dies. Only continued and exited can happen.

1600

* target task dies. Only continued and exited can happen.

1596

* Clear notask_error if WCONTINUED | WEXITED.

1601

* Clear notask_error if WCONTINUED | WEXITED.

1597

*/

1602

*/

1598

if (likely(!ptrace) || (wo->wo_flags & (WCONTINUED | WEXITED)))

1603

if (likely(!ptrace) || (wo->wo_flags & (WCONTINUED | WEXITED)))

1599

wo->notask_error = 0;

1604

wo->notask_error = 0;

1600

} else {

1605

} else {

1601

/*

1606

/*

1602

* If @p is ptraced by a task in its real parent's group,

1607

* If @p is ptraced by a task in its real parent's group,

1603

* hide group stop/continued state when looking at @p as

1608

* hide group stop/continued state when looking at @p as

1604

* the real parent; otherwise, a single stop can be

1609

* the real parent; otherwise, a single stop can be

1605

* reported twice as group and ptrace stops.

1610

* reported twice as group and ptrace stops.

1606

*

1611

*

1607

* If a ptracer wants to distinguish the two events for its

1612

* If a ptracer wants to distinguish the two events for its

1608

* own children, it should create a separate process which

1613

* own children, it should create a separate process which

1609

* takes the role of real parent.

1614

* takes the role of real parent.

1610

*/

1615

*/

1611

if (likely(!ptrace) && p->ptrace &&

1616

if (likely(!ptrace) && p->ptrace &&

1612

same_thread_group(p->parent, p->real_parent))

1617

same_thread_group(p->parent, p->real_parent))

1613

return 0;

1618

return 0;

1614

1619

1615

/*

1620

/*

1616

* @p is alive and it's gonna stop, continue or exit, so

1621

* @p is alive and it's gonna stop, continue or exit, so

1617

* there always is something to wait for.

1622

* there always is something to wait for.

1618

*/

1623

*/

1619

wo->notask_error = 0;

1624

wo->notask_error = 0;

1620

}

1625

}

1621

1626

1622

/*

1627

/*

1623

* Wait for stopped. Depending on @ptrace, different stopped state

1628

* Wait for stopped. Depending on @ptrace, different stopped state

1624

* is used and the two don't interact with each other.

1629

* is used and the two don't interact with each other.

1625

*/

1630

*/

1626

ret = wait_task_stopped(wo, ptrace, p);

1631

ret = wait_task_stopped(wo, ptrace, p);

1627

if (ret)

1632

if (ret)

1628

return ret;

1633

return ret;

1629

1634

1630

/*

1635

/*

1631

* Wait for continued. There's only one continued state and the

1636

* Wait for continued. There's only one continued state and the

1632

* ptracer can consume it which can confuse the real parent. Don't

1637

* ptracer can consume it which can confuse the real parent. Don't

1633

* use WCONTINUED from ptracer. You don't need or want it.

1638

* use WCONTINUED from ptracer. You don't need or want it.

1634

*/

1639

*/

1635

return wait_task_continued(wo, p);

1640

return wait_task_continued(wo, p);

1636

}

1641

}

1637

1642

1638

/*

1643

/*

1639

* Do the work of do_wait() for one thread in the group, @tsk.

1644

* Do the work of do_wait() for one thread in the group, @tsk.

1640

*

1645

*

1641

* -ECHILD should be in ->notask_error before the first call.

1646

* -ECHILD should be in ->notask_error before the first call.

1642

* Returns nonzero for a final return, when we have unlocked tasklist_lock.

1647

* Returns nonzero for a final return, when we have unlocked tasklist_lock.

1643

* Returns zero if the search for a child should continue; then

1648

* Returns zero if the search for a child should continue; then

1644

* ->notask_error is 0 if there were any eligible children,

1649

* ->notask_error is 0 if there were any eligible children,

1645

* or another error from security_task_wait(), or still -ECHILD.

1650

* or another error from security_task_wait(), or still -ECHILD.

1646

*/

1651

*/

1647

static int do_wait_thread(struct wait_opts *wo, struct task_struct *tsk)

1652

static int do_wait_thread(struct wait_opts *wo, struct task_struct *tsk)

1648

{

1653

{

1649

struct task_struct *p;

1654

struct task_struct *p;

1650

1655

1651

list_for_each_entry(p, &tsk->children, sibling) {

1656

list_for_each_entry(p, &tsk->children, sibling) {

1652

int ret = wait_consider_task(wo, 0, p);

1657

int ret = wait_consider_task(wo, 0, p);

1653

if (ret)

1658

if (ret)

1654

return ret;

1659

return ret;

1655

}

1660

}

1656

1661

1657

return 0;

1662

return 0;

1658

}

1663

}

1659

1664

1660

static int ptrace_do_wait(struct wait_opts *wo, struct task_struct *tsk)

1665

static int ptrace_do_wait(struct wait_opts *wo, struct task_struct *tsk)

1661

{

1666

{

1662

struct task_struct *p;

1667

struct task_struct *p;

1663

1668

1664

list_for_each_entry(p, &tsk->ptraced, ptrace_entry) {

1669

list_for_each_entry(p, &tsk->ptraced, ptrace_entry) {

1665

int ret = wait_consider_task(wo, 1, p);

1670

int ret = wait_consider_task(wo, 1, p);

1666

if (ret)

1671

if (ret)

1667

return ret;

1672

return ret;

1668

}

1673

}

1669

1674

1670

return 0;

1675

return 0;

1671

}

1676

}

1672

1677

1673

static int child_wait_callback(wait_queue_t *wait, unsigned mode,

1678

static int child_wait_callback(wait_queue_t *wait, unsigned mode,

1674

int sync, void *key)

1679

int sync, void *key)

1675

{

1680

{

1676

struct wait_opts *wo = container_of(wait, struct wait_opts,

1681

struct wait_opts *wo = container_of(wait, struct wait_opts,

1677

child_wait);

1682

child_wait);

1678

struct task_struct *p = key;

1683

struct task_struct *p = key;

1679

1684

1680

if (!eligible_pid(wo, p))

1685

if (!eligible_pid(wo, p))

1681

return 0;

1686

return 0;

1682

1687

1683

if ((wo->wo_flags & __WNOTHREAD) && wait->private != p->parent)

1688

if ((wo->wo_flags & __WNOTHREAD) && wait->private != p->parent)

1684

return 0;

1689

return 0;

1685

1690

1686

return default_wake_function(wait, mode, sync, key);

1691

return default_wake_function(wait, mode, sync, key);

1687

}

1692

}

1688

1693

1689

void __wake_up_parent(struct task_struct *p, struct task_struct *parent)

1694

void __wake_up_parent(struct task_struct *p, struct task_struct *parent)

1690

{

1695

{

1691

__wake_up_sync_key(&parent->signal->wait_chldexit,

1696

__wake_up_sync_key(&parent->signal->wait_chldexit,

1692

TASK_INTERRUPTIBLE, 1, p);

1697

TASK_INTERRUPTIBLE, 1, p);

1693

}

1698

}

1694

1699

1695

static long do_wait(struct wait_opts *wo)

1700

static long do_wait(struct wait_opts *wo)

1696

{

1701

{

1697

struct task_struct *tsk;

1702

struct task_struct *tsk;

1698

int retval;

1703

int retval;

1699

1704

1700

trace_sched_process_wait(wo->wo_pid);

1705

trace_sched_process_wait(wo->wo_pid);

1701

1706

1702

init_waitqueue_func_entry(&wo->child_wait, child_wait_callback);

1707

init_waitqueue_func_entry(&wo->child_wait, child_wait_callback);

1703

wo->child_wait.private = current;

1708

wo->child_wait.private = current;

1704

add_wait_queue(&current->signal->wait_chldexit, &wo->child_wait);

1709

add_wait_queue(&current->signal->wait_chldexit, &wo->child_wait);

1705

repeat:

1710

repeat:

1706

/*

1711

/*

1707

* If there is nothing that can match our critiera just get out.

1712

* If there is nothing that can match our critiera just get out.

1708

* We will clear ->notask_error to zero if we see any child that

1713

* We will clear ->notask_error to zero if we see any child that

1709

* might later match our criteria, even if we are not able to reap

1714

* might later match our criteria, even if we are not able to reap

1710

* it yet.

1715

* it yet.

1711

*/

1716

*/

1712

wo->notask_error = -ECHILD;

1717

wo->notask_error = -ECHILD;

1713

if ((wo->wo_type < PIDTYPE_MAX) &&

1718

if ((wo->wo_type < PIDTYPE_MAX) &&

1714

(!wo->wo_pid || hlist_empty(&wo->wo_pid->tasks[wo->wo_type])))

1719

(!wo->wo_pid || hlist_empty(&wo->wo_pid->tasks[wo->wo_type])))

1715

goto notask;

1720

goto notask;

1716

1721

1717

set_current_state(TASK_INTERRUPTIBLE);

1722

set_current_state(TASK_INTERRUPTIBLE);

1718

read_lock(&tasklist_lock);

1723

read_lock(&tasklist_lock);

1719

tsk = current;

1724

tsk = current;

1720

do {

1725

do {

1721

retval = do_wait_thread(wo, tsk);

1726

retval = do_wait_thread(wo, tsk);

1722

if (retval)

1727

if (retval)

1723

goto end;

1728

goto end;

1724

1729

1725

retval = ptrace_do_wait(wo, tsk);

1730

retval = ptrace_do_wait(wo, tsk);

1726

if (retval)

1731

if (retval)

1727

goto end;

1732

goto end;

1728

1733

1729

if (wo->wo_flags & __WNOTHREAD)

1734

if (wo->wo_flags & __WNOTHREAD)

1730

break;

1735

break;

1731

} while_each_thread(current, tsk);

1736

} while_each_thread(current, tsk);

1732

read_unlock(&tasklist_lock);

1737

read_unlock(&tasklist_lock);

1733

1738

1734

notask:

1739

notask:

1735

retval = wo->notask_error;

1740

retval = wo->notask_error;

1736

if (!retval && !(wo->wo_flags & WNOHANG)) {

1741

if (!retval && !(wo->wo_flags & WNOHANG)) {

1737

retval = -ERESTARTSYS;

1742

retval = -ERESTARTSYS;

1738

if (!signal_pending(current)) {

1743

if (!signal_pending(current)) {

1739

schedule();

1744

schedule();

1740

goto repeat;

1745

goto repeat;

1741

}

1746

}

1742

}

1747

}

1743

end:

1748

end:

1744

__set_current_state(TASK_RUNNING);

1749

__set_current_state(TASK_RUNNING);

1745

remove_wait_queue(&current->signal->wait_chldexit, &wo->child_wait);

1750

remove_wait_queue(&current->signal->wait_chldexit, &wo->child_wait);

1746

return retval;

1751

return retval;

1747

}

1752

}

1748

1753

1749

SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *,

1754

SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *,

1750

infop, int, options, struct rusage __user *, ru)

1755

infop, int, options, struct rusage __user *, ru)

1751

{

1756

{

1752

struct wait_opts wo;

1757

struct wait_opts wo;

1753

struct pid *pid = NULL;

1758

struct pid *pid = NULL;

1754

enum pid_type type;

1759

enum pid_type type;

1755

long ret;

1760

long ret;

1756

1761

1757

if (options & ~(WNOHANG|WNOWAIT|WEXITED|WSTOPPED|WCONTINUED))

1762

if (options & ~(WNOHANG|WNOWAIT|WEXITED|WSTOPPED|WCONTINUED))

1758

return -EINVAL;

1763

return -EINVAL;

1759

if (!(options & (WEXITED|WSTOPPED|WCONTINUED)))

1764

if (!(options & (WEXITED|WSTOPPED|WCONTINUED)))

1760

return -EINVAL;

1765

return -EINVAL;

1761

1766

1762

switch (which) {

1767

switch (which) {

1763

case P_ALL:

1768

case P_ALL:

1764

type = PIDTYPE_MAX;

1769

type = PIDTYPE_MAX;

1765

break;

1770

break;

1766

case P_PID:

1771

case P_PID:

1767

type = PIDTYPE_PID;

1772

type = PIDTYPE_PID;

1768

if (upid <= 0)

1773

if (upid <= 0)

1769

return -EINVAL;

1774

return -EINVAL;

1770

break;

1775

break;

1771

case P_PGID:

1776

case P_PGID:

1772

type = PIDTYPE_PGID;

1777

type = PIDTYPE_PGID;

1773

if (upid <= 0)

1778

if (upid <= 0)

1774

return -EINVAL;

1779

return -EINVAL;

1775

break;

1780

break;

1776

default:

1781

default:

1777

return -EINVAL;

1782

return -EINVAL;

1778

}

1783

}

1779

1784

1780

if (type < PIDTYPE_MAX)

1785

if (type < PIDTYPE_MAX)

1781

pid = find_get_pid(upid);

1786

pid = find_get_pid(upid);

1782

1787

1783

wo.wo_type = type;

1788

wo.wo_type = type;

1784

wo.wo_pid = pid;

1789

wo.wo_pid = pid;

1785

wo.wo_flags = options;

1790

wo.wo_flags = options;

1786

wo.wo_info = infop;

1791

wo.wo_info = infop;

1787

wo.wo_stat = NULL;

1792

wo.wo_stat = NULL;

1788

wo.wo_rusage = ru;

1793

wo.wo_rusage = ru;

1789

ret = do_wait(&wo);

1794

ret = do_wait(&wo);

1790

1795

1791

if (ret > 0) {

1796

if (ret > 0) {

1792

ret = 0;

1797

ret = 0;

1793

} else if (infop) {

1798

} else if (infop) {

1794

/*

1799

/*

1795

* For a WNOHANG return, clear out all the fields

1800

* For a WNOHANG return, clear out all the fields

1796

* we would set so the user can easily tell the

1801

* we would set so the user can easily tell the

1797

* difference.

1802

* difference.

1798

*/

1803

*/

1799

if (!ret)

1804

if (!ret)

1800

ret = put_user(0, &infop->si_signo);

1805

ret = put_user(0, &infop->si_signo);

1801

if (!ret)

1806

if (!ret)

1802

ret = put_user(0, &infop->si_errno);

1807

ret = put_user(0, &infop->si_errno);

1803

if (!ret)

1808

if (!ret)

1804

ret = put_user(0, &infop->si_code);

1809

ret = put_user(0, &infop->si_code);

1805

if (!ret)

1810

if (!ret)

1806

ret = put_user(0, &infop->si_pid);

1811

ret = put_user(0, &infop->si_pid);

1807

if (!ret)

1812

if (!ret)

1808

ret = put_user(0, &infop->si_uid);

1813

ret = put_user(0, &infop->si_uid);

1809

if (!ret)

1814

if (!ret)

1810

ret = put_user(0, &infop->si_status);

1815

ret = put_user(0, &infop->si_status);

1811

}

1816

}

1812

1817

1813

put_pid(pid);

1818

put_pid(pid);

1814

1819

1815

/* avoid REGPARM breakage on x86: */

1820

/* avoid REGPARM breakage on x86: */

1816

asmlinkage_protect(5, ret, which, upid, infop, options, ru);

1821

asmlinkage_protect(5, ret, which, upid, infop, options, ru);

1817

return ret;

1822

return ret;

1818

}

1823

}

1819

1824

1820

SYSCALL_DEFINE4(wait4, pid_t, upid, int __user *, stat_addr,

1825

SYSCALL_DEFINE4(wait4, pid_t, upid, int __user *, stat_addr,

1821

int, options, struct rusage __user *, ru)

1826

int, options, struct rusage __user *, ru)

1822

{

1827

{

1823

struct wait_opts wo;

1828

struct wait_opts wo;

1824

struct pid *pid = NULL;

1829

struct pid *pid = NULL;

1825

enum pid_type type;

1830

enum pid_type type;

1826

long ret;

1831

long ret;

1827

1832

1828

if (options & ~(WNOHANG|WUNTRACED|WCONTINUED|

1833

if (options & ~(WNOHANG|WUNTRACED|WCONTINUED|

1829

__WNOTHREAD|__WCLONE|__WALL))

1834

__WNOTHREAD|__WCLONE|__WALL))

1830

return -EINVAL;

1835

return -EINVAL;

1831

1836

1832

if (upid == -1)

1837

if (upid == -1)

1833

type = PIDTYPE_MAX;

1838

type = PIDTYPE_MAX;

1834

else if (upid < 0) {

1839

else if (upid < 0) {

1835

type = PIDTYPE_PGID;

1840

type = PIDTYPE_PGID;

1836

pid = find_get_pid(-upid);

1841

pid = find_get_pid(-upid);

1837

} else if (upid == 0) {

1842

} else if (upid == 0) {

1838

type = PIDTYPE_PGID;

1843

type = PIDTYPE_PGID;

1839

pid = get_task_pid(current, PIDTYPE_PGID);

1844

pid = get_task_pid(current, PIDTYPE_PGID);

1840

} else /* upid > 0 */ {

1845

} else /* upid > 0 */ {

1841

type = PIDTYPE_PID;

1846

type = PIDTYPE_PID;

1842

pid = find_get_pid(upid);

1847

pid = find_get_pid(upid);

1843

}

1848

}

1844

1849

1845

wo.wo_type = type;

1850

wo.wo_type = type;

1846

wo.wo_pid = pid;

1851

wo.wo_pid = pid;

1847

wo.wo_flags = options | WEXITED;

1852

wo.wo_flags = options | WEXITED;

1848

wo.wo_info = NULL;

1853

wo.wo_info = NULL;

1849

wo.wo_stat = stat_addr;

1854

wo.wo_stat = stat_addr;

1850

wo.wo_rusage = ru;

1855

wo.wo_rusage = ru;

1851

ret = do_wait(&wo);

1856

ret = do_wait(&wo);

1852

put_pid(pid);

1857

put_pid(pid);

1853

1858

1854

/* avoid REGPARM breakage on x86: */

1859

/* avoid REGPARM breakage on x86: */

1855

asmlinkage_protect(4, ret, upid, stat_addr, options, ru);

1860

asmlinkage_protect(4, ret, upid, stat_addr, options, ru);

1856

return ret;

1861

return ret;

1857

}

1862

}

1858

1863

1859

#ifdef __ARCH_WANT_SYS_WAITPID

1864

#ifdef __ARCH_WANT_SYS_WAITPID

1860

1865

1861

/*

1866

/*

1862

* sys_waitpid() remains for compatibility. waitpid() should be

1867

* sys_waitpid() remains for compatibility. waitpid() should be

1863

* implemented by calling sys_wait4() from libc.a.

1868

* implemented by calling sys_wait4() from libc.a.

1864

*/

1869

*/

1865

SYSCALL_DEFINE3(waitpid, pid_t, pid, int __user *, stat_addr, int, options)

1870

SYSCALL_DEFINE3(waitpid, pid_t, pid, int __user *, stat_addr, int, options)

1866

{

1871

{

1867

return sys_wait4(pid, stat_addr, options, NULL);

1872

return sys_wait4(pid, stat_addr, options, NULL);

1868

}

1873

}

1869

1874

GITLAB

kill tracehook_notify_death()

 /*
  * Tracing hooks
  *
  * Copyright (C) 2008-2009 Red Hat, Inc.  All rights reserved.
  *
  * This copyrighted material is made available to anyone wishing to use,
  * modify, copy, or redistribute it subject to the terms and conditions
  * of the GNU General Public License v.2.
  *
  * This file defines hook entry points called by core code where
  * user tracing/debugging support might need to do something.  These
  * entry points are called tracehook_*().  Each hook declared below
  * has a detailed kerneldoc comment giving the context (locking et
  * al) from which it is called, and the meaning of its return value.
  *
  * Each function here typically has only one call site, so it is ok
  * to have some nontrivial tracehook_*() inlines.  In all cases, the
  * fast path when no tracing is enabled should be very short.
  *
  * The purpose of this file and the tracehook_* layer is to consolidate
  * the interface that the kernel core and arch code uses to enable any
  * user debugging or tracing facility (such as ptrace).  The interfaces
  * here are carefully documented so that maintainers of core and arch
  * code do not need to think about the implementation details of the
  * tracing facilities.  Likewise, maintainers of the tracing code do not
  * need to understand all the calling core or arch code in detail, just
  * documented circumstances of each call, such as locking conditions.
  *
  * If the calling core code changes so that locking is different, then
  * it is ok to change the interface documented here.  The maintainer of
  * core code changing should notify the maintainers of the tracing code
  * that they need to work out the change.
  *
  * Some tracehook_*() inlines take arguments that the current tracing
  * implementations might not necessarily use.  These function signatures
  * are chosen to pass in all the information that is on hand in the
  * caller and might conceivably be relevant to a tracer, so that the
  * core code won't have to be updated when tracing adds more features.
  * If a call site changes so that some of those parameters are no longer
  * already on hand without extra work, then the tracehook_* interface
  * can change so there is no make-work burden on the core code.  The
  * maintainer of core code changing should notify the maintainers of the
  * tracing code that they need to work out the change.
  */
 #ifndef _LINUX_TRACEHOOK_H
 #define _LINUX_TRACEHOOK_H	1
 #include <linux/sched.h>
 #include <linux/ptrace.h>
 #include <linux/security.h>
 struct linux_binprm;
 /*
  * ptrace report for syscall entry and exit looks identical.
  */
 static inline void ptrace_report_syscall(struct pt_regs *regs)
 {
 	int ptrace = current->ptrace;
 	if (!(ptrace & PT_PTRACED))
 		return;
 	ptrace_notify(SIGTRAP | ((ptrace & PT_TRACESYSGOOD) ? 0x80 : 0));
 	/*
 	 * this isn't the same as continuing with a signal, but it will do
 	 * for normal use.  strace only continues with a signal if the
 	 * stopping signal is not SIGTRAP.  -brl
 	 */
 	if (current->exit_code) {
 		send_sig(current->exit_code, current, 1);
 		current->exit_code = 0;
 	}
 }
 /**
  * tracehook_report_syscall_entry - task is about to attempt a system call
  * @regs:		user register state of current task
  *
  * This will be called if %TIF_SYSCALL_TRACE has been set, when the
  * current task has just entered the kernel for a system call.
  * Full user register state is available here.  Changing the values
  * in @regs can affect the system call number and arguments to be tried.
  * It is safe to block here, preventing the system call from beginning.
  *
  * Returns zero normally, or nonzero if the calling arch code should abort
  * the system call.  That must prevent normal entry so no system call is
  * made.  If @task ever returns to user mode after this, its register state
  * is unspecified, but should be something harmless like an %ENOSYS error
  * return.  It should preserve enough information so that syscall_rollback()
  * can work (see asm-generic/syscall.h).
  *
  * Called without locks, just after entering kernel mode.
  */
 static inline __must_check int tracehook_report_syscall_entry(
 	struct pt_regs *regs)
 {
 	ptrace_report_syscall(regs);
 	return 0;
 }
 /**
  * tracehook_report_syscall_exit - task has just finished a system call
  * @regs:		user register state of current task
  * @step:		nonzero if simulating single-step or block-step
  *
  * This will be called if %TIF_SYSCALL_TRACE has been set, when the
  * current task has just finished an attempted system call.  Full
  * user register state is available here.  It is safe to block here,
  * preventing signals from being processed.
  *
  * If @step is nonzero, this report is also in lieu of the normal
  * trap that would follow the system call instruction because
  * user_enable_block_step() or user_enable_single_step() was used.
  * In this case, %TIF_SYSCALL_TRACE might not be set.
  *
  * Called without locks, just before checking for pending signals.
  */
 static inline void tracehook_report_syscall_exit(struct pt_regs *regs, int step)
 {
 	if (step) {
 		siginfo_t info;
 		user_single_step_siginfo(current, regs, &info);
 		force_sig_info(SIGTRAP, &info, current);
 		return;
 	}
 	ptrace_report_syscall(regs);
 }
 /**
  * tracehook_signal_handler - signal handler setup is complete
  * @sig:		number of signal being delivered
  * @info:		siginfo_t of signal being delivered
  * @ka:			sigaction setting that chose the handler
  * @regs:		user register state
  * @stepping:		nonzero if debugger single-step or block-step in use
  *
  * Called by the arch code after a signal handler has been set up.
  * Register and stack state reflects the user handler about to run.
  * Signal mask changes have already been made.
  *
  * Called without locks, shortly before returning to user mode
  * (or handling more signals).
  */
 static inline void tracehook_signal_handler(int sig, siginfo_t *info,
 					    const struct k_sigaction *ka,
 					    struct pt_regs *regs, int stepping)
 {
 	if (stepping)
 		ptrace_notify(SIGTRAP);
 }
-#define DEATH_REAP			-1
-#define DEATH_DELAYED_GROUP_LEADER	-2
-/**
- * tracehook_notify_death - task is dead, ready to notify parent
- * @task:		@current task now exiting
- * @death_cookie:	value to pass to tracehook_report_death()
- * @group_dead:		nonzero if this was the last thread in the group to die
- *
- * A return value >= 0 means call do_notify_parent() with that signal
- * number.  Negative return value can be %DEATH_REAP to self-reap right
- * now, or %DEATH_DELAYED_GROUP_LEADER to a zombie without notifying our
- * parent.  Note that a return value of 0 means a do_notify_parent() call
- * that sends no signal, but still wakes up a parent blocked in wait*().
- *
- * Called with write_lock_irq(&tasklist_lock) held.
- */
-static inline int tracehook_notify_death(struct task_struct *task,
-					 void **death_cookie, int group_dead)
-{
-	if (task_detached(task))
-		return task->ptrace ? SIGCHLD : DEATH_REAP;
-	/*
-	 * If something other than our normal parent is ptracing us, then
-	 * send it a SIGCHLD instead of honoring exit_signal.  exit_signal
-	 * only has special meaning to our real parent.
-	 */
-	if (thread_group_empty(task) && !ptrace_reparented(task))
-		return task->exit_signal;
-	return task->ptrace ? SIGCHLD : DEATH_DELAYED_GROUP_LEADER;
-}
 #ifdef TIF_NOTIFY_RESUME
 /**
  * set_notify_resume - cause tracehook_notify_resume() to be called
  * @task:		task that will call tracehook_notify_resume()
  *
  * Calling this arranges that @task will call tracehook_notify_resume()
  * before returning to user mode.  If it's already running in user mode,
  * it will enter the kernel and call tracehook_notify_resume() soon.
  * If it's blocked, it will not be woken.
  */
 static inline void set_notify_resume(struct task_struct *task)
 {
 	if (!test_and_set_tsk_thread_flag(task, TIF_NOTIFY_RESUME))
 		kick_process(task);
 }
 /**
  * tracehook_notify_resume - report when about to return to user mode
  * @regs:		user-mode registers of @current task
  *
  * This is called when %TIF_NOTIFY_RESUME has been set.  Now we are
  * about to return to user mode, and the user state in @regs can be
  * inspected or adjusted.  The caller in arch code has cleared
  * %TIF_NOTIFY_RESUME before the call.  If the flag gets set again
  * asynchronously, this will be called again before we return to
  * user mode.
  *
  * Called without locks.
  */
 static inline void tracehook_notify_resume(struct pt_regs *regs)
 {
 }
 #endif	/* TIF_NOTIFY_RESUME */
 #endif	/* <linux/tracehook.h> */

 /*
  *  linux/kernel/exit.c
  *
  *  Copyright (C) 1991, 1992  Linus Torvalds
  */
 #include <linux/mm.h>
 #include <linux/slab.h>
 #include <linux/interrupt.h>
 #include <linux/module.h>
 #include <linux/capability.h>
 #include <linux/completion.h>
 #include <linux/personality.h>
 #include <linux/tty.h>
 #include <linux/iocontext.h>
 #include <linux/key.h>
 #include <linux/security.h>
 #include <linux/cpu.h>
 #include <linux/acct.h>
 #include <linux/tsacct_kern.h>
 #include <linux/file.h>
 #include <linux/fdtable.h>
 #include <linux/binfmts.h>
 #include <linux/nsproxy.h>
 #include <linux/pid_namespace.h>
 #include <linux/ptrace.h>
 #include <linux/profile.h>
 #include <linux/mount.h>
 #include <linux/proc_fs.h>
 #include <linux/kthread.h>
 #include <linux/mempolicy.h>
 #include <linux/taskstats_kern.h>
 #include <linux/delayacct.h>
 #include <linux/freezer.h>
 #include <linux/cgroup.h>
 #include <linux/syscalls.h>
 #include <linux/signal.h>
 #include <linux/posix-timers.h>
 #include <linux/cn_proc.h>
 #include <linux/mutex.h>
 #include <linux/futex.h>
 #include <linux/pipe_fs_i.h>
 #include <linux/audit.h> /* for audit_free() */
 #include <linux/resource.h>
 #include <linux/blkdev.h>
 #include <linux/task_io_accounting_ops.h>
 #include <linux/tracehook.h>
 #include <linux/fs_struct.h>
 #include <linux/init_task.h>
 #include <linux/perf_event.h>
 #include <trace/events/sched.h>
 #include <linux/hw_breakpoint.h>
 #include <linux/oom.h>
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
 #include <asm/pgtable.h>
 #include <asm/mmu_context.h>
 static void exit_mm(struct task_struct * tsk);
 static void __unhash_process(struct task_struct *p, bool group_dead)
 {
 	nr_threads--;
 	detach_pid(p, PIDTYPE_PID);
 	if (group_dead) {
 		detach_pid(p, PIDTYPE_PGID);
 		detach_pid(p, PIDTYPE_SID);
 		list_del_rcu(&p->tasks);
 		list_del_init(&p->sibling);
 		__this_cpu_dec(process_counts);
 	}
 	list_del_rcu(&p->thread_group);
 }
 /*
  * This function expects the tasklist_lock write-locked.
  */
 static void __exit_signal(struct task_struct *tsk)
 {
 	struct signal_struct *sig = tsk->signal;
 	bool group_dead = thread_group_leader(tsk);
 	struct sighand_struct *sighand;
 	struct tty_struct *uninitialized_var(tty);
 	sighand = rcu_dereference_check(tsk->sighand,
 					rcu_read_lock_held() ||
 					lockdep_tasklist_lock_is_held());
 	spin_lock(&sighand->siglock);
 	posix_cpu_timers_exit(tsk);
 	if (group_dead) {
 		posix_cpu_timers_exit_group(tsk);
 		tty = sig->tty;
 		sig->tty = NULL;
 	} else {
 		/*
 		 * This can only happen if the caller is de_thread().
 		 * FIXME: this is the temporary hack, we should teach
 		 * posix-cpu-timers to handle this case correctly.
 		 */
 		if (unlikely(has_group_leader_pid(tsk)))
 			posix_cpu_timers_exit_group(tsk);
 		/*
 		 * If there is any task waiting for the group exit
 		 * then notify it:
 		 */
 		if (sig->notify_count > 0 && !--sig->notify_count)
 			wake_up_process(sig->group_exit_task);
 		if (tsk == sig->curr_target)
 			sig->curr_target = next_thread(tsk);
 		/*
 		 * Accumulate here the counters for all threads but the
 		 * group leader as they die, so they can be added into
 		 * the process-wide totals when those are taken.
 		 * The group leader stays around as a zombie as long
 		 * as there are other threads.  When it gets reaped,
 		 * the exit.c code will add its counts into these totals.
 		 * We won't ever get here for the group leader, since it
 		 * will have been the last reference on the signal_struct.
 		 */
 		sig->utime = cputime_add(sig->utime, tsk->utime);
 		sig->stime = cputime_add(sig->stime, tsk->stime);
 		sig->gtime = cputime_add(sig->gtime, tsk->gtime);
 		sig->min_flt += tsk->min_flt;
 		sig->maj_flt += tsk->maj_flt;
 		sig->nvcsw += tsk->nvcsw;
 		sig->nivcsw += tsk->nivcsw;
 		sig->inblock += task_io_get_inblock(tsk);
 		sig->oublock += task_io_get_oublock(tsk);
 		task_io_accounting_add(&sig->ioac, &tsk->ioac);
 		sig->sum_sched_runtime += tsk->se.sum_exec_runtime;
 	}
 	sig->nr_threads--;
 	__unhash_process(tsk, group_dead);
 	/*
 	 * Do this under ->siglock, we can race with another thread
 	 * doing sigqueue_free() if we have SIGQUEUE_PREALLOC signals.
 	 */
 	flush_sigqueue(&tsk->pending);
 	tsk->sighand = NULL;
 	spin_unlock(&sighand->siglock);
 	__cleanup_sighand(sighand);
 	clear_tsk_thread_flag(tsk,TIF_SIGPENDING);
 	if (group_dead) {
 		flush_sigqueue(&sig->shared_pending);
 		tty_kref_put(tty);
 	}
 }
 static void delayed_put_task_struct(struct rcu_head *rhp)
 {
 	struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);
 	perf_event_delayed_put(tsk);
 	trace_sched_process_free(tsk);
 	put_task_struct(tsk);
 }
 void release_task(struct task_struct * p)
 {
 	struct task_struct *leader;
 	int zap_leader;
 repeat:
 	/* don't need to get the RCU readlock here - the process is dead and
 	 * can't be modifying its own credentials. But shut RCU-lockdep up */
 	rcu_read_lock();
 	atomic_dec(&__task_cred(p)->user->processes);
 	rcu_read_unlock();
 	proc_flush_task(p);
 	write_lock_irq(&tasklist_lock);
 	ptrace_release_task(p);
 	__exit_signal(p);
 	/*
 	 * If we are the last non-leader member of the thread
 	 * group, and the leader is zombie, then notify the
 	 * group leader's parent process. (if it wants notification.)
 	 */
 	zap_leader = 0;
 	leader = p->group_leader;
 	if (leader != p && thread_group_empty(leader) && leader->exit_state == EXIT_ZOMBIE) {
 		BUG_ON(task_detached(leader));
 		do_notify_parent(leader, leader->exit_signal);
 		/*
 		 * If we were the last child thread and the leader has
 		 * exited already, and the leader's parent ignores SIGCHLD,
 		 * then we are the one who should release the leader.
 		 *
 		 * do_notify_parent() will have marked it self-reaping in
 		 * that case.
 		 */
 		zap_leader = task_detached(leader);
 		/*
 		 * This maintains the invariant that release_task()
 		 * only runs on a task in EXIT_DEAD, just for sanity.
 		 */
 		if (zap_leader)
 			leader->exit_state = EXIT_DEAD;
 	}
 	write_unlock_irq(&tasklist_lock);
 	release_thread(p);
 	call_rcu(&p->rcu, delayed_put_task_struct);
 	p = leader;
 	if (unlikely(zap_leader))
 		goto repeat;
 }
 /*
  * This checks not only the pgrp, but falls back on the pid if no
  * satisfactory pgrp is found. I dunno - gdb doesn't work correctly
  * without this...
  *
  * The caller must hold rcu lock or the tasklist lock.
  */
 struct pid *session_of_pgrp(struct pid *pgrp)
 {
 	struct task_struct *p;
 	struct pid *sid = NULL;
 	p = pid_task(pgrp, PIDTYPE_PGID);
 	if (p == NULL)
 		p = pid_task(pgrp, PIDTYPE_PID);
 	if (p != NULL)
 		sid = task_session(p);
 	return sid;
 }
 /*
  * Determine if a process group is "orphaned", according to the POSIX
  * definition in 2.2.2.52.  Orphaned process groups are not to be affected
  * by terminal-generated stop signals.  Newly orphaned process groups are
  * to receive a SIGHUP and a SIGCONT.
  *
  * "I ask you, have you ever known what it is to be an orphan?"
  */
 static int will_become_orphaned_pgrp(struct pid *pgrp, struct task_struct *ignored_task)
 {
 	struct task_struct *p;
 	do_each_pid_task(pgrp, PIDTYPE_PGID, p) {
 		if ((p == ignored_task) ||
 		    (p->exit_state && thread_group_empty(p)) ||
 		    is_global_init(p->real_parent))
 			continue;
 		if (task_pgrp(p->real_parent) != pgrp &&
 		    task_session(p->real_parent) == task_session(p))
 			return 0;
 	} while_each_pid_task(pgrp, PIDTYPE_PGID, p);
 	return 1;
 }
 int is_current_pgrp_orphaned(void)
 {
 	int retval;
 	read_lock(&tasklist_lock);
 	retval = will_become_orphaned_pgrp(task_pgrp(current), NULL);
 	read_unlock(&tasklist_lock);
 	return retval;
 }
 static int has_stopped_jobs(struct pid *pgrp)
 {
 	int retval = 0;
 	struct task_struct *p;
 	do_each_pid_task(pgrp, PIDTYPE_PGID, p) {
 		if (!task_is_stopped(p))
 			continue;
 		retval = 1;
 		break;
 	} while_each_pid_task(pgrp, PIDTYPE_PGID, p);
 	return retval;
 }
 /*
  * Check to see if any process groups have become orphaned as
  * a result of our exiting, and if they have any stopped jobs,
  * send them a SIGHUP and then a SIGCONT. (POSIX 3.2.2.2)
  */
 static void
 kill_orphaned_pgrp(struct task_struct *tsk, struct task_struct *parent)
 {
 	struct pid *pgrp = task_pgrp(tsk);
 	struct task_struct *ignored_task = tsk;
 	if (!parent)
 		 /* exit: our father is in a different pgrp than
 		  * we are and we were the only connection outside.
 		  */
 		parent = tsk->real_parent;
 	else
 		/* reparent: our child is in a different pgrp than
 		 * we are, and it was the only connection outside.
 		 */
 		ignored_task = NULL;
 	if (task_pgrp(parent) != pgrp &&
 	    task_session(parent) == task_session(tsk) &&
 	    will_become_orphaned_pgrp(pgrp, ignored_task) &&
 	    has_stopped_jobs(pgrp)) {
 		__kill_pgrp_info(SIGHUP, SEND_SIG_PRIV, pgrp);
 		__kill_pgrp_info(SIGCONT, SEND_SIG_PRIV, pgrp);
 	}
 }
 /**
  * reparent_to_kthreadd - Reparent the calling kernel thread to kthreadd
  *
  * If a kernel thread is launched as a result of a system call, or if
  * it ever exits, it should generally reparent itself to kthreadd so it
  * isn't in the way of other processes and is correctly cleaned up on exit.
  *
  * The various task state such as scheduling policy and priority may have
  * been inherited from a user process, so we reset them to sane values here.
  *
  * NOTE that reparent_to_kthreadd() gives the caller full capabilities.
  */
 static void reparent_to_kthreadd(void)
 {
 	write_lock_irq(&tasklist_lock);
 	ptrace_unlink(current);
 	/* Reparent to init */
 	current->real_parent = current->parent = kthreadd_task;
 	list_move_tail(&current->sibling, &current->real_parent->children);
 	/* Set the exit signal to SIGCHLD so we signal init on exit */
 	current->exit_signal = SIGCHLD;
 	if (task_nice(current) < 0)
 		set_user_nice(current, 0);
 	/* cpus_allowed? */
 	/* rt_priority? */
 	/* signals? */
 	memcpy(current->signal->rlim, init_task.signal->rlim,
 	       sizeof(current->signal->rlim));
 	atomic_inc(&init_cred.usage);
 	commit_creds(&init_cred);
 	write_unlock_irq(&tasklist_lock);
 }
 void __set_special_pids(struct pid *pid)
 {
 	struct task_struct *curr = current->group_leader;
 	if (task_session(curr) != pid)
 		change_pid(curr, PIDTYPE_SID, pid);
 	if (task_pgrp(curr) != pid)
 		change_pid(curr, PIDTYPE_PGID, pid);
 }
 static void set_special_pids(struct pid *pid)
 {
 	write_lock_irq(&tasklist_lock);
 	__set_special_pids(pid);
 	write_unlock_irq(&tasklist_lock);
 }
 /*
  * Let kernel threads use this to say that they allow a certain signal.
  * Must not be used if kthread was cloned with CLONE_SIGHAND.
  */
 int allow_signal(int sig)
 {
 	if (!valid_signal(sig) || sig < 1)
 		return -EINVAL;
 	spin_lock_irq(&current->sighand->siglock);
 	/* This is only needed for daemonize()'ed kthreads */
 	sigdelset(&current->blocked, sig);
 	/*
 	 * Kernel threads handle their own signals. Let the signal code
 	 * know it'll be handled, so that they don't get converted to
 	 * SIGKILL or just silently dropped.
 	 */
 	current->sighand->action[(sig)-1].sa.sa_handler = (void __user *)2;
 	recalc_sigpending();
 	spin_unlock_irq(&current->sighand->siglock);
 	return 0;
 }
 EXPORT_SYMBOL(allow_signal);
 int disallow_signal(int sig)
 {
 	if (!valid_signal(sig) || sig < 1)
 		return -EINVAL;
 	spin_lock_irq(&current->sighand->siglock);
 	current->sighand->action[(sig)-1].sa.sa_handler = SIG_IGN;
 	recalc_sigpending();
 	spin_unlock_irq(&current->sighand->siglock);
 	return 0;
 }
 EXPORT_SYMBOL(disallow_signal);
 /*
  *	Put all the gunge required to become a kernel thread without
  *	attached user resources in one place where it belongs.
  */
 void daemonize(const char *name, ...)
 {
 	va_list args;
 	sigset_t blocked;
 	va_start(args, name);
 	vsnprintf(current->comm, sizeof(current->comm), name, args);
 	va_end(args);
 	/*
 	 * If we were started as result of loading a module, close all of the
 	 * user space pages.  We don't need them, and if we didn't close them
 	 * they would be locked into memory.
 	 */
 	exit_mm(current);
 	/*
 	 * We don't want to have TIF_FREEZE set if the system-wide hibernation
 	 * or suspend transition begins right now.
 	 */
 	current->flags |= (PF_NOFREEZE | PF_KTHREAD);
 	if (current->nsproxy != &init_nsproxy) {
 		get_nsproxy(&init_nsproxy);
 		switch_task_namespaces(current, &init_nsproxy);
 	}
 	set_special_pids(&init_struct_pid);
 	proc_clear_tty(current);
 	/* Block and flush all signals */
 	sigfillset(&blocked);
 	sigprocmask(SIG_BLOCK, &blocked, NULL);
 	flush_signals(current);
 	/* Become as one with the init task */
 	daemonize_fs_struct();
 	exit_files(current);
 	current->files = init_task.files;
 	atomic_inc(&current->files->count);
 	reparent_to_kthreadd();
 }
 EXPORT_SYMBOL(daemonize);
 static void close_files(struct files_struct * files)
 {
 	int i, j;
 	struct fdtable *fdt;
 	j = 0;
 	/*
 	 * It is safe to dereference the fd table without RCU or
 	 * ->file_lock because this is the last reference to the
 	 * files structure.  But use RCU to shut RCU-lockdep up.
 	 */
 	rcu_read_lock();
 	fdt = files_fdtable(files);
 	rcu_read_unlock();
 	for (;;) {
 		unsigned long set;
 		i = j * __NFDBITS;
 		if (i >= fdt->max_fds)
 			break;
 		set = fdt->open_fds->fds_bits[j++];
 		while (set) {
 			if (set & 1) {
 				struct file * file = xchg(&fdt->fd[i], NULL);
 				if (file) {
 					filp_close(file, files);
 					cond_resched();
 				}
 			}
 			i++;
 			set >>= 1;
 		}
 	}
 }
 struct files_struct *get_files_struct(struct task_struct *task)
 {
 	struct files_struct *files;
 	task_lock(task);
 	files = task->files;
 	if (files)
 		atomic_inc(&files->count);
 	task_unlock(task);
 	return files;
 }
 void put_files_struct(struct files_struct *files)
 {
 	struct fdtable *fdt;
 	if (atomic_dec_and_test(&files->count)) {
 		close_files(files);
 		/*
 		 * Free the fd and fdset arrays if we expanded them.
 		 * If the fdtable was embedded, pass files for freeing
 		 * at the end of the RCU grace period. Otherwise,
 		 * you can free files immediately.
 		 */
 		rcu_read_lock();
 		fdt = files_fdtable(files);
 		if (fdt != &files->fdtab)
 			kmem_cache_free(files_cachep, files);
 		free_fdtable(fdt);
 		rcu_read_unlock();
 	}
 }
 void reset_files_struct(struct files_struct *files)
 {
 	struct task_struct *tsk = current;
 	struct files_struct *old;
 	old = tsk->files;
 	task_lock(tsk);
 	tsk->files = files;
 	task_unlock(tsk);
 	put_files_struct(old);
 }
 void exit_files(struct task_struct *tsk)
 {
 	struct files_struct * files = tsk->files;
 	if (files) {
 		task_lock(tsk);
 		tsk->files = NULL;
 		task_unlock(tsk);
 		put_files_struct(files);
 	}
 }
 #ifdef CONFIG_MM_OWNER
 /*
  * Task p is exiting and it owned mm, lets find a new owner for it
  */
 static inline int
 mm_need_new_owner(struct mm_struct *mm, struct task_struct *p)
 {
 	/*
 	 * If there are other users of the mm and the owner (us) is exiting
 	 * we need to find a new owner to take on the responsibility.
 	 */
 	if (atomic_read(&mm->mm_users) <= 1)
 		return 0;
 	if (mm->owner != p)
 		return 0;
 	return 1;
 }
 void mm_update_next_owner(struct mm_struct *mm)
 {
 	struct task_struct *c, *g, *p = current;
 retry:
 	if (!mm_need_new_owner(mm, p))
 		return;
 	read_lock(&tasklist_lock);
 	/*
 	 * Search in the children
 	 */
 	list_for_each_entry(c, &p->children, sibling) {
 		if (c->mm == mm)
 			goto assign_new_owner;
 	}
 	/*
 	 * Search in the siblings
 	 */
 	list_for_each_entry(c, &p->real_parent->children, sibling) {
 		if (c->mm == mm)
 			goto assign_new_owner;
 	}
 	/*
 	 * Search through everything else. We should not get
 	 * here often
 	 */
 	do_each_thread(g, c) {
 		if (c->mm == mm)
 			goto assign_new_owner;
 	} while_each_thread(g, c);
 	read_unlock(&tasklist_lock);
 	/*
 	 * We found no owner yet mm_users > 1: this implies that we are
 	 * most likely racing with swapoff (try_to_unuse()) or /proc or
 	 * ptrace or page migration (get_task_mm()).  Mark owner as NULL.
 	 */
 	mm->owner = NULL;
 	return;
 assign_new_owner:
 	BUG_ON(c == p);
 	get_task_struct(c);
 	/*
 	 * The task_lock protects c->mm from changing.
 	 * We always want mm->owner->mm == mm
 	 */
 	task_lock(c);
 	/*
 	 * Delay read_unlock() till we have the task_lock()
 	 * to ensure that c does not slip away underneath us
 	 */
 	read_unlock(&tasklist_lock);
 	if (c->mm != mm) {
 		task_unlock(c);
 		put_task_struct(c);
 		goto retry;
 	}
 	mm->owner = c;
 	task_unlock(c);
 	put_task_struct(c);
 }
 #endif /* CONFIG_MM_OWNER */
 /*
  * Turn us into a lazy TLB process if we
  * aren't already..
  */
 static void exit_mm(struct task_struct * tsk)
 {
 	struct mm_struct *mm = tsk->mm;
 	struct core_state *core_state;
 	mm_release(tsk, mm);
 	if (!mm)
 		return;
 	/*
 	 * Serialize with any possible pending coredump.
 	 * We must hold mmap_sem around checking core_state
 	 * and clearing tsk->mm.  The core-inducing thread
 	 * will increment ->nr_threads for each thread in the
 	 * group with ->mm != NULL.
 	 */
 	down_read(&mm->mmap_sem);
 	core_state = mm->core_state;
 	if (core_state) {
 		struct core_thread self;
 		up_read(&mm->mmap_sem);
 		self.task = tsk;
 		self.next = xchg(&core_state->dumper.next, &self);
 		/*
 		 * Implies mb(), the result of xchg() must be visible
 		 * to core_state->dumper.
 		 */
 		if (atomic_dec_and_test(&core_state->nr_threads))
 			complete(&core_state->startup);
 		for (;;) {
 			set_task_state(tsk, TASK_UNINTERRUPTIBLE);
 			if (!self.task) /* see coredump_finish() */
 				break;
 			schedule();
 		}
 		__set_task_state(tsk, TASK_RUNNING);
 		down_read(&mm->mmap_sem);
 	}
 	atomic_inc(&mm->mm_count);
 	BUG_ON(mm != tsk->active_mm);
 	/* more a memory barrier than a real lock */
 	task_lock(tsk);
 	tsk->mm = NULL;
 	up_read(&mm->mmap_sem);
 	enter_lazy_tlb(mm, current);
 	/* We don't want this task to be frozen prematurely */
 	clear_freeze_flag(tsk);
 	if (tsk->signal->oom_score_adj == OOM_SCORE_ADJ_MIN)
 		atomic_dec(&mm->oom_disable_count);
 	task_unlock(tsk);
 	mm_update_next_owner(mm);
 	mmput(mm);
 }
 /*
  * When we die, we re-parent all our children.
  * Try to give them to another thread in our thread
  * group, and if no such member exists, give it to
  * the child reaper process (ie "init") in our pid
  * space.
  */
 static struct task_struct *find_new_reaper(struct task_struct *father)
 	__releases(&tasklist_lock)
 	__acquires(&tasklist_lock)
 {
 	struct pid_namespace *pid_ns = task_active_pid_ns(father);
 	struct task_struct *thread;
 	thread = father;
 	while_each_thread(father, thread) {
 		if (thread->flags & PF_EXITING)
 			continue;
 		if (unlikely(pid_ns->child_reaper == father))
 			pid_ns->child_reaper = thread;
 		return thread;
 	}
 	if (unlikely(pid_ns->child_reaper == father)) {
 		write_unlock_irq(&tasklist_lock);
 		if (unlikely(pid_ns == &init_pid_ns))
 			panic("Attempted to kill init!");
 		zap_pid_ns_processes(pid_ns);
 		write_lock_irq(&tasklist_lock);
 		/*
 		 * We can not clear ->child_reaper or leave it alone.
 		 * There may by stealth EXIT_DEAD tasks on ->children,
 		 * forget_original_parent() must move them somewhere.
 		 */
 		pid_ns->child_reaper = init_pid_ns.child_reaper;
 	}
 	return pid_ns->child_reaper;
 }
 /*
 * Any that need to be release_task'd are put on the @dead list.
  */
 static void reparent_leader(struct task_struct *father, struct task_struct *p,
 				struct list_head *dead)
 {
 	list_move_tail(&p->sibling, &p->real_parent->children);
 	if (task_detached(p))
 		return;
 	/*
 	 * If this is a threaded reparent there is no need to
 	 * notify anyone anything has happened.
 	 */
 	if (same_thread_group(p->real_parent, father))
 		return;
 	/* We don't want people slaying init.  */
 	p->exit_signal = SIGCHLD;
 	/* If it has exited notify the new parent about this child's death. */
 	if (!p->ptrace &&
 	    p->exit_state == EXIT_ZOMBIE && thread_group_empty(p)) {
 		do_notify_parent(p, p->exit_signal);
 		if (task_detached(p)) {
 			p->exit_state = EXIT_DEAD;
 			list_move_tail(&p->sibling, dead);
 		}
 	}
 	kill_orphaned_pgrp(p, father);
 }
 static void forget_original_parent(struct task_struct *father)
 {
 	struct task_struct *p, *n, *reaper;
 	LIST_HEAD(dead_children);
 	write_lock_irq(&tasklist_lock);
 	/*
 	 * Note that exit_ptrace() and find_new_reaper() might
 	 * drop tasklist_lock and reacquire it.
 	 */
 	exit_ptrace(father);
 	reaper = find_new_reaper(father);
 	list_for_each_entry_safe(p, n, &father->children, sibling) {
 		struct task_struct *t = p;
 		do {
 			t->real_parent = reaper;
 			if (t->parent == father) {
 				BUG_ON(t->ptrace);
 				t->parent = t->real_parent;
 			}
 			if (t->pdeath_signal)
 				group_send_sig_info(t->pdeath_signal,
 						    SEND_SIG_NOINFO, t);
 		} while_each_thread(p, t);
 		reparent_leader(father, p, &dead_children);
 	}
 	write_unlock_irq(&tasklist_lock);
 	BUG_ON(!list_empty(&father->children));
 	list_for_each_entry_safe(p, n, &dead_children, sibling) {
 		list_del_init(&p->sibling);
 		release_task(p);
 	}
 }
 /*
  * Send signals to all our closest relatives so that they know
  * to properly mourn us..
  */
 static void exit_notify(struct task_struct *tsk, int group_dead)
 {
-	int signal;
 	bool autoreap;
-	void *cookie;
 	/*
 	 * This does two things:
 	 *
   	 * A.  Make init inherit all the child processes
 	 * B.  Check to see if any process groups have become orphaned
 	 *	as a result of our exiting, and if they have any stopped
 	 *	jobs, send them a SIGHUP and then a SIGCONT.  (POSIX 3.2.2.2)
 	 */
 	forget_original_parent(tsk);
 	exit_task_namespaces(tsk);
 	write_lock_irq(&tasklist_lock);
 	if (group_dead)
 		kill_orphaned_pgrp(tsk->group_leader, NULL);
 	/* Let father know we died
 	 *
 	 * Thread signals are configurable, but you aren't going to use
 	 * that to send signals to arbitrary processes.
 	 * That stops right now.
 	 *
 	 * If the parent exec id doesn't match the exec id we saved
 	 * when we started then we know the parent has changed security
 	 * domain.
 	 *
 	 * If our self_exec id doesn't match our parent_exec_id then
 	 * we have changed execution domain as these two values started
 	 * the same after a fork.
 	 */
-	if (tsk->exit_signal != SIGCHLD && !task_detached(tsk) &&
+	if (thread_group_leader(tsk) && tsk->exit_signal != SIGCHLD &&
 	    (tsk->parent_exec_id != tsk->real_parent->self_exec_id ||
 	     tsk->self_exec_id != tsk->parent_exec_id))
 		tsk->exit_signal = SIGCHLD;
-	signal = tracehook_notify_death(tsk, &cookie, group_dead);
+	if (unlikely(tsk->ptrace)) {
-	if (signal >= 0)
+		int sig = thread_group_leader(tsk) &&
-		autoreap = do_notify_parent(tsk, signal);
+				thread_group_empty(tsk) &&
-	else
+				!ptrace_reparented(tsk) ?
-		autoreap = (signal == DEATH_REAP);
+			tsk->exit_signal : SIGCHLD;
+		autoreap = do_notify_parent(tsk, sig);
+	} else if (thread_group_leader(tsk)) {
+		autoreap = thread_group_empty(tsk) &&
+			do_notify_parent(tsk, tsk->exit_signal);
+	} else {
+		autoreap = true;
+	}
 	tsk->exit_state = autoreap ? EXIT_DEAD : EXIT_ZOMBIE;
 	/* mt-exec, de_thread() is waiting for group leader */
 	if (unlikely(tsk->signal->notify_count < 0))
 		wake_up_process(tsk->signal->group_exit_task);
 	write_unlock_irq(&tasklist_lock);
 	/* If the process is dead, release it - nobody will wait for it */
 	if (autoreap)
 		release_task(tsk);
 }
 #ifdef CONFIG_DEBUG_STACK_USAGE
 static void check_stack_usage(void)
 {
 	static DEFINE_SPINLOCK(low_water_lock);
 	static int lowest_to_date = THREAD_SIZE;
 	unsigned long free;
 	free = stack_not_used(current);
 	if (free >= lowest_to_date)
 		return;
 	spin_lock(&low_water_lock);
 	if (free < lowest_to_date) {
 		printk(KERN_WARNING "%s used greatest stack depth: %lu bytes "
 				"left\n",
 				current->comm, free);
 		lowest_to_date = free;
 	}
 	spin_unlock(&low_water_lock);
 }
 #else
 static inline void check_stack_usage(void) {}
 #endif
 NORET_TYPE void do_exit(long code)
 {
 	struct task_struct *tsk = current;
 	int group_dead;
 	profile_task_exit(tsk);
 	WARN_ON(atomic_read(&tsk->fs_excl));
 	WARN_ON(blk_needs_flush_plug(tsk));
 	if (unlikely(in_interrupt()))
 		panic("Aiee, killing interrupt handler!");
 	if (unlikely(!tsk->pid))
 		panic("Attempted to kill the idle task!");
 	/*
 	 * If do_exit is called because this processes oopsed, it's possible
 	 * that get_fs() was left as KERNEL_DS, so reset it to USER_DS before
 	 * continuing. Amongst other possible reasons, this is to prevent
 	 * mm_release()->clear_child_tid() from writing to a user-controlled
 	 * kernel address.
 	 */
 	set_fs(USER_DS);
 	ptrace_event(PTRACE_EVENT_EXIT, code);
 	validate_creds_for_do_exit(tsk);
 	/*
 	 * We're taking recursive faults here in do_exit. Safest is to just
 	 * leave this task alone and wait for reboot.
 	 */
 	if (unlikely(tsk->flags & PF_EXITING)) {
 		printk(KERN_ALERT
 			"Fixing recursive fault but reboot is needed!\n");
 		/*
 		 * We can do this unlocked here. The futex code uses
 		 * this flag just to verify whether the pi state
 		 * cleanup has been done or not. In the worst case it
 		 * loops once more. We pretend that the cleanup was
 		 * done as there is no way to return. Either the
 		 * OWNER_DIED bit is set by now or we push the blocked
 		 * task into the wait for ever nirwana as well.
 		 */
 		tsk->flags |= PF_EXITPIDONE;
 		set_current_state(TASK_UNINTERRUPTIBLE);
 		schedule();
 	}
 	exit_irq_thread();
 	exit_signals(tsk);  /* sets PF_EXITING */
 	/*
 	 * tsk->flags are checked in the futex code to protect against
 	 * an exiting task cleaning up the robust pi futexes.
 	 */
 	smp_mb();
 	raw_spin_unlock_wait(&tsk->pi_lock);
 	if (unlikely(in_atomic()))
 		printk(KERN_INFO "note: %s[%d] exited with preempt_count %d\n",
 				current->comm, task_pid_nr(current),
 				preempt_count());
 	acct_update_integrals(tsk);
 	/* sync mm's RSS info before statistics gathering */
 	if (tsk->mm)
 		sync_mm_rss(tsk, tsk->mm);
 	group_dead = atomic_dec_and_test(&tsk->signal->live);
 	if (group_dead) {
 		hrtimer_cancel(&tsk->signal->real_timer);
 		exit_itimers(tsk->signal);
 		if (tsk->mm)
 			setmax_mm_hiwater_rss(&tsk->signal->maxrss, tsk->mm);
 	}
 	acct_collect(code, group_dead);
 	if (group_dead)
 		tty_audit_exit();
 	if (unlikely(tsk->audit_context))
 		audit_free(tsk);
 	tsk->exit_code = code;
 	taskstats_exit(tsk, group_dead);
 	exit_mm(tsk);
 	if (group_dead)
 		acct_process();
 	trace_sched_process_exit(tsk);
 	exit_sem(tsk);
 	exit_files(tsk);
 	exit_fs(tsk);
 	check_stack_usage();
 	exit_thread();
 	/*
 	 * Flush inherited counters to the parent - before the parent
 	 * gets woken up by child-exit notifications.
 	 *
 	 * because of cgroup mode, must be called before cgroup_exit()
 	 */
 	perf_event_exit_task(tsk);
 	cgroup_exit(tsk, 1);
 	if (group_dead)
 		disassociate_ctty(1);
 	module_put(task_thread_info(tsk)->exec_domain->module);
 	proc_exit_connector(tsk);
 	/*
 	 * FIXME: do that only when needed, using sched_exit tracepoint
 	 */
 	ptrace_put_breakpoints(tsk);
 	exit_notify(tsk, group_dead);
 #ifdef CONFIG_NUMA
 	task_lock(tsk);
 	mpol_put(tsk->mempolicy);
 	tsk->mempolicy = NULL;
 	task_unlock(tsk);
 #endif
 #ifdef CONFIG_FUTEX
 	if (unlikely(current->pi_state_cache))
 		kfree(current->pi_state_cache);
 #endif
 	/*
 	 * Make sure we are holding no locks:
 	 */
 	debug_check_no_locks_held(tsk);
 	/*
 	 * We can do this unlocked here. The futex code uses this flag
 	 * just to verify whether the pi state cleanup has been done
 	 * or not. In the worst case it loops once more.
 	 */
 	tsk->flags |= PF_EXITPIDONE;
 	if (tsk->io_context)
 		exit_io_context(tsk);
 	if (tsk->splice_pipe)
 		__free_pipe_info(tsk->splice_pipe);
 	validate_creds_for_do_exit(tsk);
 	preempt_disable();
 	exit_rcu();
 	/* causes final put_task_struct in finish_task_switch(). */
 	tsk->state = TASK_DEAD;
 	schedule();
 	BUG();
 	/* Avoid "noreturn function does return".  */
 	for (;;)
 		cpu_relax();	/* For when BUG is null */
 }
 EXPORT_SYMBOL_GPL(do_exit);
 NORET_TYPE void complete_and_exit(struct completion *comp, long code)
 {
 	if (comp)
 		complete(comp);
 	do_exit(code);
 }
 EXPORT_SYMBOL(complete_and_exit);
 SYSCALL_DEFINE1(exit, int, error_code)
 {
 	do_exit((error_code&0xff)<<8);
 }
 /*
  * Take down every thread in the group.  This is called by fatal signals
  * as well as by sys_exit_group (below).
  */
 NORET_TYPE void
 do_group_exit(int exit_code)
 {
 	struct signal_struct *sig = current->signal;
 	BUG_ON(exit_code & 0x80); /* core dumps don't get here */
 	if (signal_group_exit(sig))
 		exit_code = sig->group_exit_code;
 	else if (!thread_group_empty(current)) {
 		struct sighand_struct *const sighand = current->sighand;
 		spin_lock_irq(&sighand->siglock);
 		if (signal_group_exit(sig))
 			/* Another thread got here before we took the lock.  */
 			exit_code = sig->group_exit_code;
 		else {
 			sig->group_exit_code = exit_code;
 			sig->flags = SIGNAL_GROUP_EXIT;
 			zap_other_threads(current);
 		}
 		spin_unlock_irq(&sighand->siglock);
 	}
 	do_exit(exit_code);
 	/* NOTREACHED */
 }
 /*
  * this kills every thread in the thread group. Note that any externally
  * wait4()-ing process will get the correct exit code - even if this
  * thread is not the thread group leader.
  */
 SYSCALL_DEFINE1(exit_group, int, error_code)
 {
 	do_group_exit((error_code & 0xff) << 8);
 	/* NOTREACHED */
 	return 0;
 }
 struct wait_opts {
 	enum pid_type		wo_type;
 	int			wo_flags;
 	struct pid		*wo_pid;
 	struct siginfo __user	*wo_info;
 	int __user		*wo_stat;
 	struct rusage __user	*wo_rusage;
 	wait_queue_t		child_wait;
 	int			notask_error;
 };
 static inline
 struct pid *task_pid_type(struct task_struct *task, enum pid_type type)
 {
 	if (type != PIDTYPE_PID)
 		task = task->group_leader;
 	return task->pids[type].pid;
 }
 static int eligible_pid(struct wait_opts *wo, struct task_struct *p)
 {
 	return	wo->wo_type == PIDTYPE_MAX ||
 		task_pid_type(p, wo->wo_type) == wo->wo_pid;
 }
 static int eligible_child(struct wait_opts *wo, struct task_struct *p)
 {
 	if (!eligible_pid(wo, p))
 		return 0;
 	/* Wait for all children (clone and not) if __WALL is set;
 	 * otherwise, wait for clone children *only* if __WCLONE is
 	 * set; otherwise, wait for non-clone children *only*.  (Note:
 	 * A "clone" child here is one that reports to its parent
 	 * using a signal other than SIGCHLD.) */
 	if (((p->exit_signal != SIGCHLD) ^ !!(wo->wo_flags & __WCLONE))
 	    && !(wo->wo_flags & __WALL))
 		return 0;
 	return 1;
 }
 static int wait_noreap_copyout(struct wait_opts *wo, struct task_struct *p,
 				pid_t pid, uid_t uid, int why, int status)
 {
 	struct siginfo __user *infop;
 	int retval = wo->wo_rusage
 		? getrusage(p, RUSAGE_BOTH, wo->wo_rusage) : 0;
 	put_task_struct(p);
 	infop = wo->wo_info;
 	if (infop) {
 		if (!retval)
 			retval = put_user(SIGCHLD, &infop->si_signo);
 		if (!retval)
 			retval = put_user(0, &infop->si_errno);
 		if (!retval)
 			retval = put_user((short)why, &infop->si_code);
 		if (!retval)
 			retval = put_user(pid, &infop->si_pid);
 		if (!retval)
 			retval = put_user(uid, &infop->si_uid);
 		if (!retval)
 			retval = put_user(status, &infop->si_status);
 	}
 	if (!retval)
 		retval = pid;
 	return retval;
 }
 /*
  * Handle sys_wait4 work for one task in state EXIT_ZOMBIE.  We hold
  * read_lock(&tasklist_lock) on entry.  If we return zero, we still hold
  * the lock and this task is uninteresting.  If we return nonzero, we have
  * released the lock and the system call should return.
  */
 static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
 {
 	unsigned long state;
 	int retval, status, traced;
 	pid_t pid = task_pid_vnr(p);
 	uid_t uid = __task_cred(p)->uid;
 	struct siginfo __user *infop;
 	if (!likely(wo->wo_flags & WEXITED))
 		return 0;
 	if (unlikely(wo->wo_flags & WNOWAIT)) {
 		int exit_code = p->exit_code;
 		int why;
 		get_task_struct(p);
 		read_unlock(&tasklist_lock);
 		if ((exit_code & 0x7f) == 0) {
 			why = CLD_EXITED;
 			status = exit_code >> 8;
 		} else {
 			why = (exit_code & 0x80) ? CLD_DUMPED : CLD_KILLED;
 			status = exit_code & 0x7f;
 		}
 		return wait_noreap_copyout(wo, p, pid, uid, why, status);
 	}
 	/*
 	 * Try to move the task's state to DEAD
 	 * only one thread is allowed to do this:
 	 */
 	state = xchg(&p->exit_state, EXIT_DEAD);
 	if (state != EXIT_ZOMBIE) {
 		BUG_ON(state != EXIT_DEAD);
 		return 0;
 	}
 	traced = ptrace_reparented(p);
 	/*
 	 * It can be ptraced but not reparented, check
 	 * !task_detached() to filter out sub-threads.
 	 */
 	if (likely(!traced) && likely(!task_detached(p))) {
 		struct signal_struct *psig;
 		struct signal_struct *sig;
 		unsigned long maxrss;
 		cputime_t tgutime, tgstime;
 		/*
 		 * The resource counters for the group leader are in its
 		 * own task_struct.  Those for dead threads in the group
 		 * are in its signal_struct, as are those for the child
 		 * processes it has previously reaped.  All these
 		 * accumulate in the parent's signal_struct c* fields.
 		 *
 		 * We don't bother to take a lock here to protect these
 		 * p->signal fields, because they are only touched by
 		 * __exit_signal, which runs with tasklist_lock
 		 * write-locked anyway, and so is excluded here.  We do
 		 * need to protect the access to parent->signal fields,
 		 * as other threads in the parent group can be right
 		 * here reaping other children at the same time.
 		 *
 		 * We use thread_group_times() to get times for the thread
 		 * group, which consolidates times for all threads in the
 		 * group including the group leader.
 		 */
 		thread_group_times(p, &tgutime, &tgstime);
 		spin_lock_irq(&p->real_parent->sighand->siglock);
 		psig = p->real_parent->signal;
 		sig = p->signal;
 		psig->cutime =
 			cputime_add(psig->cutime,
 			cputime_add(tgutime,
 				    sig->cutime));
 		psig->cstime =
 			cputime_add(psig->cstime,
 			cputime_add(tgstime,
 				    sig->cstime));
 		psig->cgtime =
 			cputime_add(psig->cgtime,
 			cputime_add(p->gtime,
 			cputime_add(sig->gtime,
 				    sig->cgtime)));
 		psig->cmin_flt +=
 			p->min_flt + sig->min_flt + sig->cmin_flt;
 		psig->cmaj_flt +=
 			p->maj_flt + sig->maj_flt + sig->cmaj_flt;
 		psig->cnvcsw +=
 			p->nvcsw + sig->nvcsw + sig->cnvcsw;
 		psig->cnivcsw +=
 			p->nivcsw + sig->nivcsw + sig->cnivcsw;
 		psig->cinblock +=
 			task_io_get_inblock(p) +
 			sig->inblock + sig->cinblock;
 		psig->coublock +=
 			task_io_get_oublock(p) +
 			sig->oublock + sig->coublock;
 		maxrss = max(sig->maxrss, sig->cmaxrss);
 		if (psig->cmaxrss < maxrss)
 			psig->cmaxrss = maxrss;
 		task_io_accounting_add(&psig->ioac, &p->ioac);
 		task_io_accounting_add(&psig->ioac, &sig->ioac);
 		spin_unlock_irq(&p->real_parent->sighand->siglock);
 	}
 	/*
 	 * Now we are sure this task is interesting, and no other
 	 * thread can reap it because we set its state to EXIT_DEAD.
 	 */
 	read_unlock(&tasklist_lock);
 	retval = wo->wo_rusage
 		? getrusage(p, RUSAGE_BOTH, wo->wo_rusage) : 0;
 	status = (p->signal->flags & SIGNAL_GROUP_EXIT)
 		? p->signal->group_exit_code : p->exit_code;
 	if (!retval && wo->wo_stat)
 		retval = put_user(status, wo->wo_stat);
 	infop = wo->wo_info;
 	if (!retval && infop)
 		retval = put_user(SIGCHLD, &infop->si_signo);
 	if (!retval && infop)
 		retval = put_user(0, &infop->si_errno);
 	if (!retval && infop) {
 		int why;
 		if ((status & 0x7f) == 0) {
 			why = CLD_EXITED;
 			status >>= 8;
 		} else {
 			why = (status & 0x80) ? CLD_DUMPED : CLD_KILLED;
 			status &= 0x7f;
 		}
 		retval = put_user((short)why, &infop->si_code);
 		if (!retval)
 			retval = put_user(status, &infop->si_status);
 	}
 	if (!retval && infop)
 		retval = put_user(pid, &infop->si_pid);
 	if (!retval && infop)
 		retval = put_user(uid, &infop->si_uid);
 	if (!retval)
 		retval = pid;
 	if (traced) {
 		write_lock_irq(&tasklist_lock);
 		/* We dropped tasklist, ptracer could die and untrace */
 		ptrace_unlink(p);
 		/*
 		 * If this is not a detached task, notify the parent.
 		 * If it's still not detached after that, don't release
 		 * it now.
 		 */
 		if (!task_detached(p)) {
 			do_notify_parent(p, p->exit_signal);
 			if (!task_detached(p)) {
 				p->exit_state = EXIT_ZOMBIE;
 				p = NULL;
 			}
 		}
 		write_unlock_irq(&tasklist_lock);
 	}
 	if (p != NULL)
 		release_task(p);
 	return retval;
 }
 static int *task_stopped_code(struct task_struct *p, bool ptrace)
 {
 	if (ptrace) {
 		if (task_is_stopped_or_traced(p) &&
 		    !(p->jobctl & JOBCTL_LISTENING))
 			return &p->exit_code;
 	} else {
 		if (p->signal->flags & SIGNAL_STOP_STOPPED)
 			return &p->signal->group_exit_code;
 	}
 	return NULL;
 }
 /**
  * wait_task_stopped - Wait for %TASK_STOPPED or %TASK_TRACED
  * @wo: wait options
  * @ptrace: is the wait for ptrace
  * @p: task to wait for
  *
  * Handle sys_wait4() work for %p in state %TASK_STOPPED or %TASK_TRACED.
  *
  * CONTEXT:
  * read_lock(&tasklist_lock), which is released if return value is
  * non-zero.  Also, grabs and releases @p->sighand->siglock.
  *
  * RETURNS:
  * 0 if wait condition didn't exist and search for other wait conditions
  * should continue.  Non-zero return, -errno on failure and @p's pid on
  * success, implies that tasklist_lock is released and wait condition
  * search should terminate.
  */
 static int wait_task_stopped(struct wait_opts *wo,
 				int ptrace, struct task_struct *p)
 {
 	struct siginfo __user *infop;
 	int retval, exit_code, *p_code, why;
 	uid_t uid = 0; /* unneeded, required by compiler */
 	pid_t pid;
 	/*
 	 * Traditionally we see ptrace'd stopped tasks regardless of options.
 	 */
 	if (!ptrace && !(wo->wo_flags & WUNTRACED))
 		return 0;
 	if (!task_stopped_code(p, ptrace))
 		return 0;
 	exit_code = 0;
 	spin_lock_irq(&p->sighand->siglock);
 	p_code = task_stopped_code(p, ptrace);
 	if (unlikely(!p_code))
 		goto unlock_sig;
 	exit_code = *p_code;
 	if (!exit_code)
 		goto unlock_sig;
 	if (!unlikely(wo->wo_flags & WNOWAIT))
 		*p_code = 0;
 	uid = task_uid(p);
 unlock_sig:
 	spin_unlock_irq(&p->sighand->siglock);
 	if (!exit_code)
 		return 0;
 	/*
 	 * Now we are pretty sure this task is interesting.
 	 * Make sure it doesn't get reaped out from under us while we
 	 * give up the lock and then examine it below.  We don't want to
 	 * keep holding onto the tasklist_lock while we call getrusage and
 	 * possibly take page faults for user memory.
 	 */
 	get_task_struct(p);
 	pid = task_pid_vnr(p);
 	why = ptrace ? CLD_TRAPPED : CLD_STOPPED;
 	read_unlock(&tasklist_lock);
 	if (unlikely(wo->wo_flags & WNOWAIT))
 		return wait_noreap_copyout(wo, p, pid, uid, why, exit_code);
 	retval = wo->wo_rusage
 		? getrusage(p, RUSAGE_BOTH, wo->wo_rusage) : 0;
 	if (!retval && wo->wo_stat)
 		retval = put_user((exit_code << 8) | 0x7f, wo->wo_stat);
 	infop = wo->wo_info;
 	if (!retval && infop)
 		retval = put_user(SIGCHLD, &infop->si_signo);
 	if (!retval && infop)
 		retval = put_user(0, &infop->si_errno);
 	if (!retval && infop)
 		retval = put_user((short)why, &infop->si_code);
 	if (!retval && infop)
 		retval = put_user(exit_code, &infop->si_status);
 	if (!retval && infop)
 		retval = put_user(pid, &infop->si_pid);
 	if (!retval && infop)
 		retval = put_user(uid, &infop->si_uid);
 	if (!retval)
 		retval = pid;
 	put_task_struct(p);
 	BUG_ON(!retval);
 	return retval;
 }
 /*
  * Handle do_wait work for one task in a live, non-stopped state.
  * read_lock(&tasklist_lock) on entry.  If we return zero, we still hold
  * the lock and this task is uninteresting.  If we return nonzero, we have
  * released the lock and the system call should return.
  */
 static int wait_task_continued(struct wait_opts *wo, struct task_struct *p)
 {
 	int retval;
 	pid_t pid;
 	uid_t uid;
 	if (!unlikely(wo->wo_flags & WCONTINUED))
 		return 0;
 	if (!(p->signal->flags & SIGNAL_STOP_CONTINUED))
 		return 0;
 	spin_lock_irq(&p->sighand->siglock);
 	/* Re-check with the lock held.  */
 	if (!(p->signal->flags & SIGNAL_STOP_CONTINUED)) {
 		spin_unlock_irq(&p->sighand->siglock);
 		return 0;
 	}
 	if (!unlikely(wo->wo_flags & WNOWAIT))
 		p->signal->flags &= ~SIGNAL_STOP_CONTINUED;
 	uid = task_uid(p);
 	spin_unlock_irq(&p->sighand->siglock);
 	pid = task_pid_vnr(p);
 	get_task_struct(p);
 	read_unlock(&tasklist_lock);
 	if (!wo->wo_info) {
 		retval = wo->wo_rusage
 			? getrusage(p, RUSAGE_BOTH, wo->wo_rusage) : 0;
 		put_task_struct(p);
 		if (!retval && wo->wo_stat)
 			retval = put_user(0xffff, wo->wo_stat);
 		if (!retval)
 			retval = pid;
 	} else {
 		retval = wait_noreap_copyout(wo, p, pid, uid,
 					     CLD_CONTINUED, SIGCONT);
 		BUG_ON(retval == 0);
 	}
 	return retval;
 }
 /*
  * Consider @p for a wait by @parent.
  *
  * -ECHILD should be in ->notask_error before the first call.
  * Returns nonzero for a final return, when we have unlocked tasklist_lock.
  * Returns zero if the search for a child should continue;
  * then ->notask_error is 0 if @p is an eligible child,
  * or another error from security_task_wait(), or still -ECHILD.
  */
 static int wait_consider_task(struct wait_opts *wo, int ptrace,
 				struct task_struct *p)
 {
 	int ret = eligible_child(wo, p);
 	if (!ret)
 		return ret;
 	ret = security_task_wait(p);
 	if (unlikely(ret < 0)) {
 		/*
 		 * If we have not yet seen any eligible child,
 		 * then let this error code replace -ECHILD.
 		 * A permission error will give the user a clue
 		 * to look for security policy problems, rather
 		 * than for mysterious wait bugs.
 		 */
 		if (wo->notask_error)
 			wo->notask_error = ret;
 		return 0;
 	}
 	/* dead body doesn't have much to contribute */
 	if (p->exit_state == EXIT_DEAD)
 		return 0;
 	/* slay zombie? */
 	if (p->exit_state == EXIT_ZOMBIE) {
 		/*
 		 * A zombie ptracee is only visible to its ptracer.
 		 * Notification and reaping will be cascaded to the real
 		 * parent when the ptracer detaches.
 		 */
 		if (likely(!ptrace) && unlikely(p->ptrace)) {
 			/* it will become visible, clear notask_error */
 			wo->notask_error = 0;
 			return 0;
 		}
 		/* we don't reap group leaders with subthreads */
 		if (!delay_group_leader(p))
 			return wait_task_zombie(wo, p);
 		/*
 		 * Allow access to stopped/continued state via zombie by
 		 * falling through.  Clearing of notask_error is complex.
 		 *
 		 * When !@ptrace:
 		 *
 		 * If WEXITED is set, notask_error should naturally be
 		 * cleared.  If not, subset of WSTOPPED|WCONTINUED is set,
 		 * so, if there are live subthreads, there are events to
 		 * wait for.  If all subthreads are dead, it's still safe
 		 * to clear - this function will be called again in finite
 		 * amount time once all the subthreads are released and
 		 * will then return without clearing.
 		 *
 		 * When @ptrace:
 		 *
 		 * Stopped state is per-task and thus can't change once the
 		 * target task dies.  Only continued and exited can happen.
 		 * Clear notask_error if WCONTINUED | WEXITED.
 		 */
 		if (likely(!ptrace) || (wo->wo_flags & (WCONTINUED | WEXITED)))
 			wo->notask_error = 0;
 	} else {
 		/*
 		 * If @p is ptraced by a task in its real parent's group,
 		 * hide group stop/continued state when looking at @p as
 		 * the real parent; otherwise, a single stop can be
 		 * reported twice as group and ptrace stops.
 		 *
 		 * If a ptracer wants to distinguish the two events for its
 		 * own children, it should create a separate process which
 		 * takes the role of real parent.
 		 */
 		if (likely(!ptrace) && p->ptrace &&
 		    same_thread_group(p->parent, p->real_parent))
 			return 0;
 		/*
 		 * @p is alive and it's gonna stop, continue or exit, so
 		 * there always is something to wait for.
 		 */
 		wo->notask_error = 0;
 	}
 	/*
 	 * Wait for stopped.  Depending on @ptrace, different stopped state
 	 * is used and the two don't interact with each other.
 	 */
 	ret = wait_task_stopped(wo, ptrace, p);
 	if (ret)
 		return ret;
 	/*
 	 * Wait for continued.  There's only one continued state and the
 	 * ptracer can consume it which can confuse the real parent.  Don't
 	 * use WCONTINUED from ptracer.  You don't need or want it.
 	 */
 	return wait_task_continued(wo, p);
 }
 /*
  * Do the work of do_wait() for one thread in the group, @tsk.
  *
  * -ECHILD should be in ->notask_error before the first call.
  * Returns nonzero for a final return, when we have unlocked tasklist_lock.
  * Returns zero if the search for a child should continue; then
  * ->notask_error is 0 if there were any eligible children,
  * or another error from security_task_wait(), or still -ECHILD.
  */
 static int do_wait_thread(struct wait_opts *wo, struct task_struct *tsk)
 {
 	struct task_struct *p;
 	list_for_each_entry(p, &tsk->children, sibling) {
 		int ret = wait_consider_task(wo, 0, p);
 		if (ret)
 			return ret;
 	}
 	return 0;
 }
 static int ptrace_do_wait(struct wait_opts *wo, struct task_struct *tsk)
 {
 	struct task_struct *p;
 	list_for_each_entry(p, &tsk->ptraced, ptrace_entry) {
 		int ret = wait_consider_task(wo, 1, p);
 		if (ret)
 			return ret;
 	}
 	return 0;
 }
 static int child_wait_callback(wait_queue_t *wait, unsigned mode,
 				int sync, void *key)
 {
 	struct wait_opts *wo = container_of(wait, struct wait_opts,
 						child_wait);
 	struct task_struct *p = key;
 	if (!eligible_pid(wo, p))
 		return 0;
 	if ((wo->wo_flags & __WNOTHREAD) && wait->private != p->parent)
 		return 0;
 	return default_wake_function(wait, mode, sync, key);
 }
 void __wake_up_parent(struct task_struct *p, struct task_struct *parent)
 {
 	__wake_up_sync_key(&parent->signal->wait_chldexit,
 				TASK_INTERRUPTIBLE, 1, p);
 }
 static long do_wait(struct wait_opts *wo)
 {
 	struct task_struct *tsk;
 	int retval;
 	trace_sched_process_wait(wo->wo_pid);
 	init_waitqueue_func_entry(&wo->child_wait, child_wait_callback);
 	wo->child_wait.private = current;
 	add_wait_queue(&current->signal->wait_chldexit, &wo->child_wait);
 repeat:
 	/*
 	 * If there is nothing that can match our critiera just get out.
 	 * We will clear ->notask_error to zero if we see any child that
 	 * might later match our criteria, even if we are not able to reap
 	 * it yet.
 	 */
 	wo->notask_error = -ECHILD;
 	if ((wo->wo_type < PIDTYPE_MAX) &&
 	   (!wo->wo_pid || hlist_empty(&wo->wo_pid->tasks[wo->wo_type])))
 		goto notask;
 	set_current_state(TASK_INTERRUPTIBLE);
 	read_lock(&tasklist_lock);
 	tsk = current;
 	do {
 		retval = do_wait_thread(wo, tsk);
 		if (retval)
 			goto end;
 		retval = ptrace_do_wait(wo, tsk);
 		if (retval)
 			goto end;
 		if (wo->wo_flags & __WNOTHREAD)
 			break;
 	} while_each_thread(current, tsk);
 	read_unlock(&tasklist_lock);
 notask:
 	retval = wo->notask_error;
 	if (!retval && !(wo->wo_flags & WNOHANG)) {
 		retval = -ERESTARTSYS;
 		if (!signal_pending(current)) {
 			schedule();
 			goto repeat;
 		}
 	}
 end:
 	__set_current_state(TASK_RUNNING);
 	remove_wait_queue(&current->signal->wait_chldexit, &wo->child_wait);
 	return retval;
 }
 SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *,
 		infop, int, options, struct rusage __user *, ru)
 {
 	struct wait_opts wo;
 	struct pid *pid = NULL;
 	enum pid_type type;
 	long ret;
 	if (options & ~(WNOHANG|WNOWAIT|WEXITED|WSTOPPED|WCONTINUED))
 		return -EINVAL;
 	if (!(options & (WEXITED|WSTOPPED|WCONTINUED)))
 		return -EINVAL;
 	switch (which) {
 	case P_ALL:
 		type = PIDTYPE_MAX;
 		break;
 	case P_PID:
 		type = PIDTYPE_PID;
 		if (upid <= 0)
 			return -EINVAL;
 		break;
 	case P_PGID:
 		type = PIDTYPE_PGID;
 		if (upid <= 0)
 			return -EINVAL;
 		break;
 	default:
 		return -EINVAL;
 	}
 	if (type < PIDTYPE_MAX)
 		pid = find_get_pid(upid);
 	wo.wo_type	= type;
 	wo.wo_pid	= pid;
 	wo.wo_flags	= options;
 	wo.wo_info	= infop;
 	wo.wo_stat	= NULL;
 	wo.wo_rusage	= ru;
 	ret = do_wait(&wo);
 	if (ret > 0) {
 		ret = 0;
 	} else if (infop) {
 		/*
 		 * For a WNOHANG return, clear out all the fields
 		 * we would set so the user can easily tell the
 		 * difference.
 		 */
 		if (!ret)
 			ret = put_user(0, &infop->si_signo);
 		if (!ret)
 			ret = put_user(0, &infop->si_errno);
 		if (!ret)
 			ret = put_user(0, &infop->si_code);
 		if (!ret)
 			ret = put_user(0, &infop->si_pid);
 		if (!ret)
 			ret = put_user(0, &infop->si_uid);
 		if (!ret)
 			ret = put_user(0, &infop->si_status);
 	}
 	put_pid(pid);
 	/* avoid REGPARM breakage on x86: */
 	asmlinkage_protect(5, ret, which, upid, infop, options, ru);
 	return ret;
 }
 SYSCALL_DEFINE4(wait4, pid_t, upid, int __user *, stat_addr,
 		int, options, struct rusage __user *, ru)
 {
 	struct wait_opts wo;
 	struct pid *pid = NULL;
 	enum pid_type type;
 	long ret;
 	if (options & ~(WNOHANG|WUNTRACED|WCONTINUED|
 			__WNOTHREAD|__WCLONE|__WALL))
 		return -EINVAL;
 	if (upid == -1)
 		type = PIDTYPE_MAX;
 	else if (upid < 0) {
 		type = PIDTYPE_PGID;
 		pid = find_get_pid(-upid);
 	} else if (upid == 0) {
 		type = PIDTYPE_PGID;
 		pid = get_task_pid(current, PIDTYPE_PGID);
 	} else /* upid > 0 */ {
 		type = PIDTYPE_PID;
 		pid = find_get_pid(upid);
 	}
 	wo.wo_type	= type;
 	wo.wo_pid	= pid;
 	wo.wo_flags	= options | WEXITED;
 	wo.wo_info	= NULL;
 	wo.wo_stat	= stat_addr;
 	wo.wo_rusage	= ru;
 	ret = do_wait(&wo);
 	put_pid(pid);
 	/* avoid REGPARM breakage on x86: */
 	asmlinkage_protect(4, ret, upid, stat_addr, options, ru);
 	return ret;
 }
 #ifdef __ARCH_WANT_SYS_WAITPID
 /*
  * sys_waitpid() remains for compatibility. waitpid() should be
  * implemented by calling sys_wait4() from libc.a.
  */
 SYSCALL_DEFINE3(waitpid, pid_t, pid, int __user *, stat_addr, int, options)
 {
 	return sys_wait4(pid, stat_addr, options, NULL);
 }