Doug / smarc-fsl-linux-kernel

1

/*

1

/*

2

* linux/kernel/exit.c

2

* linux/kernel/exit.c

3

*

3

*

4

5

*/

5

*/

6

7

#include <linux/mm.h>

7

#include <linux/mm.h>

8

#include <linux/slab.h>

8

#include <linux/slab.h>

9

#include <linux/interrupt.h>

9

#include <linux/interrupt.h>

10

#include <linux/module.h>

10

#include <linux/module.h>

11

#include <linux/capability.h>

11

#include <linux/capability.h>

12

#include <linux/completion.h>

12

#include <linux/completion.h>

13

#include <linux/personality.h>

13

#include <linux/personality.h>

14

#include <linux/tty.h>

14

#include <linux/tty.h>

15

#include <linux/iocontext.h>

15

#include <linux/iocontext.h>

16

#include <linux/key.h>

16

#include <linux/key.h>

17

#include <linux/security.h>

17

#include <linux/security.h>

18

#include <linux/cpu.h>

18

#include <linux/cpu.h>

19

#include <linux/acct.h>

19

#include <linux/acct.h>

20

#include <linux/tsacct_kern.h>

20

#include <linux/tsacct_kern.h>

21

#include <linux/file.h>

21

#include <linux/file.h>

22

#include <linux/fdtable.h>

22

#include <linux/fdtable.h>

23

#include <linux/binfmts.h>

23

#include <linux/binfmts.h>

24

#include <linux/nsproxy.h>

24

#include <linux/nsproxy.h>

25

#include <linux/pid_namespace.h>

25

#include <linux/pid_namespace.h>

26

#include <linux/ptrace.h>

26

#include <linux/ptrace.h>

27

#include <linux/profile.h>

27

#include <linux/profile.h>

28

#include <linux/mount.h>

28

#include <linux/mount.h>

29

#include <linux/proc_fs.h>

29

#include <linux/proc_fs.h>

30

#include <linux/kthread.h>

30

#include <linux/kthread.h>

31

#include <linux/mempolicy.h>

31

#include <linux/mempolicy.h>

32

#include <linux/taskstats_kern.h>

32

#include <linux/taskstats_kern.h>

33

#include <linux/delayacct.h>

33

#include <linux/delayacct.h>

34

#include <linux/freezer.h>

34

#include <linux/freezer.h>

35

#include <linux/cgroup.h>

35

#include <linux/cgroup.h>

36

#include <linux/syscalls.h>

36

#include <linux/syscalls.h>

37

#include <linux/signal.h>

37

#include <linux/signal.h>

38

#include <linux/posix-timers.h>

38

#include <linux/posix-timers.h>

39

#include <linux/cn_proc.h>

39

#include <linux/cn_proc.h>

40

#include <linux/mutex.h>

40

#include <linux/mutex.h>

41

#include <linux/futex.h>

41

#include <linux/futex.h>

42

#include <linux/pipe_fs_i.h>

42

#include <linux/pipe_fs_i.h>

43

#include <linux/audit.h> /* for audit_free() */

43

#include <linux/audit.h> /* for audit_free() */

44

#include <linux/resource.h>

44

#include <linux/resource.h>

45

#include <linux/blkdev.h>

45

#include <linux/blkdev.h>

46

#include <linux/task_io_accounting_ops.h>

46

#include <linux/task_io_accounting_ops.h>

47

#include <linux/tracehook.h>

47

#include <linux/tracehook.h>

48

#include <linux/fs_struct.h>

48

#include <linux/fs_struct.h>

49

#include <linux/init_task.h>

49

#include <linux/init_task.h>

50

#include <linux/perf_event.h>

50

#include <linux/perf_event.h>

51

#include <trace/events/sched.h>

51

#include <trace/events/sched.h>

52

#include <linux/hw_breakpoint.h>

52

#include <linux/hw_breakpoint.h>

53

#include <linux/oom.h>

53

#include <linux/oom.h>

54

55

#include <asm/uaccess.h>

55

#include <asm/uaccess.h>

56

#include <asm/unistd.h>

56

#include <asm/unistd.h>

57

#include <asm/pgtable.h>

57

#include <asm/pgtable.h>

58

#include <asm/mmu_context.h>

58

#include <asm/mmu_context.h>

59

60

static void exit_mm(struct task_struct * tsk);

60

static void exit_mm(struct task_struct * tsk);

61

62

static void __unhash_process(struct task_struct *p, bool group_dead)

62

static void __unhash_process(struct task_struct *p, bool group_dead)

63

{

63

{

64

nr_threads--;

64

nr_threads--;

65

detach_pid(p, PIDTYPE_PID);

65

detach_pid(p, PIDTYPE_PID);

66

if (group_dead) {

66

if (group_dead) {

67

detach_pid(p, PIDTYPE_PGID);

67

detach_pid(p, PIDTYPE_PGID);

68

detach_pid(p, PIDTYPE_SID);

68

detach_pid(p, PIDTYPE_SID);

69

70

list_del_rcu(&p->tasks);

70

list_del_rcu(&p->tasks);

71

list_del_init(&p->sibling);

71

list_del_init(&p->sibling);

72

__this_cpu_dec(process_counts);

72

__this_cpu_dec(process_counts);

73

}

73

}

74

list_del_rcu(&p->thread_group);

74

list_del_rcu(&p->thread_group);

75

}

75

}

76

77

/*

77

/*

78

* This function expects the tasklist_lock write-locked.

78

* This function expects the tasklist_lock write-locked.

79

*/

79

*/

80

static void __exit_signal(struct task_struct *tsk)

80

static void __exit_signal(struct task_struct *tsk)

81

{

81

{

82

struct signal_struct *sig = tsk->signal;

82

struct signal_struct *sig = tsk->signal;

83

bool group_dead = thread_group_leader(tsk);

83

bool group_dead = thread_group_leader(tsk);

84

struct sighand_struct *sighand;

84

struct sighand_struct *sighand;

85

struct tty_struct *uninitialized_var(tty);

85

struct tty_struct *uninitialized_var(tty);

86

87

sighand = rcu_dereference_check(tsk->sighand,

87

sighand = rcu_dereference_check(tsk->sighand,

88

rcu_read_lock_held() ||

88

rcu_read_lock_held() ||

89

lockdep_tasklist_lock_is_held());

89

lockdep_tasklist_lock_is_held());

90

spin_lock(&sighand->siglock);

90

spin_lock(&sighand->siglock);

91

92

posix_cpu_timers_exit(tsk);

92

posix_cpu_timers_exit(tsk);

93

if (group_dead) {

93

if (group_dead) {

94

posix_cpu_timers_exit_group(tsk);

94

posix_cpu_timers_exit_group(tsk);

95

tty = sig->tty;

95

tty = sig->tty;

96

sig->tty = NULL;

96

sig->tty = NULL;

97

} else {

97

} else {

98

/*

98

/*

99

* This can only happen if the caller is de_thread().

99

* This can only happen if the caller is de_thread().

100

* FIXME: this is the temporary hack, we should teach

100

* FIXME: this is the temporary hack, we should teach

101

* posix-cpu-timers to handle this case correctly.

101

* posix-cpu-timers to handle this case correctly.

102

*/

102

*/

103

if (unlikely(has_group_leader_pid(tsk)))

103

if (unlikely(has_group_leader_pid(tsk)))

104

posix_cpu_timers_exit_group(tsk);

104

posix_cpu_timers_exit_group(tsk);

105

106

/*

106

/*

107

* If there is any task waiting for the group exit

107

* If there is any task waiting for the group exit

108

* then notify it:

108

* then notify it:

109

*/

109

*/

110

if (sig->notify_count > 0 && !--sig->notify_count)

110

if (sig->notify_count > 0 && !--sig->notify_count)

111

wake_up_process(sig->group_exit_task);

111

wake_up_process(sig->group_exit_task);

112

113

if (tsk == sig->curr_target)

113

if (tsk == sig->curr_target)

114

sig->curr_target = next_thread(tsk);

114

sig->curr_target = next_thread(tsk);

115

/*

115

/*

116

* Accumulate here the counters for all threads but the

116

* Accumulate here the counters for all threads but the

117

* group leader as they die, so they can be added into

117

* group leader as they die, so they can be added into

118

* the process-wide totals when those are taken.

118

* the process-wide totals when those are taken.

119

* The group leader stays around as a zombie as long

119

* The group leader stays around as a zombie as long

120

* as there are other threads. When it gets reaped,

120

* as there are other threads. When it gets reaped,

121

* the exit.c code will add its counts into these totals.

121

* the exit.c code will add its counts into these totals.

122

* We won't ever get here for the group leader, since it

122

* We won't ever get here for the group leader, since it

123

* will have been the last reference on the signal_struct.

123

* will have been the last reference on the signal_struct.

124

*/

124

*/

125

sig->utime = cputime_add(sig->utime, tsk->utime);

125

sig->utime = cputime_add(sig->utime, tsk->utime);

126

sig->stime = cputime_add(sig->stime, tsk->stime);

126

sig->stime = cputime_add(sig->stime, tsk->stime);

127

sig->gtime = cputime_add(sig->gtime, tsk->gtime);

127

sig->gtime = cputime_add(sig->gtime, tsk->gtime);

128

sig->min_flt += tsk->min_flt;

128

sig->min_flt += tsk->min_flt;

129

sig->maj_flt += tsk->maj_flt;

129

sig->maj_flt += tsk->maj_flt;

130

sig->nvcsw += tsk->nvcsw;

130

sig->nvcsw += tsk->nvcsw;

131

sig->nivcsw += tsk->nivcsw;

131

sig->nivcsw += tsk->nivcsw;

132

sig->inblock += task_io_get_inblock(tsk);

132

sig->inblock += task_io_get_inblock(tsk);

133

sig->oublock += task_io_get_oublock(tsk);

133

sig->oublock += task_io_get_oublock(tsk);

134

task_io_accounting_add(&sig->ioac, &tsk->ioac);

134

task_io_accounting_add(&sig->ioac, &tsk->ioac);

135

sig->sum_sched_runtime += tsk->se.sum_exec_runtime;

135

sig->sum_sched_runtime += tsk->se.sum_exec_runtime;

136

}

136

}

137

138

sig->nr_threads--;

138

sig->nr_threads--;

139

__unhash_process(tsk, group_dead);

139

__unhash_process(tsk, group_dead);

140

141

/*

141

/*

142

* Do this under ->siglock, we can race with another thread

142

* Do this under ->siglock, we can race with another thread

143

* doing sigqueue_free() if we have SIGQUEUE_PREALLOC signals.

143

* doing sigqueue_free() if we have SIGQUEUE_PREALLOC signals.

144

*/

144

*/

145

flush_sigqueue(&tsk->pending);

145

flush_sigqueue(&tsk->pending);

146

tsk->sighand = NULL;

146

tsk->sighand = NULL;

147

spin_unlock(&sighand->siglock);

147

spin_unlock(&sighand->siglock);

148

149

__cleanup_sighand(sighand);

149

__cleanup_sighand(sighand);

150

clear_tsk_thread_flag(tsk,TIF_SIGPENDING);

150

clear_tsk_thread_flag(tsk,TIF_SIGPENDING);

151

if (group_dead) {

151

if (group_dead) {

152

flush_sigqueue(&sig->shared_pending);

152

flush_sigqueue(&sig->shared_pending);

153

tty_kref_put(tty);

153

tty_kref_put(tty);

154

}

154

}

155

}

155

}

156

157

static void delayed_put_task_struct(struct rcu_head *rhp)

157

static void delayed_put_task_struct(struct rcu_head *rhp)

158

{

158

{

159

struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);

159

struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);

160

161

perf_event_delayed_put(tsk);

161

perf_event_delayed_put(tsk);

162

trace_sched_process_free(tsk);

162

trace_sched_process_free(tsk);

163

put_task_struct(tsk);

163

put_task_struct(tsk);

164

}

164

}

165

166

167

void release_task(struct task_struct * p)

167

void release_task(struct task_struct * p)

168

{

168

{

169

struct task_struct *leader;

169

struct task_struct *leader;

170

int zap_leader;

170

int zap_leader;

171

repeat:

171

repeat:

172

/* don't need to get the RCU readlock here - the process is dead and

172

/* don't need to get the RCU readlock here - the process is dead and

173

* can't be modifying its own credentials. But shut RCU-lockdep up */

173

* can't be modifying its own credentials. But shut RCU-lockdep up */

174

rcu_read_lock();

174

rcu_read_lock();

175

atomic_dec(&__task_cred(p)->user->processes);

175

atomic_dec(&__task_cred(p)->user->processes);

176

rcu_read_unlock();

176

rcu_read_unlock();

177

178

proc_flush_task(p);

178

proc_flush_task(p);

179

180

write_lock_irq(&tasklist_lock);

180

write_lock_irq(&tasklist_lock);

181

ptrace_release_task(p);

181

ptrace_release_task(p);

182

__exit_signal(p);

182

__exit_signal(p);

183

184

/*

184

/*

185

* If we are the last non-leader member of the thread

185

* If we are the last non-leader member of the thread

186

* group, and the leader is zombie, then notify the

186

* group, and the leader is zombie, then notify the

187

* group leader's parent process. (if it wants notification.)

187

* group leader's parent process. (if it wants notification.)

188

*/

188

*/

189

zap_leader = 0;

189

zap_leader = 0;

190

leader = p->group_leader;

190

leader = p->group_leader;

191

if (leader != p && thread_group_empty(leader) && leader->exit_state == EXIT_ZOMBIE) {

191

if (leader != p && thread_group_empty(leader) && leader->exit_state == EXIT_ZOMBIE) {

192

/*

192

/*

193

* If we were the last child thread and the leader has

193

* If we were the last child thread and the leader has

194

* exited already, and the leader's parent ignores SIGCHLD,

194

* exited already, and the leader's parent ignores SIGCHLD,

195

* then we are the one who should release the leader.

195

* then we are the one who should release the leader.

196

*/

196

*/

197

zap_leader = do_notify_parent(leader, leader->exit_signal);

197

zap_leader = do_notify_parent(leader, leader->exit_signal);

198

if (zap_leader)

198

if (zap_leader)

199

leader->exit_state = EXIT_DEAD;

199

leader->exit_state = EXIT_DEAD;

200

}

200

}

201

202

write_unlock_irq(&tasklist_lock);

202

write_unlock_irq(&tasklist_lock);

203

release_thread(p);

203

release_thread(p);

204

call_rcu(&p->rcu, delayed_put_task_struct);

204

call_rcu(&p->rcu, delayed_put_task_struct);

205

206

p = leader;

206

p = leader;

207

if (unlikely(zap_leader))

207

if (unlikely(zap_leader))

208

goto repeat;

208

goto repeat;

209

}

209

}

210

211

/*

211

/*

212

* This checks not only the pgrp, but falls back on the pid if no

212

* This checks not only the pgrp, but falls back on the pid if no

213

* satisfactory pgrp is found. I dunno - gdb doesn't work correctly

213

* satisfactory pgrp is found. I dunno - gdb doesn't work correctly

214

* without this...

214

* without this...

215

*

215

*

216

* The caller must hold rcu lock or the tasklist lock.

216

* The caller must hold rcu lock or the tasklist lock.

217

*/

217

*/

218

struct pid *session_of_pgrp(struct pid *pgrp)

218

struct pid *session_of_pgrp(struct pid *pgrp)

219

{

219

{

220

struct task_struct *p;

220

struct task_struct *p;

221

struct pid *sid = NULL;

221

struct pid *sid = NULL;

222

223

p = pid_task(pgrp, PIDTYPE_PGID);

223

p = pid_task(pgrp, PIDTYPE_PGID);

224

if (p == NULL)

224

if (p == NULL)

225

p = pid_task(pgrp, PIDTYPE_PID);

225

p = pid_task(pgrp, PIDTYPE_PID);

226

if (p != NULL)

226

if (p != NULL)

227

sid = task_session(p);

227

sid = task_session(p);

228

229

return sid;

229

return sid;

230

}

230

}

231

232

/*

232

/*

233

* Determine if a process group is "orphaned", according to the POSIX

233

* Determine if a process group is "orphaned", according to the POSIX

234

* definition in 2.2.2.52. Orphaned process groups are not to be affected

234

* definition in 2.2.2.52. Orphaned process groups are not to be affected

235

* by terminal-generated stop signals. Newly orphaned process groups are

235

* by terminal-generated stop signals. Newly orphaned process groups are

236

* to receive a SIGHUP and a SIGCONT.

236

* to receive a SIGHUP and a SIGCONT.

237

*

237

*

238

* "I ask you, have you ever known what it is to be an orphan?"

238

* "I ask you, have you ever known what it is to be an orphan?"

239

*/

239

*/

240

static int will_become_orphaned_pgrp(struct pid *pgrp, struct task_struct *ignored_task)

240

static int will_become_orphaned_pgrp(struct pid *pgrp, struct task_struct *ignored_task)

241

{

241

{

242

struct task_struct *p;

242

struct task_struct *p;

243

244

do_each_pid_task(pgrp, PIDTYPE_PGID, p) {

244

do_each_pid_task(pgrp, PIDTYPE_PGID, p) {

245

if ((p == ignored_task) ||

245

if ((p == ignored_task) ||

246

(p->exit_state && thread_group_empty(p)) ||

246

(p->exit_state && thread_group_empty(p)) ||

247

is_global_init(p->real_parent))

247

is_global_init(p->real_parent))

248

continue;

248

continue;

249

250

if (task_pgrp(p->real_parent) != pgrp &&

250

if (task_pgrp(p->real_parent) != pgrp &&

251

task_session(p->real_parent) == task_session(p))

251

task_session(p->real_parent) == task_session(p))

252

return 0;

252

return 0;

253

} while_each_pid_task(pgrp, PIDTYPE_PGID, p);

253

} while_each_pid_task(pgrp, PIDTYPE_PGID, p);

254

255

return 1;

255

return 1;

256

}

256

}

257

258

int is_current_pgrp_orphaned(void)

258

int is_current_pgrp_orphaned(void)

259

{

259

{

260

int retval;

260

int retval;

261

262

read_lock(&tasklist_lock);

262

read_lock(&tasklist_lock);

263

retval = will_become_orphaned_pgrp(task_pgrp(current), NULL);

263

retval = will_become_orphaned_pgrp(task_pgrp(current), NULL);

264

read_unlock(&tasklist_lock);

264

read_unlock(&tasklist_lock);

265

266

return retval;

266

return retval;

267

}

267

}

268

269

static int has_stopped_jobs(struct pid *pgrp)

269

static int has_stopped_jobs(struct pid *pgrp)

270

{

270

{

271

int retval = 0;

271

int retval = 0;

272

struct task_struct *p;

272

struct task_struct *p;

273

274

do_each_pid_task(pgrp, PIDTYPE_PGID, p) {

274

do_each_pid_task(pgrp, PIDTYPE_PGID, p) {

275

if (!task_is_stopped(p))

275

if (!task_is_stopped(p))

276

continue;

276

continue;

277

retval = 1;

277

retval = 1;

278

break;

278

break;

279

} while_each_pid_task(pgrp, PIDTYPE_PGID, p);

279

} while_each_pid_task(pgrp, PIDTYPE_PGID, p);

280

return retval;

280

return retval;

281

}

281

}

282

283

/*

283

/*

284

* Check to see if any process groups have become orphaned as

284

* Check to see if any process groups have become orphaned as

285

* a result of our exiting, and if they have any stopped jobs,

285

* a result of our exiting, and if they have any stopped jobs,

286

* send them a SIGHUP and then a SIGCONT. (POSIX 3.2.2.2)

286

* send them a SIGHUP and then a SIGCONT. (POSIX 3.2.2.2)

287

*/

287

*/

288

static void

288

static void

289

kill_orphaned_pgrp(struct task_struct *tsk, struct task_struct *parent)

289

kill_orphaned_pgrp(struct task_struct *tsk, struct task_struct *parent)

290

{

290

{

291

struct pid *pgrp = task_pgrp(tsk);

291

struct pid *pgrp = task_pgrp(tsk);

292

struct task_struct *ignored_task = tsk;

292

struct task_struct *ignored_task = tsk;

293

294

if (!parent)

294

if (!parent)

295

/* exit: our father is in a different pgrp than

295

/* exit: our father is in a different pgrp than

296

* we are and we were the only connection outside.

296

* we are and we were the only connection outside.

297

*/

297

*/

298

parent = tsk->real_parent;

298

parent = tsk->real_parent;

299

else

299

else

300

/* reparent: our child is in a different pgrp than

300

/* reparent: our child is in a different pgrp than

301

* we are, and it was the only connection outside.

301

* we are, and it was the only connection outside.

302

*/

302

*/

303

ignored_task = NULL;

303

ignored_task = NULL;

304

305

if (task_pgrp(parent) != pgrp &&

305

if (task_pgrp(parent) != pgrp &&

306

task_session(parent) == task_session(tsk) &&

306

task_session(parent) == task_session(tsk) &&

307

will_become_orphaned_pgrp(pgrp, ignored_task) &&

307

will_become_orphaned_pgrp(pgrp, ignored_task) &&

308

has_stopped_jobs(pgrp)) {

308

has_stopped_jobs(pgrp)) {

309

__kill_pgrp_info(SIGHUP, SEND_SIG_PRIV, pgrp);

309

__kill_pgrp_info(SIGHUP, SEND_SIG_PRIV, pgrp);

310

__kill_pgrp_info(SIGCONT, SEND_SIG_PRIV, pgrp);

310

__kill_pgrp_info(SIGCONT, SEND_SIG_PRIV, pgrp);

311

}

311

}

312

}

312

}

313

314

/**

314

/**

315

* reparent_to_kthreadd - Reparent the calling kernel thread to kthreadd

315

* reparent_to_kthreadd - Reparent the calling kernel thread to kthreadd

316

*

316

*

317

* If a kernel thread is launched as a result of a system call, or if

317

* If a kernel thread is launched as a result of a system call, or if

318

* it ever exits, it should generally reparent itself to kthreadd so it

318

* it ever exits, it should generally reparent itself to kthreadd so it

319

* isn't in the way of other processes and is correctly cleaned up on exit.

319

* isn't in the way of other processes and is correctly cleaned up on exit.

320

*

320

*

321

* The various task state such as scheduling policy and priority may have

321

* The various task state such as scheduling policy and priority may have

322

* been inherited from a user process, so we reset them to sane values here.

322

* been inherited from a user process, so we reset them to sane values here.

323

*

323

*

324

* NOTE that reparent_to_kthreadd() gives the caller full capabilities.

324

* NOTE that reparent_to_kthreadd() gives the caller full capabilities.

325

*/

325

*/

326

static void reparent_to_kthreadd(void)

326

static void reparent_to_kthreadd(void)

327

{

327

{

328

write_lock_irq(&tasklist_lock);

328

write_lock_irq(&tasklist_lock);

329

330

ptrace_unlink(current);

330

ptrace_unlink(current);

331

/* Reparent to init */

331

/* Reparent to init */

332

current->real_parent = current->parent = kthreadd_task;

332

current->real_parent = current->parent = kthreadd_task;

333

list_move_tail(&current->sibling, &current->real_parent->children);

333

list_move_tail(&current->sibling, &current->real_parent->children);

334

335

/* Set the exit signal to SIGCHLD so we signal init on exit */

335

/* Set the exit signal to SIGCHLD so we signal init on exit */

336

current->exit_signal = SIGCHLD;

336

current->exit_signal = SIGCHLD;

337

338

if (task_nice(current) < 0)

338

if (task_nice(current) < 0)

339

set_user_nice(current, 0);

339

set_user_nice(current, 0);

340

/* cpus_allowed? */

340

/* cpus_allowed? */

341

/* rt_priority? */

341

/* rt_priority? */

342

/* signals? */

342

/* signals? */

343

memcpy(current->signal->rlim, init_task.signal->rlim,

343

memcpy(current->signal->rlim, init_task.signal->rlim,

344

sizeof(current->signal->rlim));

344

sizeof(current->signal->rlim));

345

346

atomic_inc(&init_cred.usage);

346

atomic_inc(&init_cred.usage);

347

commit_creds(&init_cred);

347

commit_creds(&init_cred);

348

write_unlock_irq(&tasklist_lock);

348

write_unlock_irq(&tasklist_lock);

349

}

349

}

350

351

void __set_special_pids(struct pid *pid)

351

void __set_special_pids(struct pid *pid)

352

{

352

{

353

struct task_struct *curr = current->group_leader;

353

struct task_struct *curr = current->group_leader;

354

355

if (task_session(curr) != pid)

355

if (task_session(curr) != pid)

356

change_pid(curr, PIDTYPE_SID, pid);

356

change_pid(curr, PIDTYPE_SID, pid);

357

358

if (task_pgrp(curr) != pid)

358

if (task_pgrp(curr) != pid)

359

change_pid(curr, PIDTYPE_PGID, pid);

359

change_pid(curr, PIDTYPE_PGID, pid);

360

}

360

}

361

362

static void set_special_pids(struct pid *pid)

362

static void set_special_pids(struct pid *pid)

363

{

363

{

364

write_lock_irq(&tasklist_lock);

364

write_lock_irq(&tasklist_lock);

365

__set_special_pids(pid);

365

__set_special_pids(pid);

366

write_unlock_irq(&tasklist_lock);

366

write_unlock_irq(&tasklist_lock);

367

}

367

}

368

369

/*

369

/*

370

* Let kernel threads use this to say that they allow a certain signal.

370

* Let kernel threads use this to say that they allow a certain signal.

371

* Must not be used if kthread was cloned with CLONE_SIGHAND.

371

* Must not be used if kthread was cloned with CLONE_SIGHAND.

372

*/

372

*/

373

int allow_signal(int sig)

373

int allow_signal(int sig)

374

{

374

{

375

if (!valid_signal(sig) || sig < 1)

375

if (!valid_signal(sig) || sig < 1)

376

return -EINVAL;

376

return -EINVAL;

377

378

spin_lock_irq(&current->sighand->siglock);

378

spin_lock_irq(&current->sighand->siglock);

379

/* This is only needed for daemonize()'ed kthreads */

379

/* This is only needed for daemonize()'ed kthreads */

380

sigdelset(&current->blocked, sig);

380

sigdelset(&current->blocked, sig);

381

/*

381

/*

382

* Kernel threads handle their own signals. Let the signal code

382

* Kernel threads handle their own signals. Let the signal code

383

* know it'll be handled, so that they don't get converted to

383

* know it'll be handled, so that they don't get converted to

384

* SIGKILL or just silently dropped.

384

* SIGKILL or just silently dropped.

385

*/

385

*/

386

current->sighand->action[(sig)-1].sa.sa_handler = (void __user *)2;

386

current->sighand->action[(sig)-1].sa.sa_handler = (void __user *)2;

387

recalc_sigpending();

387

recalc_sigpending();

388

spin_unlock_irq(&current->sighand->siglock);

388

spin_unlock_irq(&current->sighand->siglock);

389

return 0;

389

return 0;

390

}

390

}

391

392

EXPORT_SYMBOL(allow_signal);

392

EXPORT_SYMBOL(allow_signal);

393

394

int disallow_signal(int sig)

394

int disallow_signal(int sig)

395

{

395

{

396

if (!valid_signal(sig) || sig < 1)

396

if (!valid_signal(sig) || sig < 1)

397

return -EINVAL;

397

return -EINVAL;

398

399

spin_lock_irq(&current->sighand->siglock);

399

spin_lock_irq(&current->sighand->siglock);

400

current->sighand->action[(sig)-1].sa.sa_handler = SIG_IGN;

400

current->sighand->action[(sig)-1].sa.sa_handler = SIG_IGN;

401

recalc_sigpending();

401

recalc_sigpending();

402

spin_unlock_irq(&current->sighand->siglock);

402

spin_unlock_irq(&current->sighand->siglock);

403

return 0;

403

return 0;

404

}

404

}

405

406

EXPORT_SYMBOL(disallow_signal);

406

EXPORT_SYMBOL(disallow_signal);

407

408

/*

408

/*

409

* Put all the gunge required to become a kernel thread without

409

* Put all the gunge required to become a kernel thread without

410

* attached user resources in one place where it belongs.

410

* attached user resources in one place where it belongs.

411

*/

411

*/

412

413

void daemonize(const char *name, ...)

413

void daemonize(const char *name, ...)

414

{

414

{

415

va_list args;

415

va_list args;

416

sigset_t blocked;

416

sigset_t blocked;

417

418

va_start(args, name);

418

va_start(args, name);

419

vsnprintf(current->comm, sizeof(current->comm), name, args);

419

vsnprintf(current->comm, sizeof(current->comm), name, args);

420

va_end(args);

420

va_end(args);

421

422

/*

422

/*

423

* If we were started as result of loading a module, close all of the

423

* If we were started as result of loading a module, close all of the

424

* user space pages. We don't need them, and if we didn't close them

424

* user space pages. We don't need them, and if we didn't close them

425

* they would be locked into memory.

425

* they would be locked into memory.

426

*/

426

*/

427

exit_mm(current);

427

exit_mm(current);

428

/*

428

/*

429

* We don't want to have TIF_FREEZE set if the system-wide hibernation

429

* We don't want to have TIF_FREEZE set if the system-wide hibernation

430

* or suspend transition begins right now.

430

* or suspend transition begins right now.

431

*/

431

*/

432

current->flags |= (PF_NOFREEZE | PF_KTHREAD);

432

current->flags |= (PF_NOFREEZE | PF_KTHREAD);

433

434

if (current->nsproxy != &init_nsproxy) {

434

if (current->nsproxy != &init_nsproxy) {

435

get_nsproxy(&init_nsproxy);

435

get_nsproxy(&init_nsproxy);

436

switch_task_namespaces(current, &init_nsproxy);

436

switch_task_namespaces(current, &init_nsproxy);

437

}

437

}

438

set_special_pids(&init_struct_pid);

438

set_special_pids(&init_struct_pid);

439

proc_clear_tty(current);

439

proc_clear_tty(current);

440

441

/* Block and flush all signals */

441

/* Block and flush all signals */

442

sigfillset(&blocked);

442

sigfillset(&blocked);

443

sigprocmask(SIG_BLOCK, &blocked, NULL);

443

sigprocmask(SIG_BLOCK, &blocked, NULL);

444

flush_signals(current);

444

flush_signals(current);

445

446

/* Become as one with the init task */

446

/* Become as one with the init task */

447

448

daemonize_fs_struct();

448

daemonize_fs_struct();

449

exit_files(current);

449

exit_files(current);

450

current->files = init_task.files;

450

current->files = init_task.files;

451

atomic_inc(&current->files->count);

451

atomic_inc(&current->files->count);

452

453

reparent_to_kthreadd();

453

reparent_to_kthreadd();

454

}

454

}

455

456

EXPORT_SYMBOL(daemonize);

456

EXPORT_SYMBOL(daemonize);

457

458

static void close_files(struct files_struct * files)

458

static void close_files(struct files_struct * files)

459

{

459

{

460

int i, j;

460

int i, j;

461

struct fdtable *fdt;

461

struct fdtable *fdt;

462

463

j = 0;

463

j = 0;

464

465

/*

465

/*

466

* It is safe to dereference the fd table without RCU or

466

* It is safe to dereference the fd table without RCU or

467

* ->file_lock because this is the last reference to the

467

* ->file_lock because this is the last reference to the

468

* files structure. But use RCU to shut RCU-lockdep up.

468

* files structure. But use RCU to shut RCU-lockdep up.

469

*/

469

*/

470

rcu_read_lock();

470

rcu_read_lock();

471

fdt = files_fdtable(files);

471

fdt = files_fdtable(files);

472

rcu_read_unlock();

472

rcu_read_unlock();

473

for (;;) {

473

for (;;) {

474

unsigned long set;

474

unsigned long set;

475

i = j * __NFDBITS;

475

i = j * __NFDBITS;

476

if (i >= fdt->max_fds)

476

if (i >= fdt->max_fds)

477

break;

477

break;

478

set = fdt->open_fds->fds_bits[j++];

478

set = fdt->open_fds->fds_bits[j++];

479

while (set) {

479

while (set) {

480

if (set & 1) {

480

if (set & 1) {

481

struct file * file = xchg(&fdt->fd[i], NULL);

481

struct file * file = xchg(&fdt->fd[i], NULL);

482

if (file) {

482

if (file) {

483

filp_close(file, files);

483

filp_close(file, files);

484

cond_resched();

484

cond_resched();

485

}

485

}

486

}

486

}

487

i++;

487

i++;

488

set >>= 1;

488

set >>= 1;

489

}

489

}

490

}

490

}

491

}

491

}

492

493

struct files_struct *get_files_struct(struct task_struct *task)

493

struct files_struct *get_files_struct(struct task_struct *task)

494

{

494

{

495

struct files_struct *files;

495

struct files_struct *files;

496

497

task_lock(task);

497

task_lock(task);

498

files = task->files;

498

files = task->files;

499

if (files)

499

if (files)

500

atomic_inc(&files->count);

500

atomic_inc(&files->count);

501

task_unlock(task);

501

task_unlock(task);

502

503

return files;

503

return files;

504

}

504

}

505

506

void put_files_struct(struct files_struct *files)

506

void put_files_struct(struct files_struct *files)

507

{

507

{

508

struct fdtable *fdt;

508

struct fdtable *fdt;

509

510

if (atomic_dec_and_test(&files->count)) {

510

if (atomic_dec_and_test(&files->count)) {

511

close_files(files);

511

close_files(files);

512

/*

512

/*

513

* Free the fd and fdset arrays if we expanded them.

513

* Free the fd and fdset arrays if we expanded them.

514

* If the fdtable was embedded, pass files for freeing

514

* If the fdtable was embedded, pass files for freeing

515

* at the end of the RCU grace period. Otherwise,

515

* at the end of the RCU grace period. Otherwise,

516

* you can free files immediately.

516

* you can free files immediately.

517

*/

517

*/

518

rcu_read_lock();

518

rcu_read_lock();

519

fdt = files_fdtable(files);

519

fdt = files_fdtable(files);

520

if (fdt != &files->fdtab)

520

if (fdt != &files->fdtab)

521

kmem_cache_free(files_cachep, files);

521

kmem_cache_free(files_cachep, files);

522

free_fdtable(fdt);

522

free_fdtable(fdt);

523

rcu_read_unlock();

523

rcu_read_unlock();

524

}

524

}

525

}

525

}

526

527

void reset_files_struct(struct files_struct *files)

527

void reset_files_struct(struct files_struct *files)

528

{

528

{

529

struct task_struct *tsk = current;

529

struct task_struct *tsk = current;

530

struct files_struct *old;

530

struct files_struct *old;

531

532

old = tsk->files;

532

old = tsk->files;

533

task_lock(tsk);

533

task_lock(tsk);

534

tsk->files = files;

534

tsk->files = files;

535

task_unlock(tsk);

535

task_unlock(tsk);

536

put_files_struct(old);

536

put_files_struct(old);

537

}

537

}

538

539

void exit_files(struct task_struct *tsk)

539

void exit_files(struct task_struct *tsk)

540

{

540

{

541

struct files_struct * files = tsk->files;

541

struct files_struct * files = tsk->files;

542

543

if (files) {

543

if (files) {

544

task_lock(tsk);

544

task_lock(tsk);

545

tsk->files = NULL;

545

tsk->files = NULL;

546

task_unlock(tsk);

546

task_unlock(tsk);

547

put_files_struct(files);

547

put_files_struct(files);

548

}

548

}

549

}

549

}

550

551

#ifdef CONFIG_MM_OWNER

551

#ifdef CONFIG_MM_OWNER

552

/*

552

/*

553

* Task p is exiting and it owned mm, lets find a new owner for it

553

* Task p is exiting and it owned mm, lets find a new owner for it

554

*/

554

*/

555

static inline int

555

static inline int

556

mm_need_new_owner(struct mm_struct *mm, struct task_struct *p)

556

mm_need_new_owner(struct mm_struct *mm, struct task_struct *p)

557

{

557

{

558

/*

558

/*

559

* If there are other users of the mm and the owner (us) is exiting

559

* If there are other users of the mm and the owner (us) is exiting

560

* we need to find a new owner to take on the responsibility.

560

* we need to find a new owner to take on the responsibility.

561

*/

561

*/

562

if (atomic_read(&mm->mm_users) <= 1)

562

if (atomic_read(&mm->mm_users) <= 1)

563

return 0;

563

return 0;

564

if (mm->owner != p)

564

if (mm->owner != p)

565

return 0;

565

return 0;

566

return 1;

566

return 1;

567

}

567

}

568

569

void mm_update_next_owner(struct mm_struct *mm)

569

void mm_update_next_owner(struct mm_struct *mm)

570

{

570

{

571

struct task_struct *c, *g, *p = current;

571

struct task_struct *c, *g, *p = current;

572

573

retry:

573

retry:

574

if (!mm_need_new_owner(mm, p))

574

if (!mm_need_new_owner(mm, p))

575

return;

575

return;

576

577

read_lock(&tasklist_lock);

577

read_lock(&tasklist_lock);

578

/*

578

/*

579

* Search in the children

579

* Search in the children

580

*/

580

*/

581

list_for_each_entry(c, &p->children, sibling) {

581

list_for_each_entry(c, &p->children, sibling) {

582

if (c->mm == mm)

582

if (c->mm == mm)

583

goto assign_new_owner;

583

goto assign_new_owner;

584

}

584

}

585

586

/*

586

/*

587

* Search in the siblings

587

* Search in the siblings

588

*/

588

*/

589

list_for_each_entry(c, &p->real_parent->children, sibling) {

589

list_for_each_entry(c, &p->real_parent->children, sibling) {

590

if (c->mm == mm)

590

if (c->mm == mm)

591

goto assign_new_owner;

591

goto assign_new_owner;

592

}

592

}

593

594

/*

594

/*

595

* Search through everything else. We should not get

595

* Search through everything else. We should not get

596

* here often

596

* here often

597

*/

597

*/

598

do_each_thread(g, c) {

598

do_each_thread(g, c) {

599

if (c->mm == mm)

599

if (c->mm == mm)

600

goto assign_new_owner;

600

goto assign_new_owner;

601

} while_each_thread(g, c);

601

} while_each_thread(g, c);

602

603

read_unlock(&tasklist_lock);

603

read_unlock(&tasklist_lock);

604

/*

604

/*

605

* We found no owner yet mm_users > 1: this implies that we are

605

* We found no owner yet mm_users > 1: this implies that we are

606

* most likely racing with swapoff (try_to_unuse()) or /proc or

606

* most likely racing with swapoff (try_to_unuse()) or /proc or

607

* ptrace or page migration (get_task_mm()). Mark owner as NULL.

607

* ptrace or page migration (get_task_mm()). Mark owner as NULL.

608

*/

608

*/

609

mm->owner = NULL;

609

mm->owner = NULL;

610

return;

610

return;

611

612

assign_new_owner:

612

assign_new_owner:

613

BUG_ON(c == p);

613

BUG_ON(c == p);

614

get_task_struct(c);

614

get_task_struct(c);

615

/*

615

/*

616

* The task_lock protects c->mm from changing.

616

* The task_lock protects c->mm from changing.

617

* We always want mm->owner->mm == mm

617

* We always want mm->owner->mm == mm

618

*/

618

*/

619

task_lock(c);

619

task_lock(c);

620

/*

620

/*

621

* Delay read_unlock() till we have the task_lock()

621

* Delay read_unlock() till we have the task_lock()

622

* to ensure that c does not slip away underneath us

622

* to ensure that c does not slip away underneath us

623

*/

623

*/

624

read_unlock(&tasklist_lock);

624

read_unlock(&tasklist_lock);

625

if (c->mm != mm) {

625

if (c->mm != mm) {

626

task_unlock(c);

626

task_unlock(c);

627

put_task_struct(c);

627

put_task_struct(c);

628

goto retry;

628

goto retry;

629

}

629

}

630

mm->owner = c;

630

mm->owner = c;

631

task_unlock(c);

631

task_unlock(c);

632

put_task_struct(c);

632

put_task_struct(c);

633

}

633

}

634

#endif /* CONFIG_MM_OWNER */

634

#endif /* CONFIG_MM_OWNER */

635

636

/*

636

/*

637

* Turn us into a lazy TLB process if we

637

* Turn us into a lazy TLB process if we

638

* aren't already..

638

* aren't already..

639

*/

639

*/

640

static void exit_mm(struct task_struct * tsk)

640

static void exit_mm(struct task_struct * tsk)

641

{

641

{

642

struct mm_struct *mm = tsk->mm;

642

struct mm_struct *mm = tsk->mm;

643

struct core_state *core_state;

643

struct core_state *core_state;

644

645

mm_release(tsk, mm);

645

mm_release(tsk, mm);

646

if (!mm)

646

if (!mm)

647

return;

647

return;

648

/*

648

/*

649

* Serialize with any possible pending coredump.

649

* Serialize with any possible pending coredump.

650

* We must hold mmap_sem around checking core_state

650

* We must hold mmap_sem around checking core_state

651

* and clearing tsk->mm. The core-inducing thread

651

* and clearing tsk->mm. The core-inducing thread

652

* will increment ->nr_threads for each thread in the

652

* will increment ->nr_threads for each thread in the

653

* group with ->mm != NULL.

653

* group with ->mm != NULL.

654

*/

654

*/

655

down_read(&mm->mmap_sem);

655

down_read(&mm->mmap_sem);

656

core_state = mm->core_state;

656

core_state = mm->core_state;

657

if (core_state) {

657

if (core_state) {

658

struct core_thread self;

658

struct core_thread self;

659

up_read(&mm->mmap_sem);

659

up_read(&mm->mmap_sem);

660

661

self.task = tsk;

661

self.task = tsk;

662

self.next = xchg(&core_state->dumper.next, &self);

662

self.next = xchg(&core_state->dumper.next, &self);

663

/*

663

/*

664

* Implies mb(), the result of xchg() must be visible

664

* Implies mb(), the result of xchg() must be visible

665

* to core_state->dumper.

665

* to core_state->dumper.

666

*/

666

*/

667

if (atomic_dec_and_test(&core_state->nr_threads))

667

if (atomic_dec_and_test(&core_state->nr_threads))

668

complete(&core_state->startup);

668

complete(&core_state->startup);

669

670

for (;;) {

670

for (;;) {

671

set_task_state(tsk, TASK_UNINTERRUPTIBLE);

671

set_task_state(tsk, TASK_UNINTERRUPTIBLE);

672

if (!self.task) /* see coredump_finish() */

672

if (!self.task) /* see coredump_finish() */

673

break;

673

break;

674

schedule();

674

schedule();

675

}

675

}

676

__set_task_state(tsk, TASK_RUNNING);

676

__set_task_state(tsk, TASK_RUNNING);

677

down_read(&mm->mmap_sem);

677

down_read(&mm->mmap_sem);

678

}

678

}

679

atomic_inc(&mm->mm_count);

679

atomic_inc(&mm->mm_count);

680

BUG_ON(mm != tsk->active_mm);

680

BUG_ON(mm != tsk->active_mm);

681

/* more a memory barrier than a real lock */

681

/* more a memory barrier than a real lock */

682

task_lock(tsk);

682

task_lock(tsk);

683

tsk->mm = NULL;

683

tsk->mm = NULL;

684

up_read(&mm->mmap_sem);

684

up_read(&mm->mmap_sem);

685

enter_lazy_tlb(mm, current);

685

enter_lazy_tlb(mm, current);

686

/* We don't want this task to be frozen prematurely */

686

/* We don't want this task to be frozen prematurely */

687

clear_freeze_flag(tsk);

687

clear_freeze_flag(tsk);

688

if (tsk->signal->oom_score_adj == OOM_SCORE_ADJ_MIN)

688

if (tsk->signal->oom_score_adj == OOM_SCORE_ADJ_MIN)

689

atomic_dec(&mm->oom_disable_count);

689

atomic_dec(&mm->oom_disable_count);

690

task_unlock(tsk);

690

task_unlock(tsk);

691

mm_update_next_owner(mm);

691

mm_update_next_owner(mm);

692

mmput(mm);

692

mmput(mm);

693

}

693

}

694

695

/*

695

/*

696

* When we die, we re-parent all our children.

696

* When we die, we re-parent all our children.

697

* Try to give them to another thread in our thread

697

* Try to give them to another thread in our thread

698

* group, and if no such member exists, give it to

698

* group, and if no such member exists, give it to

699

* the child reaper process (ie "init") in our pid

699

* the child reaper process (ie "init") in our pid

700

* space.

700

* space.

701

*/

701

*/

702

static struct task_struct *find_new_reaper(struct task_struct *father)

702

static struct task_struct *find_new_reaper(struct task_struct *father)

703

__releases(&tasklist_lock)

703

__releases(&tasklist_lock)

704

__acquires(&tasklist_lock)

704

__acquires(&tasklist_lock)

705

{

705

{

706

struct pid_namespace *pid_ns = task_active_pid_ns(father);

706

struct pid_namespace *pid_ns = task_active_pid_ns(father);

707

struct task_struct *thread;

707

struct task_struct *thread;

708

709

thread = father;

709

thread = father;

710

while_each_thread(father, thread) {

710

while_each_thread(father, thread) {

711

if (thread->flags & PF_EXITING)

711

if (thread->flags & PF_EXITING)

712

continue;

712

continue;

713

if (unlikely(pid_ns->child_reaper == father))

713

if (unlikely(pid_ns->child_reaper == father))

714

pid_ns->child_reaper = thread;

714

pid_ns->child_reaper = thread;

715

return thread;

715

return thread;

716

}

716

}

717

718

if (unlikely(pid_ns->child_reaper == father)) {

718

if (unlikely(pid_ns->child_reaper == father)) {

719

write_unlock_irq(&tasklist_lock);

719

write_unlock_irq(&tasklist_lock);

720

if (unlikely(pid_ns == &init_pid_ns))

720

if (unlikely(pid_ns == &init_pid_ns))

721

panic("Attempted to kill init!");

721

panic("Attempted to kill init!");

722

723

zap_pid_ns_processes(pid_ns);

723

zap_pid_ns_processes(pid_ns);

724

write_lock_irq(&tasklist_lock);

724

write_lock_irq(&tasklist_lock);

725

/*

725

/*

726

* We can not clear ->child_reaper or leave it alone.

726

* We can not clear ->child_reaper or leave it alone.

727

* There may by stealth EXIT_DEAD tasks on ->children,

727

* There may by stealth EXIT_DEAD tasks on ->children,

728

* forget_original_parent() must move them somewhere.

728

* forget_original_parent() must move them somewhere.

729

*/

729

*/

730

pid_ns->child_reaper = init_pid_ns.child_reaper;

730

pid_ns->child_reaper = init_pid_ns.child_reaper;

731

}

731

}

732

733

return pid_ns->child_reaper;

733

return pid_ns->child_reaper;

734

}

734

}

735

736

/*

736

/*

737

* Any that need to be release_task'd are put on the @dead list.

737

* Any that need to be release_task'd are put on the @dead list.

738

*/

738

*/

739

static void reparent_leader(struct task_struct *father, struct task_struct *p,

739

static void reparent_leader(struct task_struct *father, struct task_struct *p,

740

struct list_head *dead)

740

struct list_head *dead)

741

{

741

{

742

list_move_tail(&p->sibling, &p->real_parent->children);

742

list_move_tail(&p->sibling, &p->real_parent->children);

743

744

if (p->exit_state == EXIT_DEAD)

744

if (p->exit_state == EXIT_DEAD)

745

return;

745

return;

746

/*

746

/*

747

* If this is a threaded reparent there is no need to

747

* If this is a threaded reparent there is no need to

748

* notify anyone anything has happened.

748

* notify anyone anything has happened.

749

*/

749

*/

750

if (same_thread_group(p->real_parent, father))

750

if (same_thread_group(p->real_parent, father))

751

return;

751

return;

752

753

/* We don't want people slaying init. */

753

/* We don't want people slaying init. */

754

p->exit_signal = SIGCHLD;

754

p->exit_signal = SIGCHLD;

755

756

/* If it has exited notify the new parent about this child's death. */

756

/* If it has exited notify the new parent about this child's death. */

757

if (!p->ptrace &&

757

if (!p->ptrace &&

758

p->exit_state == EXIT_ZOMBIE && thread_group_empty(p)) {

758

p->exit_state == EXIT_ZOMBIE && thread_group_empty(p)) {

759

if (do_notify_parent(p, p->exit_signal)) {

759

if (do_notify_parent(p, p->exit_signal)) {

760

p->exit_state = EXIT_DEAD;

760

p->exit_state = EXIT_DEAD;

761

list_move_tail(&p->sibling, dead);

761

list_move_tail(&p->sibling, dead);

762

}

762

}

763

}

763

}

764

765

kill_orphaned_pgrp(p, father);

765

kill_orphaned_pgrp(p, father);

766

}

766

}

767

768

static void forget_original_parent(struct task_struct *father)

768

static void forget_original_parent(struct task_struct *father)

769

{

769

{

770

struct task_struct *p, *n, *reaper;

770

struct task_struct *p, *n, *reaper;

771

LIST_HEAD(dead_children);

771

LIST_HEAD(dead_children);

772

773

write_lock_irq(&tasklist_lock);

773

write_lock_irq(&tasklist_lock);

774

/*

774

/*

775

* Note that exit_ptrace() and find_new_reaper() might

775

* Note that exit_ptrace() and find_new_reaper() might

776

* drop tasklist_lock and reacquire it.

776

* drop tasklist_lock and reacquire it.

777

*/

777

*/

778

exit_ptrace(father);

778

exit_ptrace(father);

779

reaper = find_new_reaper(father);

779

reaper = find_new_reaper(father);

780

781

list_for_each_entry_safe(p, n, &father->children, sibling) {

781

list_for_each_entry_safe(p, n, &father->children, sibling) {

782

struct task_struct *t = p;

782

struct task_struct *t = p;

783

do {

783

do {

784

t->real_parent = reaper;

784

t->real_parent = reaper;

785

if (t->parent == father) {

785

if (t->parent == father) {

786

BUG_ON(t->ptrace);

786

BUG_ON(t->ptrace);

787

t->parent = t->real_parent;

787

t->parent = t->real_parent;

788

}

788

}

789

if (t->pdeath_signal)

789

if (t->pdeath_signal)

790

group_send_sig_info(t->pdeath_signal,

790

group_send_sig_info(t->pdeath_signal,

791

SEND_SIG_NOINFO, t);

791

SEND_SIG_NOINFO, t);

792

} while_each_thread(p, t);

792

} while_each_thread(p, t);

793

reparent_leader(father, p, &dead_children);

793

reparent_leader(father, p, &dead_children);

794

}

794

}

795

write_unlock_irq(&tasklist_lock);

795

write_unlock_irq(&tasklist_lock);

796

797

BUG_ON(!list_empty(&father->children));

797

BUG_ON(!list_empty(&father->children));

798

799

list_for_each_entry_safe(p, n, &dead_children, sibling) {

799

list_for_each_entry_safe(p, n, &dead_children, sibling) {

800

list_del_init(&p->sibling);

800

list_del_init(&p->sibling);

801

release_task(p);

801

release_task(p);

802

}

802

}

803

}

803

}

804

805

/*

805

/*

806

* Send signals to all our closest relatives so that they know

806

* Send signals to all our closest relatives so that they know

807

* to properly mourn us..

807

* to properly mourn us..

808

*/

808

*/

809

static void exit_notify(struct task_struct *tsk, int group_dead)

809

static void exit_notify(struct task_struct *tsk, int group_dead)

810

{

810

{

811

bool autoreap;

811

bool autoreap;

812

813

/*

813

/*

814

* This does two things:

814

* This does two things:

815

*

815

*

816

* A. Make init inherit all the child processes

816

* A. Make init inherit all the child processes

817

* B. Check to see if any process groups have become orphaned

817

* B. Check to see if any process groups have become orphaned

818

* as a result of our exiting, and if they have any stopped

818

* as a result of our exiting, and if they have any stopped

819

* jobs, send them a SIGHUP and then a SIGCONT. (POSIX 3.2.2.2)

819

* jobs, send them a SIGHUP and then a SIGCONT. (POSIX 3.2.2.2)

820

*/

820

*/

821

forget_original_parent(tsk);

821

forget_original_parent(tsk);

822

exit_task_namespaces(tsk);

822

exit_task_namespaces(tsk);

823

824

write_lock_irq(&tasklist_lock);

824

write_lock_irq(&tasklist_lock);

825

if (group_dead)

825

if (group_dead)

826

kill_orphaned_pgrp(tsk->group_leader, NULL);

826

kill_orphaned_pgrp(tsk->group_leader, NULL);

827

828

/* Let father know we died

828

/* Let father know we died

829

*

829

*

830

* Thread signals are configurable, but you aren't going to use

830

* Thread signals are configurable, but you aren't going to use

831

* that to send signals to arbitrary processes.

831

* that to send signals to arbitrary processes.

832

* That stops right now.

832

* That stops right now.

833

*

833

*

834

* If the parent exec id doesn't match the exec id we saved

834

* If the parent exec id doesn't match the exec id we saved

835

* when we started then we know the parent has changed security

835

* when we started then we know the parent has changed security

836

* domain.

836

* domain.

837

*

837

*

838

* If our self_exec id doesn't match our parent_exec_id then

838

* If our self_exec id doesn't match our parent_exec_id then

839

* we have changed execution domain as these two values started

839

* we have changed execution domain as these two values started

840

* the same after a fork.

840

* the same after a fork.

841

*/

841

*/

842

if (thread_group_leader(tsk) && tsk->exit_signal != SIGCHLD &&

842

if (thread_group_leader(tsk) && tsk->exit_signal != SIGCHLD &&

843

(tsk->parent_exec_id != tsk->real_parent->self_exec_id ||

843

(tsk->parent_exec_id != tsk->real_parent->self_exec_id ||

844

tsk->self_exec_id != tsk->parent_exec_id))

844

tsk->self_exec_id != tsk->parent_exec_id))

845

tsk->exit_signal = SIGCHLD;

845

tsk->exit_signal = SIGCHLD;

846

847

if (unlikely(tsk->ptrace)) {

847

if (unlikely(tsk->ptrace)) {

848

int sig = thread_group_leader(tsk) &&

848

int sig = thread_group_leader(tsk) &&

849

thread_group_empty(tsk) &&

849

thread_group_empty(tsk) &&

850

!ptrace_reparented(tsk) ?

850

!ptrace_reparented(tsk) ?

851

tsk->exit_signal : SIGCHLD;

851

tsk->exit_signal : SIGCHLD;

852

autoreap = do_notify_parent(tsk, sig);

852

autoreap = do_notify_parent(tsk, sig);

853

} else if (thread_group_leader(tsk)) {

853

} else if (thread_group_leader(tsk)) {

854

autoreap = thread_group_empty(tsk) &&

854

autoreap = thread_group_empty(tsk) &&

855

do_notify_parent(tsk, tsk->exit_signal);

855

do_notify_parent(tsk, tsk->exit_signal);

856

} else {

856

} else {

857

autoreap = true;

857

autoreap = true;

858

}

858

}

859

860

tsk->exit_state = autoreap ? EXIT_DEAD : EXIT_ZOMBIE;

860

tsk->exit_state = autoreap ? EXIT_DEAD : EXIT_ZOMBIE;

861

862

/* mt-exec, de_thread() is waiting for group leader */

862

/* mt-exec, de_thread() is waiting for group leader */

863

if (unlikely(tsk->signal->notify_count < 0))

863

if (unlikely(tsk->signal->notify_count < 0))

864

wake_up_process(tsk->signal->group_exit_task);

864

wake_up_process(tsk->signal->group_exit_task);

865

write_unlock_irq(&tasklist_lock);

865

write_unlock_irq(&tasklist_lock);

866

867

/* If the process is dead, release it - nobody will wait for it */

867

/* If the process is dead, release it - nobody will wait for it */

868

if (autoreap)

868

if (autoreap)

869

release_task(tsk);

869

release_task(tsk);

870

}

870

}

871

872

#ifdef CONFIG_DEBUG_STACK_USAGE

872

#ifdef CONFIG_DEBUG_STACK_USAGE

873

static void check_stack_usage(void)

873

static void check_stack_usage(void)

874

{

874

{

875

static DEFINE_SPINLOCK(low_water_lock);

875

static DEFINE_SPINLOCK(low_water_lock);

876

static int lowest_to_date = THREAD_SIZE;

876

static int lowest_to_date = THREAD_SIZE;

877

unsigned long free;

877

unsigned long free;

878

879

free = stack_not_used(current);

879

free = stack_not_used(current);

880

881

if (free >= lowest_to_date)

881

if (free >= lowest_to_date)

882

return;

882

return;

883

884

spin_lock(&low_water_lock);

884

spin_lock(&low_water_lock);

885

if (free < lowest_to_date) {

885

if (free < lowest_to_date) {

886

printk(KERN_WARNING "%s used greatest stack depth: %lu bytes "

886

printk(KERN_WARNING "%s used greatest stack depth: %lu bytes "

887

"left\n",

887

"left\n",

888

current->comm, free);

888

current->comm, free);

889

lowest_to_date = free;

889

lowest_to_date = free;

890

}

890

}

891

spin_unlock(&low_water_lock);

891

spin_unlock(&low_water_lock);

892

}

892

}

893

#else

893

#else

894

static inline void check_stack_usage(void) {}

894

static inline void check_stack_usage(void) {}

895

#endif

895

#endif

896

897

NORET_TYPE void do_exit(long code)

897

NORET_TYPE void do_exit(long code)

898

{

898

{

899

struct task_struct *tsk = current;

899

struct task_struct *tsk = current;

900

int group_dead;

900

int group_dead;

901

902

profile_task_exit(tsk);

902

profile_task_exit(tsk);

903

904

WARN_ON(atomic_read(&tsk->fs_excl));

904

WARN_ON(atomic_read(&tsk->fs_excl));

905

WARN_ON(blk_needs_flush_plug(tsk));

905

WARN_ON(blk_needs_flush_plug(tsk));

906

907

if (unlikely(in_interrupt()))

907

if (unlikely(in_interrupt()))

908

panic("Aiee, killing interrupt handler!");

908

panic("Aiee, killing interrupt handler!");

909

if (unlikely(!tsk->pid))

909

if (unlikely(!tsk->pid))

910

panic("Attempted to kill the idle task!");

910

panic("Attempted to kill the idle task!");

911

912

/*

912

/*

913

* If do_exit is called because this processes oopsed, it's possible

913

* If do_exit is called because this processes oopsed, it's possible

914

* that get_fs() was left as KERNEL_DS, so reset it to USER_DS before

914

* that get_fs() was left as KERNEL_DS, so reset it to USER_DS before

915

* continuing. Amongst other possible reasons, this is to prevent

915

* continuing. Amongst other possible reasons, this is to prevent

916

* mm_release()->clear_child_tid() from writing to a user-controlled

916

* mm_release()->clear_child_tid() from writing to a user-controlled

917

* kernel address.

917

* kernel address.

918

*/

918

*/

919

set_fs(USER_DS);

919

set_fs(USER_DS);

920

921

ptrace_event(PTRACE_EVENT_EXIT, code);

921

ptrace_event(PTRACE_EVENT_EXIT, code);

922

923

validate_creds_for_do_exit(tsk);

923

validate_creds_for_do_exit(tsk);

924

925

/*

925

/*

926

* We're taking recursive faults here in do_exit. Safest is to just

926

* We're taking recursive faults here in do_exit. Safest is to just

927

* leave this task alone and wait for reboot.

927

* leave this task alone and wait for reboot.

928

*/

928

*/

929

if (unlikely(tsk->flags & PF_EXITING)) {

929

if (unlikely(tsk->flags & PF_EXITING)) {

930

printk(KERN_ALERT

930

printk(KERN_ALERT

931

"Fixing recursive fault but reboot is needed!\n");

931

"Fixing recursive fault but reboot is needed!\n");

932

/*

932

/*

933

* We can do this unlocked here. The futex code uses

933

* We can do this unlocked here. The futex code uses

934

* this flag just to verify whether the pi state

934

* this flag just to verify whether the pi state

935

* cleanup has been done or not. In the worst case it

935

* cleanup has been done or not. In the worst case it

936

* loops once more. We pretend that the cleanup was

936

* loops once more. We pretend that the cleanup was

937

* done as there is no way to return. Either the

937

* done as there is no way to return. Either the

938

* OWNER_DIED bit is set by now or we push the blocked

938

* OWNER_DIED bit is set by now or we push the blocked

939

* task into the wait for ever nirwana as well.

939

* task into the wait for ever nirwana as well.

940

*/

940

*/

941

tsk->flags |= PF_EXITPIDONE;

941

tsk->flags |= PF_EXITPIDONE;

942

set_current_state(TASK_UNINTERRUPTIBLE);

942

set_current_state(TASK_UNINTERRUPTIBLE);

943

schedule();

943

schedule();

944

}

944

}

945

946

exit_irq_thread();

946

exit_irq_thread();

947

948

exit_signals(tsk); /* sets PF_EXITING */

948

exit_signals(tsk); /* sets PF_EXITING */

949

/*

949

/*

950

* tsk->flags are checked in the futex code to protect against

950

* tsk->flags are checked in the futex code to protect against

951

* an exiting task cleaning up the robust pi futexes.

951

* an exiting task cleaning up the robust pi futexes.

952

*/

952

*/

953

smp_mb();

953

smp_mb();

954

raw_spin_unlock_wait(&tsk->pi_lock);

954

raw_spin_unlock_wait(&tsk->pi_lock);

955

956

if (unlikely(in_atomic()))

956

if (unlikely(in_atomic()))

957

printk(KERN_INFO "note: %s[%d] exited with preempt_count %d\n",

957

printk(KERN_INFO "note: %s[%d] exited with preempt_count %d\n",

958

current->comm, task_pid_nr(current),

958

current->comm, task_pid_nr(current),

959

preempt_count());

959

preempt_count());

960

961

acct_update_integrals(tsk);

961

acct_update_integrals(tsk);

962

/* sync mm's RSS info before statistics gathering */

962

/* sync mm's RSS info before statistics gathering */

963

if (tsk->mm)

963

if (tsk->mm)

964

sync_mm_rss(tsk, tsk->mm);

964

sync_mm_rss(tsk, tsk->mm);

965

group_dead = atomic_dec_and_test(&tsk->signal->live);

965

group_dead = atomic_dec_and_test(&tsk->signal->live);

966

if (group_dead) {

966

if (group_dead) {

967

hrtimer_cancel(&tsk->signal->real_timer);

967

hrtimer_cancel(&tsk->signal->real_timer);

968

exit_itimers(tsk->signal);

968

exit_itimers(tsk->signal);

969

if (tsk->mm)

969

if (tsk->mm)

970

setmax_mm_hiwater_rss(&tsk->signal->maxrss, tsk->mm);

970

setmax_mm_hiwater_rss(&tsk->signal->maxrss, tsk->mm);

971

}

971

}

972

acct_collect(code, group_dead);

972

acct_collect(code, group_dead);

973

if (group_dead)

973

if (group_dead)

974

tty_audit_exit();

974

tty_audit_exit();

975

if (unlikely(tsk->audit_context))

975

if (unlikely(tsk->audit_context))

976

audit_free(tsk);

976

audit_free(tsk);

977

978

tsk->exit_code = code;

978

tsk->exit_code = code;

979

taskstats_exit(tsk, group_dead);

979

taskstats_exit(tsk, group_dead);

980

981

exit_mm(tsk);

981

exit_mm(tsk);

982

983

if (group_dead)

983

if (group_dead)

984

acct_process();

984

acct_process();

985

trace_sched_process_exit(tsk);

985

trace_sched_process_exit(tsk);

986

987

exit_sem(tsk);

987

exit_sem(tsk);

988

exit_files(tsk);

988

exit_files(tsk);

989

exit_fs(tsk);

989

exit_fs(tsk);

990

check_stack_usage();

990

check_stack_usage();

991

exit_thread();

991

exit_thread();

992

993

/*

993

/*

994

* Flush inherited counters to the parent - before the parent

994

* Flush inherited counters to the parent - before the parent

995

* gets woken up by child-exit notifications.

995

* gets woken up by child-exit notifications.

996

*

996

*

997

* because of cgroup mode, must be called before cgroup_exit()

997

* because of cgroup mode, must be called before cgroup_exit()

998

*/

998

*/

999

perf_event_exit_task(tsk);

999

perf_event_exit_task(tsk);

1000

1001

cgroup_exit(tsk, 1);

1001

cgroup_exit(tsk, 1);

1002

1003

if (group_dead)

1003

if (group_dead)

1004

disassociate_ctty(1);

1004

disassociate_ctty(1);

1005

1006

module_put(task_thread_info(tsk)->exec_domain->module);

1006

module_put(task_thread_info(tsk)->exec_domain->module);

1007

1008

proc_exit_connector(tsk);

1008

proc_exit_connector(tsk);

1009

1010

/*

1010

/*

1011

* FIXME: do that only when needed, using sched_exit tracepoint

1011

* FIXME: do that only when needed, using sched_exit tracepoint

1012

*/

1012

*/

1013

ptrace_put_breakpoints(tsk);

1013

ptrace_put_breakpoints(tsk);

1014

1015

exit_notify(tsk, group_dead);

1015

exit_notify(tsk, group_dead);

1016

#ifdef CONFIG_NUMA

1016

#ifdef CONFIG_NUMA

1017

task_lock(tsk);

1017

task_lock(tsk);

1018

mpol_put(tsk->mempolicy);

1018

mpol_put(tsk->mempolicy);

1019

tsk->mempolicy = NULL;

1019

tsk->mempolicy = NULL;

1020

task_unlock(tsk);

1020

task_unlock(tsk);

1021

#endif

1021

#endif

1022

#ifdef CONFIG_FUTEX

1022

#ifdef CONFIG_FUTEX

1023

if (unlikely(current->pi_state_cache))

1023

if (unlikely(current->pi_state_cache))

1024

kfree(current->pi_state_cache);

1024

kfree(current->pi_state_cache);

1025

#endif

1025

#endif

1026

/*

1026

/*

1027

* Make sure we are holding no locks:

1027

* Make sure we are holding no locks:

1028

*/

1028

*/

1029

debug_check_no_locks_held(tsk);

1029

debug_check_no_locks_held(tsk);

1030

/*

1030

/*

1031

* We can do this unlocked here. The futex code uses this flag

1031

* We can do this unlocked here. The futex code uses this flag

1032

* just to verify whether the pi state cleanup has been done

1032

* just to verify whether the pi state cleanup has been done

1033

* or not. In the worst case it loops once more.

1033

* or not. In the worst case it loops once more.

1034

*/

1034

*/

1035

tsk->flags |= PF_EXITPIDONE;

1035

tsk->flags |= PF_EXITPIDONE;

1036

1037

if (tsk->io_context)

1037

if (tsk->io_context)

1038

exit_io_context(tsk);

1038

exit_io_context(tsk);

1039

1040

if (tsk->splice_pipe)

1040

if (tsk->splice_pipe)

1041

__free_pipe_info(tsk->splice_pipe);

1041

__free_pipe_info(tsk->splice_pipe);

1042

1043

validate_creds_for_do_exit(tsk);

1043

validate_creds_for_do_exit(tsk);

1044

1045

preempt_disable();

1045

preempt_disable();

1046

exit_rcu();

1046

exit_rcu();

1047

/* causes final put_task_struct in finish_task_switch(). */

1047

/* causes final put_task_struct in finish_task_switch(). */

1048

tsk->state = TASK_DEAD;

1048

tsk->state = TASK_DEAD;

1049

schedule();

1049

schedule();

1050

BUG();

1050

BUG();

1051

/* Avoid "noreturn function does return". */

1051

/* Avoid "noreturn function does return". */

1052

for (;;)

1052

for (;;)

1053

cpu_relax(); /* For when BUG is null */

1053

cpu_relax(); /* For when BUG is null */

1054

}

1054

}

1055

1056

EXPORT_SYMBOL_GPL(do_exit);

1056

EXPORT_SYMBOL_GPL(do_exit);

1057

1058

NORET_TYPE void complete_and_exit(struct completion *comp, long code)

1058

NORET_TYPE void complete_and_exit(struct completion *comp, long code)

1059

{

1059

{

1060

if (comp)

1060

if (comp)

1061

complete(comp);

1061

complete(comp);

1062

1063

do_exit(code);

1063

do_exit(code);

1064

}

1064

}

1065

1066

EXPORT_SYMBOL(complete_and_exit);

1066

EXPORT_SYMBOL(complete_and_exit);

1067

1068

SYSCALL_DEFINE1(exit, int, error_code)

1068

SYSCALL_DEFINE1(exit, int, error_code)

1069

{

1069

{

1070

do_exit((error_code&0xff)<<8);

1070

do_exit((error_code&0xff)<<8);

1071

}

1071

}

1072

1073

/*

1073

/*

1074

* Take down every thread in the group. This is called by fatal signals

1074

* Take down every thread in the group. This is called by fatal signals

1075

* as well as by sys_exit_group (below).

1075

* as well as by sys_exit_group (below).

1076

*/

1076

*/

1077

NORET_TYPE void

1077

NORET_TYPE void

1078

do_group_exit(int exit_code)

1078

do_group_exit(int exit_code)

1079

{

1079

{

1080

struct signal_struct *sig = current->signal;

1080

struct signal_struct *sig = current->signal;

1081

1082

BUG_ON(exit_code & 0x80); /* core dumps don't get here */

1082

BUG_ON(exit_code & 0x80); /* core dumps don't get here */

1083

1084

if (signal_group_exit(sig))

1084

if (signal_group_exit(sig))

1085

exit_code = sig->group_exit_code;

1085

exit_code = sig->group_exit_code;

1086

else if (!thread_group_empty(current)) {

1086

else if (!thread_group_empty(current)) {

1087

struct sighand_struct *const sighand = current->sighand;

1087

struct sighand_struct *const sighand = current->sighand;

1088

spin_lock_irq(&sighand->siglock);

1088

spin_lock_irq(&sighand->siglock);

1089

if (signal_group_exit(sig))

1089

if (signal_group_exit(sig))

1090

/* Another thread got here before we took the lock. */

1090

/* Another thread got here before we took the lock. */

1091

exit_code = sig->group_exit_code;

1091

exit_code = sig->group_exit_code;

1092

else {

1092

else {

1093

sig->group_exit_code = exit_code;

1093

sig->group_exit_code = exit_code;

1094

sig->flags = SIGNAL_GROUP_EXIT;

1094

sig->flags = SIGNAL_GROUP_EXIT;

1095

zap_other_threads(current);

1095

zap_other_threads(current);

1096

}

1096

}

1097

spin_unlock_irq(&sighand->siglock);

1097

spin_unlock_irq(&sighand->siglock);

1098

}

1098

}

1099

1100

do_exit(exit_code);

1100

do_exit(exit_code);

1101

/* NOTREACHED */

1101

/* NOTREACHED */

1102

}

1102

}

1103

1104

/*

1104

/*

1105

* this kills every thread in the thread group. Note that any externally

1105

* this kills every thread in the thread group. Note that any externally

1106

* wait4()-ing process will get the correct exit code - even if this

1106

* wait4()-ing process will get the correct exit code - even if this

1107

* thread is not the thread group leader.

1107

* thread is not the thread group leader.

1108

*/

1108

*/

1109

SYSCALL_DEFINE1(exit_group, int, error_code)

1109

SYSCALL_DEFINE1(exit_group, int, error_code)

1110

{

1110

{

1111

do_group_exit((error_code & 0xff) << 8);

1111

do_group_exit((error_code & 0xff) << 8);

1112

/* NOTREACHED */

1112

/* NOTREACHED */

1113

return 0;

1113

return 0;

1114

}

1114

}

1115

1116

struct wait_opts {

1116

struct wait_opts {

1117

enum pid_type wo_type;

1117

enum pid_type wo_type;

1118

int wo_flags;

1118

int wo_flags;

1119

struct pid *wo_pid;

1119

struct pid *wo_pid;

1120

1121

struct siginfo __user *wo_info;

1121

struct siginfo __user *wo_info;

1122

int __user *wo_stat;

1122

int __user *wo_stat;

1123

struct rusage __user *wo_rusage;

1123

struct rusage __user *wo_rusage;

1124

1125

wait_queue_t child_wait;

1125

wait_queue_t child_wait;

1126

int notask_error;

1126

int notask_error;

1127

};

1127

};

1128

1129

static inline

1129

static inline

1130

struct pid *task_pid_type(struct task_struct *task, enum pid_type type)

1130

struct pid *task_pid_type(struct task_struct *task, enum pid_type type)

1131

{

1131

{

1132

if (type != PIDTYPE_PID)

1132

if (type != PIDTYPE_PID)

1133

task = task->group_leader;

1133

task = task->group_leader;

1134

return task->pids[type].pid;

1134

return task->pids[type].pid;

1135

}

1135

}

1136

1137

static int eligible_pid(struct wait_opts *wo, struct task_struct *p)

1137

static int eligible_pid(struct wait_opts *wo, struct task_struct *p)

1138

{

1138

{

1139

return wo->wo_type == PIDTYPE_MAX ||

1139

return wo->wo_type == PIDTYPE_MAX ||

1140

task_pid_type(p, wo->wo_type) == wo->wo_pid;

1140

task_pid_type(p, wo->wo_type) == wo->wo_pid;

1141

}

1141

}

1142

1143

static int eligible_child(struct wait_opts *wo, struct task_struct *p)

1143

static int eligible_child(struct wait_opts *wo, struct task_struct *p)

1144

{

1144

{

1145

if (!eligible_pid(wo, p))

1145

if (!eligible_pid(wo, p))

1146

return 0;

1146

return 0;

1147

/* Wait for all children (clone and not) if __WALL is set;

1147

/* Wait for all children (clone and not) if __WALL is set;

1148

* otherwise, wait for clone children *only* if __WCLONE is

1148

* otherwise, wait for clone children *only* if __WCLONE is

1149

* set; otherwise, wait for non-clone children *only*. (Note:

1149

* set; otherwise, wait for non-clone children *only*. (Note:

1150

* A "clone" child here is one that reports to its parent

1150

* A "clone" child here is one that reports to its parent

1151

* using a signal other than SIGCHLD.) */

1151

* using a signal other than SIGCHLD.) */

1152

if (((p->exit_signal != SIGCHLD) ^ !!(wo->wo_flags & __WCLONE))

1152

if (((p->exit_signal != SIGCHLD) ^ !!(wo->wo_flags & __WCLONE))

1153

&& !(wo->wo_flags & __WALL))

1153

&& !(wo->wo_flags & __WALL))

1154

return 0;

1154

return 0;

1155

1156

return 1;

1156

return 1;

1157

}

1157

}

1158

1159

static int wait_noreap_copyout(struct wait_opts *wo, struct task_struct *p,

1159

static int wait_noreap_copyout(struct wait_opts *wo, struct task_struct *p,

1160

pid_t pid, uid_t uid, int why, int status)

1160

pid_t pid, uid_t uid, int why, int status)

1161

{

1161

{

1162

struct siginfo __user *infop;

1162

struct siginfo __user *infop;

1163

int retval = wo->wo_rusage

1163

int retval = wo->wo_rusage

1164

? getrusage(p, RUSAGE_BOTH, wo->wo_rusage) : 0;

1164

? getrusage(p, RUSAGE_BOTH, wo->wo_rusage) : 0;

1165

1166

put_task_struct(p);

1166

put_task_struct(p);

1167

infop = wo->wo_info;

1167

infop = wo->wo_info;

1168

if (infop) {

1168

if (infop) {

1169

if (!retval)

1169

if (!retval)

1170

retval = put_user(SIGCHLD, &infop->si_signo);

1170

retval = put_user(SIGCHLD, &infop->si_signo);

1171

if (!retval)

1171

if (!retval)

1172

retval = put_user(0, &infop->si_errno);

1172

retval = put_user(0, &infop->si_errno);

1173

if (!retval)

1173

if (!retval)

1174

retval = put_user((short)why, &infop->si_code);

1174

retval = put_user((short)why, &infop->si_code);

1175

if (!retval)

1175

if (!retval)

1176

retval = put_user(pid, &infop->si_pid);

1176

retval = put_user(pid, &infop->si_pid);

1177

if (!retval)

1177

if (!retval)

1178

retval = put_user(uid, &infop->si_uid);

1178

retval = put_user(uid, &infop->si_uid);

1179

if (!retval)

1179

if (!retval)

1180

retval = put_user(status, &infop->si_status);

1180

retval = put_user(status, &infop->si_status);

1181

}

1181

}

1182

if (!retval)

1182

if (!retval)

1183

retval = pid;

1183

retval = pid;

1184

return retval;

1184

return retval;

1185

}

1185

}

1186

1187

/*

1187

/*

1188

* Handle sys_wait4 work for one task in state EXIT_ZOMBIE. We hold

1188

* Handle sys_wait4 work for one task in state EXIT_ZOMBIE. We hold

1189

* read_lock(&tasklist_lock) on entry. If we return zero, we still hold

1189

* read_lock(&tasklist_lock) on entry. If we return zero, we still hold

1190

* the lock and this task is uninteresting. If we return nonzero, we have

1190

* the lock and this task is uninteresting. If we return nonzero, we have

1191

* released the lock and the system call should return.

1191

* released the lock and the system call should return.

1192

*/

1192

*/

1193

static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)

1193

static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)

1194

{

1194

{

1195

unsigned long state;

1195

unsigned long state;

1196

int retval, status, traced;

1196

int retval, status, traced;

1197

pid_t pid = task_pid_vnr(p);

1197

pid_t pid = task_pid_vnr(p);

1198

uid_t uid = __task_cred(p)->uid;

1198

uid_t uid = __task_cred(p)->uid;

1199

struct siginfo __user *infop;

1199

struct siginfo __user *infop;

1200

1201

if (!likely(wo->wo_flags & WEXITED))

1201

if (!likely(wo->wo_flags & WEXITED))

1202

return 0;

1202

return 0;

1203

1204

if (unlikely(wo->wo_flags & WNOWAIT)) {

1204

if (unlikely(wo->wo_flags & WNOWAIT)) {

1205

int exit_code = p->exit_code;

1205

int exit_code = p->exit_code;

1206

int why;

1206

int why;

1207

1208

get_task_struct(p);

1208

get_task_struct(p);

1209

read_unlock(&tasklist_lock);

1209

read_unlock(&tasklist_lock);

1210

if ((exit_code & 0x7f) == 0) {

1210

if ((exit_code & 0x7f) == 0) {

1211

why = CLD_EXITED;

1211

why = CLD_EXITED;

1212

status = exit_code >> 8;

1212

status = exit_code >> 8;

1213

} else {

1213

} else {

1214

why = (exit_code & 0x80) ? CLD_DUMPED : CLD_KILLED;

1214

why = (exit_code & 0x80) ? CLD_DUMPED : CLD_KILLED;

1215

status = exit_code & 0x7f;

1215

status = exit_code & 0x7f;

1216

}

1216

}

1217

return wait_noreap_copyout(wo, p, pid, uid, why, status);

1217

return wait_noreap_copyout(wo, p, pid, uid, why, status);

1218

}

1218

}

1219

1220

/*

1220

/*

1221

* Try to move the task's state to DEAD

1221

* Try to move the task's state to DEAD

1222

* only one thread is allowed to do this:

1222

* only one thread is allowed to do this:

1223

*/

1223

*/

1224

state = xchg(&p->exit_state, EXIT_DEAD);

1224

state = xchg(&p->exit_state, EXIT_DEAD);

1225

if (state != EXIT_ZOMBIE) {

1225

if (state != EXIT_ZOMBIE) {

1226

BUG_ON(state != EXIT_DEAD);

1226

BUG_ON(state != EXIT_DEAD);

1227

return 0;

1227

return 0;

1228

}

1228

}

1229

1230

traced = ptrace_reparented(p);

1230

traced = ptrace_reparented(p);

1231

/*

1231

/*

1232

* It can be ptraced but not reparented, check

1232

* It can be ptraced but not reparented, check

1233

* thread_group_leader() to filter out sub-threads.

1233

* thread_group_leader() to filter out sub-threads.

1234

*/

1234

*/

1235

if (likely(!traced) && thread_group_leader(p)) {

1235

if (likely(!traced) && thread_group_leader(p)) {

1236

struct signal_struct *psig;

1236

struct signal_struct *psig;

1237

struct signal_struct *sig;

1237

struct signal_struct *sig;

1238

unsigned long maxrss;

1238

unsigned long maxrss;

1239

cputime_t tgutime, tgstime;

1239

cputime_t tgutime, tgstime;

1240

1241

/*

1241

/*

1242

* The resource counters for the group leader are in its

1242

* The resource counters for the group leader are in its

1243

* own task_struct. Those for dead threads in the group

1243

* own task_struct. Those for dead threads in the group

1244

* are in its signal_struct, as are those for the child

1244

* are in its signal_struct, as are those for the child

1245

* processes it has previously reaped. All these

1245

* processes it has previously reaped. All these

1246

* accumulate in the parent's signal_struct c* fields.

1246

* accumulate in the parent's signal_struct c* fields.

1247

*

1247

*

1248

* We don't bother to take a lock here to protect these

1248

* We don't bother to take a lock here to protect these

1249

* p->signal fields, because they are only touched by

1249

* p->signal fields, because they are only touched by

1250

* __exit_signal, which runs with tasklist_lock

1250

* __exit_signal, which runs with tasklist_lock

1251

* write-locked anyway, and so is excluded here. We do

1251

* write-locked anyway, and so is excluded here. We do

1252

* need to protect the access to parent->signal fields,

1252

* need to protect the access to parent->signal fields,

1253

* as other threads in the parent group can be right

1253

* as other threads in the parent group can be right

1254

* here reaping other children at the same time.

1254

* here reaping other children at the same time.

1255

*

1255

*

1256

* We use thread_group_times() to get times for the thread

1256

* We use thread_group_times() to get times for the thread

1257

* group, which consolidates times for all threads in the

1257

* group, which consolidates times for all threads in the

1258

* group including the group leader.

1258

* group including the group leader.

1259

*/

1259

*/

1260

thread_group_times(p, &tgutime, &tgstime);

1260

thread_group_times(p, &tgutime, &tgstime);

1261

spin_lock_irq(&p->real_parent->sighand->siglock);

1261

spin_lock_irq(&p->real_parent->sighand->siglock);

1262

psig = p->real_parent->signal;

1262

psig = p->real_parent->signal;

1263

sig = p->signal;

1263

sig = p->signal;

1264

psig->cutime =

1264

psig->cutime =

1265

cputime_add(psig->cutime,

1265

cputime_add(psig->cutime,

1266

cputime_add(tgutime,

1266

cputime_add(tgutime,

1267

sig->cutime));

1267

sig->cutime));

1268

psig->cstime =

1268

psig->cstime =

1269

cputime_add(psig->cstime,

1269

cputime_add(psig->cstime,

1270

cputime_add(tgstime,

1270

cputime_add(tgstime,

1271

sig->cstime));

1271

sig->cstime));

1272

psig->cgtime =

1272

psig->cgtime =

1273

cputime_add(psig->cgtime,

1273

cputime_add(psig->cgtime,

1274

cputime_add(p->gtime,

1274

cputime_add(p->gtime,

1275

cputime_add(sig->gtime,

1275

cputime_add(sig->gtime,

1276

sig->cgtime)));

1276

sig->cgtime)));

1277

psig->cmin_flt +=

1277

psig->cmin_flt +=

1278

p->min_flt + sig->min_flt + sig->cmin_flt;

1278

p->min_flt + sig->min_flt + sig->cmin_flt;

1279

psig->cmaj_flt +=

1279

psig->cmaj_flt +=

1280

p->maj_flt + sig->maj_flt + sig->cmaj_flt;

1280

p->maj_flt + sig->maj_flt + sig->cmaj_flt;

1281

psig->cnvcsw +=

1281

psig->cnvcsw +=

1282

p->nvcsw + sig->nvcsw + sig->cnvcsw;

1282

p->nvcsw + sig->nvcsw + sig->cnvcsw;

1283

psig->cnivcsw +=

1283

psig->cnivcsw +=

1284

p->nivcsw + sig->nivcsw + sig->cnivcsw;

1284

p->nivcsw + sig->nivcsw + sig->cnivcsw;

1285

psig->cinblock +=

1285

psig->cinblock +=

1286

task_io_get_inblock(p) +

1286

task_io_get_inblock(p) +

1287

sig->inblock + sig->cinblock;

1287

sig->inblock + sig->cinblock;

1288

psig->coublock +=

1288

psig->coublock +=

1289

task_io_get_oublock(p) +

1289

task_io_get_oublock(p) +

1290

sig->oublock + sig->coublock;

1290

sig->oublock + sig->coublock;

1291

maxrss = max(sig->maxrss, sig->cmaxrss);

1291

maxrss = max(sig->maxrss, sig->cmaxrss);

1292

if (psig->cmaxrss < maxrss)

1292

if (psig->cmaxrss < maxrss)

1293

psig->cmaxrss = maxrss;

1293

psig->cmaxrss = maxrss;

1294

task_io_accounting_add(&psig->ioac, &p->ioac);

1294

task_io_accounting_add(&psig->ioac, &p->ioac);

1295

task_io_accounting_add(&psig->ioac, &sig->ioac);

1295

task_io_accounting_add(&psig->ioac, &sig->ioac);

1296

spin_unlock_irq(&p->real_parent->sighand->siglock);

1296

spin_unlock_irq(&p->real_parent->sighand->siglock);

1297

}

1297

}

1298

1299

/*

1299

/*

1300

* Now we are sure this task is interesting, and no other

1300

* Now we are sure this task is interesting, and no other

1301

* thread can reap it because we set its state to EXIT_DEAD.

1301

* thread can reap it because we set its state to EXIT_DEAD.

1302

*/

1302

*/

1303

read_unlock(&tasklist_lock);

1303

read_unlock(&tasklist_lock);

1304

1305

retval = wo->wo_rusage

1305

retval = wo->wo_rusage

1306

? getrusage(p, RUSAGE_BOTH, wo->wo_rusage) : 0;

1306

? getrusage(p, RUSAGE_BOTH, wo->wo_rusage) : 0;

1307

status = (p->signal->flags & SIGNAL_GROUP_EXIT)

1307

status = (p->signal->flags & SIGNAL_GROUP_EXIT)

1308

? p->signal->group_exit_code : p->exit_code;

1308

? p->signal->group_exit_code : p->exit_code;

1309

if (!retval && wo->wo_stat)

1309

if (!retval && wo->wo_stat)

1310

retval = put_user(status, wo->wo_stat);

1310

retval = put_user(status, wo->wo_stat);

1311

1312

infop = wo->wo_info;

1312

infop = wo->wo_info;

1313

if (!retval && infop)

1313

if (!retval && infop)

1314

retval = put_user(SIGCHLD, &infop->si_signo);

1314

retval = put_user(SIGCHLD, &infop->si_signo);

1315

if (!retval && infop)

1315

if (!retval && infop)

1316

retval = put_user(0, &infop->si_errno);

1316

retval = put_user(0, &infop->si_errno);

1317

if (!retval && infop) {

1317

if (!retval && infop) {

1318

int why;

1318

int why;

1319

1320

if ((status & 0x7f) == 0) {

1320

if ((status & 0x7f) == 0) {

1321

why = CLD_EXITED;

1321

why = CLD_EXITED;

1322

status >>= 8;

1322

status >>= 8;

1323

} else {

1323

} else {

1324

why = (status & 0x80) ? CLD_DUMPED : CLD_KILLED;

1324

why = (status & 0x80) ? CLD_DUMPED : CLD_KILLED;

1325

status &= 0x7f;

1325

status &= 0x7f;

1326

}

1326

}

1327

retval = put_user((short)why, &infop->si_code);

1327

retval = put_user((short)why, &infop->si_code);

1328

if (!retval)

1328

if (!retval)

1329

retval = put_user(status, &infop->si_status);

1329

retval = put_user(status, &infop->si_status);

1330

}

1330

}

1331

if (!retval && infop)

1331

if (!retval && infop)

1332

retval = put_user(pid, &infop->si_pid);

1332

retval = put_user(pid, &infop->si_pid);

1333

if (!retval && infop)

1333

if (!retval && infop)

1334

retval = put_user(uid, &infop->si_uid);

1334

retval = put_user(uid, &infop->si_uid);

1335

if (!retval)

1335

if (!retval)

1336

retval = pid;

1336

retval = pid;

1337

1338

if (traced) {

1338

if (traced) {

1339

write_lock_irq(&tasklist_lock);

1339

write_lock_irq(&tasklist_lock);

1340

/* We dropped tasklist, ptracer could die and untrace */

1340

/* We dropped tasklist, ptracer could die and untrace */

1341

ptrace_unlink(p);

1341

ptrace_unlink(p);

1342

/*

1342

/*

1343

* If this is not a sub-thread, notify the parent.

1343

* If this is not a sub-thread, notify the parent.

1344

* If parent wants a zombie, don't release it now.

1344

* If parent wants a zombie, don't release it now.

1345

*/

1345

*/

1346

if (thread_group_leader(p) &&

1346

if (thread_group_leader(p) &&

1347

!do_notify_parent(p, p->exit_signal)) {

1347

!do_notify_parent(p, p->exit_signal)) {

1348

p->exit_state = EXIT_ZOMBIE;

1348

p->exit_state = EXIT_ZOMBIE;

1349

p = NULL;

1349

p = NULL;

1350

}

1350

}

1351

write_unlock_irq(&tasklist_lock);

1351

write_unlock_irq(&tasklist_lock);

1352

}

1352

}

1353

if (p != NULL)

1353

if (p != NULL)

1354

release_task(p);

1354

release_task(p);

1355

1356

return retval;

1356

return retval;

1357

}

1357

}

1358

1359

static int *task_stopped_code(struct task_struct *p, bool ptrace)

1359

static int *task_stopped_code(struct task_struct *p, bool ptrace)

1360

{

1360

{

1361

if (ptrace) {

1361

if (ptrace) {

1362

if (task_is_stopped_or_traced(p) &&

1362

if (task_is_stopped_or_traced(p) &&

1363

!(p->jobctl & JOBCTL_LISTENING))

1363

!(p->jobctl & JOBCTL_LISTENING))

1364

return &p->exit_code;

1364

return &p->exit_code;

1365

} else {

1365

} else {

1366

if (p->signal->flags & SIGNAL_STOP_STOPPED)

1366

if (p->signal->flags & SIGNAL_STOP_STOPPED)

1367

return &p->signal->group_exit_code;

1367

return &p->signal->group_exit_code;

1368

}

1368

}

1369

return NULL;

1369

return NULL;

1370

}

1370

}

1371

1372

/**

1372

/**

1373

* wait_task_stopped - Wait for %TASK_STOPPED or %TASK_TRACED

1373

* wait_task_stopped - Wait for %TASK_STOPPED or %TASK_TRACED

1374

* @wo: wait options

1374

* @wo: wait options

1375

* @ptrace: is the wait for ptrace

1375

* @ptrace: is the wait for ptrace

1376

* @p: task to wait for

1376

* @p: task to wait for

1377

*

1377

*

1378

* Handle sys_wait4() work for %p in state %TASK_STOPPED or %TASK_TRACED.

1378

* Handle sys_wait4() work for %p in state %TASK_STOPPED or %TASK_TRACED.

1379

*

1379

*

1380

* CONTEXT:

1380

* CONTEXT:

1381

* read_lock(&tasklist_lock), which is released if return value is

1381

* read_lock(&tasklist_lock), which is released if return value is

1382

* non-zero. Also, grabs and releases @p->sighand->siglock.

1382

* non-zero. Also, grabs and releases @p->sighand->siglock.

1383

*

1383

*

1384

* RETURNS:

1384

* RETURNS:

1385

* 0 if wait condition didn't exist and search for other wait conditions

1385

* 0 if wait condition didn't exist and search for other wait conditions

1386

* should continue. Non-zero return, -errno on failure and @p's pid on

1386

* should continue. Non-zero return, -errno on failure and @p's pid on

1387

* success, implies that tasklist_lock is released and wait condition

1387

* success, implies that tasklist_lock is released and wait condition

1388

* search should terminate.

1388

* search should terminate.

1389

*/

1389

*/

1390

static int wait_task_stopped(struct wait_opts *wo,

1390

static int wait_task_stopped(struct wait_opts *wo,

1391

int ptrace, struct task_struct *p)

1391

int ptrace, struct task_struct *p)

1392

{

1392

{

1393

struct siginfo __user *infop;

1393

struct siginfo __user *infop;

1394

int retval, exit_code, *p_code, why;

1394

int retval, exit_code, *p_code, why;

1395

uid_t uid = 0; /* unneeded, required by compiler */

1395

uid_t uid = 0; /* unneeded, required by compiler */

1396

pid_t pid;

1396

pid_t pid;

1397

1398

/*

1398

/*

1399

* Traditionally we see ptrace'd stopped tasks regardless of options.

1399

* Traditionally we see ptrace'd stopped tasks regardless of options.

1400

*/

1400

*/

1401

if (!ptrace && !(wo->wo_flags & WUNTRACED))

1401

if (!ptrace && !(wo->wo_flags & WUNTRACED))

1402

return 0;

1402

return 0;

1403

1404

if (!task_stopped_code(p, ptrace))

1404

if (!task_stopped_code(p, ptrace))

1405

return 0;

1405

return 0;

1406

1407

exit_code = 0;

1407

exit_code = 0;

1408

spin_lock_irq(&p->sighand->siglock);

1408

spin_lock_irq(&p->sighand->siglock);

1409

1410

p_code = task_stopped_code(p, ptrace);

1410

p_code = task_stopped_code(p, ptrace);

1411

if (unlikely(!p_code))

1411

if (unlikely(!p_code))

1412

goto unlock_sig;

1412

goto unlock_sig;

1413

1414

exit_code = *p_code;

1414

exit_code = *p_code;

1415

if (!exit_code)

1415

if (!exit_code)

1416

goto unlock_sig;

1416

goto unlock_sig;

1417

1418

if (!unlikely(wo->wo_flags & WNOWAIT))

1418

if (!unlikely(wo->wo_flags & WNOWAIT))

1419

*p_code = 0;

1419

*p_code = 0;

1420

1421

uid = task_uid(p);

1421

uid = task_uid(p);

1422

unlock_sig:

1422

unlock_sig:

1423

spin_unlock_irq(&p->sighand->siglock);

1423

spin_unlock_irq(&p->sighand->siglock);

1424

if (!exit_code)

1424

if (!exit_code)

1425

return 0;

1425

return 0;

1426

1427

/*

1427

/*

1428

* Now we are pretty sure this task is interesting.

1428

* Now we are pretty sure this task is interesting.

1429

* Make sure it doesn't get reaped out from under us while we

1429

* Make sure it doesn't get reaped out from under us while we

1430

* give up the lock and then examine it below. We don't want to

1430

* give up the lock and then examine it below. We don't want to

1431

* keep holding onto the tasklist_lock while we call getrusage and

1431

* keep holding onto the tasklist_lock while we call getrusage and

1432

* possibly take page faults for user memory.

1432

* possibly take page faults for user memory.

1433

*/

1433

*/

1434

get_task_struct(p);

1434

get_task_struct(p);

1435

pid = task_pid_vnr(p);

1435

pid = task_pid_vnr(p);

1436

why = ptrace ? CLD_TRAPPED : CLD_STOPPED;

1436

why = ptrace ? CLD_TRAPPED : CLD_STOPPED;

1437

read_unlock(&tasklist_lock);

1437

read_unlock(&tasklist_lock);

1438

1439

if (unlikely(wo->wo_flags & WNOWAIT))

1439

if (unlikely(wo->wo_flags & WNOWAIT))

1440

return wait_noreap_copyout(wo, p, pid, uid, why, exit_code);

1440

return wait_noreap_copyout(wo, p, pid, uid, why, exit_code);

1441

1442

retval = wo->wo_rusage

1442

retval = wo->wo_rusage

1443

? getrusage(p, RUSAGE_BOTH, wo->wo_rusage) : 0;

1443

? getrusage(p, RUSAGE_BOTH, wo->wo_rusage) : 0;

1444

if (!retval && wo->wo_stat)

1444

if (!retval && wo->wo_stat)

1445

retval = put_user((exit_code << 8) | 0x7f, wo->wo_stat);

1445

retval = put_user((exit_code << 8) | 0x7f, wo->wo_stat);

1446

1447

infop = wo->wo_info;

1447

infop = wo->wo_info;

1448

if (!retval && infop)

1448

if (!retval && infop)

1449

retval = put_user(SIGCHLD, &infop->si_signo);

1449

retval = put_user(SIGCHLD, &infop->si_signo);

1450

if (!retval && infop)

1450

if (!retval && infop)

1451

retval = put_user(0, &infop->si_errno);

1451

retval = put_user(0, &infop->si_errno);

1452

if (!retval && infop)

1452

if (!retval && infop)

1453

retval = put_user((short)why, &infop->si_code);

1453

retval = put_user((short)why, &infop->si_code);

1454

if (!retval && infop)

1454

if (!retval && infop)

1455

retval = put_user(exit_code, &infop->si_status);

1455

retval = put_user(exit_code, &infop->si_status);

1456

if (!retval && infop)

1456

if (!retval && infop)

1457

retval = put_user(pid, &infop->si_pid);

1457

retval = put_user(pid, &infop->si_pid);

1458

if (!retval && infop)

1458

if (!retval && infop)

1459

retval = put_user(uid, &infop->si_uid);

1459

retval = put_user(uid, &infop->si_uid);

1460

if (!retval)

1460

if (!retval)

1461

retval = pid;

1461

retval = pid;

1462

put_task_struct(p);

1462

put_task_struct(p);

1463

1464

BUG_ON(!retval);

1464

BUG_ON(!retval);

1465

return retval;

1465

return retval;

1466

}

1466

}

1467

1468

/*

1468

/*

1469

* Handle do_wait work for one task in a live, non-stopped state.

1469

* Handle do_wait work for one task in a live, non-stopped state.

1470

* read_lock(&tasklist_lock) on entry. If we return zero, we still hold

1470

* read_lock(&tasklist_lock) on entry. If we return zero, we still hold

1471

* the lock and this task is uninteresting. If we return nonzero, we have

1471

* the lock and this task is uninteresting. If we return nonzero, we have

1472

* released the lock and the system call should return.

1472

* released the lock and the system call should return.

1473

*/

1473

*/

1474

static int wait_task_continued(struct wait_opts *wo, struct task_struct *p)

1474

static int wait_task_continued(struct wait_opts *wo, struct task_struct *p)

1475

{

1475

{

1476

int retval;

1476

int retval;

1477

pid_t pid;

1477

pid_t pid;

1478

uid_t uid;

1478

uid_t uid;

1479

1480

if (!unlikely(wo->wo_flags & WCONTINUED))

1480

if (!unlikely(wo->wo_flags & WCONTINUED))

1481

return 0;

1481

return 0;

1482

1483

if (!(p->signal->flags & SIGNAL_STOP_CONTINUED))

1483

if (!(p->signal->flags & SIGNAL_STOP_CONTINUED))

1484

return 0;

1484

return 0;

1485

1486

spin_lock_irq(&p->sighand->siglock);

1486

spin_lock_irq(&p->sighand->siglock);

1487

/* Re-check with the lock held. */

1487

/* Re-check with the lock held. */

1488

if (!(p->signal->flags & SIGNAL_STOP_CONTINUED)) {

1488

if (!(p->signal->flags & SIGNAL_STOP_CONTINUED)) {

1489

spin_unlock_irq(&p->sighand->siglock);

1489

spin_unlock_irq(&p->sighand->siglock);

1490

return 0;

1490

return 0;

1491

}

1491

}

1492

if (!unlikely(wo->wo_flags & WNOWAIT))

1492

if (!unlikely(wo->wo_flags & WNOWAIT))

1493

p->signal->flags &= ~SIGNAL_STOP_CONTINUED;

1493

p->signal->flags &= ~SIGNAL_STOP_CONTINUED;

1494

uid = task_uid(p);

1494

uid = task_uid(p);

1495

spin_unlock_irq(&p->sighand->siglock);

1495

spin_unlock_irq(&p->sighand->siglock);

1496

1497

pid = task_pid_vnr(p);

1497

pid = task_pid_vnr(p);

1498

get_task_struct(p);

1498

get_task_struct(p);

1499

read_unlock(&tasklist_lock);

1499

read_unlock(&tasklist_lock);

1500

1501

if (!wo->wo_info) {

1501

if (!wo->wo_info) {

1502

retval = wo->wo_rusage

1502

retval = wo->wo_rusage

1503

? getrusage(p, RUSAGE_BOTH, wo->wo_rusage) : 0;

1503

? getrusage(p, RUSAGE_BOTH, wo->wo_rusage) : 0;

1504

put_task_struct(p);

1504

put_task_struct(p);

1505

if (!retval && wo->wo_stat)

1505

if (!retval && wo->wo_stat)

1506

retval = put_user(0xffff, wo->wo_stat);

1506

retval = put_user(0xffff, wo->wo_stat);

1507

if (!retval)

1507

if (!retval)

1508

retval = pid;

1508

retval = pid;

1509

} else {

1509

} else {

1510

retval = wait_noreap_copyout(wo, p, pid, uid,

1510

retval = wait_noreap_copyout(wo, p, pid, uid,

1511

CLD_CONTINUED, SIGCONT);

1511

CLD_CONTINUED, SIGCONT);

1512

BUG_ON(retval == 0);

1512

BUG_ON(retval == 0);

1513

}

1513

}

1514

1515

return retval;

1515

return retval;

1516

}

1516

}

1517

1518

/*

1518

/*

1519

* Consider @p for a wait by @parent.

1519

* Consider @p for a wait by @parent.

1520

*

1520

*

1521

* -ECHILD should be in ->notask_error before the first call.

1521

* -ECHILD should be in ->notask_error before the first call.

1522

* Returns nonzero for a final return, when we have unlocked tasklist_lock.

1522

* Returns nonzero for a final return, when we have unlocked tasklist_lock.

1523

* Returns zero if the search for a child should continue;

1523

* Returns zero if the search for a child should continue;

1524

* then ->notask_error is 0 if @p is an eligible child,

1524

* then ->notask_error is 0 if @p is an eligible child,

1525

* or another error from security_task_wait(), or still -ECHILD.

1525

* or another error from security_task_wait(), or still -ECHILD.

1526

*/

1526

*/

1527

static int wait_consider_task(struct wait_opts *wo, int ptrace,

1527

static int wait_consider_task(struct wait_opts *wo, int ptrace,

1528

struct task_struct *p)

1528

struct task_struct *p)

1529

{

1529

{

1530

int ret = eligible_child(wo, p);

1530

int ret = eligible_child(wo, p);

1531

if (!ret)

1531

if (!ret)

1532

return ret;

1532

return ret;

1533

1534

ret = security_task_wait(p);

1534

ret = security_task_wait(p);

1535

if (unlikely(ret < 0)) {

1535

if (unlikely(ret < 0)) {

1536

/*

1536

/*

1537

* If we have not yet seen any eligible child,

1537

* If we have not yet seen any eligible child,

1538

* then let this error code replace -ECHILD.

1538

* then let this error code replace -ECHILD.

1539

* A permission error will give the user a clue

1539

* A permission error will give the user a clue

1540

* to look for security policy problems, rather

1540

* to look for security policy problems, rather

1541

* than for mysterious wait bugs.

1541

* than for mysterious wait bugs.

1542

*/

1542

*/

1543

if (wo->notask_error)

1543

if (wo->notask_error)

1544

wo->notask_error = ret;

1544

wo->notask_error = ret;

1545

return 0;

1545

return 0;

1546

}

1546

}

1547

1548

/* dead body doesn't have much to contribute */

1548

/* dead body doesn't have much to contribute */

1549

if (p->exit_state == EXIT_DEAD)

1549

if (p->exit_state == EXIT_DEAD)

1550

return 0;

1550

return 0;

1551

1552

/* slay zombie? */

1552

/* slay zombie? */

1553

if (p->exit_state == EXIT_ZOMBIE) {

1553

if (p->exit_state == EXIT_ZOMBIE) {

1554

/*

1554

/*

1555

* A zombie ptracee is only visible to its ptracer.

1555

* A zombie ptracee is only visible to its ptracer.

1556

* Notification and reaping will be cascaded to the real

1556

* Notification and reaping will be cascaded to the real

1557

* parent when the ptracer detaches.

1557

* parent when the ptracer detaches.

1558

*/

1558

*/

1559

if (likely(!ptrace) && unlikely(p->ptrace)) {

1559

if (likely(!ptrace) && unlikely(p->ptrace)) {

1560

/* it will become visible, clear notask_error */

1560

/* it will become visible, clear notask_error */

1561

wo->notask_error = 0;

1561

wo->notask_error = 0;

1562

return 0;

1562

return 0;

1563

}

1563

}

1564

1565

/* we don't reap group leaders with subthreads */

1565

/* we don't reap group leaders with subthreads */

1566

if (!delay_group_leader(p))

1566

if (!delay_group_leader(p))

1567

return wait_task_zombie(wo, p);

1567

return wait_task_zombie(wo, p);

1568

1569

/*

1569

/*

1570

* Allow access to stopped/continued state via zombie by

1570

* Allow access to stopped/continued state via zombie by

1571

* falling through. Clearing of notask_error is complex.

1571

* falling through. Clearing of notask_error is complex.

1572

*

1572

*

1573

* When !@ptrace:

1573

* When !@ptrace:

1574

*

1574

*

1575

* If WEXITED is set, notask_error should naturally be

1575

* If WEXITED is set, notask_error should naturally be

1576

* cleared. If not, subset of WSTOPPED|WCONTINUED is set,

1576

* cleared. If not, subset of WSTOPPED|WCONTINUED is set,

1577

* so, if there are live subthreads, there are events to

1577

* so, if there are live subthreads, there are events to

1578

* wait for. If all subthreads are dead, it's still safe

1578

* wait for. If all subthreads are dead, it's still safe

1579

* to clear - this function will be called again in finite

1579

* to clear - this function will be called again in finite

1580

* amount time once all the subthreads are released and

1580

* amount time once all the subthreads are released and

1581

* will then return without clearing.

1581

* will then return without clearing.

1582

*

1582

*

1583

* When @ptrace:

1583

* When @ptrace:

1584

*

1584

*

1585

* Stopped state is per-task and thus can't change once the

1585

* Stopped state is per-task and thus can't change once the

1586

* target task dies. Only continued and exited can happen.

1586

* target task dies. Only continued and exited can happen.

1587

* Clear notask_error if WCONTINUED | WEXITED.

1587

* Clear notask_error if WCONTINUED | WEXITED.

1588

*/

1588

*/

1589

if (likely(!ptrace) || (wo->wo_flags & (WCONTINUED | WEXITED)))

1589

if (likely(!ptrace) || (wo->wo_flags & (WCONTINUED | WEXITED)))

1590

wo->notask_error = 0;

1590

wo->notask_error = 0;

1591

} else {

1591

} else {

1592

/*

1592

/*

1593

* If @p is ptraced by a task in its real parent's group,

1593

* If @p is ptraced by a task in its real parent's group,

1594

* hide group stop/continued state when looking at @p as

1594

* hide group stop/continued state when looking at @p as

1595

* the real parent; otherwise, a single stop can be

1595

* the real parent; otherwise, a single stop can be

1596

* reported twice as group and ptrace stops.

1596

* reported twice as group and ptrace stops.

1597

*

1597

*

1598

* If a ptracer wants to distinguish the two events for its

1598

* If a ptracer wants to distinguish the two events for its

1599

* own children, it should create a separate process which

1599

* own children, it should create a separate process which

1600

* takes the role of real parent.

1600

* takes the role of real parent.

1601

*/

1601

*/

1602

if (likely(!ptrace) && p->ptrace &&

1602

if (likely(!ptrace) && p->ptrace && !ptrace_reparented(p))

1603

same_thread_group(p->parent, p->real_parent))

1604

return 0;

1603

return 0;

1605

1604

1606

/*

1605

/*

1607

* @p is alive and it's gonna stop, continue or exit, so

1606

* @p is alive and it's gonna stop, continue or exit, so

1608

* there always is something to wait for.

1607

* there always is something to wait for.

1609

*/

1608

*/

1610

wo->notask_error = 0;

1609

wo->notask_error = 0;

1611

}

1610

}

1612

1611

1613

/*

1612

/*

1614

* Wait for stopped. Depending on @ptrace, different stopped state

1613

* Wait for stopped. Depending on @ptrace, different stopped state

1615

* is used and the two don't interact with each other.

1614

* is used and the two don't interact with each other.

1616

*/

1615

*/

1617

ret = wait_task_stopped(wo, ptrace, p);

1616

ret = wait_task_stopped(wo, ptrace, p);

1618

if (ret)

1617

if (ret)

1619

return ret;

1618

return ret;

1620

1619

1621

/*

1620

/*

1622

* Wait for continued. There's only one continued state and the

1621

* Wait for continued. There's only one continued state and the

1623

* ptracer can consume it which can confuse the real parent. Don't

1622

* ptracer can consume it which can confuse the real parent. Don't

1624

* use WCONTINUED from ptracer. You don't need or want it.

1623

* use WCONTINUED from ptracer. You don't need or want it.

1625

*/

1624

*/

1626

return wait_task_continued(wo, p);

1625

return wait_task_continued(wo, p);

1627

}

1626

}

1628

1627

1629

/*

1628

/*

1630

* Do the work of do_wait() for one thread in the group, @tsk.

1629

* Do the work of do_wait() for one thread in the group, @tsk.

1631

*

1630

*

1632

* -ECHILD should be in ->notask_error before the first call.

1631

* -ECHILD should be in ->notask_error before the first call.

1633

* Returns nonzero for a final return, when we have unlocked tasklist_lock.

1632

* Returns nonzero for a final return, when we have unlocked tasklist_lock.

1634

* Returns zero if the search for a child should continue; then

1633

* Returns zero if the search for a child should continue; then

1635

* ->notask_error is 0 if there were any eligible children,

1634

* ->notask_error is 0 if there were any eligible children,

1636

* or another error from security_task_wait(), or still -ECHILD.

1635

* or another error from security_task_wait(), or still -ECHILD.

1637

*/

1636

*/

1638

static int do_wait_thread(struct wait_opts *wo, struct task_struct *tsk)

1637

static int do_wait_thread(struct wait_opts *wo, struct task_struct *tsk)

1639

{

1638

{

1640

struct task_struct *p;

1639

struct task_struct *p;

1641

1640

1642

list_for_each_entry(p, &tsk->children, sibling) {

1641

list_for_each_entry(p, &tsk->children, sibling) {

1643

int ret = wait_consider_task(wo, 0, p);

1642

int ret = wait_consider_task(wo, 0, p);

1644

if (ret)

1643

if (ret)

1645

return ret;

1644

return ret;

1646

}

1645

}

1647

1646

1648

return 0;

1647

return 0;

1649

}

1648

}

1650

1649

1651

static int ptrace_do_wait(struct wait_opts *wo, struct task_struct *tsk)

1650

static int ptrace_do_wait(struct wait_opts *wo, struct task_struct *tsk)

1652

{

1651

{

1653

struct task_struct *p;

1652

struct task_struct *p;

1654

1653

1655

list_for_each_entry(p, &tsk->ptraced, ptrace_entry) {

1654

list_for_each_entry(p, &tsk->ptraced, ptrace_entry) {

1656

int ret = wait_consider_task(wo, 1, p);

1655

int ret = wait_consider_task(wo, 1, p);

1657

if (ret)

1656

if (ret)

1658

return ret;

1657

return ret;

1659

}

1658

}

1660

1659

1661

return 0;

1660

return 0;

1662

}

1661

}

1663

1662

1664

static int child_wait_callback(wait_queue_t *wait, unsigned mode,

1663

static int child_wait_callback(wait_queue_t *wait, unsigned mode,

1665

int sync, void *key)

1664

int sync, void *key)

1666

{

1665

{

1667

struct wait_opts *wo = container_of(wait, struct wait_opts,

1666

struct wait_opts *wo = container_of(wait, struct wait_opts,

1668

child_wait);

1667

child_wait);

1669

struct task_struct *p = key;

1668

struct task_struct *p = key;

1670

1669

1671

if (!eligible_pid(wo, p))

1670

if (!eligible_pid(wo, p))

1672

return 0;

1671

return 0;

1673

1672

1674

if ((wo->wo_flags & __WNOTHREAD) && wait->private != p->parent)

1673

if ((wo->wo_flags & __WNOTHREAD) && wait->private != p->parent)

1675

return 0;

1674

return 0;

1676

1675

1677

return default_wake_function(wait, mode, sync, key);

1676

return default_wake_function(wait, mode, sync, key);

1678

}

1677

}

1679

1678

1680

void __wake_up_parent(struct task_struct *p, struct task_struct *parent)

1679

void __wake_up_parent(struct task_struct *p, struct task_struct *parent)

1681

{

1680

{

1682

__wake_up_sync_key(&parent->signal->wait_chldexit,

1681

__wake_up_sync_key(&parent->signal->wait_chldexit,

1683

TASK_INTERRUPTIBLE, 1, p);

1682

TASK_INTERRUPTIBLE, 1, p);

1684

}

1683

}

1685

1684

1686

static long do_wait(struct wait_opts *wo)

1685

static long do_wait(struct wait_opts *wo)

1687

{

1686

{

1688

struct task_struct *tsk;

1687

struct task_struct *tsk;

1689

int retval;

1688

int retval;

1690

1689

1691

trace_sched_process_wait(wo->wo_pid);

1690

trace_sched_process_wait(wo->wo_pid);

1692

1691

1693

init_waitqueue_func_entry(&wo->child_wait, child_wait_callback);

1692

init_waitqueue_func_entry(&wo->child_wait, child_wait_callback);

1694

wo->child_wait.private = current;

1693

wo->child_wait.private = current;

1695

add_wait_queue(&current->signal->wait_chldexit, &wo->child_wait);

1694

add_wait_queue(&current->signal->wait_chldexit, &wo->child_wait);

1696

repeat:

1695

repeat:

1697

/*

1696

/*

1698

* If there is nothing that can match our critiera just get out.

1697

* If there is nothing that can match our critiera just get out.

1699

* We will clear ->notask_error to zero if we see any child that

1698

* We will clear ->notask_error to zero if we see any child that

1700

* might later match our criteria, even if we are not able to reap

1699

* might later match our criteria, even if we are not able to reap

1701

* it yet.

1700

* it yet.

1702

*/

1701

*/

1703

wo->notask_error = -ECHILD;

1702

wo->notask_error = -ECHILD;

1704

if ((wo->wo_type < PIDTYPE_MAX) &&

1703

if ((wo->wo_type < PIDTYPE_MAX) &&

1705

(!wo->wo_pid || hlist_empty(&wo->wo_pid->tasks[wo->wo_type])))

1704

(!wo->wo_pid || hlist_empty(&wo->wo_pid->tasks[wo->wo_type])))

1706

goto notask;

1705

goto notask;

1707

1706

1708

set_current_state(TASK_INTERRUPTIBLE);

1707

set_current_state(TASK_INTERRUPTIBLE);

1709

read_lock(&tasklist_lock);

1708

read_lock(&tasklist_lock);

1710

tsk = current;

1709

tsk = current;

1711

do {

1710

do {

1712

retval = do_wait_thread(wo, tsk);

1711

retval = do_wait_thread(wo, tsk);

1713

if (retval)

1712

if (retval)

1714

goto end;

1713

goto end;

1715

1714

1716

retval = ptrace_do_wait(wo, tsk);

1715

retval = ptrace_do_wait(wo, tsk);

1717

if (retval)

1716

if (retval)

1718

goto end;

1717

goto end;

1719

1718

1720

if (wo->wo_flags & __WNOTHREAD)

1719

if (wo->wo_flags & __WNOTHREAD)

1721

break;

1720

break;

1722

} while_each_thread(current, tsk);

1721

} while_each_thread(current, tsk);

1723

read_unlock(&tasklist_lock);

1722

read_unlock(&tasklist_lock);

1724

1723

1725

notask:

1724

notask:

1726

retval = wo->notask_error;

1725

retval = wo->notask_error;

1727

if (!retval && !(wo->wo_flags & WNOHANG)) {

1726

if (!retval && !(wo->wo_flags & WNOHANG)) {

1728

retval = -ERESTARTSYS;

1727

retval = -ERESTARTSYS;

1729

if (!signal_pending(current)) {

1728

if (!signal_pending(current)) {

1730

schedule();

1729

schedule();

1731

goto repeat;

1730

goto repeat;

1732

}

1731

}

1733

}

1732

}

1734

end:

1733

end:

1735

__set_current_state(TASK_RUNNING);

1734

__set_current_state(TASK_RUNNING);

1736

remove_wait_queue(&current->signal->wait_chldexit, &wo->child_wait);

1735

remove_wait_queue(&current->signal->wait_chldexit, &wo->child_wait);

1737

return retval;

1736

return retval;

1738

}

1737

}

1739

1738

1740

SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *,

1739

SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *,

1741

infop, int, options, struct rusage __user *, ru)

1740

infop, int, options, struct rusage __user *, ru)

1742

{

1741

{

1743

struct wait_opts wo;

1742

struct wait_opts wo;

1744

struct pid *pid = NULL;

1743

struct pid *pid = NULL;

1745

enum pid_type type;

1744

enum pid_type type;

1746

long ret;

1745

long ret;

1747

1746

1748

if (options & ~(WNOHANG|WNOWAIT|WEXITED|WSTOPPED|WCONTINUED))

1747

if (options & ~(WNOHANG|WNOWAIT|WEXITED|WSTOPPED|WCONTINUED))

1749

return -EINVAL;

1748

return -EINVAL;

1750

if (!(options & (WEXITED|WSTOPPED|WCONTINUED)))

1749

if (!(options & (WEXITED|WSTOPPED|WCONTINUED)))

1751

return -EINVAL;

1750

return -EINVAL;

1752

1751

1753

switch (which) {

1752

switch (which) {

1754

case P_ALL:

1753

case P_ALL:

1755

type = PIDTYPE_MAX;

1754

type = PIDTYPE_MAX;

1756

break;

1755

break;

1757

case P_PID:

1756

case P_PID:

1758

type = PIDTYPE_PID;

1757

type = PIDTYPE_PID;

1759

if (upid <= 0)

1758

if (upid <= 0)

1760

return -EINVAL;

1759

return -EINVAL;

1761

break;

1760

break;

1762

case P_PGID:

1761

case P_PGID:

1763

type = PIDTYPE_PGID;

1762

type = PIDTYPE_PGID;

1764

if (upid <= 0)

1763

if (upid <= 0)

1765

return -EINVAL;

1764

return -EINVAL;

1766

break;

1765

break;

1767

default:

1766

default:

1768

return -EINVAL;

1767

return -EINVAL;

1769

}

1768

}

1770

1769

1771

if (type < PIDTYPE_MAX)

1770

if (type < PIDTYPE_MAX)

1772

pid = find_get_pid(upid);

1771

pid = find_get_pid(upid);

1773

1772

1774

wo.wo_type = type;

1773

wo.wo_type = type;

1775

wo.wo_pid = pid;

1774

wo.wo_pid = pid;

1776

wo.wo_flags = options;

1775

wo.wo_flags = options;

1777

wo.wo_info = infop;

1776

wo.wo_info = infop;

1778

wo.wo_stat = NULL;

1777

wo.wo_stat = NULL;

1779

wo.wo_rusage = ru;

1778

wo.wo_rusage = ru;

1780

ret = do_wait(&wo);

1779

ret = do_wait(&wo);

1781

1780

1782

if (ret > 0) {

1781

if (ret > 0) {

1783

ret = 0;

1782

ret = 0;

1784

} else if (infop) {

1783

} else if (infop) {

1785

/*

1784

/*

1786

* For a WNOHANG return, clear out all the fields

1785

* For a WNOHANG return, clear out all the fields

1787

* we would set so the user can easily tell the

1786

* we would set so the user can easily tell the

1788

* difference.

1787

* difference.

1789

*/

1788

*/

1790

if (!ret)

1789

if (!ret)

1791

ret = put_user(0, &infop->si_signo);

1790

ret = put_user(0, &infop->si_signo);

1792

if (!ret)

1791

if (!ret)

1793

ret = put_user(0, &infop->si_errno);

1792

ret = put_user(0, &infop->si_errno);

1794

if (!ret)

1793

if (!ret)

1795

ret = put_user(0, &infop->si_code);

1794

ret = put_user(0, &infop->si_code);

1796

if (!ret)

1795

if (!ret)

1797

ret = put_user(0, &infop->si_pid);

1796

ret = put_user(0, &infop->si_pid);

1798

if (!ret)

1797

if (!ret)

1799

ret = put_user(0, &infop->si_uid);

1798

ret = put_user(0, &infop->si_uid);

1800

if (!ret)

1799

if (!ret)

1801

ret = put_user(0, &infop->si_status);

1800

ret = put_user(0, &infop->si_status);

1802

}

1801

}

1803

1802

1804

put_pid(pid);

1803

put_pid(pid);

1805

1804

1806

/* avoid REGPARM breakage on x86: */

1805

/* avoid REGPARM breakage on x86: */

1807

asmlinkage_protect(5, ret, which, upid, infop, options, ru);

1806

asmlinkage_protect(5, ret, which, upid, infop, options, ru);

1808

return ret;

1807

return ret;

1809

}

1808

}

1810

1809

1811

SYSCALL_DEFINE4(wait4, pid_t, upid, int __user *, stat_addr,

1810

SYSCALL_DEFINE4(wait4, pid_t, upid, int __user *, stat_addr,

1812

int, options, struct rusage __user *, ru)

1811

int, options, struct rusage __user *, ru)

1813

{

1812

{

1814

struct wait_opts wo;

1813

struct wait_opts wo;

1815

struct pid *pid = NULL;

1814

struct pid *pid = NULL;

1816

enum pid_type type;

1815

enum pid_type type;

1817

long ret;

1816

long ret;

1818

1817

1819

if (options & ~(WNOHANG|WUNTRACED|WCONTINUED|

1818

if (options & ~(WNOHANG|WUNTRACED|WCONTINUED|

1820

__WNOTHREAD|__WCLONE|__WALL))

1819

__WNOTHREAD|__WCLONE|__WALL))

1821

return -EINVAL;

1820

return -EINVAL;

1822

1821

1823

if (upid == -1)

1822

if (upid == -1)

1824

type = PIDTYPE_MAX;

1823

type = PIDTYPE_MAX;

1825

else if (upid < 0) {

1824

else if (upid < 0) {

1826

type = PIDTYPE_PGID;

1825

type = PIDTYPE_PGID;

1827

pid = find_get_pid(-upid);

1826

pid = find_get_pid(-upid);

1828

} else if (upid == 0) {

1827

} else if (upid == 0) {

1829

type = PIDTYPE_PGID;

1828

type = PIDTYPE_PGID;

1830

pid = get_task_pid(current, PIDTYPE_PGID);

1829

pid = get_task_pid(current, PIDTYPE_PGID);

1831

} else /* upid > 0 */ {

1830

} else /* upid > 0 */ {

1832

type = PIDTYPE_PID;

1831

type = PIDTYPE_PID;

1833

pid = find_get_pid(upid);

1832

pid = find_get_pid(upid);

1834

}

1833

}

1835

1834

1836

wo.wo_type = type;

1835

wo.wo_type = type;

1837

wo.wo_pid = pid;

1836

wo.wo_pid = pid;

1838

wo.wo_flags = options | WEXITED;

1837

wo.wo_flags = options | WEXITED;

1839

wo.wo_info = NULL;

1838

wo.wo_info = NULL;

1840

wo.wo_stat = stat_addr;

1839

wo.wo_stat = stat_addr;

1841

wo.wo_rusage = ru;

1840

wo.wo_rusage = ru;

1842

ret = do_wait(&wo);

1841

ret = do_wait(&wo);

1843

put_pid(pid);

1842

put_pid(pid);

1844

1843

1845

/* avoid REGPARM breakage on x86: */

1844

/* avoid REGPARM breakage on x86: */

1846

asmlinkage_protect(4, ret, upid, stat_addr, options, ru);

1845

asmlinkage_protect(4, ret, upid, stat_addr, options, ru);

1847

return ret;

1846

return ret;

1848

}

1847

}

1849

1848

1850

#ifdef __ARCH_WANT_SYS_WAITPID

1849

#ifdef __ARCH_WANT_SYS_WAITPID

1851

1850

1852

/*

1851

/*

1853

* sys_waitpid() remains for compatibility. waitpid() should be

1852

* sys_waitpid() remains for compatibility. waitpid() should be

1854

* implemented by calling sys_wait4() from libc.a.

1853

* implemented by calling sys_wait4() from libc.a.

1855

*/

1854

*/

1856

SYSCALL_DEFINE3(waitpid, pid_t, pid, int __user *, stat_addr, int, options)

1855

SYSCALL_DEFINE3(waitpid, pid_t, pid, int __user *, stat_addr, int, options)

1857

{

1856

{

1858

return sys_wait4(pid, stat_addr, options, NULL);

1857

return sys_wait4(pid, stat_addr, options, NULL);

1859

}

1858

}

1860

1859

1861

#endif

1860

#endif

1862

1861

GITLAB

ptrace: wait_consider_task: s/same_thread_group/ptrace_reparented/

 /*
  *  linux/kernel/exit.c
  *
  *  Copyright (C) 1991, 1992  Linus Torvalds
  */
 #include <linux/mm.h>
 #include <linux/slab.h>
 #include <linux/interrupt.h>
 #include <linux/module.h>
 #include <linux/capability.h>
 #include <linux/completion.h>
 #include <linux/personality.h>
 #include <linux/tty.h>
 #include <linux/iocontext.h>
 #include <linux/key.h>
 #include <linux/security.h>
 #include <linux/cpu.h>
 #include <linux/acct.h>
 #include <linux/tsacct_kern.h>
 #include <linux/file.h>
 #include <linux/fdtable.h>
 #include <linux/binfmts.h>
 #include <linux/nsproxy.h>
 #include <linux/pid_namespace.h>
 #include <linux/ptrace.h>
 #include <linux/profile.h>
 #include <linux/mount.h>
 #include <linux/proc_fs.h>
 #include <linux/kthread.h>
 #include <linux/mempolicy.h>
 #include <linux/taskstats_kern.h>
 #include <linux/delayacct.h>
 #include <linux/freezer.h>
 #include <linux/cgroup.h>
 #include <linux/syscalls.h>
 #include <linux/signal.h>
 #include <linux/posix-timers.h>
 #include <linux/cn_proc.h>
 #include <linux/mutex.h>
 #include <linux/futex.h>
 #include <linux/pipe_fs_i.h>
 #include <linux/audit.h> /* for audit_free() */
 #include <linux/resource.h>
 #include <linux/blkdev.h>
 #include <linux/task_io_accounting_ops.h>
 #include <linux/tracehook.h>
 #include <linux/fs_struct.h>
 #include <linux/init_task.h>
 #include <linux/perf_event.h>
 #include <trace/events/sched.h>
 #include <linux/hw_breakpoint.h>
 #include <linux/oom.h>
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
 #include <asm/pgtable.h>
 #include <asm/mmu_context.h>
 static void exit_mm(struct task_struct * tsk);
 static void __unhash_process(struct task_struct *p, bool group_dead)
 {
 	nr_threads--;
 	detach_pid(p, PIDTYPE_PID);
 	if (group_dead) {
 		detach_pid(p, PIDTYPE_PGID);
 		detach_pid(p, PIDTYPE_SID);
 		list_del_rcu(&p->tasks);
 		list_del_init(&p->sibling);
 		__this_cpu_dec(process_counts);
 	}
 	list_del_rcu(&p->thread_group);
 }
 /*
  * This function expects the tasklist_lock write-locked.
  */
 static void __exit_signal(struct task_struct *tsk)
 {
 	struct signal_struct *sig = tsk->signal;
 	bool group_dead = thread_group_leader(tsk);
 	struct sighand_struct *sighand;
 	struct tty_struct *uninitialized_var(tty);
 	sighand = rcu_dereference_check(tsk->sighand,
 					rcu_read_lock_held() ||
 					lockdep_tasklist_lock_is_held());
 	spin_lock(&sighand->siglock);
 	posix_cpu_timers_exit(tsk);
 	if (group_dead) {
 		posix_cpu_timers_exit_group(tsk);
 		tty = sig->tty;
 		sig->tty = NULL;
 	} else {
 		/*
 		 * This can only happen if the caller is de_thread().
 		 * FIXME: this is the temporary hack, we should teach
 		 * posix-cpu-timers to handle this case correctly.
 		 */
 		if (unlikely(has_group_leader_pid(tsk)))
 			posix_cpu_timers_exit_group(tsk);
 		/*
 		 * If there is any task waiting for the group exit
 		 * then notify it:
 		 */
 		if (sig->notify_count > 0 && !--sig->notify_count)
 			wake_up_process(sig->group_exit_task);
 		if (tsk == sig->curr_target)
 			sig->curr_target = next_thread(tsk);
 		/*
 		 * Accumulate here the counters for all threads but the
 		 * group leader as they die, so they can be added into
 		 * the process-wide totals when those are taken.
 		 * The group leader stays around as a zombie as long
 		 * as there are other threads.  When it gets reaped,
 		 * the exit.c code will add its counts into these totals.
 		 * We won't ever get here for the group leader, since it
 		 * will have been the last reference on the signal_struct.
 		 */
 		sig->utime = cputime_add(sig->utime, tsk->utime);
 		sig->stime = cputime_add(sig->stime, tsk->stime);
 		sig->gtime = cputime_add(sig->gtime, tsk->gtime);
 		sig->min_flt += tsk->min_flt;
 		sig->maj_flt += tsk->maj_flt;
 		sig->nvcsw += tsk->nvcsw;
 		sig->nivcsw += tsk->nivcsw;
 		sig->inblock += task_io_get_inblock(tsk);
 		sig->oublock += task_io_get_oublock(tsk);
 		task_io_accounting_add(&sig->ioac, &tsk->ioac);
 		sig->sum_sched_runtime += tsk->se.sum_exec_runtime;
 	}
 	sig->nr_threads--;
 	__unhash_process(tsk, group_dead);
 	/*
 	 * Do this under ->siglock, we can race with another thread
 	 * doing sigqueue_free() if we have SIGQUEUE_PREALLOC signals.
 	 */
 	flush_sigqueue(&tsk->pending);
 	tsk->sighand = NULL;
 	spin_unlock(&sighand->siglock);
 	__cleanup_sighand(sighand);
 	clear_tsk_thread_flag(tsk,TIF_SIGPENDING);
 	if (group_dead) {
 		flush_sigqueue(&sig->shared_pending);
 		tty_kref_put(tty);
 	}
 }
 static void delayed_put_task_struct(struct rcu_head *rhp)
 {
 	struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);
 	perf_event_delayed_put(tsk);
 	trace_sched_process_free(tsk);
 	put_task_struct(tsk);
 }
 void release_task(struct task_struct * p)
 {
 	struct task_struct *leader;
 	int zap_leader;
 repeat:
 	/* don't need to get the RCU readlock here - the process is dead and
 	 * can't be modifying its own credentials. But shut RCU-lockdep up */
 	rcu_read_lock();
 	atomic_dec(&__task_cred(p)->user->processes);
 	rcu_read_unlock();
 	proc_flush_task(p);
 	write_lock_irq(&tasklist_lock);
 	ptrace_release_task(p);
 	__exit_signal(p);
 	/*
 	 * If we are the last non-leader member of the thread
 	 * group, and the leader is zombie, then notify the
 	 * group leader's parent process. (if it wants notification.)
 	 */
 	zap_leader = 0;
 	leader = p->group_leader;
 	if (leader != p && thread_group_empty(leader) && leader->exit_state == EXIT_ZOMBIE) {
 		/*
 		 * If we were the last child thread and the leader has
 		 * exited already, and the leader's parent ignores SIGCHLD,
 		 * then we are the one who should release the leader.
 		 */
 		zap_leader = do_notify_parent(leader, leader->exit_signal);
 		if (zap_leader)
 			leader->exit_state = EXIT_DEAD;
 	}
 	write_unlock_irq(&tasklist_lock);
 	release_thread(p);
 	call_rcu(&p->rcu, delayed_put_task_struct);
 	p = leader;
 	if (unlikely(zap_leader))
 		goto repeat;
 }
 /*
  * This checks not only the pgrp, but falls back on the pid if no
  * satisfactory pgrp is found. I dunno - gdb doesn't work correctly
  * without this...
  *
  * The caller must hold rcu lock or the tasklist lock.
  */
 struct pid *session_of_pgrp(struct pid *pgrp)
 {
 	struct task_struct *p;
 	struct pid *sid = NULL;
 	p = pid_task(pgrp, PIDTYPE_PGID);
 	if (p == NULL)
 		p = pid_task(pgrp, PIDTYPE_PID);
 	if (p != NULL)
 		sid = task_session(p);
 	return sid;
 }
 /*
  * Determine if a process group is "orphaned", according to the POSIX
  * definition in 2.2.2.52.  Orphaned process groups are not to be affected
  * by terminal-generated stop signals.  Newly orphaned process groups are
  * to receive a SIGHUP and a SIGCONT.
  *
  * "I ask you, have you ever known what it is to be an orphan?"
  */
 static int will_become_orphaned_pgrp(struct pid *pgrp, struct task_struct *ignored_task)
 {
 	struct task_struct *p;
 	do_each_pid_task(pgrp, PIDTYPE_PGID, p) {
 		if ((p == ignored_task) ||
 		    (p->exit_state && thread_group_empty(p)) ||
 		    is_global_init(p->real_parent))
 			continue;
 		if (task_pgrp(p->real_parent) != pgrp &&
 		    task_session(p->real_parent) == task_session(p))
 			return 0;
 	} while_each_pid_task(pgrp, PIDTYPE_PGID, p);
 	return 1;
 }
 int is_current_pgrp_orphaned(void)
 {
 	int retval;
 	read_lock(&tasklist_lock);
 	retval = will_become_orphaned_pgrp(task_pgrp(current), NULL);
 	read_unlock(&tasklist_lock);
 	return retval;
 }
 static int has_stopped_jobs(struct pid *pgrp)
 {
 	int retval = 0;
 	struct task_struct *p;
 	do_each_pid_task(pgrp, PIDTYPE_PGID, p) {
 		if (!task_is_stopped(p))
 			continue;
 		retval = 1;
 		break;
 	} while_each_pid_task(pgrp, PIDTYPE_PGID, p);
 	return retval;
 }
 /*
  * Check to see if any process groups have become orphaned as
  * a result of our exiting, and if they have any stopped jobs,
  * send them a SIGHUP and then a SIGCONT. (POSIX 3.2.2.2)
  */
 static void
 kill_orphaned_pgrp(struct task_struct *tsk, struct task_struct *parent)
 {
 	struct pid *pgrp = task_pgrp(tsk);
 	struct task_struct *ignored_task = tsk;
 	if (!parent)
 		 /* exit: our father is in a different pgrp than
 		  * we are and we were the only connection outside.
 		  */
 		parent = tsk->real_parent;
 	else
 		/* reparent: our child is in a different pgrp than
 		 * we are, and it was the only connection outside.
 		 */
 		ignored_task = NULL;
 	if (task_pgrp(parent) != pgrp &&
 	    task_session(parent) == task_session(tsk) &&
 	    will_become_orphaned_pgrp(pgrp, ignored_task) &&
 	    has_stopped_jobs(pgrp)) {
 		__kill_pgrp_info(SIGHUP, SEND_SIG_PRIV, pgrp);
 		__kill_pgrp_info(SIGCONT, SEND_SIG_PRIV, pgrp);
 	}
 }
 /**
  * reparent_to_kthreadd - Reparent the calling kernel thread to kthreadd
  *
  * If a kernel thread is launched as a result of a system call, or if
  * it ever exits, it should generally reparent itself to kthreadd so it
  * isn't in the way of other processes and is correctly cleaned up on exit.
  *
  * The various task state such as scheduling policy and priority may have
  * been inherited from a user process, so we reset them to sane values here.
  *
  * NOTE that reparent_to_kthreadd() gives the caller full capabilities.
  */
 static void reparent_to_kthreadd(void)
 {
 	write_lock_irq(&tasklist_lock);
 	ptrace_unlink(current);
 	/* Reparent to init */
 	current->real_parent = current->parent = kthreadd_task;
 	list_move_tail(&current->sibling, &current->real_parent->children);
 	/* Set the exit signal to SIGCHLD so we signal init on exit */
 	current->exit_signal = SIGCHLD;
 	if (task_nice(current) < 0)
 		set_user_nice(current, 0);
 	/* cpus_allowed? */
 	/* rt_priority? */
 	/* signals? */
 	memcpy(current->signal->rlim, init_task.signal->rlim,
 	       sizeof(current->signal->rlim));
 	atomic_inc(&init_cred.usage);
 	commit_creds(&init_cred);
 	write_unlock_irq(&tasklist_lock);
 }
 void __set_special_pids(struct pid *pid)
 {
 	struct task_struct *curr = current->group_leader;
 	if (task_session(curr) != pid)
 		change_pid(curr, PIDTYPE_SID, pid);
 	if (task_pgrp(curr) != pid)
 		change_pid(curr, PIDTYPE_PGID, pid);
 }
 static void set_special_pids(struct pid *pid)
 {
 	write_lock_irq(&tasklist_lock);
 	__set_special_pids(pid);
 	write_unlock_irq(&tasklist_lock);
 }
 /*
  * Let kernel threads use this to say that they allow a certain signal.
  * Must not be used if kthread was cloned with CLONE_SIGHAND.
  */
 int allow_signal(int sig)
 {
 	if (!valid_signal(sig) || sig < 1)
 		return -EINVAL;
 	spin_lock_irq(&current->sighand->siglock);
 	/* This is only needed for daemonize()'ed kthreads */
 	sigdelset(&current->blocked, sig);
 	/*
 	 * Kernel threads handle their own signals. Let the signal code
 	 * know it'll be handled, so that they don't get converted to
 	 * SIGKILL or just silently dropped.
 	 */
 	current->sighand->action[(sig)-1].sa.sa_handler = (void __user *)2;
 	recalc_sigpending();
 	spin_unlock_irq(&current->sighand->siglock);
 	return 0;
 }
 EXPORT_SYMBOL(allow_signal);
 int disallow_signal(int sig)
 {
 	if (!valid_signal(sig) || sig < 1)
 		return -EINVAL;
 	spin_lock_irq(&current->sighand->siglock);
 	current->sighand->action[(sig)-1].sa.sa_handler = SIG_IGN;
 	recalc_sigpending();
 	spin_unlock_irq(&current->sighand->siglock);
 	return 0;
 }
 EXPORT_SYMBOL(disallow_signal);
 /*
  *	Put all the gunge required to become a kernel thread without
  *	attached user resources in one place where it belongs.
  */
 void daemonize(const char *name, ...)
 {
 	va_list args;
 	sigset_t blocked;
 	va_start(args, name);
 	vsnprintf(current->comm, sizeof(current->comm), name, args);
 	va_end(args);
 	/*
 	 * If we were started as result of loading a module, close all of the
 	 * user space pages.  We don't need them, and if we didn't close them
 	 * they would be locked into memory.
 	 */
 	exit_mm(current);
 	/*
 	 * We don't want to have TIF_FREEZE set if the system-wide hibernation
 	 * or suspend transition begins right now.
 	 */
 	current->flags |= (PF_NOFREEZE | PF_KTHREAD);
 	if (current->nsproxy != &init_nsproxy) {
 		get_nsproxy(&init_nsproxy);
 		switch_task_namespaces(current, &init_nsproxy);
 	}
 	set_special_pids(&init_struct_pid);
 	proc_clear_tty(current);
 	/* Block and flush all signals */
 	sigfillset(&blocked);
 	sigprocmask(SIG_BLOCK, &blocked, NULL);
 	flush_signals(current);
 	/* Become as one with the init task */
 	daemonize_fs_struct();
 	exit_files(current);
 	current->files = init_task.files;
 	atomic_inc(&current->files->count);
 	reparent_to_kthreadd();
 }
 EXPORT_SYMBOL(daemonize);
 static void close_files(struct files_struct * files)
 {
 	int i, j;
 	struct fdtable *fdt;
 	j = 0;
 	/*
 	 * It is safe to dereference the fd table without RCU or
 	 * ->file_lock because this is the last reference to the
 	 * files structure.  But use RCU to shut RCU-lockdep up.
 	 */
 	rcu_read_lock();
 	fdt = files_fdtable(files);
 	rcu_read_unlock();
 	for (;;) {
 		unsigned long set;
 		i = j * __NFDBITS;
 		if (i >= fdt->max_fds)
 			break;
 		set = fdt->open_fds->fds_bits[j++];
 		while (set) {
 			if (set & 1) {
 				struct file * file = xchg(&fdt->fd[i], NULL);
 				if (file) {
 					filp_close(file, files);
 					cond_resched();
 				}
 			}
 			i++;
 			set >>= 1;
 		}
 	}
 }
 struct files_struct *get_files_struct(struct task_struct *task)
 {
 	struct files_struct *files;
 	task_lock(task);
 	files = task->files;
 	if (files)
 		atomic_inc(&files->count);
 	task_unlock(task);
 	return files;
 }
 void put_files_struct(struct files_struct *files)
 {
 	struct fdtable *fdt;
 	if (atomic_dec_and_test(&files->count)) {
 		close_files(files);
 		/*
 		 * Free the fd and fdset arrays if we expanded them.
 		 * If the fdtable was embedded, pass files for freeing
 		 * at the end of the RCU grace period. Otherwise,
 		 * you can free files immediately.
 		 */
 		rcu_read_lock();
 		fdt = files_fdtable(files);
 		if (fdt != &files->fdtab)
 			kmem_cache_free(files_cachep, files);
 		free_fdtable(fdt);
 		rcu_read_unlock();
 	}
 }
 void reset_files_struct(struct files_struct *files)
 {
 	struct task_struct *tsk = current;
 	struct files_struct *old;
 	old = tsk->files;
 	task_lock(tsk);
 	tsk->files = files;
 	task_unlock(tsk);
 	put_files_struct(old);
 }
 void exit_files(struct task_struct *tsk)
 {
 	struct files_struct * files = tsk->files;
 	if (files) {
 		task_lock(tsk);
 		tsk->files = NULL;
 		task_unlock(tsk);
 		put_files_struct(files);
 	}
 }
 #ifdef CONFIG_MM_OWNER
 /*
  * Task p is exiting and it owned mm, lets find a new owner for it
  */
 static inline int
 mm_need_new_owner(struct mm_struct *mm, struct task_struct *p)
 {
 	/*
 	 * If there are other users of the mm and the owner (us) is exiting
 	 * we need to find a new owner to take on the responsibility.
 	 */
 	if (atomic_read(&mm->mm_users) <= 1)
 		return 0;
 	if (mm->owner != p)
 		return 0;
 	return 1;
 }
 void mm_update_next_owner(struct mm_struct *mm)
 {
 	struct task_struct *c, *g, *p = current;
 retry:
 	if (!mm_need_new_owner(mm, p))
 		return;
 	read_lock(&tasklist_lock);
 	/*
 	 * Search in the children
 	 */
 	list_for_each_entry(c, &p->children, sibling) {
 		if (c->mm == mm)
 			goto assign_new_owner;
 	}
 	/*
 	 * Search in the siblings
 	 */
 	list_for_each_entry(c, &p->real_parent->children, sibling) {
 		if (c->mm == mm)
 			goto assign_new_owner;
 	}
 	/*
 	 * Search through everything else. We should not get
 	 * here often
 	 */
 	do_each_thread(g, c) {
 		if (c->mm == mm)
 			goto assign_new_owner;
 	} while_each_thread(g, c);
 	read_unlock(&tasklist_lock);
 	/*
 	 * We found no owner yet mm_users > 1: this implies that we are
 	 * most likely racing with swapoff (try_to_unuse()) or /proc or
 	 * ptrace or page migration (get_task_mm()).  Mark owner as NULL.
 	 */
 	mm->owner = NULL;
 	return;
 assign_new_owner:
 	BUG_ON(c == p);
 	get_task_struct(c);
 	/*
 	 * The task_lock protects c->mm from changing.
 	 * We always want mm->owner->mm == mm
 	 */
 	task_lock(c);
 	/*
 	 * Delay read_unlock() till we have the task_lock()
 	 * to ensure that c does not slip away underneath us
 	 */
 	read_unlock(&tasklist_lock);
 	if (c->mm != mm) {
 		task_unlock(c);
 		put_task_struct(c);
 		goto retry;
 	}
 	mm->owner = c;
 	task_unlock(c);
 	put_task_struct(c);
 }
 #endif /* CONFIG_MM_OWNER */
 /*
  * Turn us into a lazy TLB process if we
  * aren't already..
  */
 static void exit_mm(struct task_struct * tsk)
 {
 	struct mm_struct *mm = tsk->mm;
 	struct core_state *core_state;
 	mm_release(tsk, mm);
 	if (!mm)
 		return;
 	/*
 	 * Serialize with any possible pending coredump.
 	 * We must hold mmap_sem around checking core_state
 	 * and clearing tsk->mm.  The core-inducing thread
 	 * will increment ->nr_threads for each thread in the
 	 * group with ->mm != NULL.
 	 */
 	down_read(&mm->mmap_sem);
 	core_state = mm->core_state;
 	if (core_state) {
 		struct core_thread self;
 		up_read(&mm->mmap_sem);
 		self.task = tsk;
 		self.next = xchg(&core_state->dumper.next, &self);
 		/*
 		 * Implies mb(), the result of xchg() must be visible
 		 * to core_state->dumper.
 		 */
 		if (atomic_dec_and_test(&core_state->nr_threads))
 			complete(&core_state->startup);
 		for (;;) {
 			set_task_state(tsk, TASK_UNINTERRUPTIBLE);
 			if (!self.task) /* see coredump_finish() */
 				break;
 			schedule();
 		}
 		__set_task_state(tsk, TASK_RUNNING);
 		down_read(&mm->mmap_sem);
 	}
 	atomic_inc(&mm->mm_count);
 	BUG_ON(mm != tsk->active_mm);
 	/* more a memory barrier than a real lock */
 	task_lock(tsk);
 	tsk->mm = NULL;
 	up_read(&mm->mmap_sem);
 	enter_lazy_tlb(mm, current);
 	/* We don't want this task to be frozen prematurely */
 	clear_freeze_flag(tsk);
 	if (tsk->signal->oom_score_adj == OOM_SCORE_ADJ_MIN)
 		atomic_dec(&mm->oom_disable_count);
 	task_unlock(tsk);
 	mm_update_next_owner(mm);
 	mmput(mm);
 }
 /*
  * When we die, we re-parent all our children.
  * Try to give them to another thread in our thread
  * group, and if no such member exists, give it to
  * the child reaper process (ie "init") in our pid
  * space.
  */
 static struct task_struct *find_new_reaper(struct task_struct *father)
 	__releases(&tasklist_lock)
 	__acquires(&tasklist_lock)
 {
 	struct pid_namespace *pid_ns = task_active_pid_ns(father);
 	struct task_struct *thread;
 	thread = father;
 	while_each_thread(father, thread) {
 		if (thread->flags & PF_EXITING)
 			continue;
 		if (unlikely(pid_ns->child_reaper == father))
 			pid_ns->child_reaper = thread;
 		return thread;
 	}
 	if (unlikely(pid_ns->child_reaper == father)) {
 		write_unlock_irq(&tasklist_lock);
 		if (unlikely(pid_ns == &init_pid_ns))
 			panic("Attempted to kill init!");
 		zap_pid_ns_processes(pid_ns);
 		write_lock_irq(&tasklist_lock);
 		/*
 		 * We can not clear ->child_reaper or leave it alone.
 		 * There may by stealth EXIT_DEAD tasks on ->children,
 		 * forget_original_parent() must move them somewhere.
 		 */
 		pid_ns->child_reaper = init_pid_ns.child_reaper;
 	}
 	return pid_ns->child_reaper;
 }
 /*
 * Any that need to be release_task'd are put on the @dead list.
  */
 static void reparent_leader(struct task_struct *father, struct task_struct *p,
 				struct list_head *dead)
 {
 	list_move_tail(&p->sibling, &p->real_parent->children);
 	if (p->exit_state == EXIT_DEAD)
 		return;
 	/*
 	 * If this is a threaded reparent there is no need to
 	 * notify anyone anything has happened.
 	 */
 	if (same_thread_group(p->real_parent, father))
 		return;
 	/* We don't want people slaying init.  */
 	p->exit_signal = SIGCHLD;
 	/* If it has exited notify the new parent about this child's death. */
 	if (!p->ptrace &&
 	    p->exit_state == EXIT_ZOMBIE && thread_group_empty(p)) {
 		if (do_notify_parent(p, p->exit_signal)) {
 			p->exit_state = EXIT_DEAD;
 			list_move_tail(&p->sibling, dead);
 		}
 	}
 	kill_orphaned_pgrp(p, father);
 }
 static void forget_original_parent(struct task_struct *father)
 {
 	struct task_struct *p, *n, *reaper;
 	LIST_HEAD(dead_children);
 	write_lock_irq(&tasklist_lock);
 	/*
 	 * Note that exit_ptrace() and find_new_reaper() might
 	 * drop tasklist_lock and reacquire it.
 	 */
 	exit_ptrace(father);
 	reaper = find_new_reaper(father);
 	list_for_each_entry_safe(p, n, &father->children, sibling) {
 		struct task_struct *t = p;
 		do {
 			t->real_parent = reaper;
 			if (t->parent == father) {
 				BUG_ON(t->ptrace);
 				t->parent = t->real_parent;
 			}
 			if (t->pdeath_signal)
 				group_send_sig_info(t->pdeath_signal,
 						    SEND_SIG_NOINFO, t);
 		} while_each_thread(p, t);
 		reparent_leader(father, p, &dead_children);
 	}
 	write_unlock_irq(&tasklist_lock);
 	BUG_ON(!list_empty(&father->children));
 	list_for_each_entry_safe(p, n, &dead_children, sibling) {
 		list_del_init(&p->sibling);
 		release_task(p);
 	}
 }
 /*
  * Send signals to all our closest relatives so that they know
  * to properly mourn us..
  */
 static void exit_notify(struct task_struct *tsk, int group_dead)
 {
 	bool autoreap;
 	/*
 	 * This does two things:
 	 *
   	 * A.  Make init inherit all the child processes
 	 * B.  Check to see if any process groups have become orphaned
 	 *	as a result of our exiting, and if they have any stopped
 	 *	jobs, send them a SIGHUP and then a SIGCONT.  (POSIX 3.2.2.2)
 	 */
 	forget_original_parent(tsk);
 	exit_task_namespaces(tsk);
 	write_lock_irq(&tasklist_lock);
 	if (group_dead)
 		kill_orphaned_pgrp(tsk->group_leader, NULL);
 	/* Let father know we died
 	 *
 	 * Thread signals are configurable, but you aren't going to use
 	 * that to send signals to arbitrary processes.
 	 * That stops right now.
 	 *
 	 * If the parent exec id doesn't match the exec id we saved
 	 * when we started then we know the parent has changed security
 	 * domain.
 	 *
 	 * If our self_exec id doesn't match our parent_exec_id then
 	 * we have changed execution domain as these two values started
 	 * the same after a fork.
 	 */
 	if (thread_group_leader(tsk) && tsk->exit_signal != SIGCHLD &&
 	    (tsk->parent_exec_id != tsk->real_parent->self_exec_id ||
 	     tsk->self_exec_id != tsk->parent_exec_id))
 		tsk->exit_signal = SIGCHLD;
 	if (unlikely(tsk->ptrace)) {
 		int sig = thread_group_leader(tsk) &&
 				thread_group_empty(tsk) &&
 				!ptrace_reparented(tsk) ?
 			tsk->exit_signal : SIGCHLD;
 		autoreap = do_notify_parent(tsk, sig);
 	} else if (thread_group_leader(tsk)) {
 		autoreap = thread_group_empty(tsk) &&
 			do_notify_parent(tsk, tsk->exit_signal);
 	} else {
 		autoreap = true;
 	}
 	tsk->exit_state = autoreap ? EXIT_DEAD : EXIT_ZOMBIE;
 	/* mt-exec, de_thread() is waiting for group leader */
 	if (unlikely(tsk->signal->notify_count < 0))
 		wake_up_process(tsk->signal->group_exit_task);
 	write_unlock_irq(&tasklist_lock);
 	/* If the process is dead, release it - nobody will wait for it */
 	if (autoreap)
 		release_task(tsk);
 }
 #ifdef CONFIG_DEBUG_STACK_USAGE
 static void check_stack_usage(void)
 {
 	static DEFINE_SPINLOCK(low_water_lock);
 	static int lowest_to_date = THREAD_SIZE;
 	unsigned long free;
 	free = stack_not_used(current);
 	if (free >= lowest_to_date)
 		return;
 	spin_lock(&low_water_lock);
 	if (free < lowest_to_date) {
 		printk(KERN_WARNING "%s used greatest stack depth: %lu bytes "
 				"left\n",
 				current->comm, free);
 		lowest_to_date = free;
 	}
 	spin_unlock(&low_water_lock);
 }
 #else
 static inline void check_stack_usage(void) {}
 #endif
 NORET_TYPE void do_exit(long code)
 {
 	struct task_struct *tsk = current;
 	int group_dead;
 	profile_task_exit(tsk);
 	WARN_ON(atomic_read(&tsk->fs_excl));
 	WARN_ON(blk_needs_flush_plug(tsk));
 	if (unlikely(in_interrupt()))
 		panic("Aiee, killing interrupt handler!");
 	if (unlikely(!tsk->pid))
 		panic("Attempted to kill the idle task!");
 	/*
 	 * If do_exit is called because this processes oopsed, it's possible
 	 * that get_fs() was left as KERNEL_DS, so reset it to USER_DS before
 	 * continuing. Amongst other possible reasons, this is to prevent
 	 * mm_release()->clear_child_tid() from writing to a user-controlled
 	 * kernel address.
 	 */
 	set_fs(USER_DS);
 	ptrace_event(PTRACE_EVENT_EXIT, code);
 	validate_creds_for_do_exit(tsk);
 	/*
 	 * We're taking recursive faults here in do_exit. Safest is to just
 	 * leave this task alone and wait for reboot.
 	 */
 	if (unlikely(tsk->flags & PF_EXITING)) {
 		printk(KERN_ALERT
 			"Fixing recursive fault but reboot is needed!\n");
 		/*
 		 * We can do this unlocked here. The futex code uses
 		 * this flag just to verify whether the pi state
 		 * cleanup has been done or not. In the worst case it
 		 * loops once more. We pretend that the cleanup was
 		 * done as there is no way to return. Either the
 		 * OWNER_DIED bit is set by now or we push the blocked
 		 * task into the wait for ever nirwana as well.
 		 */
 		tsk->flags |= PF_EXITPIDONE;
 		set_current_state(TASK_UNINTERRUPTIBLE);
 		schedule();
 	}
 	exit_irq_thread();
 	exit_signals(tsk);  /* sets PF_EXITING */
 	/*
 	 * tsk->flags are checked in the futex code to protect against
 	 * an exiting task cleaning up the robust pi futexes.
 	 */
 	smp_mb();
 	raw_spin_unlock_wait(&tsk->pi_lock);
 	if (unlikely(in_atomic()))
 		printk(KERN_INFO "note: %s[%d] exited with preempt_count %d\n",
 				current->comm, task_pid_nr(current),
 				preempt_count());
 	acct_update_integrals(tsk);
 	/* sync mm's RSS info before statistics gathering */
 	if (tsk->mm)
 		sync_mm_rss(tsk, tsk->mm);
 	group_dead = atomic_dec_and_test(&tsk->signal->live);
 	if (group_dead) {
 		hrtimer_cancel(&tsk->signal->real_timer);
 		exit_itimers(tsk->signal);
 		if (tsk->mm)
 			setmax_mm_hiwater_rss(&tsk->signal->maxrss, tsk->mm);
 	}
 	acct_collect(code, group_dead);
 	if (group_dead)
 		tty_audit_exit();
 	if (unlikely(tsk->audit_context))
 		audit_free(tsk);
 	tsk->exit_code = code;
 	taskstats_exit(tsk, group_dead);
 	exit_mm(tsk);
 	if (group_dead)
 		acct_process();
 	trace_sched_process_exit(tsk);
 	exit_sem(tsk);
 	exit_files(tsk);
 	exit_fs(tsk);
 	check_stack_usage();
 	exit_thread();
 	/*
 	 * Flush inherited counters to the parent - before the parent
 	 * gets woken up by child-exit notifications.
 	 *
 	 * because of cgroup mode, must be called before cgroup_exit()
 	 */
 	perf_event_exit_task(tsk);
 	cgroup_exit(tsk, 1);
 	if (group_dead)
 		disassociate_ctty(1);
 	module_put(task_thread_info(tsk)->exec_domain->module);
 	proc_exit_connector(tsk);
 	/*
 	 * FIXME: do that only when needed, using sched_exit tracepoint
 	 */
 	ptrace_put_breakpoints(tsk);
 	exit_notify(tsk, group_dead);
 #ifdef CONFIG_NUMA
 	task_lock(tsk);
 	mpol_put(tsk->mempolicy);
 	tsk->mempolicy = NULL;
 	task_unlock(tsk);
 #endif
 #ifdef CONFIG_FUTEX
 	if (unlikely(current->pi_state_cache))
 		kfree(current->pi_state_cache);
 #endif
 	/*
 	 * Make sure we are holding no locks:
 	 */
 	debug_check_no_locks_held(tsk);
 	/*
 	 * We can do this unlocked here. The futex code uses this flag
 	 * just to verify whether the pi state cleanup has been done
 	 * or not. In the worst case it loops once more.
 	 */
 	tsk->flags |= PF_EXITPIDONE;
 	if (tsk->io_context)
 		exit_io_context(tsk);
 	if (tsk->splice_pipe)
 		__free_pipe_info(tsk->splice_pipe);
 	validate_creds_for_do_exit(tsk);
 	preempt_disable();
 	exit_rcu();
 	/* causes final put_task_struct in finish_task_switch(). */
 	tsk->state = TASK_DEAD;
 	schedule();
 	BUG();
 	/* Avoid "noreturn function does return".  */
 	for (;;)
 		cpu_relax();	/* For when BUG is null */
 }
 EXPORT_SYMBOL_GPL(do_exit);
 NORET_TYPE void complete_and_exit(struct completion *comp, long code)
 {
 	if (comp)
 		complete(comp);
 	do_exit(code);
 }
 EXPORT_SYMBOL(complete_and_exit);
 SYSCALL_DEFINE1(exit, int, error_code)
 {
 	do_exit((error_code&0xff)<<8);
 }
 /*
  * Take down every thread in the group.  This is called by fatal signals
  * as well as by sys_exit_group (below).
  */
 NORET_TYPE void
 do_group_exit(int exit_code)
 {
 	struct signal_struct *sig = current->signal;
 	BUG_ON(exit_code & 0x80); /* core dumps don't get here */
 	if (signal_group_exit(sig))
 		exit_code = sig->group_exit_code;
 	else if (!thread_group_empty(current)) {
 		struct sighand_struct *const sighand = current->sighand;
 		spin_lock_irq(&sighand->siglock);
 		if (signal_group_exit(sig))
 			/* Another thread got here before we took the lock.  */
 			exit_code = sig->group_exit_code;
 		else {
 			sig->group_exit_code = exit_code;
 			sig->flags = SIGNAL_GROUP_EXIT;
 			zap_other_threads(current);
 		}
 		spin_unlock_irq(&sighand->siglock);
 	}
 	do_exit(exit_code);
 	/* NOTREACHED */
 }
 /*
  * this kills every thread in the thread group. Note that any externally
  * wait4()-ing process will get the correct exit code - even if this
  * thread is not the thread group leader.
  */
 SYSCALL_DEFINE1(exit_group, int, error_code)
 {
 	do_group_exit((error_code & 0xff) << 8);
 	/* NOTREACHED */
 	return 0;
 }
 struct wait_opts {
 	enum pid_type		wo_type;
 	int			wo_flags;
 	struct pid		*wo_pid;
 	struct siginfo __user	*wo_info;
 	int __user		*wo_stat;
 	struct rusage __user	*wo_rusage;
 	wait_queue_t		child_wait;
 	int			notask_error;
 };
 static inline
 struct pid *task_pid_type(struct task_struct *task, enum pid_type type)
 {
 	if (type != PIDTYPE_PID)
 		task = task->group_leader;
 	return task->pids[type].pid;
 }
 static int eligible_pid(struct wait_opts *wo, struct task_struct *p)
 {
 	return	wo->wo_type == PIDTYPE_MAX ||
 		task_pid_type(p, wo->wo_type) == wo->wo_pid;
 }
 static int eligible_child(struct wait_opts *wo, struct task_struct *p)
 {
 	if (!eligible_pid(wo, p))
 		return 0;
 	/* Wait for all children (clone and not) if __WALL is set;
 	 * otherwise, wait for clone children *only* if __WCLONE is
 	 * set; otherwise, wait for non-clone children *only*.  (Note:
 	 * A "clone" child here is one that reports to its parent
 	 * using a signal other than SIGCHLD.) */
 	if (((p->exit_signal != SIGCHLD) ^ !!(wo->wo_flags & __WCLONE))
 	    && !(wo->wo_flags & __WALL))
 		return 0;
 	return 1;
 }
 static int wait_noreap_copyout(struct wait_opts *wo, struct task_struct *p,
 				pid_t pid, uid_t uid, int why, int status)
 {
 	struct siginfo __user *infop;
 	int retval = wo->wo_rusage
 		? getrusage(p, RUSAGE_BOTH, wo->wo_rusage) : 0;
 	put_task_struct(p);
 	infop = wo->wo_info;
 	if (infop) {
 		if (!retval)
 			retval = put_user(SIGCHLD, &infop->si_signo);
 		if (!retval)
 			retval = put_user(0, &infop->si_errno);
 		if (!retval)
 			retval = put_user((short)why, &infop->si_code);
 		if (!retval)
 			retval = put_user(pid, &infop->si_pid);
 		if (!retval)
 			retval = put_user(uid, &infop->si_uid);
 		if (!retval)
 			retval = put_user(status, &infop->si_status);
 	}
 	if (!retval)
 		retval = pid;
 	return retval;
 }
 /*
  * Handle sys_wait4 work for one task in state EXIT_ZOMBIE.  We hold
  * read_lock(&tasklist_lock) on entry.  If we return zero, we still hold
  * the lock and this task is uninteresting.  If we return nonzero, we have
  * released the lock and the system call should return.
  */
 static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
 {
 	unsigned long state;
 	int retval, status, traced;
 	pid_t pid = task_pid_vnr(p);
 	uid_t uid = __task_cred(p)->uid;
 	struct siginfo __user *infop;
 	if (!likely(wo->wo_flags & WEXITED))
 		return 0;
 	if (unlikely(wo->wo_flags & WNOWAIT)) {
 		int exit_code = p->exit_code;
 		int why;
 		get_task_struct(p);
 		read_unlock(&tasklist_lock);
 		if ((exit_code & 0x7f) == 0) {
 			why = CLD_EXITED;
 			status = exit_code >> 8;
 		} else {
 			why = (exit_code & 0x80) ? CLD_DUMPED : CLD_KILLED;
 			status = exit_code & 0x7f;
 		}
 		return wait_noreap_copyout(wo, p, pid, uid, why, status);
 	}
 	/*
 	 * Try to move the task's state to DEAD
 	 * only one thread is allowed to do this:
 	 */
 	state = xchg(&p->exit_state, EXIT_DEAD);
 	if (state != EXIT_ZOMBIE) {
 		BUG_ON(state != EXIT_DEAD);
 		return 0;
 	}
 	traced = ptrace_reparented(p);
 	/*
 	 * It can be ptraced but not reparented, check
 	 * thread_group_leader() to filter out sub-threads.
 	 */
 	if (likely(!traced) && thread_group_leader(p)) {
 		struct signal_struct *psig;
 		struct signal_struct *sig;
 		unsigned long maxrss;
 		cputime_t tgutime, tgstime;
 		/*
 		 * The resource counters for the group leader are in its
 		 * own task_struct.  Those for dead threads in the group
 		 * are in its signal_struct, as are those for the child
 		 * processes it has previously reaped.  All these
 		 * accumulate in the parent's signal_struct c* fields.
 		 *
 		 * We don't bother to take a lock here to protect these
 		 * p->signal fields, because they are only touched by
 		 * __exit_signal, which runs with tasklist_lock
 		 * write-locked anyway, and so is excluded here.  We do
 		 * need to protect the access to parent->signal fields,
 		 * as other threads in the parent group can be right
 		 * here reaping other children at the same time.
 		 *
 		 * We use thread_group_times() to get times for the thread
 		 * group, which consolidates times for all threads in the
 		 * group including the group leader.
 		 */
 		thread_group_times(p, &tgutime, &tgstime);
 		spin_lock_irq(&p->real_parent->sighand->siglock);
 		psig = p->real_parent->signal;
 		sig = p->signal;
 		psig->cutime =
 			cputime_add(psig->cutime,
 			cputime_add(tgutime,
 				    sig->cutime));
 		psig->cstime =
 			cputime_add(psig->cstime,
 			cputime_add(tgstime,
 				    sig->cstime));
 		psig->cgtime =
 			cputime_add(psig->cgtime,
 			cputime_add(p->gtime,
 			cputime_add(sig->gtime,
 				    sig->cgtime)));
 		psig->cmin_flt +=
 			p->min_flt + sig->min_flt + sig->cmin_flt;
 		psig->cmaj_flt +=
 			p->maj_flt + sig->maj_flt + sig->cmaj_flt;
 		psig->cnvcsw +=
 			p->nvcsw + sig->nvcsw + sig->cnvcsw;
 		psig->cnivcsw +=
 			p->nivcsw + sig->nivcsw + sig->cnivcsw;
 		psig->cinblock +=
 			task_io_get_inblock(p) +
 			sig->inblock + sig->cinblock;
 		psig->coublock +=
 			task_io_get_oublock(p) +
 			sig->oublock + sig->coublock;
 		maxrss = max(sig->maxrss, sig->cmaxrss);
 		if (psig->cmaxrss < maxrss)
 			psig->cmaxrss = maxrss;
 		task_io_accounting_add(&psig->ioac, &p->ioac);
 		task_io_accounting_add(&psig->ioac, &sig->ioac);
 		spin_unlock_irq(&p->real_parent->sighand->siglock);
 	}
 	/*
 	 * Now we are sure this task is interesting, and no other
 	 * thread can reap it because we set its state to EXIT_DEAD.
 	 */
 	read_unlock(&tasklist_lock);
 	retval = wo->wo_rusage
 		? getrusage(p, RUSAGE_BOTH, wo->wo_rusage) : 0;
 	status = (p->signal->flags & SIGNAL_GROUP_EXIT)
 		? p->signal->group_exit_code : p->exit_code;
 	if (!retval && wo->wo_stat)
 		retval = put_user(status, wo->wo_stat);
 	infop = wo->wo_info;
 	if (!retval && infop)
 		retval = put_user(SIGCHLD, &infop->si_signo);
 	if (!retval && infop)
 		retval = put_user(0, &infop->si_errno);
 	if (!retval && infop) {
 		int why;
 		if ((status & 0x7f) == 0) {
 			why = CLD_EXITED;
 			status >>= 8;
 		} else {
 			why = (status & 0x80) ? CLD_DUMPED : CLD_KILLED;
 			status &= 0x7f;
 		}
 		retval = put_user((short)why, &infop->si_code);
 		if (!retval)
 			retval = put_user(status, &infop->si_status);
 	}
 	if (!retval && infop)
 		retval = put_user(pid, &infop->si_pid);
 	if (!retval && infop)
 		retval = put_user(uid, &infop->si_uid);
 	if (!retval)
 		retval = pid;
 	if (traced) {
 		write_lock_irq(&tasklist_lock);
 		/* We dropped tasklist, ptracer could die and untrace */
 		ptrace_unlink(p);
 		/*
 		 * If this is not a sub-thread, notify the parent.
 		 * If parent wants a zombie, don't release it now.
 		 */
 		if (thread_group_leader(p) &&
 		    !do_notify_parent(p, p->exit_signal)) {
 			p->exit_state = EXIT_ZOMBIE;
 			p = NULL;
 		}
 		write_unlock_irq(&tasklist_lock);
 	}
 	if (p != NULL)
 		release_task(p);
 	return retval;
 }
 static int *task_stopped_code(struct task_struct *p, bool ptrace)
 {
 	if (ptrace) {
 		if (task_is_stopped_or_traced(p) &&
 		    !(p->jobctl & JOBCTL_LISTENING))
 			return &p->exit_code;
 	} else {
 		if (p->signal->flags & SIGNAL_STOP_STOPPED)
 			return &p->signal->group_exit_code;
 	}
 	return NULL;
 }
 /**
  * wait_task_stopped - Wait for %TASK_STOPPED or %TASK_TRACED
  * @wo: wait options
  * @ptrace: is the wait for ptrace
  * @p: task to wait for
  *
  * Handle sys_wait4() work for %p in state %TASK_STOPPED or %TASK_TRACED.
  *
  * CONTEXT:
  * read_lock(&tasklist_lock), which is released if return value is
  * non-zero.  Also, grabs and releases @p->sighand->siglock.
  *
  * RETURNS:
  * 0 if wait condition didn't exist and search for other wait conditions
  * should continue.  Non-zero return, -errno on failure and @p's pid on
  * success, implies that tasklist_lock is released and wait condition
  * search should terminate.
  */
 static int wait_task_stopped(struct wait_opts *wo,
 				int ptrace, struct task_struct *p)
 {
 	struct siginfo __user *infop;
 	int retval, exit_code, *p_code, why;
 	uid_t uid = 0; /* unneeded, required by compiler */
 	pid_t pid;
 	/*
 	 * Traditionally we see ptrace'd stopped tasks regardless of options.
 	 */
 	if (!ptrace && !(wo->wo_flags & WUNTRACED))
 		return 0;
 	if (!task_stopped_code(p, ptrace))
 		return 0;
 	exit_code = 0;
 	spin_lock_irq(&p->sighand->siglock);
 	p_code = task_stopped_code(p, ptrace);
 	if (unlikely(!p_code))
 		goto unlock_sig;
 	exit_code = *p_code;
 	if (!exit_code)
 		goto unlock_sig;
 	if (!unlikely(wo->wo_flags & WNOWAIT))
 		*p_code = 0;
 	uid = task_uid(p);
 unlock_sig:
 	spin_unlock_irq(&p->sighand->siglock);
 	if (!exit_code)
 		return 0;
 	/*
 	 * Now we are pretty sure this task is interesting.
 	 * Make sure it doesn't get reaped out from under us while we
 	 * give up the lock and then examine it below.  We don't want to
 	 * keep holding onto the tasklist_lock while we call getrusage and
 	 * possibly take page faults for user memory.
 	 */
 	get_task_struct(p);
 	pid = task_pid_vnr(p);
 	why = ptrace ? CLD_TRAPPED : CLD_STOPPED;
 	read_unlock(&tasklist_lock);
 	if (unlikely(wo->wo_flags & WNOWAIT))
 		return wait_noreap_copyout(wo, p, pid, uid, why, exit_code);
 	retval = wo->wo_rusage
 		? getrusage(p, RUSAGE_BOTH, wo->wo_rusage) : 0;
 	if (!retval && wo->wo_stat)
 		retval = put_user((exit_code << 8) | 0x7f, wo->wo_stat);
 	infop = wo->wo_info;
 	if (!retval && infop)
 		retval = put_user(SIGCHLD, &infop->si_signo);
 	if (!retval && infop)
 		retval = put_user(0, &infop->si_errno);
 	if (!retval && infop)
 		retval = put_user((short)why, &infop->si_code);
 	if (!retval && infop)
 		retval = put_user(exit_code, &infop->si_status);
 	if (!retval && infop)
 		retval = put_user(pid, &infop->si_pid);
 	if (!retval && infop)
 		retval = put_user(uid, &infop->si_uid);
 	if (!retval)
 		retval = pid;
 	put_task_struct(p);
 	BUG_ON(!retval);
 	return retval;
 }
 /*
  * Handle do_wait work for one task in a live, non-stopped state.
  * read_lock(&tasklist_lock) on entry.  If we return zero, we still hold
  * the lock and this task is uninteresting.  If we return nonzero, we have
  * released the lock and the system call should return.
  */
 static int wait_task_continued(struct wait_opts *wo, struct task_struct *p)
 {
 	int retval;
 	pid_t pid;
 	uid_t uid;
 	if (!unlikely(wo->wo_flags & WCONTINUED))
 		return 0;
 	if (!(p->signal->flags & SIGNAL_STOP_CONTINUED))
 		return 0;
 	spin_lock_irq(&p->sighand->siglock);
 	/* Re-check with the lock held.  */
 	if (!(p->signal->flags & SIGNAL_STOP_CONTINUED)) {
 		spin_unlock_irq(&p->sighand->siglock);
 		return 0;
 	}
 	if (!unlikely(wo->wo_flags & WNOWAIT))
 		p->signal->flags &= ~SIGNAL_STOP_CONTINUED;
 	uid = task_uid(p);
 	spin_unlock_irq(&p->sighand->siglock);
 	pid = task_pid_vnr(p);
 	get_task_struct(p);
 	read_unlock(&tasklist_lock);
 	if (!wo->wo_info) {
 		retval = wo->wo_rusage
 			? getrusage(p, RUSAGE_BOTH, wo->wo_rusage) : 0;
 		put_task_struct(p);
 		if (!retval && wo->wo_stat)
 			retval = put_user(0xffff, wo->wo_stat);
 		if (!retval)
 			retval = pid;
 	} else {
 		retval = wait_noreap_copyout(wo, p, pid, uid,
 					     CLD_CONTINUED, SIGCONT);
 		BUG_ON(retval == 0);
 	}
 	return retval;
 }
 /*
  * Consider @p for a wait by @parent.
  *
  * -ECHILD should be in ->notask_error before the first call.
  * Returns nonzero for a final return, when we have unlocked tasklist_lock.
  * Returns zero if the search for a child should continue;
  * then ->notask_error is 0 if @p is an eligible child,
  * or another error from security_task_wait(), or still -ECHILD.
  */
 static int wait_consider_task(struct wait_opts *wo, int ptrace,
 				struct task_struct *p)
 {
 	int ret = eligible_child(wo, p);
 	if (!ret)
 		return ret;
 	ret = security_task_wait(p);
 	if (unlikely(ret < 0)) {
 		/*
 		 * If we have not yet seen any eligible child,
 		 * then let this error code replace -ECHILD.
 		 * A permission error will give the user a clue
 		 * to look for security policy problems, rather
 		 * than for mysterious wait bugs.
 		 */
 		if (wo->notask_error)
 			wo->notask_error = ret;
 		return 0;
 	}
 	/* dead body doesn't have much to contribute */
 	if (p->exit_state == EXIT_DEAD)
 		return 0;
 	/* slay zombie? */
 	if (p->exit_state == EXIT_ZOMBIE) {
 		/*
 		 * A zombie ptracee is only visible to its ptracer.
 		 * Notification and reaping will be cascaded to the real
 		 * parent when the ptracer detaches.
 		 */
 		if (likely(!ptrace) && unlikely(p->ptrace)) {
 			/* it will become visible, clear notask_error */
 			wo->notask_error = 0;
 			return 0;
 		}
 		/* we don't reap group leaders with subthreads */
 		if (!delay_group_leader(p))
 			return wait_task_zombie(wo, p);
 		/*
 		 * Allow access to stopped/continued state via zombie by
 		 * falling through.  Clearing of notask_error is complex.
 		 *
 		 * When !@ptrace:
 		 *
 		 * If WEXITED is set, notask_error should naturally be
 		 * cleared.  If not, subset of WSTOPPED|WCONTINUED is set,
 		 * so, if there are live subthreads, there are events to
 		 * wait for.  If all subthreads are dead, it's still safe
 		 * to clear - this function will be called again in finite
 		 * amount time once all the subthreads are released and
 		 * will then return without clearing.
 		 *
 		 * When @ptrace:
 		 *
 		 * Stopped state is per-task and thus can't change once the
 		 * target task dies.  Only continued and exited can happen.
 		 * Clear notask_error if WCONTINUED | WEXITED.
 		 */
 		if (likely(!ptrace) || (wo->wo_flags & (WCONTINUED | WEXITED)))
 			wo->notask_error = 0;
 	} else {
 		/*
 		 * If @p is ptraced by a task in its real parent's group,
 		 * hide group stop/continued state when looking at @p as
 		 * the real parent; otherwise, a single stop can be
 		 * reported twice as group and ptrace stops.
 		 *
 		 * If a ptracer wants to distinguish the two events for its
 		 * own children, it should create a separate process which
 		 * takes the role of real parent.
 		 */
-		if (likely(!ptrace) && p->ptrace &&
+		if (likely(!ptrace) && p->ptrace && !ptrace_reparented(p))
-		    same_thread_group(p->parent, p->real_parent))
 			return 0;
 		/*
 		 * @p is alive and it's gonna stop, continue or exit, so
 		 * there always is something to wait for.
 		 */
 		wo->notask_error = 0;
 	}
 	/*
 	 * Wait for stopped.  Depending on @ptrace, different stopped state
 	 * is used and the two don't interact with each other.
 	 */
 	ret = wait_task_stopped(wo, ptrace, p);
 	if (ret)
 		return ret;
 	/*
 	 * Wait for continued.  There's only one continued state and the
 	 * ptracer can consume it which can confuse the real parent.  Don't
 	 * use WCONTINUED from ptracer.  You don't need or want it.
 	 */
 	return wait_task_continued(wo, p);
 }
 /*
  * Do the work of do_wait() for one thread in the group, @tsk.
  *
  * -ECHILD should be in ->notask_error before the first call.
  * Returns nonzero for a final return, when we have unlocked tasklist_lock.
  * Returns zero if the search for a child should continue; then
  * ->notask_error is 0 if there were any eligible children,
  * or another error from security_task_wait(), or still -ECHILD.
  */
 static int do_wait_thread(struct wait_opts *wo, struct task_struct *tsk)
 {
 	struct task_struct *p;
 	list_for_each_entry(p, &tsk->children, sibling) {
 		int ret = wait_consider_task(wo, 0, p);
 		if (ret)
 			return ret;
 	}
 	return 0;
 }
 static int ptrace_do_wait(struct wait_opts *wo, struct task_struct *tsk)
 {
 	struct task_struct *p;
 	list_for_each_entry(p, &tsk->ptraced, ptrace_entry) {
 		int ret = wait_consider_task(wo, 1, p);
 		if (ret)
 			return ret;
 	}
 	return 0;
 }
 static int child_wait_callback(wait_queue_t *wait, unsigned mode,
 				int sync, void *key)
 {
 	struct wait_opts *wo = container_of(wait, struct wait_opts,
 						child_wait);
 	struct task_struct *p = key;
 	if (!eligible_pid(wo, p))
 		return 0;
 	if ((wo->wo_flags & __WNOTHREAD) && wait->private != p->parent)
 		return 0;
 	return default_wake_function(wait, mode, sync, key);
 }
 void __wake_up_parent(struct task_struct *p, struct task_struct *parent)
 {
 	__wake_up_sync_key(&parent->signal->wait_chldexit,
 				TASK_INTERRUPTIBLE, 1, p);
 }
 static long do_wait(struct wait_opts *wo)
 {
 	struct task_struct *tsk;
 	int retval;
 	trace_sched_process_wait(wo->wo_pid);
 	init_waitqueue_func_entry(&wo->child_wait, child_wait_callback);
 	wo->child_wait.private = current;
 	add_wait_queue(&current->signal->wait_chldexit, &wo->child_wait);
 repeat:
 	/*
 	 * If there is nothing that can match our critiera just get out.
 	 * We will clear ->notask_error to zero if we see any child that
 	 * might later match our criteria, even if we are not able to reap
 	 * it yet.
 	 */
 	wo->notask_error = -ECHILD;
 	if ((wo->wo_type < PIDTYPE_MAX) &&
 	   (!wo->wo_pid || hlist_empty(&wo->wo_pid->tasks[wo->wo_type])))
 		goto notask;
 	set_current_state(TASK_INTERRUPTIBLE);
 	read_lock(&tasklist_lock);
 	tsk = current;
 	do {
 		retval = do_wait_thread(wo, tsk);
 		if (retval)
 			goto end;
 		retval = ptrace_do_wait(wo, tsk);
 		if (retval)
 			goto end;
 		if (wo->wo_flags & __WNOTHREAD)
 			break;
 	} while_each_thread(current, tsk);
 	read_unlock(&tasklist_lock);
 notask:
 	retval = wo->notask_error;
 	if (!retval && !(wo->wo_flags & WNOHANG)) {
 		retval = -ERESTARTSYS;
 		if (!signal_pending(current)) {
 			schedule();
 			goto repeat;
 		}
 	}
 end:
 	__set_current_state(TASK_RUNNING);
 	remove_wait_queue(&current->signal->wait_chldexit, &wo->child_wait);
 	return retval;
 }
 SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *,
 		infop, int, options, struct rusage __user *, ru)
 {
 	struct wait_opts wo;
 	struct pid *pid = NULL;
 	enum pid_type type;
 	long ret;
 	if (options & ~(WNOHANG|WNOWAIT|WEXITED|WSTOPPED|WCONTINUED))
 		return -EINVAL;
 	if (!(options & (WEXITED|WSTOPPED|WCONTINUED)))
 		return -EINVAL;
 	switch (which) {
 	case P_ALL:
 		type = PIDTYPE_MAX;
 		break;
 	case P_PID:
 		type = PIDTYPE_PID;
 		if (upid <= 0)
 			return -EINVAL;
 		break;
 	case P_PGID:
 		type = PIDTYPE_PGID;
 		if (upid <= 0)
 			return -EINVAL;
 		break;
 	default:
 		return -EINVAL;
 	}
 	if (type < PIDTYPE_MAX)
 		pid = find_get_pid(upid);
 	wo.wo_type	= type;
 	wo.wo_pid	= pid;
 	wo.wo_flags	= options;
 	wo.wo_info	= infop;
 	wo.wo_stat	= NULL;
 	wo.wo_rusage	= ru;
 	ret = do_wait(&wo);
 	if (ret > 0) {
 		ret = 0;
 	} else if (infop) {
 		/*
 		 * For a WNOHANG return, clear out all the fields
 		 * we would set so the user can easily tell the
 		 * difference.
 		 */
 		if (!ret)
 			ret = put_user(0, &infop->si_signo);
 		if (!ret)
 			ret = put_user(0, &infop->si_errno);
 		if (!ret)
 			ret = put_user(0, &infop->si_code);
 		if (!ret)
 			ret = put_user(0, &infop->si_pid);
 		if (!ret)
 			ret = put_user(0, &infop->si_uid);
 		if (!ret)
 			ret = put_user(0, &infop->si_status);
 	}
 	put_pid(pid);
 	/* avoid REGPARM breakage on x86: */
 	asmlinkage_protect(5, ret, which, upid, infop, options, ru);
 	return ret;
 }
 SYSCALL_DEFINE4(wait4, pid_t, upid, int __user *, stat_addr,
 		int, options, struct rusage __user *, ru)
 {
 	struct wait_opts wo;
 	struct pid *pid = NULL;
 	enum pid_type type;
 	long ret;
 	if (options & ~(WNOHANG|WUNTRACED|WCONTINUED|
 			__WNOTHREAD|__WCLONE|__WALL))
 		return -EINVAL;
 	if (upid == -1)
 		type = PIDTYPE_MAX;
 	else if (upid < 0) {
 		type = PIDTYPE_PGID;
 		pid = find_get_pid(-upid);
 	} else if (upid == 0) {
 		type = PIDTYPE_PGID;
 		pid = get_task_pid(current, PIDTYPE_PGID);
 	} else /* upid > 0 */ {
 		type = PIDTYPE_PID;
 		pid = find_get_pid(upid);
 	}
 	wo.wo_type	= type;
 	wo.wo_pid	= pid;
 	wo.wo_flags	= options | WEXITED;
 	wo.wo_info	= NULL;
 	wo.wo_stat	= stat_addr;
 	wo.wo_rusage	= ru;
 	ret = do_wait(&wo);
 	put_pid(pid);
 	/* avoid REGPARM breakage on x86: */
 	asmlinkage_protect(4, ret, upid, stat_addr, options, ru);
 	return ret;
 }
 #ifdef __ARCH_WANT_SYS_WAITPID
 /*
  * sys_waitpid() remains for compatibility. waitpid() should be
  * implemented by calling sys_wait4() from libc.a.
  */
 SYSCALL_DEFINE3(waitpid, pid_t, pid, int __user *, stat_addr, int, options)
 {
 	return sys_wait4(pid, stat_addr, options, NULL);
 }
 #endif