Commit abd50b39e783e1b6c75c7534c37f1eb2d94a89cd
Committed by
Linus Torvalds
1 parent
dfccbb5e49
Exists in
master
and in
13 other branches
wait: introduce EXIT_TRACE to avoid the racy EXIT_DEAD->EXIT_ZOMBIE transition
wait_task_zombie() first does EXIT_ZOMBIE->EXIT_DEAD transition and drops tasklist_lock. If this task is not the natural child and it is traced, we change its state back to EXIT_ZOMBIE for ->real_parent. The last transition is racy, this is even documented in 50b8d257486a "ptrace: partially fix the do_wait(WEXITED) vs EXIT_DEAD->EXIT_ZOMBIE race". wait_consider_task() tries to detect this transition and clear ->notask_error but we can't rely on ptrace_reparented(), debugger can exit and do ptrace_unlink() before its sub-thread sets EXIT_ZOMBIE. And there is another problem which were missed before: this transition can also race with reparent_leader() which doesn't reset >exit_signal if EXIT_DEAD, assuming that this task must be reaped by someone else. So the tracee can be re-parented with ->exit_signal != SIGCHLD, and if /sbin/init doesn't use __WALL it becomes unreapable. This was fixed by the previous commit, but it was the temporary hack. 1. Add the new exit_state, EXIT_TRACE. It means that the task is the traced zombie, debugger is going to detach and notify its natural parent. This new state is actually EXIT_ZOMBIE | EXIT_DEAD. This way we can avoid the changes in proc/kgdb code, get_task_state() still reports "X (dead)" in this case. Note: with or without this change userspace can see Z -> X -> Z transition. Not really bad, but probably makes sense to fix. 2. Change wait_task_zombie() to use EXIT_TRACE instead of EXIT_DEAD if we need to notify the ->real_parent. 3. Revert the previous hack in reparent_leader(), now that EXIT_DEAD is always the final state we can safely ignore such a task. 4. Change wait_consider_task() to check EXIT_TRACE separately and kill the racy and no longer needed ptrace_reparented() case. If ptrace == T an EXIT_TRACE thread should be simply ignored, the owner of this state is going to ptrace_unlink() this task. We can pretend that it was already removed from ->ptraced list. Otherwise we should skip this thread too but clear ->notask_error, we must be the natural parent and debugger is going to untrace and notify us. IOW, this doesn't differ from "EXIT_ZOMBIE && p->ptrace" even if the task was already untraced. Signed-off-by: Oleg Nesterov <oleg@redhat.com> Reported-by: Jan Kratochvil <jan.kratochvil@redhat.com> Reported-by: Michal Schmidt <mschmidt@redhat.com> Tested-by: Michal Schmidt <mschmidt@redhat.com> Cc: Al Viro <viro@ZenIV.linux.org.uk> Cc: Lennart Poettering <lpoetter@redhat.com> Cc: Roland McGrath <roland@hack.frob.com> Cc: Tejun Heo <tj@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Showing 2 changed files with 22 additions and 29 deletions Side-by-side Diff
include/linux/sched.h
kernel/exit.c
... | ... | @@ -560,6 +560,9 @@ |
560 | 560 | struct list_head *dead) |
561 | 561 | { |
562 | 562 | list_move_tail(&p->sibling, &p->real_parent->children); |
563 | + | |
564 | + if (p->exit_state == EXIT_DEAD) | |
565 | + return; | |
563 | 566 | /* |
564 | 567 | * If this is a threaded reparent there is no need to |
565 | 568 | * notify anyone anything has happened. |
566 | 569 | |
... | ... | @@ -567,19 +570,9 @@ |
567 | 570 | if (same_thread_group(p->real_parent, father)) |
568 | 571 | return; |
569 | 572 | |
570 | - /* | |
571 | - * We don't want people slaying init. | |
572 | - * | |
573 | - * Note: we do this even if it is EXIT_DEAD, wait_task_zombie() | |
574 | - * can change ->exit_state to EXIT_ZOMBIE. If this is the final | |
575 | - * state, do_notify_parent() was already called and ->exit_signal | |
576 | - * doesn't matter. | |
577 | - */ | |
573 | + /* We don't want people slaying init. */ | |
578 | 574 | p->exit_signal = SIGCHLD; |
579 | 575 | |
580 | - if (p->exit_state == EXIT_DEAD) | |
581 | - return; | |
582 | - | |
583 | 576 | /* If it has exited notify the new parent about this child's death. */ |
584 | 577 | if (!p->ptrace && |
585 | 578 | p->exit_state == EXIT_ZOMBIE && thread_group_empty(p)) { |
586 | 579 | |
587 | 580 | |
588 | 581 | |
... | ... | @@ -1043,17 +1036,13 @@ |
1043 | 1036 | return wait_noreap_copyout(wo, p, pid, uid, why, status); |
1044 | 1037 | } |
1045 | 1038 | |
1039 | + traced = ptrace_reparented(p); | |
1046 | 1040 | /* |
1047 | - * Try to move the task's state to DEAD | |
1048 | - * only one thread is allowed to do this: | |
1041 | + * Move the task's state to DEAD/TRACE, only one thread can do this. | |
1049 | 1042 | */ |
1050 | - state = xchg(&p->exit_state, EXIT_DEAD); | |
1051 | - if (state != EXIT_ZOMBIE) { | |
1052 | - BUG_ON(state != EXIT_DEAD); | |
1043 | + state = traced ? EXIT_TRACE : EXIT_DEAD; | |
1044 | + if (cmpxchg(&p->exit_state, EXIT_ZOMBIE, state) != EXIT_ZOMBIE) | |
1053 | 1045 | return 0; |
1054 | - } | |
1055 | - | |
1056 | - traced = ptrace_reparented(p); | |
1057 | 1046 | /* |
1058 | 1047 | * It can be ptraced but not reparented, check |
1059 | 1048 | * thread_group_leader() to filter out sub-threads. |
... | ... | @@ -1114,7 +1103,7 @@ |
1114 | 1103 | |
1115 | 1104 | /* |
1116 | 1105 | * Now we are sure this task is interesting, and no other |
1117 | - * thread can reap it because we set its state to EXIT_DEAD. | |
1106 | + * thread can reap it because we its state == DEAD/TRACE. | |
1118 | 1107 | */ |
1119 | 1108 | read_unlock(&tasklist_lock); |
1120 | 1109 | |
1121 | 1110 | |
1122 | 1111 | |
... | ... | @@ -1159,14 +1148,14 @@ |
1159 | 1148 | * If this is not a sub-thread, notify the parent. |
1160 | 1149 | * If parent wants a zombie, don't release it now. |
1161 | 1150 | */ |
1151 | + state = EXIT_DEAD; | |
1162 | 1152 | if (thread_group_leader(p) && |
1163 | - !do_notify_parent(p, p->exit_signal)) { | |
1164 | - p->exit_state = EXIT_ZOMBIE; | |
1165 | - p = NULL; | |
1166 | - } | |
1153 | + !do_notify_parent(p, p->exit_signal)) | |
1154 | + state = EXIT_ZOMBIE; | |
1155 | + p->exit_state = state; | |
1167 | 1156 | write_unlock_irq(&tasklist_lock); |
1168 | 1157 | } |
1169 | - if (p != NULL) | |
1158 | + if (state == EXIT_DEAD) | |
1170 | 1159 | release_task(p); |
1171 | 1160 | |
1172 | 1161 | return retval; |
1173 | 1162 | |
1174 | 1163 | |
... | ... | @@ -1362,12 +1351,15 @@ |
1362 | 1351 | } |
1363 | 1352 | |
1364 | 1353 | /* dead body doesn't have much to contribute */ |
1365 | - if (unlikely(p->exit_state == EXIT_DEAD)) { | |
1354 | + if (unlikely(p->exit_state == EXIT_DEAD)) | |
1355 | + return 0; | |
1356 | + | |
1357 | + if (unlikely(p->exit_state == EXIT_TRACE)) { | |
1366 | 1358 | /* |
1367 | - * But do not ignore this task until the tracer does | |
1368 | - * wait_task_zombie()->do_notify_parent(). | |
1359 | + * ptrace == 0 means we are the natural parent. In this case | |
1360 | + * we should clear notask_error, debugger will notify us. | |
1369 | 1361 | */ |
1370 | - if (likely(!ptrace) && unlikely(ptrace_reparented(p))) | |
1362 | + if (likely(!ptrace)) | |
1371 | 1363 | wo->notask_error = 0; |
1372 | 1364 | return 0; |
1373 | 1365 | } |