Commit 9b84cca2564b9a5b2d064fb44d2a55a5b44473a0
1 parent
823b018e5b
Exists in
master
and in
7 other branches
job control: Fix ptracer wait(2) hang and explain notask_error clearing
wait(2) and friends allow access to stopped/continued states through zombies, which is required as the states are process-wide and should be accessible whether the leader task is alive or undead. wait_consider_task() implements this by always clearing notask_error and going through wait_task_stopped/continued() for unreaped zombies. However, while ptraced, the stopped state is per-task and as such if the ptracee became a zombie, there's no further stopped event to listen to and wait(2) and friends should return -ECHILD on the tracee. Fix it by clearing notask_error only if WCONTINUED | WEXITED is set for ptraced zombies. While at it, document why clearing notask_error is safe for each case. Test case follows. #include <stdio.h> #include <unistd.h> #include <pthread.h> #include <time.h> #include <sys/types.h> #include <sys/ptrace.h> #include <sys/wait.h> static void *nooper(void *arg) { pause(); return NULL; } int main(void) { const struct timespec ts1s = { .tv_sec = 1 }; pid_t tracee, tracer; siginfo_t si; tracee = fork(); if (tracee == 0) { pthread_t thr; pthread_create(&thr, NULL, nooper, NULL); nanosleep(&ts1s, NULL); printf("tracee exiting\n"); pthread_exit(NULL); /* let subthread run */ } tracer = fork(); if (tracer == 0) { ptrace(PTRACE_ATTACH, tracee, NULL, NULL); while (1) { if (waitid(P_PID, tracee, &si, WSTOPPED) < 0) { perror("waitid"); break; } ptrace(PTRACE_CONT, tracee, NULL, (void *)(long)si.si_status); } return 0; } waitid(P_PID, tracer, &si, WEXITED); kill(tracee, SIGKILL); return 0; } Before the patch, after the tracee becomes a zombie, the tracer's waitid(WSTOPPED) never returns and the program doesn't terminate. tracee exiting ^C After the patch, tracee exiting triggers waitid() to fail. tracee exiting waitid: No child processes -v2: Oleg pointed out that exited in addition to continued can happen for ptraced dead group leader. Clear notask_error for ptraced child on WEXITED too. Signed-off-by: Tejun Heo <tj@kernel.org> Acked-by: Oleg Nesterov <oleg@redhat.com>
Showing 1 changed file with 34 additions and 10 deletions Side-by-side Diff
kernel/exit.c
... | ... | @@ -1550,17 +1550,41 @@ |
1550 | 1550 | return 0; |
1551 | 1551 | } |
1552 | 1552 | |
1553 | - /* | |
1554 | - * We don't reap group leaders with subthreads. | |
1555 | - */ | |
1556 | - if (p->exit_state == EXIT_ZOMBIE && !delay_group_leader(p)) | |
1557 | - return wait_task_zombie(wo, p); | |
1553 | + /* slay zombie? */ | |
1554 | + if (p->exit_state == EXIT_ZOMBIE) { | |
1555 | + /* we don't reap group leaders with subthreads */ | |
1556 | + if (!delay_group_leader(p)) | |
1557 | + return wait_task_zombie(wo, p); | |
1558 | 1558 | |
1559 | - /* | |
1560 | - * It's stopped or running now, so it might | |
1561 | - * later continue, exit, or stop again. | |
1562 | - */ | |
1563 | - wo->notask_error = 0; | |
1559 | + /* | |
1560 | + * Allow access to stopped/continued state via zombie by | |
1561 | + * falling through. Clearing of notask_error is complex. | |
1562 | + * | |
1563 | + * When !@ptrace: | |
1564 | + * | |
1565 | + * If WEXITED is set, notask_error should naturally be | |
1566 | + * cleared. If not, subset of WSTOPPED|WCONTINUED is set, | |
1567 | + * so, if there are live subthreads, there are events to | |
1568 | + * wait for. If all subthreads are dead, it's still safe | |
1569 | + * to clear - this function will be called again in finite | |
1570 | + * amount time once all the subthreads are released and | |
1571 | + * will then return without clearing. | |
1572 | + * | |
1573 | + * When @ptrace: | |
1574 | + * | |
1575 | + * Stopped state is per-task and thus can't change once the | |
1576 | + * target task dies. Only continued and exited can happen. | |
1577 | + * Clear notask_error if WCONTINUED | WEXITED. | |
1578 | + */ | |
1579 | + if (likely(!ptrace) || (wo->wo_flags & (WCONTINUED | WEXITED))) | |
1580 | + wo->notask_error = 0; | |
1581 | + } else { | |
1582 | + /* | |
1583 | + * @p is alive and it's gonna stop, continue or exit, so | |
1584 | + * there always is something to wait for. | |
1585 | + */ | |
1586 | + wo->notask_error = 0; | |
1587 | + } | |
1564 | 1588 | |
1565 | 1589 | if (task_stopped_code(p, ptrace)) |
1566 | 1590 | return wait_task_stopped(wo, ptrace, p); |