Commit 6db840fa7887980ef68a649640d506fe069eef0c
Committed by
Linus Torvalds
1 parent
356d6d5058
Exists in
master
and in
39 other branches
exec: RT sub-thread can livelock and monopolize CPU on exec
de_thread() yields waiting for ->group_leader to be a zombie. This deadlocks if an rt-prio execer shares the same cpu with ->group_leader. Change the code to use ->group_exit_task/notify_count mechanics. This patch certainly uglifies the code, perhaps someone can suggest something better. Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru> Cc: Roland McGrath <roland@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Showing 2 changed files with 22 additions and 16 deletions Side-by-side Diff
fs/exec.c
... | ... | @@ -801,16 +801,15 @@ |
801 | 801 | hrtimer_restart(&sig->real_timer); |
802 | 802 | spin_lock_irq(lock); |
803 | 803 | } |
804 | + | |
805 | + sig->notify_count = count; | |
806 | + sig->group_exit_task = tsk; | |
804 | 807 | while (atomic_read(&sig->count) > count) { |
805 | - sig->group_exit_task = tsk; | |
806 | - sig->notify_count = count; | |
807 | 808 | __set_current_state(TASK_UNINTERRUPTIBLE); |
808 | 809 | spin_unlock_irq(lock); |
809 | 810 | schedule(); |
810 | 811 | spin_lock_irq(lock); |
811 | 812 | } |
812 | - sig->group_exit_task = NULL; | |
813 | - sig->notify_count = 0; | |
814 | 813 | spin_unlock_irq(lock); |
815 | 814 | |
816 | 815 | /* |
817 | 816 | |
818 | 817 | |
... | ... | @@ -819,15 +818,18 @@ |
819 | 818 | * and to assume its PID: |
820 | 819 | */ |
821 | 820 | if (!thread_group_leader(tsk)) { |
822 | - /* | |
823 | - * Wait for the thread group leader to be a zombie. | |
824 | - * It should already be zombie at this point, most | |
825 | - * of the time. | |
826 | - */ | |
827 | 821 | leader = tsk->group_leader; |
828 | - while (leader->exit_state != EXIT_ZOMBIE) | |
829 | - yield(); | |
830 | 822 | |
823 | + sig->notify_count = -1; | |
824 | + for (;;) { | |
825 | + write_lock_irq(&tasklist_lock); | |
826 | + if (likely(leader->exit_state)) | |
827 | + break; | |
828 | + __set_current_state(TASK_UNINTERRUPTIBLE); | |
829 | + write_unlock_irq(&tasklist_lock); | |
830 | + schedule(); | |
831 | + } | |
832 | + | |
831 | 833 | /* |
832 | 834 | * The only record we have of the real-time age of a |
833 | 835 | * process, regardless of execs it's done, is start_time. |
... | ... | @@ -840,8 +842,6 @@ |
840 | 842 | */ |
841 | 843 | tsk->start_time = leader->start_time; |
842 | 844 | |
843 | - write_lock_irq(&tasklist_lock); | |
844 | - | |
845 | 845 | BUG_ON(leader->tgid != tsk->tgid); |
846 | 846 | BUG_ON(tsk->pid == tsk->tgid); |
847 | 847 | /* |
... | ... | @@ -874,6 +874,8 @@ |
874 | 874 | write_unlock_irq(&tasklist_lock); |
875 | 875 | } |
876 | 876 | |
877 | + sig->group_exit_task = NULL; | |
878 | + sig->notify_count = 0; | |
877 | 879 | /* |
878 | 880 | * There may be one thread left which is just exiting, |
879 | 881 | * but it's safe to stop telling the group to kill themselves. |
kernel/exit.c
... | ... | @@ -92,10 +92,9 @@ |
92 | 92 | * If there is any task waiting for the group exit |
93 | 93 | * then notify it: |
94 | 94 | */ |
95 | - if (sig->group_exit_task && atomic_read(&sig->count) == sig->notify_count) { | |
95 | + if (sig->group_exit_task && atomic_read(&sig->count) == sig->notify_count) | |
96 | 96 | wake_up_process(sig->group_exit_task); |
97 | - sig->group_exit_task = NULL; | |
98 | - } | |
97 | + | |
99 | 98 | if (tsk == sig->curr_target) |
100 | 99 | sig->curr_target = next_thread(tsk); |
101 | 100 | /* |
... | ... | @@ -826,6 +825,11 @@ |
826 | 825 | if (tsk->exit_signal == -1 && likely(!tsk->ptrace)) |
827 | 826 | state = EXIT_DEAD; |
828 | 827 | tsk->exit_state = state; |
828 | + | |
829 | + if (thread_group_leader(tsk) && | |
830 | + tsk->signal->notify_count < 0 && | |
831 | + tsk->signal->group_exit_task) | |
832 | + wake_up_process(tsk->signal->group_exit_task); | |
829 | 833 | |
830 | 834 | write_unlock_irq(&tasklist_lock); |
831 | 835 |