Commit 6db840fa7887980ef68a649640d506fe069eef0c

Authored by Oleg Nesterov
Committed by Linus Torvalds
1 parent 356d6d5058

exec: RT sub-thread can livelock and monopolize CPU on exec

de_thread() yields waiting for ->group_leader to be a zombie. This deadlocks
if an rt-prio execer shares the same cpu with ->group_leader. Change the code
to use ->group_exit_task/notify_count mechanics.

This patch certainly uglifies the code, perhaps someone can suggest something
better.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Cc: Roland McGrath <roland@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

Showing 2 changed files with 22 additions and 16 deletions Side-by-side Diff

... ... @@ -801,16 +801,15 @@
801 801 hrtimer_restart(&sig->real_timer);
802 802 spin_lock_irq(lock);
803 803 }
  804 +
  805 + sig->notify_count = count;
  806 + sig->group_exit_task = tsk;
804 807 while (atomic_read(&sig->count) > count) {
805   - sig->group_exit_task = tsk;
806   - sig->notify_count = count;
807 808 __set_current_state(TASK_UNINTERRUPTIBLE);
808 809 spin_unlock_irq(lock);
809 810 schedule();
810 811 spin_lock_irq(lock);
811 812 }
812   - sig->group_exit_task = NULL;
813   - sig->notify_count = 0;
814 813 spin_unlock_irq(lock);
815 814  
816 815 /*
817 816  
818 817  
... ... @@ -819,15 +818,18 @@
819 818 * and to assume its PID:
820 819 */
821 820 if (!thread_group_leader(tsk)) {
822   - /*
823   - * Wait for the thread group leader to be a zombie.
824   - * It should already be zombie at this point, most
825   - * of the time.
826   - */
827 821 leader = tsk->group_leader;
828   - while (leader->exit_state != EXIT_ZOMBIE)
829   - yield();
830 822  
  823 + sig->notify_count = -1;
  824 + for (;;) {
  825 + write_lock_irq(&tasklist_lock);
  826 + if (likely(leader->exit_state))
  827 + break;
  828 + __set_current_state(TASK_UNINTERRUPTIBLE);
  829 + write_unlock_irq(&tasklist_lock);
  830 + schedule();
  831 + }
  832 +
831 833 /*
832 834 * The only record we have of the real-time age of a
833 835 * process, regardless of execs it's done, is start_time.
... ... @@ -840,8 +842,6 @@
840 842 */
841 843 tsk->start_time = leader->start_time;
842 844  
843   - write_lock_irq(&tasklist_lock);
844   -
845 845 BUG_ON(leader->tgid != tsk->tgid);
846 846 BUG_ON(tsk->pid == tsk->tgid);
847 847 /*
... ... @@ -874,6 +874,8 @@
874 874 write_unlock_irq(&tasklist_lock);
875 875 }
876 876  
  877 + sig->group_exit_task = NULL;
  878 + sig->notify_count = 0;
877 879 /*
878 880 * There may be one thread left which is just exiting,
879 881 * but it's safe to stop telling the group to kill themselves.
... ... @@ -92,10 +92,9 @@
92 92 * If there is any task waiting for the group exit
93 93 * then notify it:
94 94 */
95   - if (sig->group_exit_task && atomic_read(&sig->count) == sig->notify_count) {
  95 + if (sig->group_exit_task && atomic_read(&sig->count) == sig->notify_count)
96 96 wake_up_process(sig->group_exit_task);
97   - sig->group_exit_task = NULL;
98   - }
  97 +
99 98 if (tsk == sig->curr_target)
100 99 sig->curr_target = next_thread(tsk);
101 100 /*
... ... @@ -826,6 +825,11 @@
826 825 if (tsk->exit_signal == -1 && likely(!tsk->ptrace))
827 826 state = EXIT_DEAD;
828 827 tsk->exit_state = state;
  828 +
  829 + if (thread_group_leader(tsk) &&
  830 + tsk->signal->notify_count < 0 &&
  831 + tsk->signal->group_exit_task)
  832 + wake_up_process(tsk->signal->group_exit_task);
829 833  
830 834 write_unlock_irq(&tasklist_lock);
831 835