Commit 4ab6c08336535f8c8e42cf45d7adeda882eff06e

Authored by Oleg Nesterov
Committed by Linus Torvalds
1 parent 03ef83af52

clone(): fix race between copy_process() and de_thread()

Spotted by Hiroshi Shimamoto who also provided the test-case below.

copy_process() uses signal->count as a reference counter, but it is not.
This test case

	#include <sys/types.h>
	#include <sys/wait.h>
	#include <unistd.h>
	#include <stdio.h>
	#include <errno.h>
	#include <pthread.h>

	void *null_thread(void *p)
	{
		for (;;)
			sleep(1);

		return NULL;
	}

	void *exec_thread(void *p)
	{
		execl("/bin/true", "/bin/true", NULL);

		return null_thread(p);
	}

	int main(int argc, char **argv)
	{
		for (;;) {
			pid_t pid;
			int ret, status;

			pid = fork();
			if (pid < 0)
				break;

			if (!pid) {
				pthread_t tid;

				pthread_create(&tid, NULL, exec_thread, NULL);
				for (;;)
					pthread_create(&tid, NULL, null_thread, NULL);
			}

			do {
				ret = waitpid(pid, &status, 0);
			} while (ret == -1 && errno == EINTR);
		}

		return 0;
	}

quickly creates an unkillable task.

If copy_process(CLONE_THREAD) races with de_thread()
copy_signal()->atomic(signal->count) breaks the signal->notify_count
logic, and the execing thread can hang forever in kernel space.

Change copy_process() to increment count/live only when we know for sure
we can't fail.  In this case the forked thread will take care of its
reference to signal correctly.

If copy_process() fails, check CLONE_THREAD flag.  If it it set - do
nothing, the counters were not changed and current belongs to the same
thread group.  If it is not set, ->signal must be released in any case
(and ->count must be == 1), the forked child is the only thread in the
thread group.

We need more cleanups here, in particular signal->count should not be used
by de_thread/__exit_signal at all.  This patch only fixes the bug.

Reported-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Tested-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Roland McGrath <roland@redhat.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

Showing 1 changed file with 5 additions and 15 deletions Side-by-side Diff

... ... @@ -815,11 +815,8 @@
815 815 {
816 816 struct signal_struct *sig;
817 817  
818   - if (clone_flags & CLONE_THREAD) {
819   - atomic_inc(&current->signal->count);
820   - atomic_inc(&current->signal->live);
  818 + if (clone_flags & CLONE_THREAD)
821 819 return 0;
822   - }
823 820  
824 821 sig = kmem_cache_alloc(signal_cachep, GFP_KERNEL);
825 822 tsk->signal = sig;
... ... @@ -877,16 +874,6 @@
877 874 kmem_cache_free(signal_cachep, sig);
878 875 }
879 876  
880   -static void cleanup_signal(struct task_struct *tsk)
881   -{
882   - struct signal_struct *sig = tsk->signal;
883   -
884   - atomic_dec(&sig->live);
885   -
886   - if (atomic_dec_and_test(&sig->count))
887   - __cleanup_signal(sig);
888   -}
889   -
890 877 static void copy_flags(unsigned long clone_flags, struct task_struct *p)
891 878 {
892 879 unsigned long new_flags = p->flags;
... ... @@ -1239,6 +1226,8 @@
1239 1226 }
1240 1227  
1241 1228 if (clone_flags & CLONE_THREAD) {
  1229 + atomic_inc(&current->signal->count);
  1230 + atomic_inc(&current->signal->live);
1242 1231 p->group_leader = current->group_leader;
1243 1232 list_add_tail_rcu(&p->thread_group, &p->group_leader->thread_group);
1244 1233 }
... ... @@ -1282,7 +1271,8 @@
1282 1271 if (p->mm)
1283 1272 mmput(p->mm);
1284 1273 bad_fork_cleanup_signal:
1285   - cleanup_signal(p);
  1274 + if (!(clone_flags & CLONE_THREAD))
  1275 + __cleanup_signal(p->signal);
1286 1276 bad_fork_cleanup_sighand:
1287 1277 __cleanup_sighand(p->sighand);
1288 1278 bad_fork_cleanup_fs: