From 7f1a169b88f513e32a432ca0f85bfd282d117bd6 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Thu, 25 Dec 2014 15:51:21 +0900 Subject: [PATCH] sched/fair: Fix RCU stall upon -ENOMEM in sched_create_group() When alloc_fair_sched_group() in sched_create_group() fails, free_sched_group() is called, and free_fair_sched_group() is called by free_sched_group(). Since destroy_cfs_bandwidth() is called by free_fair_sched_group() without calling init_cfs_bandwidth(), RCU stall occurs at hrtimer_cancel(): INFO: rcu_sched self-detected stall on CPU { 1} (t=60000 jiffies g=13074 c=13073 q=0) Task dump for CPU 1: (fprintd) R running task 0 6249 1 0x00000088 ... Call Trace: [] sched_show_task+0xa8/0x110 [] dump_cpu_task+0x3d/0x50 [] rcu_dump_cpu_stacks+0x90/0xd0 [] rcu_check_callbacks+0x491/0x700 [] update_process_times+0x4b/0x80 [] tick_sched_handle.isra.20+0x36/0x50 [] tick_sched_timer+0x42/0x70 [] __run_hrtimer+0x69/0x1a0 [] ? tick_sched_handle.isra.20+0x50/0x50 [] hrtimer_interrupt+0xef/0x230 [] local_apic_timer_interrupt+0x3b/0x70 [] smp_apic_timer_interrupt+0x45/0x60 [] apic_timer_interrupt+0x6d/0x80 [] ? lock_hrtimer_base.isra.23+0x18/0x50 [] ? __kmalloc+0x211/0x230 [] hrtimer_try_to_cancel+0x22/0xd0 [] ? __kmalloc+0x211/0x230 [] hrtimer_cancel+0x22/0x30 [] free_fair_sched_group+0x25/0xd0 [] free_sched_group+0x16/0x40 [] sched_create_group+0x4b/0x80 [] sched_autogroup_create_attach+0x43/0x1c0 [] sys_setsid+0x7c/0x110 [] system_call_fastpath+0x12/0x17 Check whether init_cfs_bandwidth() was called before calling destroy_cfs_bandwidth(). Signed-off-by: Tetsuo Handa [ Move the check into destroy_cfs_bandwidth() to aid compilability. ] Signed-off-by: Peter Zijlstra (Intel) Cc: Paul Turner Cc: Ben Segall Cc: Linus Torvalds Link: http://lkml.kernel.org/r/201412252210.GCC30204.SOMVFFOtQJFLOH@I-love.SAKURA.ne.jp Signed-off-by: Ingo Molnar --- kernel/sched/fair.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 6b99659..40667cb 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -4005,6 +4005,10 @@ void __start_cfs_bandwidth(struct cfs_bandwidth *cfs_b, bool force) static void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b) { + /* init_cfs_bandwidth() was not called */ + if (!cfs_b->throttled_cfs_rq.next) + return; + hrtimer_cancel(&cfs_b->period_timer); hrtimer_cancel(&cfs_b->slack_timer); } -- 1.9.1