debug_core: refactor locking for master/slave cpus

For quite some time there have been problems with memory barriers and various races with NMI on multi processor systems using the kernel debugger. The algorithm for entering the kernel debug core and resuming kernel execution was racy and had several known edge case problems with attempting to debug something on a heavily loaded system using breakpoints that are hit repeatedly and quickly. The prior "locking" design entry worked as follows: * The atomic counter kgdb_active was used with atomic exchange in order to elect a master cpu out of all the cpus that may have taken a debug exception. * The master cpu increments all elements of passive_cpu_wait[]. * The master cpu issues the round up cpus message. * Each "slave cpu" that enters the debug core increments its own element in cpu_in_kgdb[]. * Each "slave cpu" spins on passive_cpu_wait[] until it becomes 0. * The master cpu debugs the system. The new scheme removes the two arrays of atomic counters and replaces them with 2 single counters. One counter is used to count the number of cpus waiting to become a master cpu (because one or more hit an exception). The second counter is use to indicate how many cpus have entered as slave cpus. The new entry logic works as follows: * One or more cpus enters via kgdb_handle_exception() and increments the masters_in_kgdb. Each cpu attempts to get the spin lock called dbg_master_lock. * The master cpu sets kgdb_active to the current cpu. * The master cpu takes the spinlock dbg_slave_lock. * The master cpu asks to round up all the other cpus. * Each slave cpu that is not already in kgdb_handle_exception() will enter and increment slaves_in_kgdb. Each slave will now spin try_locking on dbg_slave_lock. * The master cpu waits for the sum of masters_in_kgdb and slaves_in_kgdb to be equal to the sum of the online cpus. * The master cpu debugs the system. In the new design the kgdb_active can only be changed while holding dbg_master_lock. Stress testing has not turned up any further entry/exit races that existed in the prior locking design. The prior locking design suffered from atomic variables not being truly atomic (in the capacity as used by kgdb) along with memory barrier races. Signed-off-by: Jason Wessel <jason.wessel@windriver.com> Acked-by: Dongdong Deng <dongdong.deng@windriver.com>

debug_core: refactor locking for master/slave cpus
For quite some time there have been problems with memory barriers and various races with NMI on multi processor systems using the kernel debugger. The algorithm for entering the kernel debug core and resuming kernel execution was racy and had several known edge case problems with attempting to debug something on a heavily loaded system using breakpoints that are hit repeatedly and quickly. The prior "locking" design entry worked as follows: * The atomic counter kgdb_active was used with atomic exchange in order to elect a master cpu out of all the cpus that may have taken a debug exception. * The master cpu increments all elements of passive_cpu_wait[]. * The master cpu issues the round up cpus message. * Each "slave cpu" that enters the debug core increments its own element in cpu_in_kgdb[]. * Each "slave cpu" spins on passive_cpu_wait[] until it becomes 0. * The master cpu debugs the system. The new scheme removes the two arrays of atomic counters and replaces them with 2 single counters. One counter is used to count the number of cpus waiting to become a master cpu (because one or more hit an exception). The second counter is use to indicate how many cpus have entered as slave cpus. The new entry logic works as follows: * One or more cpus enters via kgdb_handle_exception() and increments the masters_in_kgdb. Each cpu attempts to get the spin lock called dbg_master_lock. * The master cpu sets kgdb_active to the current cpu. * The master cpu takes the spinlock dbg_slave_lock. * The master cpu asks to round up all the other cpus. * Each slave cpu that is not already in kgdb_handle_exception() will enter and increment slaves_in_kgdb. Each slave will now spin try_locking on dbg_slave_lock. * The master cpu waits for the sum of masters_in_kgdb and slaves_in_kgdb to be equal to the sum of the online cpus. * The master cpu debugs the system. In the new design the kgdb_active can only be changed while holding dbg_master_lock. Stress testing has not turned up any further entry/exit races that existed in the prior locking design. The prior locking design suffered from atomic variables not being truly atomic (in the capacity as used by kgdb) along with memory barrier races. Signed-off-by: Jason Wessel <jason.wessel@windriver.com> Acked-by: Dongdong Deng <dongdong.deng@windriver.com>
Jason Wessel
1 parent 39a0715f5a
Showing 2 changed files with 56 additions and 50 deletions Side-by-side Diff
kernel/debug/debug_core.c
kernel/debug/debug_core.h
@@ -110,13 +110,15 @@
  */
 atomic_t			kgdb_active = ATOMIC_INIT(-1);
 EXPORT_SYMBOL_GPL(kgdb_active);
+static DEFINE_RAW_SPINLOCK(dbg_master_lock);
+static DEFINE_RAW_SPINLOCK(dbg_slave_lock);
  
 /*
  * We use NR_CPUs not PERCPU, in case kgdb is used to debug early
  * bootup code (which might not have percpu set up yet):
  */
-static atomic_t			passive_cpu_wait[NR_CPUS];
-static atomic_t			cpu_in_kgdb[NR_CPUS];
+static atomic_t			masters_in_kgdb;
+static atomic_t			slaves_in_kgdb;
 static atomic_t			kgdb_break_tasklet_var;
 atomic_t			kgdb_setting_breakpoint;
  
  
  
  
@@ -478,14 +480,23 @@
 	rcu_cpu_stall_reset();
 }
  
-static int kgdb_cpu_enter(struct kgdb_state *ks, struct pt_regs *regs)
+static int kgdb_cpu_enter(struct kgdb_state *ks, struct pt_regs *regs,
+		int exception_state)
 {
 	unsigned long flags;
 	int sstep_tries = 100;
 	int error;
-	int i, cpu;
+	int cpu;
 	int trace_on = 0;
+	int online_cpus = num_online_cpus();
  
+	kgdb_info[ks->cpu].enter_kgdb++;
+	kgdb_info[ks->cpu].exception_state |= exception_state;
+
+	if (exception_state == DCPU_WANT_MASTER)
+		atomic_inc(&masters_in_kgdb);
+	else
+		atomic_inc(&slaves_in_kgdb);
 	kgdb_disable_hw_debug(ks->linux_regs);
  
 acquirelock:
  
  
@@ -500,14 +511,15 @@
 	kgdb_info[cpu].task = current;
 	kgdb_info[cpu].ret_state = 0;
 	kgdb_info[cpu].irq_depth = hardirq_count() >> HARDIRQ_SHIFT;
-	/*
-	 * Make sure the above info reaches the primary CPU before
-	 * our cpu_in_kgdb[] flag setting does:
-	 */
-	atomic_inc(&cpu_in_kgdb[cpu]);
  
-	if (exception_level == 1)
+	/* Make sure the above info reaches the primary CPU */
+	smp_mb();
+
+	if (exception_level == 1) {
+		if (raw_spin_trylock(&dbg_master_lock))
+			atomic_xchg(&kgdb_active, cpu);
 		goto cpu_master_loop;
+	}
  
 	/*
 	 * CPU will loop if it is a slave or request to become a kgdb
  
  
@@ -519,10 +531,12 @@
 			kgdb_info[cpu].exception_state &= ~DCPU_NEXT_MASTER;
 			goto cpu_master_loop;
 		} else if (kgdb_info[cpu].exception_state & DCPU_WANT_MASTER) {
-			if (atomic_cmpxchg(&kgdb_active, -1, cpu) == cpu)
+			if (raw_spin_trylock(&dbg_master_lock)) {
+				atomic_xchg(&kgdb_active, cpu);
 				break;
+			}
 		} else if (kgdb_info[cpu].exception_state & DCPU_IS_SLAVE) {
-			if (!atomic_read(&passive_cpu_wait[cpu]))
+			if (!raw_spin_is_locked(&dbg_slave_lock))
 				goto return_normal;
 		} else {
 return_normal:
@@ -533,7 +547,11 @@
 				arch_kgdb_ops.correct_hw_break();
 			if (trace_on)
 				tracing_on();
-			atomic_dec(&cpu_in_kgdb[cpu]);
+			kgdb_info[cpu].exception_state &=
+				~(DCPU_WANT_MASTER | DCPU_IS_SLAVE);
+			kgdb_info[cpu].enter_kgdb--;
+			smp_mb__before_atomic_dec();
+			atomic_dec(&slaves_in_kgdb);
 			dbg_touch_watchdogs();
 			local_irq_restore(flags);
 			return 0;
@@ -551,6 +569,7 @@
 	    (kgdb_info[cpu].task &&
 	     kgdb_info[cpu].task->pid != kgdb_sstep_pid) && --sstep_tries) {
 		atomic_set(&kgdb_active, -1);
+		raw_spin_unlock(&dbg_master_lock);
 		dbg_touch_watchdogs();
 		local_irq_restore(flags);
  
@@ -576,10 +595,8 @@
 	 * Get the passive CPU lock which will hold all the non-primary
 	 * CPU in a spin state while the debugger is active
 	 */
-	if (!kgdb_single_step) {
-		for (i = 0; i < NR_CPUS; i++)
-			atomic_inc(&passive_cpu_wait[i]);
-	}
+	if (!kgdb_single_step)
+		raw_spin_lock(&dbg_slave_lock);
  
 #ifdef CONFIG_SMP
 	/* Signal the other CPUs to enter kgdb_wait() */
@@ -590,10 +607,9 @@
 	/*
 	 * Wait for the other CPUs to be notified and be waiting for us:
 	 */
-	for_each_online_cpu(i) {
-		while (kgdb_do_roundup && !atomic_read(&cpu_in_kgdb[i]))
-			cpu_relax();
-	}
+	while (kgdb_do_roundup && (atomic_read(&masters_in_kgdb) +
+				atomic_read(&slaves_in_kgdb)) != online_cpus)
+		cpu_relax();
  
 	/*
 	 * At this point the primary processor is completely
  
@@ -634,24 +650,11 @@
 	if (dbg_io_ops->post_exception)
 		dbg_io_ops->post_exception();
  
-	atomic_dec(&cpu_in_kgdb[ks->cpu]);
-
 	if (!kgdb_single_step) {
-		for (i = NR_CPUS-1; i >= 0; i--)
-			atomic_dec(&passive_cpu_wait[i]);
-		/*
-		 * Wait till all the CPUs have quit from the debugger,
-		 * but allow a CPU that hit an exception and is
-		 * waiting to become the master to remain in the debug
-		 * core.
-		 */
-		for_each_online_cpu(i) {
-			while (kgdb_do_roundup &&
-			       atomic_read(&cpu_in_kgdb[i]) &&
-			       !(kgdb_info[i].exception_state &
-				 DCPU_WANT_MASTER))
-				cpu_relax();
-		}
+		raw_spin_unlock(&dbg_slave_lock);
+		/* Wait till all the CPUs have quit from the debugger. */
+		while (kgdb_do_roundup && atomic_read(&slaves_in_kgdb))
+			cpu_relax();
 	}
  
 kgdb_restore:
  
@@ -666,8 +669,15 @@
 		arch_kgdb_ops.correct_hw_break();
 	if (trace_on)
 		tracing_on();
+
+	kgdb_info[cpu].exception_state &=
+		~(DCPU_WANT_MASTER | DCPU_IS_SLAVE);
+	kgdb_info[cpu].enter_kgdb--;
+	smp_mb__before_atomic_dec();
+	atomic_dec(&masters_in_kgdb);
 	/* Free kgdb_active */
 	atomic_set(&kgdb_active, -1);
+	raw_spin_unlock(&dbg_master_lock);
 	dbg_touch_watchdogs();
 	local_irq_restore(flags);
  
@@ -686,7 +696,6 @@
 {
 	struct kgdb_state kgdb_var;
 	struct kgdb_state *ks = &kgdb_var;
-	int ret;
  
 	ks->cpu			= raw_smp_processor_id();
 	ks->ex_vector		= evector;
@@ -697,11 +706,10 @@
  
 	if (kgdb_reenter_check(ks))
 		return 0; /* Ouch, double exception ! */
-	kgdb_info[ks->cpu].exception_state |= DCPU_WANT_MASTER;
-	ret = kgdb_cpu_enter(ks, regs);
-	kgdb_info[ks->cpu].exception_state &= ~(DCPU_WANT_MASTER |
-						DCPU_IS_SLAVE);
-	return ret;
+	if (kgdb_info[ks->cpu].enter_kgdb != 0)
+		return 0;
+
+	return kgdb_cpu_enter(ks, regs, DCPU_WANT_MASTER);
 }
  
 int kgdb_nmicallback(int cpu, void *regs)
@@ -714,12 +722,9 @@
 	ks->cpu			= cpu;
 	ks->linux_regs		= regs;
  
-	if (!atomic_read(&cpu_in_kgdb[cpu]) &&
-	    atomic_read(&kgdb_active) != -1 &&
-	    atomic_read(&kgdb_active) != cpu) {
-		kgdb_info[cpu].exception_state |= DCPU_IS_SLAVE;
-		kgdb_cpu_enter(ks, regs);
-		kgdb_info[cpu].exception_state &= ~DCPU_IS_SLAVE;
+	if (kgdb_info[ks->cpu].enter_kgdb == 0 &&
+			raw_spin_is_locked(&dbg_master_lock)) {
+		kgdb_cpu_enter(ks, regs, DCPU_IS_SLAVE);
 		return 0;
 	}
 #endif
@@ -40,6 +40,7 @@
 	int			exception_state;
 	int			ret_state;
 	int			irq_depth;
+	int			enter_kgdb;
 };
  
 extern struct debuggerinfo_struct kgdb_info[];
...	...	@@ -110,13 +110,15 @@
110	110	*/
111	111	atomic_t kgdb_active = ATOMIC_INIT(-1);
112	112	EXPORT_SYMBOL_GPL(kgdb_active);
	113	+static DEFINE_RAW_SPINLOCK(dbg_master_lock);
	114	+static DEFINE_RAW_SPINLOCK(dbg_slave_lock);
113	115
114	116	/*
115	117	* We use NR_CPUs not PERCPU, in case kgdb is used to debug early
116	118	* bootup code (which might not have percpu set up yet):
117	119	*/
118		-static atomic_t passive_cpu_wait[NR_CPUS];
119		-static atomic_t cpu_in_kgdb[NR_CPUS];
	120	+static atomic_t masters_in_kgdb;
	121	+static atomic_t slaves_in_kgdb;
120	122	static atomic_t kgdb_break_tasklet_var;
121	123	atomic_t kgdb_setting_breakpoint;
122	124
123	125
124	126
125	127
...	...	@@ -478,14 +480,23 @@
478	480	rcu_cpu_stall_reset();
479	481	}
480	482
481		-static int kgdb_cpu_enter(struct kgdb_state ks, struct pt_regs regs)
	483	+static int kgdb_cpu_enter(struct kgdb_state ks, struct pt_regs regs,
	484	+ int exception_state)
482	485	{
483	486	unsigned long flags;
484	487	int sstep_tries = 100;
485	488	int error;
486		- int i, cpu;
	489	+ int cpu;
487	490	int trace_on = 0;
	491	+ int online_cpus = num_online_cpus();
488	492
	493	+ kgdb_info[ks->cpu].enter_kgdb++;
	494	+ kgdb_info[ks->cpu].exception_state \|= exception_state;
	495	+
	496	+ if (exception_state == DCPU_WANT_MASTER)
	497	+ atomic_inc(&masters_in_kgdb);
	498	+ else
	499	+ atomic_inc(&slaves_in_kgdb);
489	500	kgdb_disable_hw_debug(ks->linux_regs);
490	501
491	502	acquirelock:
492	503
493	504
...	...	@@ -500,14 +511,15 @@
500	511	kgdb_info[cpu].task = current;
501	512	kgdb_info[cpu].ret_state = 0;
502	513	kgdb_info[cpu].irq_depth = hardirq_count() >> HARDIRQ_SHIFT;
503		- /*
504		- * Make sure the above info reaches the primary CPU before
505		- * our cpu_in_kgdb[] flag setting does:
506		- */
507		- atomic_inc(&cpu_in_kgdb[cpu]);
508	514
509		- if (exception_level == 1)
	515	+ /* Make sure the above info reaches the primary CPU */
	516	+ smp_mb();
	517	+
	518	+ if (exception_level == 1) {
	519	+ if (raw_spin_trylock(&dbg_master_lock))
	520	+ atomic_xchg(&kgdb_active, cpu);
510	521	goto cpu_master_loop;
	522	+ }
511	523
512	524	/*
513	525	* CPU will loop if it is a slave or request to become a kgdb
514	526
515	527
...	...	@@ -519,10 +531,12 @@
519	531	kgdb_info[cpu].exception_state &= ~DCPU_NEXT_MASTER;
520	532	goto cpu_master_loop;
521	533	} else if (kgdb_info[cpu].exception_state & DCPU_WANT_MASTER) {
522		- if (atomic_cmpxchg(&kgdb_active, -1, cpu) == cpu)
	534	+ if (raw_spin_trylock(&dbg_master_lock)) {
	535	+ atomic_xchg(&kgdb_active, cpu);
523	536	break;
	537	+ }
524	538	} else if (kgdb_info[cpu].exception_state & DCPU_IS_SLAVE) {
525		- if (!atomic_read(&passive_cpu_wait[cpu]))
	539	+ if (!raw_spin_is_locked(&dbg_slave_lock))
526	540	goto return_normal;
527	541	} else {
528	542	return_normal:
...	...	@@ -533,7 +547,11 @@
533	547	arch_kgdb_ops.correct_hw_break();
534	548	if (trace_on)
535	549	tracing_on();
536		- atomic_dec(&cpu_in_kgdb[cpu]);
	550	+ kgdb_info[cpu].exception_state &=
	551	+ ~(DCPU_WANT_MASTER \| DCPU_IS_SLAVE);
	552	+ kgdb_info[cpu].enter_kgdb--;
	553	+ smp_mb__before_atomic_dec();
	554	+ atomic_dec(&slaves_in_kgdb);
537	555	dbg_touch_watchdogs();
538	556	local_irq_restore(flags);
539	557	return 0;
...	...	@@ -551,6 +569,7 @@
551	569	(kgdb_info[cpu].task &&
552	570	kgdb_info[cpu].task->pid != kgdb_sstep_pid) && --sstep_tries) {
553	571	atomic_set(&kgdb_active, -1);
	572	+ raw_spin_unlock(&dbg_master_lock);
554	573	dbg_touch_watchdogs();
555	574	local_irq_restore(flags);
556	575
...	...	@@ -576,10 +595,8 @@
576	595	* Get the passive CPU lock which will hold all the non-primary
577	596	* CPU in a spin state while the debugger is active
578	597	*/
579		- if (!kgdb_single_step) {
580		- for (i = 0; i < NR_CPUS; i++)
581		- atomic_inc(&passive_cpu_wait[i]);
582		- }
	598	+ if (!kgdb_single_step)
	599	+ raw_spin_lock(&dbg_slave_lock);
583	600
584	601	#ifdef CONFIG_SMP
585	602	/* Signal the other CPUs to enter kgdb_wait() */
...	...	@@ -590,10 +607,9 @@
590	607	/*
591	608	* Wait for the other CPUs to be notified and be waiting for us:
592	609	*/
593		- for_each_online_cpu(i) {
594		- while (kgdb_do_roundup && !atomic_read(&cpu_in_kgdb[i]))
595		- cpu_relax();
596		- }
	610	+ while (kgdb_do_roundup && (atomic_read(&masters_in_kgdb) +
	611	+ atomic_read(&slaves_in_kgdb)) != online_cpus)
	612	+ cpu_relax();
597	613
598	614	/*
599	615	* At this point the primary processor is completely
600	616
...	...	@@ -634,24 +650,11 @@
634	650	if (dbg_io_ops->post_exception)
635	651	dbg_io_ops->post_exception();
636	652
637		- atomic_dec(&cpu_in_kgdb[ks->cpu]);
638		-
639	653	if (!kgdb_single_step) {
640		- for (i = NR_CPUS-1; i >= 0; i--)
641		- atomic_dec(&passive_cpu_wait[i]);
642		- /*
643		- * Wait till all the CPUs have quit from the debugger,
644		- * but allow a CPU that hit an exception and is
645		- * waiting to become the master to remain in the debug
646		- * core.
647		- */
648		- for_each_online_cpu(i) {
649		- while (kgdb_do_roundup &&
650		- atomic_read(&cpu_in_kgdb[i]) &&
651		- !(kgdb_info[i].exception_state &
652		- DCPU_WANT_MASTER))
653		- cpu_relax();
654		- }
	654	+ raw_spin_unlock(&dbg_slave_lock);
	655	+ /* Wait till all the CPUs have quit from the debugger. */
	656	+ while (kgdb_do_roundup && atomic_read(&slaves_in_kgdb))
	657	+ cpu_relax();
655	658	}
656	659
657	660	kgdb_restore:
658	661
...	...	@@ -666,8 +669,15 @@
666	669	arch_kgdb_ops.correct_hw_break();
667	670	if (trace_on)
668	671	tracing_on();
	672	+
	673	+ kgdb_info[cpu].exception_state &=
	674	+ ~(DCPU_WANT_MASTER \| DCPU_IS_SLAVE);
	675	+ kgdb_info[cpu].enter_kgdb--;
	676	+ smp_mb__before_atomic_dec();
	677	+ atomic_dec(&masters_in_kgdb);
669	678	/* Free kgdb_active */
670	679	atomic_set(&kgdb_active, -1);
	680	+ raw_spin_unlock(&dbg_master_lock);
671	681	dbg_touch_watchdogs();
672	682	local_irq_restore(flags);
673	683
...	...	@@ -686,7 +696,6 @@
686	696	{
687	697	struct kgdb_state kgdb_var;
688	698	struct kgdb_state *ks = &kgdb_var;
689		- int ret;
690	699
691	700	ks->cpu = raw_smp_processor_id();
692	701	ks->ex_vector = evector;
...	...	@@ -697,11 +706,10 @@
697	706
698	707	if (kgdb_reenter_check(ks))
699	708	return 0; /* Ouch, double exception ! */
700		- kgdb_info[ks->cpu].exception_state \|= DCPU_WANT_MASTER;
701		- ret = kgdb_cpu_enter(ks, regs);
702		- kgdb_info[ks->cpu].exception_state &= ~(DCPU_WANT_MASTER \|
703		- DCPU_IS_SLAVE);
704		- return ret;
	709	+ if (kgdb_info[ks->cpu].enter_kgdb != 0)
	710	+ return 0;
	711	+
	712	+ return kgdb_cpu_enter(ks, regs, DCPU_WANT_MASTER);
705	713	}
706	714
707	715	int kgdb_nmicallback(int cpu, void *regs)
...	...	@@ -714,12 +722,9 @@
714	722	ks->cpu = cpu;
715	723	ks->linux_regs = regs;
716	724
717		- if (!atomic_read(&cpu_in_kgdb[cpu]) &&
718		- atomic_read(&kgdb_active) != -1 &&
719		- atomic_read(&kgdb_active) != cpu) {
720		- kgdb_info[cpu].exception_state \|= DCPU_IS_SLAVE;
721		- kgdb_cpu_enter(ks, regs);
722		- kgdb_info[cpu].exception_state &= ~DCPU_IS_SLAVE;
	725	+ if (kgdb_info[ks->cpu].enter_kgdb == 0 &&
	726	+ raw_spin_is_locked(&dbg_master_lock)) {
	727	+ kgdb_cpu_enter(ks, regs, DCPU_IS_SLAVE);
723	728	return 0;
724	729	}
725	730	#endif
...	...	@@ -40,6 +40,7 @@
40	40	int exception_state;
41	41	int ret_state;
42	42	int irq_depth;
	43	+ int enter_kgdb;
43	44	};
44	45
45	46	extern struct debuggerinfo_struct kgdb_info[];