Commit dfee3a7b92208b30f77876068aece9ea571270c2

Authored by Jason Wessel
1 parent 39a0715f5a

debug_core: refactor locking for master/slave cpus

For quite some time there have been problems with memory barriers and
various races with NMI on multi processor systems using the kernel
debugger.  The algorithm for entering the kernel debug core and
resuming kernel execution was racy and had several known edge case
problems with attempting to debug something on a heavily loaded system
using breakpoints that are hit repeatedly and quickly.

The prior "locking" design entry worked as follows:

  * The atomic counter kgdb_active was used with atomic exchange in
    order to elect a master cpu out of all the cpus that may have
    taken a debug exception.
  * The master cpu increments all elements of passive_cpu_wait[].
  * The master cpu issues the round up cpus message.
  * Each "slave cpu" that enters the debug core increments its own
    element in cpu_in_kgdb[].
  * Each "slave cpu" spins on passive_cpu_wait[] until it becomes 0.
  * The master cpu debugs the system.

The new scheme removes the two arrays of atomic counters and replaces
them with 2 single counters.  One counter is used to count the number
of cpus waiting to become a master cpu (because one or more hit an
exception). The second counter is use to indicate how many cpus have
entered as slave cpus.

The new entry logic works as follows:

  * One or more cpus enters via kgdb_handle_exception() and increments
    the masters_in_kgdb. Each cpu attempts to get the spin lock called
    dbg_master_lock.
  * The master cpu sets kgdb_active to the current cpu.
  * The master cpu takes the spinlock dbg_slave_lock.
  * The master cpu asks to round up all the other cpus.
  * Each slave cpu that is not already in kgdb_handle_exception()
    will enter and increment slaves_in_kgdb.  Each slave will now spin
    try_locking on dbg_slave_lock.
  * The master cpu waits for the sum of masters_in_kgdb and slaves_in_kgdb
    to be equal to the sum of the online cpus.
  * The master cpu debugs the system.

In the new design the kgdb_active can only be changed while holding
dbg_master_lock.  Stress testing has not turned up any further
entry/exit races that existed in the prior locking design.  The prior
locking design suffered from atomic variables not being truly atomic
(in the capacity as used by kgdb) along with memory barrier races.

Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
Acked-by: Dongdong Deng <dongdong.deng@windriver.com>

Showing 2 changed files with 56 additions and 50 deletions Side-by-side Diff

kernel/debug/debug_core.c
... ... @@ -110,13 +110,15 @@
110 110 */
111 111 atomic_t kgdb_active = ATOMIC_INIT(-1);
112 112 EXPORT_SYMBOL_GPL(kgdb_active);
  113 +static DEFINE_RAW_SPINLOCK(dbg_master_lock);
  114 +static DEFINE_RAW_SPINLOCK(dbg_slave_lock);
113 115  
114 116 /*
115 117 * We use NR_CPUs not PERCPU, in case kgdb is used to debug early
116 118 * bootup code (which might not have percpu set up yet):
117 119 */
118   -static atomic_t passive_cpu_wait[NR_CPUS];
119   -static atomic_t cpu_in_kgdb[NR_CPUS];
  120 +static atomic_t masters_in_kgdb;
  121 +static atomic_t slaves_in_kgdb;
120 122 static atomic_t kgdb_break_tasklet_var;
121 123 atomic_t kgdb_setting_breakpoint;
122 124  
123 125  
124 126  
125 127  
... ... @@ -478,14 +480,23 @@
478 480 rcu_cpu_stall_reset();
479 481 }
480 482  
481   -static int kgdb_cpu_enter(struct kgdb_state *ks, struct pt_regs *regs)
  483 +static int kgdb_cpu_enter(struct kgdb_state *ks, struct pt_regs *regs,
  484 + int exception_state)
482 485 {
483 486 unsigned long flags;
484 487 int sstep_tries = 100;
485 488 int error;
486   - int i, cpu;
  489 + int cpu;
487 490 int trace_on = 0;
  491 + int online_cpus = num_online_cpus();
488 492  
  493 + kgdb_info[ks->cpu].enter_kgdb++;
  494 + kgdb_info[ks->cpu].exception_state |= exception_state;
  495 +
  496 + if (exception_state == DCPU_WANT_MASTER)
  497 + atomic_inc(&masters_in_kgdb);
  498 + else
  499 + atomic_inc(&slaves_in_kgdb);
489 500 kgdb_disable_hw_debug(ks->linux_regs);
490 501  
491 502 acquirelock:
492 503  
493 504  
... ... @@ -500,14 +511,15 @@
500 511 kgdb_info[cpu].task = current;
501 512 kgdb_info[cpu].ret_state = 0;
502 513 kgdb_info[cpu].irq_depth = hardirq_count() >> HARDIRQ_SHIFT;
503   - /*
504   - * Make sure the above info reaches the primary CPU before
505   - * our cpu_in_kgdb[] flag setting does:
506   - */
507   - atomic_inc(&cpu_in_kgdb[cpu]);
508 514  
509   - if (exception_level == 1)
  515 + /* Make sure the above info reaches the primary CPU */
  516 + smp_mb();
  517 +
  518 + if (exception_level == 1) {
  519 + if (raw_spin_trylock(&dbg_master_lock))
  520 + atomic_xchg(&kgdb_active, cpu);
510 521 goto cpu_master_loop;
  522 + }
511 523  
512 524 /*
513 525 * CPU will loop if it is a slave or request to become a kgdb
514 526  
515 527  
... ... @@ -519,10 +531,12 @@
519 531 kgdb_info[cpu].exception_state &= ~DCPU_NEXT_MASTER;
520 532 goto cpu_master_loop;
521 533 } else if (kgdb_info[cpu].exception_state & DCPU_WANT_MASTER) {
522   - if (atomic_cmpxchg(&kgdb_active, -1, cpu) == cpu)
  534 + if (raw_spin_trylock(&dbg_master_lock)) {
  535 + atomic_xchg(&kgdb_active, cpu);
523 536 break;
  537 + }
524 538 } else if (kgdb_info[cpu].exception_state & DCPU_IS_SLAVE) {
525   - if (!atomic_read(&passive_cpu_wait[cpu]))
  539 + if (!raw_spin_is_locked(&dbg_slave_lock))
526 540 goto return_normal;
527 541 } else {
528 542 return_normal:
... ... @@ -533,7 +547,11 @@
533 547 arch_kgdb_ops.correct_hw_break();
534 548 if (trace_on)
535 549 tracing_on();
536   - atomic_dec(&cpu_in_kgdb[cpu]);
  550 + kgdb_info[cpu].exception_state &=
  551 + ~(DCPU_WANT_MASTER | DCPU_IS_SLAVE);
  552 + kgdb_info[cpu].enter_kgdb--;
  553 + smp_mb__before_atomic_dec();
  554 + atomic_dec(&slaves_in_kgdb);
537 555 dbg_touch_watchdogs();
538 556 local_irq_restore(flags);
539 557 return 0;
... ... @@ -551,6 +569,7 @@
551 569 (kgdb_info[cpu].task &&
552 570 kgdb_info[cpu].task->pid != kgdb_sstep_pid) && --sstep_tries) {
553 571 atomic_set(&kgdb_active, -1);
  572 + raw_spin_unlock(&dbg_master_lock);
554 573 dbg_touch_watchdogs();
555 574 local_irq_restore(flags);
556 575  
... ... @@ -576,10 +595,8 @@
576 595 * Get the passive CPU lock which will hold all the non-primary
577 596 * CPU in a spin state while the debugger is active
578 597 */
579   - if (!kgdb_single_step) {
580   - for (i = 0; i < NR_CPUS; i++)
581   - atomic_inc(&passive_cpu_wait[i]);
582   - }
  598 + if (!kgdb_single_step)
  599 + raw_spin_lock(&dbg_slave_lock);
583 600  
584 601 #ifdef CONFIG_SMP
585 602 /* Signal the other CPUs to enter kgdb_wait() */
... ... @@ -590,10 +607,9 @@
590 607 /*
591 608 * Wait for the other CPUs to be notified and be waiting for us:
592 609 */
593   - for_each_online_cpu(i) {
594   - while (kgdb_do_roundup && !atomic_read(&cpu_in_kgdb[i]))
595   - cpu_relax();
596   - }
  610 + while (kgdb_do_roundup && (atomic_read(&masters_in_kgdb) +
  611 + atomic_read(&slaves_in_kgdb)) != online_cpus)
  612 + cpu_relax();
597 613  
598 614 /*
599 615 * At this point the primary processor is completely
600 616  
... ... @@ -634,24 +650,11 @@
634 650 if (dbg_io_ops->post_exception)
635 651 dbg_io_ops->post_exception();
636 652  
637   - atomic_dec(&cpu_in_kgdb[ks->cpu]);
638   -
639 653 if (!kgdb_single_step) {
640   - for (i = NR_CPUS-1; i >= 0; i--)
641   - atomic_dec(&passive_cpu_wait[i]);
642   - /*
643   - * Wait till all the CPUs have quit from the debugger,
644   - * but allow a CPU that hit an exception and is
645   - * waiting to become the master to remain in the debug
646   - * core.
647   - */
648   - for_each_online_cpu(i) {
649   - while (kgdb_do_roundup &&
650   - atomic_read(&cpu_in_kgdb[i]) &&
651   - !(kgdb_info[i].exception_state &
652   - DCPU_WANT_MASTER))
653   - cpu_relax();
654   - }
  654 + raw_spin_unlock(&dbg_slave_lock);
  655 + /* Wait till all the CPUs have quit from the debugger. */
  656 + while (kgdb_do_roundup && atomic_read(&slaves_in_kgdb))
  657 + cpu_relax();
655 658 }
656 659  
657 660 kgdb_restore:
658 661  
... ... @@ -666,8 +669,15 @@
666 669 arch_kgdb_ops.correct_hw_break();
667 670 if (trace_on)
668 671 tracing_on();
  672 +
  673 + kgdb_info[cpu].exception_state &=
  674 + ~(DCPU_WANT_MASTER | DCPU_IS_SLAVE);
  675 + kgdb_info[cpu].enter_kgdb--;
  676 + smp_mb__before_atomic_dec();
  677 + atomic_dec(&masters_in_kgdb);
669 678 /* Free kgdb_active */
670 679 atomic_set(&kgdb_active, -1);
  680 + raw_spin_unlock(&dbg_master_lock);
671 681 dbg_touch_watchdogs();
672 682 local_irq_restore(flags);
673 683  
... ... @@ -686,7 +696,6 @@
686 696 {
687 697 struct kgdb_state kgdb_var;
688 698 struct kgdb_state *ks = &kgdb_var;
689   - int ret;
690 699  
691 700 ks->cpu = raw_smp_processor_id();
692 701 ks->ex_vector = evector;
... ... @@ -697,11 +706,10 @@
697 706  
698 707 if (kgdb_reenter_check(ks))
699 708 return 0; /* Ouch, double exception ! */
700   - kgdb_info[ks->cpu].exception_state |= DCPU_WANT_MASTER;
701   - ret = kgdb_cpu_enter(ks, regs);
702   - kgdb_info[ks->cpu].exception_state &= ~(DCPU_WANT_MASTER |
703   - DCPU_IS_SLAVE);
704   - return ret;
  709 + if (kgdb_info[ks->cpu].enter_kgdb != 0)
  710 + return 0;
  711 +
  712 + return kgdb_cpu_enter(ks, regs, DCPU_WANT_MASTER);
705 713 }
706 714  
707 715 int kgdb_nmicallback(int cpu, void *regs)
... ... @@ -714,12 +722,9 @@
714 722 ks->cpu = cpu;
715 723 ks->linux_regs = regs;
716 724  
717   - if (!atomic_read(&cpu_in_kgdb[cpu]) &&
718   - atomic_read(&kgdb_active) != -1 &&
719   - atomic_read(&kgdb_active) != cpu) {
720   - kgdb_info[cpu].exception_state |= DCPU_IS_SLAVE;
721   - kgdb_cpu_enter(ks, regs);
722   - kgdb_info[cpu].exception_state &= ~DCPU_IS_SLAVE;
  725 + if (kgdb_info[ks->cpu].enter_kgdb == 0 &&
  726 + raw_spin_is_locked(&dbg_master_lock)) {
  727 + kgdb_cpu_enter(ks, regs, DCPU_IS_SLAVE);
723 728 return 0;
724 729 }
725 730 #endif
kernel/debug/debug_core.h
... ... @@ -40,6 +40,7 @@
40 40 int exception_state;
41 41 int ret_state;
42 42 int irq_depth;
  43 + int enter_kgdb;
43 44 };
44 45  
45 46 extern struct debuggerinfo_struct kgdb_info[];