Commit 0fe1ac48bef018bed896307cd12f6ca9b5e704ab

Authored by Paul Mackerras
Committed by Benjamin Herrenschmidt
1 parent cea0d767c2

powerpc/perf_event: Fix oops due to perf_event_do_pending call

Anton Blanchard found that large POWER systems would occasionally
crash in the exception exit path when profiling with perf_events.
The symptom was that an interrupt would occur late in the exit path
when the MSR[RI] (recoverable interrupt) bit was clear.  Interrupts
should be hard-disabled at this point but they were enabled.  Because
the interrupt was not recoverable the system panicked.

The reason is that the exception exit path was calling
perf_event_do_pending after hard-disabling interrupts, and
perf_event_do_pending will re-enable interrupts.

The simplest and cleanest fix for this is to use the same mechanism
that 32-bit powerpc does, namely to cause a self-IPI by setting the
decrementer to 1.  This means we can remove the tests in the exception
exit path and raw_local_irq_restore.

This also makes sure that the call to perf_event_do_pending from
timer_interrupt() happens within irq_enter/irq_exit.  (Note that
calling perf_event_do_pending from timer_interrupt does not mean that
there is a possible 1/HZ latency; setting the decrementer to 1 ensures
that the timer interrupt will happen immediately, i.e. within one
timebase tick, which is a few nanoseconds or 10s of nanoseconds.)

Signed-off-by: Paul Mackerras <paulus@samba.org>
Cc: stable@kernel.org
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>

Showing 5 changed files with 48 additions and 66 deletions Side-by-side Diff

arch/powerpc/include/asm/hw_irq.h
... ... @@ -130,44 +130,6 @@
130 130 */
131 131 struct irq_chip;
132 132  
133   -#ifdef CONFIG_PERF_EVENTS
134   -
135   -#ifdef CONFIG_PPC64
136   -static inline unsigned long test_perf_event_pending(void)
137   -{
138   - unsigned long x;
139   -
140   - asm volatile("lbz %0,%1(13)"
141   - : "=r" (x)
142   - : "i" (offsetof(struct paca_struct, perf_event_pending)));
143   - return x;
144   -}
145   -
146   -static inline void set_perf_event_pending(void)
147   -{
148   - asm volatile("stb %0,%1(13)" : :
149   - "r" (1),
150   - "i" (offsetof(struct paca_struct, perf_event_pending)));
151   -}
152   -
153   -static inline void clear_perf_event_pending(void)
154   -{
155   - asm volatile("stb %0,%1(13)" : :
156   - "r" (0),
157   - "i" (offsetof(struct paca_struct, perf_event_pending)));
158   -}
159   -#endif /* CONFIG_PPC64 */
160   -
161   -#else /* CONFIG_PERF_EVENTS */
162   -
163   -static inline unsigned long test_perf_event_pending(void)
164   -{
165   - return 0;
166   -}
167   -
168   -static inline void clear_perf_event_pending(void) {}
169   -#endif /* CONFIG_PERF_EVENTS */
170   -
171 133 #endif /* __KERNEL__ */
172 134 #endif /* _ASM_POWERPC_HW_IRQ_H */
arch/powerpc/kernel/asm-offsets.c
... ... @@ -133,7 +133,6 @@
133 133 DEFINE(PACAKMSR, offsetof(struct paca_struct, kernel_msr));
134 134 DEFINE(PACASOFTIRQEN, offsetof(struct paca_struct, soft_enabled));
135 135 DEFINE(PACAHARDIRQEN, offsetof(struct paca_struct, hard_enabled));
136   - DEFINE(PACAPERFPEND, offsetof(struct paca_struct, perf_event_pending));
137 136 DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id));
138 137 #ifdef CONFIG_PPC_MM_SLICES
139 138 DEFINE(PACALOWSLICESPSIZE, offsetof(struct paca_struct,
arch/powerpc/kernel/entry_64.S
... ... @@ -556,15 +556,6 @@
556 556 2:
557 557 TRACE_AND_RESTORE_IRQ(r5);
558 558  
559   -#ifdef CONFIG_PERF_EVENTS
560   - /* check paca->perf_event_pending if we're enabling ints */
561   - lbz r3,PACAPERFPEND(r13)
562   - and. r3,r3,r5
563   - beq 27f
564   - bl .perf_event_do_pending
565   -27:
566   -#endif /* CONFIG_PERF_EVENTS */
567   -
568 559 /* extract EE bit and use it to restore paca->hard_enabled */
569 560 ld r3,_MSR(r1)
570 561 rldicl r4,r3,49,63 /* r0 = (r3 >> 15) & 1 */
arch/powerpc/kernel/irq.c
... ... @@ -53,7 +53,6 @@
53 53 #include <linux/bootmem.h>
54 54 #include <linux/pci.h>
55 55 #include <linux/debugfs.h>
56   -#include <linux/perf_event.h>
57 56  
58 57 #include <asm/uaccess.h>
59 58 #include <asm/system.h>
... ... @@ -144,11 +143,6 @@
144 143 iseries_handle_interrupts();
145 144 }
146 145 #endif /* CONFIG_PPC_STD_MMU_64 */
147   -
148   - if (test_perf_event_pending()) {
149   - clear_perf_event_pending();
150   - perf_event_do_pending();
151   - }
152 146  
153 147 /*
154 148 * if (get_paca()->hard_enabled) return;
arch/powerpc/kernel/time.c
... ... @@ -532,25 +532,60 @@
532 532 }
533 533 #endif /* CONFIG_PPC_ISERIES */
534 534  
535   -#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_PPC32)
536   -DEFINE_PER_CPU(u8, perf_event_pending);
  535 +#ifdef CONFIG_PERF_EVENTS
537 536  
538   -void set_perf_event_pending(void)
  537 +/*
  538 + * 64-bit uses a byte in the PACA, 32-bit uses a per-cpu variable...
  539 + */
  540 +#ifdef CONFIG_PPC64
  541 +static inline unsigned long test_perf_event_pending(void)
539 542 {
540   - get_cpu_var(perf_event_pending) = 1;
541   - set_dec(1);
542   - put_cpu_var(perf_event_pending);
  543 + unsigned long x;
  544 +
  545 + asm volatile("lbz %0,%1(13)"
  546 + : "=r" (x)
  547 + : "i" (offsetof(struct paca_struct, perf_event_pending)));
  548 + return x;
543 549 }
544 550  
  551 +static inline void set_perf_event_pending_flag(void)
  552 +{
  553 + asm volatile("stb %0,%1(13)" : :
  554 + "r" (1),
  555 + "i" (offsetof(struct paca_struct, perf_event_pending)));
  556 +}
  557 +
  558 +static inline void clear_perf_event_pending(void)
  559 +{
  560 + asm volatile("stb %0,%1(13)" : :
  561 + "r" (0),
  562 + "i" (offsetof(struct paca_struct, perf_event_pending)));
  563 +}
  564 +
  565 +#else /* 32-bit */
  566 +
  567 +DEFINE_PER_CPU(u8, perf_event_pending);
  568 +
  569 +#define set_perf_event_pending_flag() __get_cpu_var(perf_event_pending) = 1
545 570 #define test_perf_event_pending() __get_cpu_var(perf_event_pending)
546 571 #define clear_perf_event_pending() __get_cpu_var(perf_event_pending) = 0
547 572  
548   -#else /* CONFIG_PERF_EVENTS && CONFIG_PPC32 */
  573 +#endif /* 32 vs 64 bit */
549 574  
  575 +void set_perf_event_pending(void)
  576 +{
  577 + preempt_disable();
  578 + set_perf_event_pending_flag();
  579 + set_dec(1);
  580 + preempt_enable();
  581 +}
  582 +
  583 +#else /* CONFIG_PERF_EVENTS */
  584 +
550 585 #define test_perf_event_pending() 0
551 586 #define clear_perf_event_pending()
552 587  
553   -#endif /* CONFIG_PERF_EVENTS && CONFIG_PPC32 */
  588 +#endif /* CONFIG_PERF_EVENTS */
554 589  
555 590 /*
556 591 * For iSeries shared processors, we have to let the hypervisor
... ... @@ -582,10 +617,6 @@
582 617 set_dec(DECREMENTER_MAX);
583 618  
584 619 #ifdef CONFIG_PPC32
585   - if (test_perf_event_pending()) {
586   - clear_perf_event_pending();
587   - perf_event_do_pending();
588   - }
589 620 if (atomic_read(&ppc_n_lost_interrupts) != 0)
590 621 do_IRQ(regs);
591 622 #endif
... ... @@ -603,6 +634,11 @@
603 634 irq_enter();
604 635  
605 636 calculate_steal_time();
  637 +
  638 + if (test_perf_event_pending()) {
  639 + clear_perf_event_pending();
  640 + perf_event_do_pending();
  641 + }
606 642  
607 643 #ifdef CONFIG_PPC_ISERIES
608 644 if (firmware_has_feature(FW_FEATURE_ISERIES))