Commit 0fe1ac48bef018bed896307cd12f6ca9b5e704ab
Committed by
Benjamin Herrenschmidt
1 parent
cea0d767c2
Exists in
master
and in
7 other branches
powerpc/perf_event: Fix oops due to perf_event_do_pending call
Anton Blanchard found that large POWER systems would occasionally crash in the exception exit path when profiling with perf_events. The symptom was that an interrupt would occur late in the exit path when the MSR[RI] (recoverable interrupt) bit was clear. Interrupts should be hard-disabled at this point but they were enabled. Because the interrupt was not recoverable the system panicked. The reason is that the exception exit path was calling perf_event_do_pending after hard-disabling interrupts, and perf_event_do_pending will re-enable interrupts. The simplest and cleanest fix for this is to use the same mechanism that 32-bit powerpc does, namely to cause a self-IPI by setting the decrementer to 1. This means we can remove the tests in the exception exit path and raw_local_irq_restore. This also makes sure that the call to perf_event_do_pending from timer_interrupt() happens within irq_enter/irq_exit. (Note that calling perf_event_do_pending from timer_interrupt does not mean that there is a possible 1/HZ latency; setting the decrementer to 1 ensures that the timer interrupt will happen immediately, i.e. within one timebase tick, which is a few nanoseconds or 10s of nanoseconds.) Signed-off-by: Paul Mackerras <paulus@samba.org> Cc: stable@kernel.org Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Showing 5 changed files with 48 additions and 66 deletions Side-by-side Diff
arch/powerpc/include/asm/hw_irq.h
... | ... | @@ -130,44 +130,6 @@ |
130 | 130 | */ |
131 | 131 | struct irq_chip; |
132 | 132 | |
133 | -#ifdef CONFIG_PERF_EVENTS | |
134 | - | |
135 | -#ifdef CONFIG_PPC64 | |
136 | -static inline unsigned long test_perf_event_pending(void) | |
137 | -{ | |
138 | - unsigned long x; | |
139 | - | |
140 | - asm volatile("lbz %0,%1(13)" | |
141 | - : "=r" (x) | |
142 | - : "i" (offsetof(struct paca_struct, perf_event_pending))); | |
143 | - return x; | |
144 | -} | |
145 | - | |
146 | -static inline void set_perf_event_pending(void) | |
147 | -{ | |
148 | - asm volatile("stb %0,%1(13)" : : | |
149 | - "r" (1), | |
150 | - "i" (offsetof(struct paca_struct, perf_event_pending))); | |
151 | -} | |
152 | - | |
153 | -static inline void clear_perf_event_pending(void) | |
154 | -{ | |
155 | - asm volatile("stb %0,%1(13)" : : | |
156 | - "r" (0), | |
157 | - "i" (offsetof(struct paca_struct, perf_event_pending))); | |
158 | -} | |
159 | -#endif /* CONFIG_PPC64 */ | |
160 | - | |
161 | -#else /* CONFIG_PERF_EVENTS */ | |
162 | - | |
163 | -static inline unsigned long test_perf_event_pending(void) | |
164 | -{ | |
165 | - return 0; | |
166 | -} | |
167 | - | |
168 | -static inline void clear_perf_event_pending(void) {} | |
169 | -#endif /* CONFIG_PERF_EVENTS */ | |
170 | - | |
171 | 133 | #endif /* __KERNEL__ */ |
172 | 134 | #endif /* _ASM_POWERPC_HW_IRQ_H */ |
arch/powerpc/kernel/asm-offsets.c
... | ... | @@ -133,7 +133,6 @@ |
133 | 133 | DEFINE(PACAKMSR, offsetof(struct paca_struct, kernel_msr)); |
134 | 134 | DEFINE(PACASOFTIRQEN, offsetof(struct paca_struct, soft_enabled)); |
135 | 135 | DEFINE(PACAHARDIRQEN, offsetof(struct paca_struct, hard_enabled)); |
136 | - DEFINE(PACAPERFPEND, offsetof(struct paca_struct, perf_event_pending)); | |
137 | 136 | DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id)); |
138 | 137 | #ifdef CONFIG_PPC_MM_SLICES |
139 | 138 | DEFINE(PACALOWSLICESPSIZE, offsetof(struct paca_struct, |
arch/powerpc/kernel/entry_64.S
... | ... | @@ -556,15 +556,6 @@ |
556 | 556 | 2: |
557 | 557 | TRACE_AND_RESTORE_IRQ(r5); |
558 | 558 | |
559 | -#ifdef CONFIG_PERF_EVENTS | |
560 | - /* check paca->perf_event_pending if we're enabling ints */ | |
561 | - lbz r3,PACAPERFPEND(r13) | |
562 | - and. r3,r3,r5 | |
563 | - beq 27f | |
564 | - bl .perf_event_do_pending | |
565 | -27: | |
566 | -#endif /* CONFIG_PERF_EVENTS */ | |
567 | - | |
568 | 559 | /* extract EE bit and use it to restore paca->hard_enabled */ |
569 | 560 | ld r3,_MSR(r1) |
570 | 561 | rldicl r4,r3,49,63 /* r0 = (r3 >> 15) & 1 */ |
arch/powerpc/kernel/irq.c
... | ... | @@ -53,7 +53,6 @@ |
53 | 53 | #include <linux/bootmem.h> |
54 | 54 | #include <linux/pci.h> |
55 | 55 | #include <linux/debugfs.h> |
56 | -#include <linux/perf_event.h> | |
57 | 56 | |
58 | 57 | #include <asm/uaccess.h> |
59 | 58 | #include <asm/system.h> |
... | ... | @@ -144,11 +143,6 @@ |
144 | 143 | iseries_handle_interrupts(); |
145 | 144 | } |
146 | 145 | #endif /* CONFIG_PPC_STD_MMU_64 */ |
147 | - | |
148 | - if (test_perf_event_pending()) { | |
149 | - clear_perf_event_pending(); | |
150 | - perf_event_do_pending(); | |
151 | - } | |
152 | 146 | |
153 | 147 | /* |
154 | 148 | * if (get_paca()->hard_enabled) return; |
arch/powerpc/kernel/time.c
... | ... | @@ -532,25 +532,60 @@ |
532 | 532 | } |
533 | 533 | #endif /* CONFIG_PPC_ISERIES */ |
534 | 534 | |
535 | -#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_PPC32) | |
536 | -DEFINE_PER_CPU(u8, perf_event_pending); | |
535 | +#ifdef CONFIG_PERF_EVENTS | |
537 | 536 | |
538 | -void set_perf_event_pending(void) | |
537 | +/* | |
538 | + * 64-bit uses a byte in the PACA, 32-bit uses a per-cpu variable... | |
539 | + */ | |
540 | +#ifdef CONFIG_PPC64 | |
541 | +static inline unsigned long test_perf_event_pending(void) | |
539 | 542 | { |
540 | - get_cpu_var(perf_event_pending) = 1; | |
541 | - set_dec(1); | |
542 | - put_cpu_var(perf_event_pending); | |
543 | + unsigned long x; | |
544 | + | |
545 | + asm volatile("lbz %0,%1(13)" | |
546 | + : "=r" (x) | |
547 | + : "i" (offsetof(struct paca_struct, perf_event_pending))); | |
548 | + return x; | |
543 | 549 | } |
544 | 550 | |
551 | +static inline void set_perf_event_pending_flag(void) | |
552 | +{ | |
553 | + asm volatile("stb %0,%1(13)" : : | |
554 | + "r" (1), | |
555 | + "i" (offsetof(struct paca_struct, perf_event_pending))); | |
556 | +} | |
557 | + | |
558 | +static inline void clear_perf_event_pending(void) | |
559 | +{ | |
560 | + asm volatile("stb %0,%1(13)" : : | |
561 | + "r" (0), | |
562 | + "i" (offsetof(struct paca_struct, perf_event_pending))); | |
563 | +} | |
564 | + | |
565 | +#else /* 32-bit */ | |
566 | + | |
567 | +DEFINE_PER_CPU(u8, perf_event_pending); | |
568 | + | |
569 | +#define set_perf_event_pending_flag() __get_cpu_var(perf_event_pending) = 1 | |
545 | 570 | #define test_perf_event_pending() __get_cpu_var(perf_event_pending) |
546 | 571 | #define clear_perf_event_pending() __get_cpu_var(perf_event_pending) = 0 |
547 | 572 | |
548 | -#else /* CONFIG_PERF_EVENTS && CONFIG_PPC32 */ | |
573 | +#endif /* 32 vs 64 bit */ | |
549 | 574 | |
575 | +void set_perf_event_pending(void) | |
576 | +{ | |
577 | + preempt_disable(); | |
578 | + set_perf_event_pending_flag(); | |
579 | + set_dec(1); | |
580 | + preempt_enable(); | |
581 | +} | |
582 | + | |
583 | +#else /* CONFIG_PERF_EVENTS */ | |
584 | + | |
550 | 585 | #define test_perf_event_pending() 0 |
551 | 586 | #define clear_perf_event_pending() |
552 | 587 | |
553 | -#endif /* CONFIG_PERF_EVENTS && CONFIG_PPC32 */ | |
588 | +#endif /* CONFIG_PERF_EVENTS */ | |
554 | 589 | |
555 | 590 | /* |
556 | 591 | * For iSeries shared processors, we have to let the hypervisor |
... | ... | @@ -582,10 +617,6 @@ |
582 | 617 | set_dec(DECREMENTER_MAX); |
583 | 618 | |
584 | 619 | #ifdef CONFIG_PPC32 |
585 | - if (test_perf_event_pending()) { | |
586 | - clear_perf_event_pending(); | |
587 | - perf_event_do_pending(); | |
588 | - } | |
589 | 620 | if (atomic_read(&ppc_n_lost_interrupts) != 0) |
590 | 621 | do_IRQ(regs); |
591 | 622 | #endif |
... | ... | @@ -603,6 +634,11 @@ |
603 | 634 | irq_enter(); |
604 | 635 | |
605 | 636 | calculate_steal_time(); |
637 | + | |
638 | + if (test_perf_event_pending()) { | |
639 | + clear_perf_event_pending(); | |
640 | + perf_event_do_pending(); | |
641 | + } | |
606 | 642 | |
607 | 643 | #ifdef CONFIG_PPC_ISERIES |
608 | 644 | if (firmware_has_feature(FW_FEATURE_ISERIES)) |