Commit 73ed148aea9dc0508be7e30e7a447f55c1b2f378

Authored by Benjamin Herrenschmidt
1 parent 1de1455f33

powerpc/powernv: Improve kexec reliability

We add a machine_shutdown hook that frees the OPAL interrupts
(so they get masked at the source and don't fire while kexec'ing)
and which triggers an IODA reset on all the PCIe host bridges
which will have the effect of blocking all DMAs and subsequent
PCIs interrupts.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>

Showing 7 changed files with 56 additions and 0 deletions Side-by-side Diff

arch/powerpc/include/asm/opal.h
... ... @@ -563,6 +563,8 @@
563 563  
564 564 extern int opal_machine_check(struct pt_regs *regs);
565 565  
  566 +extern void opal_shutdown(void);
  567 +
566 568 #endif /* __ASSEMBLY__ */
567 569  
568 570 #endif /* __OPAL_H */
arch/powerpc/platforms/powernv/opal.c
... ... @@ -15,6 +15,7 @@
15 15 #include <linux/of.h>
16 16 #include <linux/of_platform.h>
17 17 #include <linux/interrupt.h>
  18 +#include <linux/slab.h>
18 19 #include <asm/opal.h>
19 20 #include <asm/firmware.h>
20 21  
... ... @@ -28,6 +29,8 @@
28 29 static struct device_node *opal_node;
29 30 static DEFINE_SPINLOCK(opal_write_lock);
30 31 extern u64 opal_mc_secondary_handler[];
  32 +static unsigned int *opal_irqs;
  33 +static unsigned int opal_irq_count;
31 34  
32 35 int __init early_init_dt_scan_opal(unsigned long node,
33 36 const char *uname, int depth, void *data)
... ... @@ -323,6 +326,8 @@
323 326 irqs = of_get_property(opal_node, "opal-interrupts", &irqlen);
324 327 pr_debug("opal: Found %d interrupts reserved for OPAL\n",
325 328 irqs ? (irqlen / 4) : 0);
  329 + opal_irq_count = irqlen / 4;
  330 + opal_irqs = kzalloc(opal_irq_count * sizeof(unsigned int), GFP_KERNEL);
326 331 for (i = 0; irqs && i < (irqlen / 4); i++, irqs++) {
327 332 unsigned int hwirq = be32_to_cpup(irqs);
328 333 unsigned int irq = irq_create_mapping(NULL, hwirq);
329 334  
... ... @@ -334,8 +339,20 @@
334 339 if (rc)
335 340 pr_warning("opal: Error %d requesting irq %d"
336 341 " (0x%x)\n", rc, irq, hwirq);
  342 + opal_irqs[i] = irq;
337 343 }
338 344 return 0;
339 345 }
340 346 subsys_initcall(opal_init);
  347 +
  348 +void opal_shutdown(void)
  349 +{
  350 + unsigned int i;
  351 +
  352 + for (i = 0; i < opal_irq_count; i++) {
  353 + if (opal_irqs[i])
  354 + free_irq(opal_irqs[i], 0);
  355 + opal_irqs[i] = 0;
  356 + }
  357 +}
arch/powerpc/platforms/powernv/pci-ioda.c
... ... @@ -1048,6 +1048,12 @@
1048 1048 return phb->ioda.pe_rmap[(bus->number << 8) | devfn];
1049 1049 }
1050 1050  
  1051 +static void pnv_pci_ioda_shutdown(struct pnv_phb *phb)
  1052 +{
  1053 + opal_pci_reset(phb->opal_id, OPAL_PCI_IODA_TABLE_RESET,
  1054 + OPAL_ASSERT_RESET);
  1055 +}
  1056 +
1051 1057 void __init pnv_pci_init_ioda_phb(struct device_node *np, int ioda_type)
1052 1058 {
1053 1059 struct pci_controller *hose;
... ... @@ -1177,6 +1183,9 @@
1177 1183  
1178 1184 /* Setup TCEs */
1179 1185 phb->dma_dev_setup = pnv_pci_ioda_dma_dev_setup;
  1186 +
  1187 + /* Setup shutdown function for kexec */
  1188 + phb->shutdown = pnv_pci_ioda_shutdown;
1180 1189  
1181 1190 /* Setup MSI support */
1182 1191 pnv_pci_init_ioda_msis(phb);
arch/powerpc/platforms/powernv/pci.c
... ... @@ -450,6 +450,18 @@
450 450 pnv_pci_dma_fallback_setup(hose, pdev);
451 451 }
452 452  
  453 +void pnv_pci_shutdown(void)
  454 +{
  455 + struct pci_controller *hose;
  456 +
  457 + list_for_each_entry(hose, &hose_list, list_node) {
  458 + struct pnv_phb *phb = hose->private_data;
  459 +
  460 + if (phb && phb->shutdown)
  461 + phb->shutdown(phb);
  462 + }
  463 +}
  464 +
453 465 /* Fixup wrong class code in p7ioc and p8 root complex */
454 466 static void pnv_p7ioc_rc_quirk(struct pci_dev *dev)
455 467 {
arch/powerpc/platforms/powernv/pci.h
... ... @@ -86,6 +86,7 @@
86 86 void (*dma_dev_setup)(struct pnv_phb *phb, struct pci_dev *pdev);
87 87 void (*fixup_phb)(struct pci_controller *hose);
88 88 u32 (*bdfn_to_pe)(struct pnv_phb *phb, struct pci_bus *bus, u32 devfn);
  89 + void (*shutdown)(struct pnv_phb *phb);
89 90  
90 91 union {
91 92 struct {
... ... @@ -158,5 +159,6 @@
158 159 extern void pnv_pci_init_ioda2_phb(struct device_node *np);
159 160 extern void pnv_pci_ioda_tce_invalidate(struct iommu_table *tbl,
160 161 u64 *startp, u64 *endp);
  162 +
161 163 #endif /* __POWERNV_PCI_H */
arch/powerpc/platforms/powernv/powernv.h
... ... @@ -9,8 +9,10 @@
9 9  
10 10 #ifdef CONFIG_PCI
11 11 extern void pnv_pci_init(void);
  12 +extern void pnv_pci_shutdown(void);
12 13 #else
13 14 static inline void pnv_pci_init(void) { }
  15 +static inline void pnv_pci_shutdown(void) { }
14 16 #endif
15 17  
16 18 #endif /* _POWERNV_H */
arch/powerpc/platforms/powernv/setup.c
... ... @@ -126,6 +126,17 @@
126 126 {
127 127 }
128 128  
  129 +static void pnv_shutdown(void)
  130 +{
  131 + /* Let the PCI code clear up IODA tables */
  132 + pnv_pci_shutdown();
  133 +
  134 + /* And unregister all OPAL interrupts so they don't fire
  135 + * up while we kexec
  136 + */
  137 + opal_shutdown();
  138 +}
  139 +
129 140 #ifdef CONFIG_KEXEC
130 141 static void pnv_kexec_cpu_down(int crash_shutdown, int secondary)
131 142 {
... ... @@ -187,6 +198,7 @@
187 198 .init_IRQ = pnv_init_IRQ,
188 199 .show_cpuinfo = pnv_show_cpuinfo,
189 200 .progress = pnv_progress,
  201 + .machine_shutdown = pnv_shutdown,
190 202 .power_save = power7_idle,
191 203 .calibrate_decr = generic_calibrate_decr,
192 204 #ifdef CONFIG_KEXEC