Commit 8d63f375051bfb1506fa546db840af8510d1cd60
Committed by
James Bottomley
1 parent
4520b0089b
[SCSI] lpfc: add PCI error recovery support
This patch adds PCI Error recovery support to the Emulex Lightpulse Fibrechannel (lpfc) SCSI device driver. Lightly tested at this point, works. Signed-off-by: Linas Vepstas <linas@austin.ibm.com> Acked-by: Bino.Sebastian@Emulex.Com Acked-by: James Smart <james.smart@emulex.com> Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
Showing 2 changed files with 109 additions and 0 deletions Side-by-side Diff
drivers/scsi/lpfc/lpfc_init.c
... | ... | @@ -518,6 +518,10 @@ |
518 | 518 | struct lpfc_sli *psli = &phba->sli; |
519 | 519 | struct lpfc_sli_ring *pring; |
520 | 520 | uint32_t event_data; |
521 | + /* If the pci channel is offline, ignore possible errors, | |
522 | + * since we cannot communicate with the pci card anyway. */ | |
523 | + if (pci_channel_offline(phba->pcidev)) | |
524 | + return; | |
521 | 525 | |
522 | 526 | if (phba->work_hs & HS_FFER6 || |
523 | 527 | phba->work_hs & HS_FFER5) { |
... | ... | @@ -1797,6 +1801,92 @@ |
1797 | 1801 | pci_set_drvdata(pdev, NULL); |
1798 | 1802 | } |
1799 | 1803 | |
1804 | +/** | |
1805 | + * lpfc_io_error_detected - called when PCI error is detected | |
1806 | + * @pdev: Pointer to PCI device | |
1807 | + * @state: The current pci conneection state | |
1808 | + * | |
1809 | + * This function is called after a PCI bus error affecting | |
1810 | + * this device has been detected. | |
1811 | + */ | |
1812 | +static pci_ers_result_t lpfc_io_error_detected(struct pci_dev *pdev, | |
1813 | + pci_channel_state_t state) | |
1814 | +{ | |
1815 | + struct Scsi_Host *host = pci_get_drvdata(pdev); | |
1816 | + struct lpfc_hba *phba = (struct lpfc_hba *)host->hostdata; | |
1817 | + struct lpfc_sli *psli = &phba->sli; | |
1818 | + struct lpfc_sli_ring *pring; | |
1819 | + | |
1820 | + if (state == pci_channel_io_perm_failure) { | |
1821 | + lpfc_pci_remove_one(pdev); | |
1822 | + return PCI_ERS_RESULT_DISCONNECT; | |
1823 | + } | |
1824 | + pci_disable_device(pdev); | |
1825 | + /* | |
1826 | + * There may be I/Os dropped by the firmware. | |
1827 | + * Error iocb (I/O) on txcmplq and let the SCSI layer | |
1828 | + * retry it after re-establishing link. | |
1829 | + */ | |
1830 | + pring = &psli->ring[psli->fcp_ring]; | |
1831 | + lpfc_sli_abort_iocb_ring(phba, pring); | |
1832 | + | |
1833 | + /* Request a slot reset. */ | |
1834 | + return PCI_ERS_RESULT_NEED_RESET; | |
1835 | +} | |
1836 | + | |
1837 | +/** | |
1838 | + * lpfc_io_slot_reset - called after the pci bus has been reset. | |
1839 | + * @pdev: Pointer to PCI device | |
1840 | + * | |
1841 | + * Restart the card from scratch, as if from a cold-boot. | |
1842 | + */ | |
1843 | +static pci_ers_result_t lpfc_io_slot_reset(struct pci_dev *pdev) | |
1844 | +{ | |
1845 | + struct Scsi_Host *host = pci_get_drvdata(pdev); | |
1846 | + struct lpfc_hba *phba = (struct lpfc_hba *)host->hostdata; | |
1847 | + struct lpfc_sli *psli = &phba->sli; | |
1848 | + int bars = pci_select_bars(pdev, IORESOURCE_MEM); | |
1849 | + | |
1850 | + dev_printk(KERN_INFO, &pdev->dev, "recovering from a slot reset.\n"); | |
1851 | + if (pci_enable_device_bars(pdev, bars)) { | |
1852 | + printk(KERN_ERR "lpfc: Cannot re-enable " | |
1853 | + "PCI device after reset.\n"); | |
1854 | + return PCI_ERS_RESULT_DISCONNECT; | |
1855 | + } | |
1856 | + | |
1857 | + pci_set_master(pdev); | |
1858 | + | |
1859 | + /* Re-establishing Link */ | |
1860 | + spin_lock_irq(phba->host->host_lock); | |
1861 | + phba->fc_flag |= FC_ESTABLISH_LINK; | |
1862 | + psli->sli_flag &= ~LPFC_SLI2_ACTIVE; | |
1863 | + spin_unlock_irq(phba->host->host_lock); | |
1864 | + | |
1865 | + | |
1866 | + /* Take device offline; this will perform cleanup */ | |
1867 | + lpfc_offline(phba); | |
1868 | + lpfc_sli_brdrestart(phba); | |
1869 | + | |
1870 | + return PCI_ERS_RESULT_RECOVERED; | |
1871 | +} | |
1872 | + | |
1873 | +/** | |
1874 | + * lpfc_io_resume - called when traffic can start flowing again. | |
1875 | + * @pdev: Pointer to PCI device | |
1876 | + * | |
1877 | + * This callback is called when the error recovery driver tells us that | |
1878 | + * its OK to resume normal operation. | |
1879 | + */ | |
1880 | +static void lpfc_io_resume(struct pci_dev *pdev) | |
1881 | +{ | |
1882 | + struct Scsi_Host *host = pci_get_drvdata(pdev); | |
1883 | + struct lpfc_hba *phba = (struct lpfc_hba *)host->hostdata; | |
1884 | + | |
1885 | + if (lpfc_online(phba) == 0) { | |
1886 | + mod_timer(&phba->fc_estabtmo, jiffies + HZ * 60); | |
1887 | + } | |
1888 | +} | |
1889 | + | |
1800 | 1890 | static struct pci_device_id lpfc_id_table[] = { |
1801 | 1891 | {PCI_VENDOR_ID_EMULEX, PCI_DEVICE_ID_VIPER, |
1802 | 1892 | PCI_ANY_ID, PCI_ANY_ID, }, |
1803 | 1893 | |
... | ... | @@ -1857,11 +1947,18 @@ |
1857 | 1947 | |
1858 | 1948 | MODULE_DEVICE_TABLE(pci, lpfc_id_table); |
1859 | 1949 | |
1950 | +static struct pci_error_handlers lpfc_err_handler = { | |
1951 | + .error_detected = lpfc_io_error_detected, | |
1952 | + .slot_reset = lpfc_io_slot_reset, | |
1953 | + .resume = lpfc_io_resume, | |
1954 | +}; | |
1955 | + | |
1860 | 1956 | static struct pci_driver lpfc_driver = { |
1861 | 1957 | .name = LPFC_DRIVER_NAME, |
1862 | 1958 | .id_table = lpfc_id_table, |
1863 | 1959 | .probe = lpfc_pci_probe_one, |
1864 | 1960 | .remove = __devexit_p(lpfc_pci_remove_one), |
1961 | + .err_handler = &lpfc_err_handler, | |
1865 | 1962 | }; |
1866 | 1963 | |
1867 | 1964 | static int __init |
drivers/scsi/lpfc/lpfc_sli.c
... | ... | @@ -2104,6 +2104,10 @@ |
2104 | 2104 | volatile uint32_t word0, ldata; |
2105 | 2105 | void __iomem *to_slim; |
2106 | 2106 | |
2107 | + /* If the PCI channel is in offline state, do not post mbox. */ | |
2108 | + if (unlikely(pci_channel_offline(phba->pcidev))) | |
2109 | + return MBX_NOT_FINISHED; | |
2110 | + | |
2107 | 2111 | psli = &phba->sli; |
2108 | 2112 | |
2109 | 2113 | spin_lock_irqsave(phba->host->host_lock, drvr_flag); |
... | ... | @@ -2407,6 +2411,10 @@ |
2407 | 2411 | struct lpfc_iocbq *nextiocb; |
2408 | 2412 | IOCB_t *iocb; |
2409 | 2413 | |
2414 | + /* If the PCI channel is in offline state, do not post iocbs. */ | |
2415 | + if (unlikely(pci_channel_offline(phba->pcidev))) | |
2416 | + return IOCB_ERROR; | |
2417 | + | |
2410 | 2418 | /* |
2411 | 2419 | * We should never get an IOCB if we are in a < LINK_DOWN state |
2412 | 2420 | */ |
... | ... | @@ -3152,6 +3160,10 @@ |
3152 | 3160 | phba = (struct lpfc_hba *) dev_id; |
3153 | 3161 | |
3154 | 3162 | if (unlikely(!phba)) |
3163 | + return IRQ_NONE; | |
3164 | + | |
3165 | + /* If the pci channel is offline, ignore all the interrupts. */ | |
3166 | + if (unlikely(pci_channel_offline(phba->pcidev))) | |
3155 | 3167 | return IRQ_NONE; |
3156 | 3168 | |
3157 | 3169 | phba->sli.slistat.sli_intr++; |