Commit 8d63f375051bfb1506fa546db840af8510d1cd60

Authored by Linas Vepstas
Committed by James Bottomley
1 parent 4520b0089b

[SCSI] lpfc: add PCI error recovery support

This patch adds PCI Error recovery support to the
Emulex Lightpulse Fibrechannel (lpfc) SCSI device driver.
Lightly tested at this point, works.

Signed-off-by: Linas Vepstas <linas@austin.ibm.com>
Acked-by: Bino.Sebastian@Emulex.Com
Acked-by: James Smart <james.smart@emulex.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>

Showing 2 changed files with 109 additions and 0 deletions Side-by-side Diff

drivers/scsi/lpfc/lpfc_init.c
... ... @@ -518,6 +518,10 @@
518 518 struct lpfc_sli *psli = &phba->sli;
519 519 struct lpfc_sli_ring *pring;
520 520 uint32_t event_data;
  521 + /* If the pci channel is offline, ignore possible errors,
  522 + * since we cannot communicate with the pci card anyway. */
  523 + if (pci_channel_offline(phba->pcidev))
  524 + return;
521 525  
522 526 if (phba->work_hs & HS_FFER6 ||
523 527 phba->work_hs & HS_FFER5) {
... ... @@ -1797,6 +1801,92 @@
1797 1801 pci_set_drvdata(pdev, NULL);
1798 1802 }
1799 1803  
  1804 +/**
  1805 + * lpfc_io_error_detected - called when PCI error is detected
  1806 + * @pdev: Pointer to PCI device
  1807 + * @state: The current pci conneection state
  1808 + *
  1809 + * This function is called after a PCI bus error affecting
  1810 + * this device has been detected.
  1811 + */
  1812 +static pci_ers_result_t lpfc_io_error_detected(struct pci_dev *pdev,
  1813 + pci_channel_state_t state)
  1814 +{
  1815 + struct Scsi_Host *host = pci_get_drvdata(pdev);
  1816 + struct lpfc_hba *phba = (struct lpfc_hba *)host->hostdata;
  1817 + struct lpfc_sli *psli = &phba->sli;
  1818 + struct lpfc_sli_ring *pring;
  1819 +
  1820 + if (state == pci_channel_io_perm_failure) {
  1821 + lpfc_pci_remove_one(pdev);
  1822 + return PCI_ERS_RESULT_DISCONNECT;
  1823 + }
  1824 + pci_disable_device(pdev);
  1825 + /*
  1826 + * There may be I/Os dropped by the firmware.
  1827 + * Error iocb (I/O) on txcmplq and let the SCSI layer
  1828 + * retry it after re-establishing link.
  1829 + */
  1830 + pring = &psli->ring[psli->fcp_ring];
  1831 + lpfc_sli_abort_iocb_ring(phba, pring);
  1832 +
  1833 + /* Request a slot reset. */
  1834 + return PCI_ERS_RESULT_NEED_RESET;
  1835 +}
  1836 +
  1837 +/**
  1838 + * lpfc_io_slot_reset - called after the pci bus has been reset.
  1839 + * @pdev: Pointer to PCI device
  1840 + *
  1841 + * Restart the card from scratch, as if from a cold-boot.
  1842 + */
  1843 +static pci_ers_result_t lpfc_io_slot_reset(struct pci_dev *pdev)
  1844 +{
  1845 + struct Scsi_Host *host = pci_get_drvdata(pdev);
  1846 + struct lpfc_hba *phba = (struct lpfc_hba *)host->hostdata;
  1847 + struct lpfc_sli *psli = &phba->sli;
  1848 + int bars = pci_select_bars(pdev, IORESOURCE_MEM);
  1849 +
  1850 + dev_printk(KERN_INFO, &pdev->dev, "recovering from a slot reset.\n");
  1851 + if (pci_enable_device_bars(pdev, bars)) {
  1852 + printk(KERN_ERR "lpfc: Cannot re-enable "
  1853 + "PCI device after reset.\n");
  1854 + return PCI_ERS_RESULT_DISCONNECT;
  1855 + }
  1856 +
  1857 + pci_set_master(pdev);
  1858 +
  1859 + /* Re-establishing Link */
  1860 + spin_lock_irq(phba->host->host_lock);
  1861 + phba->fc_flag |= FC_ESTABLISH_LINK;
  1862 + psli->sli_flag &= ~LPFC_SLI2_ACTIVE;
  1863 + spin_unlock_irq(phba->host->host_lock);
  1864 +
  1865 +
  1866 + /* Take device offline; this will perform cleanup */
  1867 + lpfc_offline(phba);
  1868 + lpfc_sli_brdrestart(phba);
  1869 +
  1870 + return PCI_ERS_RESULT_RECOVERED;
  1871 +}
  1872 +
  1873 +/**
  1874 + * lpfc_io_resume - called when traffic can start flowing again.
  1875 + * @pdev: Pointer to PCI device
  1876 + *
  1877 + * This callback is called when the error recovery driver tells us that
  1878 + * its OK to resume normal operation.
  1879 + */
  1880 +static void lpfc_io_resume(struct pci_dev *pdev)
  1881 +{
  1882 + struct Scsi_Host *host = pci_get_drvdata(pdev);
  1883 + struct lpfc_hba *phba = (struct lpfc_hba *)host->hostdata;
  1884 +
  1885 + if (lpfc_online(phba) == 0) {
  1886 + mod_timer(&phba->fc_estabtmo, jiffies + HZ * 60);
  1887 + }
  1888 +}
  1889 +
1800 1890 static struct pci_device_id lpfc_id_table[] = {
1801 1891 {PCI_VENDOR_ID_EMULEX, PCI_DEVICE_ID_VIPER,
1802 1892 PCI_ANY_ID, PCI_ANY_ID, },
1803 1893  
... ... @@ -1857,11 +1947,18 @@
1857 1947  
1858 1948 MODULE_DEVICE_TABLE(pci, lpfc_id_table);
1859 1949  
  1950 +static struct pci_error_handlers lpfc_err_handler = {
  1951 + .error_detected = lpfc_io_error_detected,
  1952 + .slot_reset = lpfc_io_slot_reset,
  1953 + .resume = lpfc_io_resume,
  1954 +};
  1955 +
1860 1956 static struct pci_driver lpfc_driver = {
1861 1957 .name = LPFC_DRIVER_NAME,
1862 1958 .id_table = lpfc_id_table,
1863 1959 .probe = lpfc_pci_probe_one,
1864 1960 .remove = __devexit_p(lpfc_pci_remove_one),
  1961 + .err_handler = &lpfc_err_handler,
1865 1962 };
1866 1963  
1867 1964 static int __init
drivers/scsi/lpfc/lpfc_sli.c
... ... @@ -2104,6 +2104,10 @@
2104 2104 volatile uint32_t word0, ldata;
2105 2105 void __iomem *to_slim;
2106 2106  
  2107 + /* If the PCI channel is in offline state, do not post mbox. */
  2108 + if (unlikely(pci_channel_offline(phba->pcidev)))
  2109 + return MBX_NOT_FINISHED;
  2110 +
2107 2111 psli = &phba->sli;
2108 2112  
2109 2113 spin_lock_irqsave(phba->host->host_lock, drvr_flag);
... ... @@ -2407,6 +2411,10 @@
2407 2411 struct lpfc_iocbq *nextiocb;
2408 2412 IOCB_t *iocb;
2409 2413  
  2414 + /* If the PCI channel is in offline state, do not post iocbs. */
  2415 + if (unlikely(pci_channel_offline(phba->pcidev)))
  2416 + return IOCB_ERROR;
  2417 +
2410 2418 /*
2411 2419 * We should never get an IOCB if we are in a < LINK_DOWN state
2412 2420 */
... ... @@ -3152,6 +3160,10 @@
3152 3160 phba = (struct lpfc_hba *) dev_id;
3153 3161  
3154 3162 if (unlikely(!phba))
  3163 + return IRQ_NONE;
  3164 +
  3165 + /* If the pci channel is offline, ignore all the interrupts. */
  3166 + if (unlikely(pci_channel_offline(phba->pcidev)))
3155 3167 return IRQ_NONE;
3156 3168  
3157 3169 phba->sli.slistat.sli_intr++;