Commit fb51ccbf217c1c994607b6519c7d85250928553d

Authored by Jan Kiszka
Committed by Jesse Barnes
1 parent ae5cd86455

PCI: Rework config space blocking services

pci_block_user_cfg_access was designed for the use case that a single
context, the IPR driver, temporarily delays user space accesses to the
config space via sysfs. This assumption became invalid by the time
pci_dev_reset was added as locking instance. Today, if you run two loops
in parallel that reset the same device via sysfs, you end up with a
kernel BUG as pci_block_user_cfg_access detect the broken assumption.

This reworks the pci_block_user_cfg_access to a sleeping service
pci_cfg_access_lock and an atomic-compatible variant called
pci_cfg_access_trylock. The former not only blocks user space access as
before but also waits if access was already locked. The latter service
just returns false in this case, allowing the caller to resolve the
conflict instead of raising a BUG.

Adaptions of the ipr driver were originally written by Brian King.

Acked-by: Brian King <brking@linux.vnet.ibm.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>

Showing 7 changed files with 131 additions and 50 deletions Side-by-side Diff

drivers/pci/access.c
... ... @@ -127,20 +127,20 @@
127 127 * We have a bit per device to indicate it's blocked and a global wait queue
128 128 * for callers to sleep on until devices are unblocked.
129 129 */
130   -static DECLARE_WAIT_QUEUE_HEAD(pci_ucfg_wait);
  130 +static DECLARE_WAIT_QUEUE_HEAD(pci_cfg_wait);
131 131  
132   -static noinline void pci_wait_ucfg(struct pci_dev *dev)
  132 +static noinline void pci_wait_cfg(struct pci_dev *dev)
133 133 {
134 134 DECLARE_WAITQUEUE(wait, current);
135 135  
136   - __add_wait_queue(&pci_ucfg_wait, &wait);
  136 + __add_wait_queue(&pci_cfg_wait, &wait);
137 137 do {
138 138 set_current_state(TASK_UNINTERRUPTIBLE);
139 139 raw_spin_unlock_irq(&pci_lock);
140 140 schedule();
141 141 raw_spin_lock_irq(&pci_lock);
142   - } while (dev->block_ucfg_access);
143   - __remove_wait_queue(&pci_ucfg_wait, &wait);
  142 + } while (dev->block_cfg_access);
  143 + __remove_wait_queue(&pci_cfg_wait, &wait);
144 144 }
145 145  
146 146 /* Returns 0 on success, negative values indicate error. */
... ... @@ -153,7 +153,8 @@
153 153 if (PCI_##size##_BAD) \
154 154 return -EINVAL; \
155 155 raw_spin_lock_irq(&pci_lock); \
156   - if (unlikely(dev->block_ucfg_access)) pci_wait_ucfg(dev); \
  156 + if (unlikely(dev->block_cfg_access)) \
  157 + pci_wait_cfg(dev); \
157 158 ret = dev->bus->ops->read(dev->bus, dev->devfn, \
158 159 pos, sizeof(type), &data); \
159 160 raw_spin_unlock_irq(&pci_lock); \
... ... @@ -172,7 +173,8 @@
172 173 if (PCI_##size##_BAD) \
173 174 return -EINVAL; \
174 175 raw_spin_lock_irq(&pci_lock); \
175   - if (unlikely(dev->block_ucfg_access)) pci_wait_ucfg(dev); \
  176 + if (unlikely(dev->block_cfg_access)) \
  177 + pci_wait_cfg(dev); \
176 178 ret = dev->bus->ops->write(dev->bus, dev->devfn, \
177 179 pos, sizeof(type), val); \
178 180 raw_spin_unlock_irq(&pci_lock); \
179 181  
180 182  
181 183  
182 184  
183 185  
184 186  
185 187  
186 188  
187 189  
188 190  
... ... @@ -401,36 +403,56 @@
401 403 EXPORT_SYMBOL(pci_vpd_truncate);
402 404  
403 405 /**
404   - * pci_block_user_cfg_access - Block userspace PCI config reads/writes
  406 + * pci_cfg_access_lock - Lock PCI config reads/writes
405 407 * @dev: pci device struct
406 408 *
407   - * When user access is blocked, any reads or writes to config space will
408   - * sleep until access is unblocked again. We don't allow nesting of
409   - * block/unblock calls.
  409 + * When access is locked, any userspace reads or writes to config
  410 + * space and concurrent lock requests will sleep until access is
  411 + * allowed via pci_cfg_access_unlocked again.
410 412 */
411   -void pci_block_user_cfg_access(struct pci_dev *dev)
  413 +void pci_cfg_access_lock(struct pci_dev *dev)
412 414 {
  415 + might_sleep();
  416 +
  417 + raw_spin_lock_irq(&pci_lock);
  418 + if (dev->block_cfg_access)
  419 + pci_wait_cfg(dev);
  420 + dev->block_cfg_access = 1;
  421 + raw_spin_unlock_irq(&pci_lock);
  422 +}
  423 +EXPORT_SYMBOL_GPL(pci_cfg_access_lock);
  424 +
  425 +/**
  426 + * pci_cfg_access_trylock - try to lock PCI config reads/writes
  427 + * @dev: pci device struct
  428 + *
  429 + * Same as pci_cfg_access_lock, but will return 0 if access is
  430 + * already locked, 1 otherwise. This function can be used from
  431 + * atomic contexts.
  432 + */
  433 +bool pci_cfg_access_trylock(struct pci_dev *dev)
  434 +{
413 435 unsigned long flags;
414   - int was_blocked;
  436 + bool locked = true;
415 437  
416 438 raw_spin_lock_irqsave(&pci_lock, flags);
417   - was_blocked = dev->block_ucfg_access;
418   - dev->block_ucfg_access = 1;
  439 + if (dev->block_cfg_access)
  440 + locked = false;
  441 + else
  442 + dev->block_cfg_access = 1;
419 443 raw_spin_unlock_irqrestore(&pci_lock, flags);
420 444  
421   - /* If we BUG() inside the pci_lock, we're guaranteed to hose
422   - * the machine */
423   - BUG_ON(was_blocked);
  445 + return locked;
424 446 }
425   -EXPORT_SYMBOL_GPL(pci_block_user_cfg_access);
  447 +EXPORT_SYMBOL_GPL(pci_cfg_access_trylock);
426 448  
427 449 /**
428   - * pci_unblock_user_cfg_access - Unblock userspace PCI config reads/writes
  450 + * pci_cfg_access_unlock - Unlock PCI config reads/writes
429 451 * @dev: pci device struct
430 452 *
431   - * This function allows userspace PCI config accesses to resume.
  453 + * This function allows PCI config accesses to resume.
432 454 */
433   -void pci_unblock_user_cfg_access(struct pci_dev *dev)
  455 +void pci_cfg_access_unlock(struct pci_dev *dev)
434 456 {
435 457 unsigned long flags;
436 458  
437 459  
438 460  
... ... @@ -438,11 +460,11 @@
438 460  
439 461 /* This indicates a problem in the caller, but we don't need
440 462 * to kill them, unlike a double-block above. */
441   - WARN_ON(!dev->block_ucfg_access);
  463 + WARN_ON(!dev->block_cfg_access);
442 464  
443   - dev->block_ucfg_access = 0;
444   - wake_up_all(&pci_ucfg_wait);
  465 + dev->block_cfg_access = 0;
  466 + wake_up_all(&pci_cfg_wait);
445 467 raw_spin_unlock_irqrestore(&pci_lock, flags);
446 468 }
447   -EXPORT_SYMBOL_GPL(pci_unblock_user_cfg_access);
  469 +EXPORT_SYMBOL_GPL(pci_cfg_access_unlock);
... ... @@ -348,10 +348,10 @@
348 348 }
349 349  
350 350 iov->ctrl |= PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE;
351   - pci_block_user_cfg_access(dev);
  351 + pci_cfg_access_lock(dev);
352 352 pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
353 353 msleep(100);
354   - pci_unblock_user_cfg_access(dev);
  354 + pci_cfg_access_unlock(dev);
355 355  
356 356 iov->initial = initial;
357 357 if (nr_virtfn < initial)
358 358  
... ... @@ -379,10 +379,10 @@
379 379 virtfn_remove(dev, j, 0);
380 380  
381 381 iov->ctrl &= ~(PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE);
382   - pci_block_user_cfg_access(dev);
  382 + pci_cfg_access_lock(dev);
383 383 pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
384 384 ssleep(1);
385   - pci_unblock_user_cfg_access(dev);
  385 + pci_cfg_access_unlock(dev);
386 386  
387 387 if (iov->link != dev->devfn)
388 388 sysfs_remove_link(&dev->dev.kobj, "dep_link");
389 389  
... ... @@ -405,10 +405,10 @@
405 405 virtfn_remove(dev, i, 0);
406 406  
407 407 iov->ctrl &= ~(PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE);
408   - pci_block_user_cfg_access(dev);
  408 + pci_cfg_access_lock(dev);
409 409 pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
410 410 ssleep(1);
411   - pci_unblock_user_cfg_access(dev);
  411 + pci_cfg_access_unlock(dev);
412 412  
413 413 if (iov->link != dev->devfn)
414 414 sysfs_remove_link(&dev->dev.kobj, "dep_link");
... ... @@ -2965,7 +2965,7 @@
2965 2965 might_sleep();
2966 2966  
2967 2967 if (!probe) {
2968   - pci_block_user_cfg_access(dev);
  2968 + pci_cfg_access_lock(dev);
2969 2969 /* block PM suspend, driver probe, etc. */
2970 2970 device_lock(&dev->dev);
2971 2971 }
... ... @@ -2990,7 +2990,7 @@
2990 2990 done:
2991 2991 if (!probe) {
2992 2992 device_unlock(&dev->dev);
2993   - pci_unblock_user_cfg_access(dev);
  2993 + pci_cfg_access_unlock(dev);
2994 2994 }
2995 2995  
2996 2996 return rc;
... ... @@ -7638,8 +7638,12 @@
7638 7638 **/
7639 7639 static int ipr_reset_bist_done(struct ipr_cmnd *ipr_cmd)
7640 7640 {
  7641 + struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg;
  7642 +
7641 7643 ENTER;
7642   - pci_unblock_user_cfg_access(ipr_cmd->ioa_cfg->pdev);
  7644 + if (ioa_cfg->cfg_locked)
  7645 + pci_cfg_access_unlock(ioa_cfg->pdev);
  7646 + ioa_cfg->cfg_locked = 0;
7643 7647 ipr_cmd->job_step = ipr_reset_restore_cfg_space;
7644 7648 LEAVE;
7645 7649 return IPR_RC_JOB_CONTINUE;
... ... @@ -7660,8 +7664,6 @@
7660 7664 int rc = PCIBIOS_SUCCESSFUL;
7661 7665  
7662 7666 ENTER;
7663   - pci_block_user_cfg_access(ioa_cfg->pdev);
7664   -
7665 7667 if (ioa_cfg->ipr_chip->bist_method == IPR_MMIO)
7666 7668 writel(IPR_UPROCI_SIS64_START_BIST,
7667 7669 ioa_cfg->regs.set_uproc_interrupt_reg32);
... ... @@ -7673,7 +7675,9 @@
7673 7675 ipr_reset_start_timer(ipr_cmd, IPR_WAIT_FOR_BIST_TIMEOUT);
7674 7676 rc = IPR_RC_JOB_RETURN;
7675 7677 } else {
7676   - pci_unblock_user_cfg_access(ipr_cmd->ioa_cfg->pdev);
  7678 + if (ioa_cfg->cfg_locked)
  7679 + pci_cfg_access_unlock(ipr_cmd->ioa_cfg->pdev);
  7680 + ioa_cfg->cfg_locked = 0;
7677 7681 ipr_cmd->s.ioasa.hdr.ioasc = cpu_to_be32(IPR_IOASC_PCI_ACCESS_ERROR);
7678 7682 rc = IPR_RC_JOB_CONTINUE;
7679 7683 }
... ... @@ -7716,7 +7720,6 @@
7716 7720 struct pci_dev *pdev = ioa_cfg->pdev;
7717 7721  
7718 7722 ENTER;
7719   - pci_block_user_cfg_access(pdev);
7720 7723 pci_set_pcie_reset_state(pdev, pcie_warm_reset);
7721 7724 ipr_cmd->job_step = ipr_reset_slot_reset_done;
7722 7725 ipr_reset_start_timer(ipr_cmd, IPR_PCI_RESET_TIMEOUT);
... ... @@ -7725,6 +7728,56 @@
7725 7728 }
7726 7729  
7727 7730 /**
  7731 + * ipr_reset_block_config_access_wait - Wait for permission to block config access
  7732 + * @ipr_cmd: ipr command struct
  7733 + *
  7734 + * Description: This attempts to block config access to the IOA.
  7735 + *
  7736 + * Return value:
  7737 + * IPR_RC_JOB_CONTINUE / IPR_RC_JOB_RETURN
  7738 + **/
  7739 +static int ipr_reset_block_config_access_wait(struct ipr_cmnd *ipr_cmd)
  7740 +{
  7741 + struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg;
  7742 + int rc = IPR_RC_JOB_CONTINUE;
  7743 +
  7744 + if (pci_cfg_access_trylock(ioa_cfg->pdev)) {
  7745 + ioa_cfg->cfg_locked = 1;
  7746 + ipr_cmd->job_step = ioa_cfg->reset;
  7747 + } else {
  7748 + if (ipr_cmd->u.time_left) {
  7749 + rc = IPR_RC_JOB_RETURN;
  7750 + ipr_cmd->u.time_left -= IPR_CHECK_FOR_RESET_TIMEOUT;
  7751 + ipr_reset_start_timer(ipr_cmd,
  7752 + IPR_CHECK_FOR_RESET_TIMEOUT);
  7753 + } else {
  7754 + ipr_cmd->job_step = ioa_cfg->reset;
  7755 + dev_err(&ioa_cfg->pdev->dev,
  7756 + "Timed out waiting to lock config access. Resetting anyway.\n");
  7757 + }
  7758 + }
  7759 +
  7760 + return rc;
  7761 +}
  7762 +
  7763 +/**
  7764 + * ipr_reset_block_config_access - Block config access to the IOA
  7765 + * @ipr_cmd: ipr command struct
  7766 + *
  7767 + * Description: This attempts to block config access to the IOA
  7768 + *
  7769 + * Return value:
  7770 + * IPR_RC_JOB_CONTINUE
  7771 + **/
  7772 +static int ipr_reset_block_config_access(struct ipr_cmnd *ipr_cmd)
  7773 +{
  7774 + ipr_cmd->ioa_cfg->cfg_locked = 0;
  7775 + ipr_cmd->job_step = ipr_reset_block_config_access_wait;
  7776 + ipr_cmd->u.time_left = IPR_WAIT_FOR_RESET_TIMEOUT;
  7777 + return IPR_RC_JOB_CONTINUE;
  7778 +}
  7779 +
  7780 +/**
7728 7781 * ipr_reset_allowed - Query whether or not IOA can be reset
7729 7782 * @ioa_cfg: ioa config struct
7730 7783 *
... ... @@ -7763,7 +7816,7 @@
7763 7816 ipr_cmd->u.time_left -= IPR_CHECK_FOR_RESET_TIMEOUT;
7764 7817 ipr_reset_start_timer(ipr_cmd, IPR_CHECK_FOR_RESET_TIMEOUT);
7765 7818 } else {
7766   - ipr_cmd->job_step = ioa_cfg->reset;
  7819 + ipr_cmd->job_step = ipr_reset_block_config_access;
7767 7820 rc = IPR_RC_JOB_CONTINUE;
7768 7821 }
7769 7822  
... ... @@ -7796,7 +7849,7 @@
7796 7849 writel(IPR_UPROCI_RESET_ALERT, ioa_cfg->regs.set_uproc_interrupt_reg32);
7797 7850 ipr_cmd->job_step = ipr_reset_wait_to_start_bist;
7798 7851 } else {
7799   - ipr_cmd->job_step = ioa_cfg->reset;
  7852 + ipr_cmd->job_step = ipr_reset_block_config_access;
7800 7853 }
7801 7854  
7802 7855 ipr_cmd->u.time_left = IPR_WAIT_FOR_RESET_TIMEOUT;
... ... @@ -1387,6 +1387,7 @@
1387 1387 u8 msi_received:1;
1388 1388 u8 sis64:1;
1389 1389 u8 dump_timeout:1;
  1390 + u8 cfg_locked:1;
1390 1391  
1391 1392 u8 revid;
1392 1393  
drivers/uio/uio_pci_generic.c
... ... @@ -55,7 +55,8 @@
55 55 BUILD_BUG_ON(PCI_COMMAND % 4);
56 56 BUILD_BUG_ON(PCI_COMMAND + 2 != PCI_STATUS);
57 57  
58   - pci_block_user_cfg_access(pdev);
  58 + if (!pci_cfg_access_trylock(pdev))
  59 + goto error;
59 60  
60 61 /* Read both command and status registers in a single 32-bit operation.
61 62 * Note: we could cache the value for command and move the status read
... ... @@ -79,7 +80,7 @@
79 80 ret = IRQ_HANDLED;
80 81 done:
81 82  
82   - pci_unblock_user_cfg_access(pdev);
  83 + pci_cfg_access_lock(pdev);
83 84 return ret;
84 85 }
85 86  
... ... @@ -91,7 +92,7 @@
91 92 u16 orig, new;
92 93 int err = 0;
93 94  
94   - pci_block_user_cfg_access(pdev);
  95 + pci_cfg_access_lock(pdev);
95 96 pci_read_config_word(pdev, PCI_COMMAND, &orig);
96 97 pci_write_config_word(pdev, PCI_COMMAND,
97 98 orig ^ PCI_COMMAND_INTX_DISABLE);
... ... @@ -114,7 +115,7 @@
114 115 /* Now restore the original value. */
115 116 pci_write_config_word(pdev, PCI_COMMAND, orig);
116 117 err:
117   - pci_unblock_user_cfg_access(pdev);
  118 + pci_cfg_access_unlock(pdev);
118 119 return err;
119 120 }
120 121  
... ... @@ -308,7 +308,7 @@
308 308 unsigned int is_added:1;
309 309 unsigned int is_busmaster:1; /* device is busmaster */
310 310 unsigned int no_msi:1; /* device may not use msi */
311   - unsigned int block_ucfg_access:1; /* userspace config space access is blocked */
  311 + unsigned int block_cfg_access:1; /* config space access is blocked */
312 312 unsigned int broken_parity_status:1; /* Device generates false positive parity */
313 313 unsigned int irq_reroute_variant:2; /* device needs IRQ rerouting variant */
314 314 unsigned int msi_enabled:1;
... ... @@ -1085,8 +1085,9 @@
1085 1085 void ht_destroy_irq(unsigned int irq);
1086 1086 #endif /* CONFIG_HT_IRQ */
1087 1087  
1088   -extern void pci_block_user_cfg_access(struct pci_dev *dev);
1089   -extern void pci_unblock_user_cfg_access(struct pci_dev *dev);
  1088 +extern void pci_cfg_access_lock(struct pci_dev *dev);
  1089 +extern bool pci_cfg_access_trylock(struct pci_dev *dev);
  1090 +extern void pci_cfg_access_unlock(struct pci_dev *dev);
1090 1091  
1091 1092 /*
1092 1093 * PCI domain support. Sometimes called PCI segment (eg by ACPI),
1093 1094  
... ... @@ -1283,10 +1284,13 @@
1283 1284  
1284 1285 #define pci_dma_burst_advice(pdev, strat, strategy_parameter) do { } while (0)
1285 1286  
1286   -static inline void pci_block_user_cfg_access(struct pci_dev *dev)
  1287 +static inline void pci_block_cfg_access(struct pci_dev *dev)
1287 1288 { }
1288 1289  
1289   -static inline void pci_unblock_user_cfg_access(struct pci_dev *dev)
  1290 +static inline int pci_block_cfg_access_in_atomic(struct pci_dev *dev)
  1291 +{ return 0; }
  1292 +
  1293 +static inline void pci_unblock_cfg_access(struct pci_dev *dev)
1290 1294 { }
1291 1295  
1292 1296 static inline struct pci_bus *pci_find_next_bus(const struct pci_bus *from)