Commit a2d8617dff910d5afe2964b722d71fc6e1bb1c09

Authored by Brian King
Committed by Greg Kroah-Hartman
1 parent 587a7ba37c

ipr: wait for aborted command responses

commit 6cdb08172bc89f0a39e1643c5e7eab362692fd1b upstream.

Fixes a race condition in abort handling that was injected
when multiple interrupt support was added. When only a single
interrupt is present, the adapter guarantees it will send
responses for aborted commands prior to the response for the
abort command itself. With multiple interrupts, these responses
generally come back on different interrupts, so we need to
ensure the abort thread waits until the aborted command is
complete so we don't perform a double completion. This race
condition was being hit frequently in environments which
were triggering command timeouts, which was resulting in
a double completion causing a kernel oops.

Signed-off-by: Brian King <brking@linux.vnet.ibm.com>
Reviewed-by: Wendy Xiong <wenxiong@linux.vnet.ibm.com>
Tested-by: Wendy Xiong <wenxiong@linux.vnet.ibm.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Showing 2 changed files with 93 additions and 0 deletions Side-by-side Diff

... ... @@ -683,6 +683,7 @@
683 683 ipr_reinit_ipr_cmnd(ipr_cmd);
684 684 ipr_cmd->u.scratch = 0;
685 685 ipr_cmd->sibling = NULL;
  686 + ipr_cmd->eh_comp = NULL;
686 687 ipr_cmd->fast_done = fast_done;
687 688 init_timer(&ipr_cmd->timer);
688 689 }
... ... @@ -848,6 +849,8 @@
848 849  
849 850 scsi_dma_unmap(ipr_cmd->scsi_cmd);
850 851 scsi_cmd->scsi_done(scsi_cmd);
  852 + if (ipr_cmd->eh_comp)
  853 + complete(ipr_cmd->eh_comp);
851 854 list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q);
852 855 }
853 856  
... ... @@ -4853,6 +4856,84 @@
4853 4856 return rc;
4854 4857 }
4855 4858  
  4859 +/**
  4860 + * ipr_match_lun - Match function for specified LUN
  4861 + * @ipr_cmd: ipr command struct
  4862 + * @device: device to match (sdev)
  4863 + *
  4864 + * Returns:
  4865 + * 1 if command matches sdev / 0 if command does not match sdev
  4866 + **/
  4867 +static int ipr_match_lun(struct ipr_cmnd *ipr_cmd, void *device)
  4868 +{
  4869 + if (ipr_cmd->scsi_cmd && ipr_cmd->scsi_cmd->device == device)
  4870 + return 1;
  4871 + return 0;
  4872 +}
  4873 +
  4874 +/**
  4875 + * ipr_wait_for_ops - Wait for matching commands to complete
  4876 + * @ipr_cmd: ipr command struct
  4877 + * @device: device to match (sdev)
  4878 + * @match: match function to use
  4879 + *
  4880 + * Returns:
  4881 + * SUCCESS / FAILED
  4882 + **/
  4883 +static int ipr_wait_for_ops(struct ipr_ioa_cfg *ioa_cfg, void *device,
  4884 + int (*match)(struct ipr_cmnd *, void *))
  4885 +{
  4886 + struct ipr_cmnd *ipr_cmd;
  4887 + int wait;
  4888 + unsigned long flags;
  4889 + struct ipr_hrr_queue *hrrq;
  4890 + signed long timeout = IPR_ABORT_TASK_TIMEOUT;
  4891 + DECLARE_COMPLETION_ONSTACK(comp);
  4892 +
  4893 + ENTER;
  4894 + do {
  4895 + wait = 0;
  4896 +
  4897 + for_each_hrrq(hrrq, ioa_cfg) {
  4898 + spin_lock_irqsave(hrrq->lock, flags);
  4899 + list_for_each_entry(ipr_cmd, &hrrq->hrrq_pending_q, queue) {
  4900 + if (match(ipr_cmd, device)) {
  4901 + ipr_cmd->eh_comp = &comp;
  4902 + wait++;
  4903 + }
  4904 + }
  4905 + spin_unlock_irqrestore(hrrq->lock, flags);
  4906 + }
  4907 +
  4908 + if (wait) {
  4909 + timeout = wait_for_completion_timeout(&comp, timeout);
  4910 +
  4911 + if (!timeout) {
  4912 + wait = 0;
  4913 +
  4914 + for_each_hrrq(hrrq, ioa_cfg) {
  4915 + spin_lock_irqsave(hrrq->lock, flags);
  4916 + list_for_each_entry(ipr_cmd, &hrrq->hrrq_pending_q, queue) {
  4917 + if (match(ipr_cmd, device)) {
  4918 + ipr_cmd->eh_comp = NULL;
  4919 + wait++;
  4920 + }
  4921 + }
  4922 + spin_unlock_irqrestore(hrrq->lock, flags);
  4923 + }
  4924 +
  4925 + if (wait)
  4926 + dev_err(&ioa_cfg->pdev->dev, "Timed out waiting for aborted commands\n");
  4927 + LEAVE;
  4928 + return wait ? FAILED : SUCCESS;
  4929 + }
  4930 + }
  4931 + } while (wait);
  4932 +
  4933 + LEAVE;
  4934 + return SUCCESS;
  4935 +}
  4936 +
4856 4937 static int ipr_eh_host_reset(struct scsi_cmnd *cmd)
4857 4938 {
4858 4939 struct ipr_ioa_cfg *ioa_cfg;
4859 4940  
4860 4941  
... ... @@ -5072,11 +5153,17 @@
5072 5153 static int ipr_eh_dev_reset(struct scsi_cmnd *cmd)
5073 5154 {
5074 5155 int rc;
  5156 + struct ipr_ioa_cfg *ioa_cfg;
5075 5157  
  5158 + ioa_cfg = (struct ipr_ioa_cfg *) cmd->device->host->hostdata;
  5159 +
5076 5160 spin_lock_irq(cmd->device->host->host_lock);
5077 5161 rc = __ipr_eh_dev_reset(cmd);
5078 5162 spin_unlock_irq(cmd->device->host->host_lock);
5079 5163  
  5164 + if (rc == SUCCESS)
  5165 + rc = ipr_wait_for_ops(ioa_cfg, cmd->device, ipr_match_lun);
  5166 +
5080 5167 return rc;
5081 5168 }
5082 5169  
5083 5170  
5084 5171  
... ... @@ -5254,13 +5341,18 @@
5254 5341 {
5255 5342 unsigned long flags;
5256 5343 int rc;
  5344 + struct ipr_ioa_cfg *ioa_cfg;
5257 5345  
5258 5346 ENTER;
5259 5347  
  5348 + ioa_cfg = (struct ipr_ioa_cfg *) scsi_cmd->device->host->hostdata;
  5349 +
5260 5350 spin_lock_irqsave(scsi_cmd->device->host->host_lock, flags);
5261 5351 rc = ipr_cancel_op(scsi_cmd);
5262 5352 spin_unlock_irqrestore(scsi_cmd->device->host->host_lock, flags);
5263 5353  
  5354 + if (rc == SUCCESS)
  5355 + rc = ipr_wait_for_ops(ioa_cfg, scsi_cmd->device, ipr_match_lun);
5264 5356 LEAVE;
5265 5357 return rc;
5266 5358 }
... ... @@ -1608,6 +1608,7 @@
1608 1608 struct scsi_device *sdev;
1609 1609 } u;
1610 1610  
  1611 + struct completion *eh_comp;
1611 1612 struct ipr_hrr_queue *hrrq;
1612 1613 struct ipr_ioa_cfg *ioa_cfg;
1613 1614 };