Commit 397497dd61948b0d59d1d21812b93c97b0eeb2dd

Authored by Jeff Skirvin
Committed by Dan Williams
1 parent 87805162b6

isci: Check IDEV_GONE before performing abort path operations.

In the link fail path, set IDEV_GONE for every device on the domain
when the last link in the port fails.

In the abort path functions like isci_reset_device, make sure that
there has not already been a detected domain failure with the device
by checking IDEV_GONE, before performing any kind of hard reset, SMP
phy control, or TMF operation.

The check for IDEV_GONE makes sure that the device in the abort path
really has control of the port with which it is associated.  This
prevents starting hard resets at incorrect times and scheduling
unnecessary LUN resets for SATA devices.

Signed-off-by: Jeff Skirvin <jeffrey.d.skirvin@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

Showing 2 changed files with 57 additions and 21 deletions Side-by-side Diff

drivers/scsi/isci/port.c
... ... @@ -240,8 +240,31 @@
240 240 struct isci_phy *isci_phy,
241 241 struct isci_port *isci_port)
242 242 {
  243 + struct isci_remote_device *isci_device;
  244 +
243 245 dev_dbg(&isci_host->pdev->dev,
244 246 "%s: isci_port = %p\n", __func__, isci_port);
  247 +
  248 + if (isci_port) {
  249 +
  250 + /* check to see if this is the last phy on this port. */
  251 + if (isci_phy->sas_phy.port &&
  252 + isci_phy->sas_phy.port->num_phys == 1) {
  253 + /* change the state for all devices on this port. The
  254 + * next task sent to this device will be returned as
  255 + * SAS_TASK_UNDELIVERED, and the scsi mid layer will
  256 + * remove the target
  257 + */
  258 + list_for_each_entry(isci_device,
  259 + &isci_port->remote_dev_list,
  260 + node) {
  261 + dev_dbg(&isci_host->pdev->dev,
  262 + "%s: isci_device = %p\n",
  263 + __func__, isci_device);
  264 + set_bit(IDEV_GONE, &isci_device->flags);
  265 + }
  266 + }
  267 + }
245 268  
246 269 /* Notify libsas of the borken link, this will trigger calls to our
247 270 * isci_port_deformed and isci_dev_gone functions.
drivers/scsi/isci/task.c
... ... @@ -421,7 +421,7 @@
421 421 struct isci_host *ihost = dev_to_ihost(dev);
422 422 struct isci_remote_device *idev;
423 423 unsigned long flags;
424   - int ret;
  424 + int ret = TMF_RESP_FUNC_COMPLETE;
425 425  
426 426 spin_lock_irqsave(&ihost->scic_lock, flags);
427 427 idev = isci_get_device(dev->lldd_dev);
... ... @@ -447,12 +447,12 @@
447 447 goto out;
448 448 }
449 449 /* All pending I/Os have been terminated and cleaned up. */
450   - if (dev_is_sata(dev)) {
451   - sas_ata_schedule_reset(dev);
452   - ret = TMF_RESP_FUNC_COMPLETE;
453   - } else {
454   - /* Send the task management part of the reset. */
455   - ret = isci_task_send_lu_reset_sas(ihost, idev, lun);
  450 + if (!test_bit(IDEV_GONE, &idev->flags)) {
  451 + if (dev_is_sata(dev))
  452 + sas_ata_schedule_reset(dev);
  453 + else
  454 + /* Send the task management part of the reset. */
  455 + ret = isci_task_send_lu_reset_sas(ihost, idev, lun);
456 456 }
457 457 out:
458 458 isci_put_device(idev);
... ... @@ -512,8 +512,17 @@
512 512 spin_unlock_irqrestore(&ihost->scic_lock, flags);
513 513  
514 514 dev_warn(&ihost->pdev->dev,
515   - "%s: dev = %p, task = %p, old_request == %p\n",
516   - __func__, idev, task, old_request);
  515 + "%s: dev = %p (%s%s), task = %p, old_request == %p\n",
  516 + __func__, idev,
  517 + (dev_is_sata(task->dev) ? "STP/SATA"
  518 + : ((dev_is_expander(task->dev))
  519 + ? "SMP"
  520 + : "SSP")),
  521 + ((idev) ? ((test_bit(IDEV_GONE, &idev->flags))
  522 + ? " IDEV_GONE"
  523 + : "")
  524 + : " <NULL>"),
  525 + task, old_request);
517 526  
518 527 /* Device reset conditions signalled in task_state_flags are the
519 528 * responsbility of libsas to observe at the start of the error
... ... @@ -552,7 +561,8 @@
552 561  
553 562 if (task->task_proto == SAS_PROTOCOL_SMP ||
554 563 sas_protocol_ata(task->task_proto) ||
555   - test_bit(IREQ_COMPLETE_IN_TARGET, &old_request->flags)) {
  564 + test_bit(IREQ_COMPLETE_IN_TARGET, &old_request->flags) ||
  565 + test_bit(IDEV_GONE, &idev->flags)) {
556 566  
557 567 spin_unlock_irqrestore(&ihost->scic_lock, flags);
558 568  
... ... @@ -561,7 +571,8 @@
561 571  
562 572 dev_warn(&ihost->pdev->dev,
563 573 "%s: %s request"
564   - " or complete_in_target (%d), thus no TMF\n",
  574 + " or complete_in_target (%d), "
  575 + "or IDEV_GONE (%d), thus no TMF\n",
565 576 __func__,
566 577 ((task->task_proto == SAS_PROTOCOL_SMP)
567 578 ? "SMP"
... ... @@ -570,7 +581,8 @@
570 581 : "<other>")
571 582 ),
572 583 test_bit(IREQ_COMPLETE_IN_TARGET,
573   - &old_request->flags));
  584 + &old_request->flags),
  585 + test_bit(IDEV_GONE, &idev->flags));
574 586  
575 587 spin_lock_irqsave(&task->task_state_lock, flags);
576 588 task->task_state_flags &= ~(SAS_TASK_AT_INITIATOR |
... ... @@ -734,7 +746,7 @@
734 746 struct domain_device *dev,
735 747 struct isci_remote_device *idev)
736 748 {
737   - int rc = TMF_RESP_FUNC_COMPLETE, reset_stat;
  749 + int rc = TMF_RESP_FUNC_COMPLETE, reset_stat = -1;
738 750 struct sas_phy *phy = sas_get_local_phy(dev);
739 751 struct isci_port *iport = dev->port->lldd_port;
740 752  
741 753  
... ... @@ -752,14 +764,15 @@
752 764 * primary duty of this function is to cleanup tasks, so that is the
753 765 * relevant status.
754 766 */
  767 + if (!test_bit(IDEV_GONE, &idev->flags)) {
  768 + if (scsi_is_sas_phy_local(phy)) {
  769 + struct isci_phy *iphy = &ihost->phys[phy->number];
755 770  
756   - if (scsi_is_sas_phy_local(phy)) {
757   - struct isci_phy *iphy = &ihost->phys[phy->number];
758   -
759   - reset_stat = isci_port_perform_hard_reset(ihost, iport, iphy);
760   - } else
761   - reset_stat = sas_phy_reset(phy, !dev_is_sata(dev));
762   -
  771 + reset_stat = isci_port_perform_hard_reset(ihost, iport,
  772 + iphy);
  773 + } else
  774 + reset_stat = sas_phy_reset(phy, !dev_is_sata(dev));
  775 + }
763 776 /* Explicitly resume the RNC here, since there was no task sent. */
764 777 isci_remote_device_resume_from_abort(ihost, idev);
765 778