Commit 2f2eb58762b4dcddfe25c90800323765c1257eca

Authored by Christof Schmitt
Committed by James Bottomley
1 parent 7794a5af67

[SCSI] Allow FC LLD to fast-fail scsi eh by introducing new eh return

If the scsi eh is running and then a FC LLD calls
fc_remote_port_delete, the SCSI commands sent from the eh will fail.
To prevent this, a FC LLD can call fc_block_scsi_eh from the eh
callback, blocking the eh thread until the dev_loss_tmo fires or the
remote port is available again.

If (e.g. for a multipathing setup) the dev_loss_tmo is set to a very
large value, thus preventing the scsi device removal , the scsi eh can
block for a long time. For multipathing, the fast_io_fail_tmo is then
set to a low value to detect path problems sooner.

This patch introduces a new return code FAST_IO_FAIL. The function
fc_block_scsi_eh now returns FAST_IO_FAIL when the fast_io_fail_tmo
fires. This indicates that the LLD terminated all pending I/O requests
and there are no more pending SCSI commands for the scsi eh to wait
for. This return code can be passed back to the scsi eh to stop the
escalation and finish the recovery process for this device.

Signed-off-by: Christof Schmitt <christof.schmitt@de.ibm.com>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>

Showing 4 changed files with 27 additions and 11 deletions Side-by-side Diff

drivers/scsi/scsi_error.c
... ... @@ -957,9 +957,10 @@
957 957 "0x%p\n", current->comm,
958 958 scmd));
959 959 rtn = scsi_try_to_abort_cmd(scmd);
960   - if (rtn == SUCCESS) {
  960 + if (rtn == SUCCESS || rtn == FAST_IO_FAIL) {
961 961 scmd->eh_eflags &= ~SCSI_EH_CANCEL_CMD;
962 962 if (!scsi_device_online(scmd->device) ||
  963 + rtn == FAST_IO_FAIL ||
963 964 !scsi_eh_tur(scmd)) {
964 965 scsi_eh_finish_cmd(scmd, done_q);
965 966 }
966 967  
... ... @@ -1086,8 +1087,9 @@
1086 1087 " 0x%p\n", current->comm,
1087 1088 sdev));
1088 1089 rtn = scsi_try_bus_device_reset(bdr_scmd);
1089   - if (rtn == SUCCESS) {
  1090 + if (rtn == SUCCESS || rtn == FAST_IO_FAIL) {
1090 1091 if (!scsi_device_online(sdev) ||
  1092 + rtn == FAST_IO_FAIL ||
1091 1093 !scsi_eh_tur(bdr_scmd)) {
1092 1094 list_for_each_entry_safe(scmd, next,
1093 1095 work_q, eh_entry) {
1094 1096  
... ... @@ -1150,10 +1152,11 @@
1150 1152 "to target %d\n",
1151 1153 current->comm, id));
1152 1154 rtn = scsi_try_target_reset(tgtr_scmd);
1153   - if (rtn == SUCCESS) {
  1155 + if (rtn == SUCCESS || rtn == FAST_IO_FAIL) {
1154 1156 list_for_each_entry_safe(scmd, next, work_q, eh_entry) {
1155 1157 if (id == scmd_id(scmd))
1156 1158 if (!scsi_device_online(scmd->device) ||
  1159 + rtn == FAST_IO_FAIL ||
1157 1160 !scsi_eh_tur(tgtr_scmd))
1158 1161 scsi_eh_finish_cmd(scmd,
1159 1162 done_q);
1160 1163  
... ... @@ -1209,10 +1212,11 @@
1209 1212 " %d\n", current->comm,
1210 1213 channel));
1211 1214 rtn = scsi_try_bus_reset(chan_scmd);
1212   - if (rtn == SUCCESS) {
  1215 + if (rtn == SUCCESS || rtn == FAST_IO_FAIL) {
1213 1216 list_for_each_entry_safe(scmd, next, work_q, eh_entry) {
1214 1217 if (channel == scmd_channel(scmd))
1215 1218 if (!scsi_device_online(scmd->device) ||
  1219 + rtn == FAST_IO_FAIL ||
1216 1220 !scsi_eh_tur(scmd))
1217 1221 scsi_eh_finish_cmd(scmd,
1218 1222 done_q);
1219 1223  
... ... @@ -1246,9 +1250,10 @@
1246 1250 , current->comm));
1247 1251  
1248 1252 rtn = scsi_try_host_reset(scmd);
1249   - if (rtn == SUCCESS) {
  1253 + if (rtn == SUCCESS || rtn == FAST_IO_FAIL) {
1250 1254 list_for_each_entry_safe(scmd, next, work_q, eh_entry) {
1251 1255 if (!scsi_device_online(scmd->device) ||
  1256 + rtn == FAST_IO_FAIL ||
1252 1257 (!scsi_eh_try_stu(scmd) && !scsi_eh_tur(scmd)) ||
1253 1258 !scsi_eh_tur(scmd))
1254 1259 scsi_eh_finish_cmd(scmd, done_q);
drivers/scsi/scsi_transport_fc.c
... ... @@ -3197,23 +3197,33 @@
3197 3197 *
3198 3198 * This routine can be called from a FC LLD scsi_eh callback. It
3199 3199 * blocks the scsi_eh thread until the fc_rport leaves the
3200   - * FC_PORTSTATE_BLOCKED. This is necessary to avoid the scsi_eh
3201   - * failing recovery actions for blocked rports which would lead to
3202   - * offlined SCSI devices.
  3200 + * FC_PORTSTATE_BLOCKED, or the fast_io_fail_tmo fires. This is
  3201 + * necessary to avoid the scsi_eh failing recovery actions for blocked
  3202 + * rports which would lead to offlined SCSI devices.
  3203 + *
  3204 + * Returns: 0 if the fc_rport left the state FC_PORTSTATE_BLOCKED.
  3205 + * FAST_IO_FAIL if the fast_io_fail_tmo fired, this should be
  3206 + * passed back to scsi_eh.
3203 3207 */
3204   -void fc_block_scsi_eh(struct scsi_cmnd *cmnd)
  3208 +int fc_block_scsi_eh(struct scsi_cmnd *cmnd)
3205 3209 {
3206 3210 struct Scsi_Host *shost = cmnd->device->host;
3207 3211 struct fc_rport *rport = starget_to_rport(scsi_target(cmnd->device));
3208 3212 unsigned long flags;
3209 3213  
3210 3214 spin_lock_irqsave(shost->host_lock, flags);
3211   - while (rport->port_state == FC_PORTSTATE_BLOCKED) {
  3215 + while (rport->port_state == FC_PORTSTATE_BLOCKED &&
  3216 + !(rport->flags & FC_RPORT_FAST_FAIL_TIMEDOUT)) {
3212 3217 spin_unlock_irqrestore(shost->host_lock, flags);
3213 3218 msleep(1000);
3214 3219 spin_lock_irqsave(shost->host_lock, flags);
3215 3220 }
3216 3221 spin_unlock_irqrestore(shost->host_lock, flags);
  3222 +
  3223 + if (rport->flags & FC_RPORT_FAST_FAIL_TIMEDOUT)
  3224 + return FAST_IO_FAIL;
  3225 +
  3226 + return 0;
3217 3227 }
3218 3228 EXPORT_SYMBOL(fc_block_scsi_eh);
3219 3229  
... ... @@ -423,6 +423,7 @@
423 423 #define ADD_TO_MLQUEUE 0x2006
424 424 #define TIMEOUT_ERROR 0x2007
425 425 #define SCSI_RETURN_NOT_HANDLED 0x2008
  426 +#define FAST_IO_FAIL 0x2009
426 427  
427 428 /*
428 429 * Midlevel queue return values.
include/scsi/scsi_transport_fc.h
... ... @@ -807,7 +807,7 @@
807 807 struct fc_vport *fc_vport_create(struct Scsi_Host *shost, int channel,
808 808 struct fc_vport_identifiers *);
809 809 int fc_vport_terminate(struct fc_vport *vport);
810   -void fc_block_scsi_eh(struct scsi_cmnd *cmnd);
  810 +int fc_block_scsi_eh(struct scsi_cmnd *cmnd);
811 811  
812 812 #endif /* SCSI_TRANSPORT_FC_H */