[SCSI] Add 'eh_deadline' to limit SCSI EH runtime

This patchs adds an 'eh_deadline' sysfs attribute to the scsi host which limits the overall runtime of the SCSI EH. The 'eh_deadline' value is stored in the now obsolete field 'resetting'. When a command is failed the start time of the EH is stored in 'last_reset'. If the overall runtime of the SCSI EH is longer than last_reset + eh_deadline, the EH is short-circuited and falls through to issue a host reset only. [jejb: add comments in Scsi_Host about new fields] Signed-off-by: Hannes Reinecke <hare@suse.de> Signed-off-by: James Bottomley <JBottomley@Parallels.com>

[SCSI] Add 'eh_deadline' to limit SCSI EH runtime
This patchs adds an 'eh_deadline' sysfs attribute to the scsi host which limits the overall runtime of the SCSI EH. The 'eh_deadline' value is stored in the now obsolete field 'resetting'. When a command is failed the start time of the EH is stored in 'last_reset'. If the overall runtime of the SCSI EH is longer than last_reset + eh_deadline, the EH is short-circuited and falls through to issue a host reset only. [jejb: add comments in Scsi_Host about new fields] Signed-off-by: Hannes Reinecke <hare@suse.de> Signed-off-by: James Bottomley <JBottomley@Parallels.com>
Hannes Reinecke · James Bottomley
1 parent 6b1e5a45d4
Showing 4 changed files with 173 additions and 6 deletions Side-by-side Diff
drivers/scsi/hosts.c
drivers/scsi/scsi_error.c
drivers/scsi/scsi_sysfs.c
include/scsi/scsi_host.h
@@ -316,6 +316,12 @@
 	kfree(shost);
 }
  
+static unsigned int shost_eh_deadline;
+
+module_param_named(eh_deadline, shost_eh_deadline, uint, S_IRUGO|S_IWUSR);
+MODULE_PARM_DESC(eh_deadline,
+		 "SCSI EH timeout in seconds (should be between 1 and 2^32-1)");
+
 static struct device_type scsi_host_type = {
 	.name =		"scsi_host",
 	.release =	scsi_host_dev_release,
@@ -388,6 +394,7 @@
 	shost->unchecked_isa_dma = sht->unchecked_isa_dma;
 	shost->use_clustering = sht->use_clustering;
 	shost->ordered_tag = sht->ordered_tag;
+	shost->eh_deadline = shost_eh_deadline * HZ;
  
 	if (sht->supported_mode == MODE_UNKNOWN)
 		/* means we didn't set it ... default to INITIATOR */
@@ -87,6 +87,18 @@
 }
 EXPORT_SYMBOL_GPL(scsi_schedule_eh);
  
+static int scsi_host_eh_past_deadline(struct Scsi_Host *shost)
+{
+	if (!shost->last_reset || !shost->eh_deadline)
+		return 0;
+
+	if (time_before(jiffies,
+			shost->last_reset + shost->eh_deadline))
+		return 0;
+
+	return 1;
+}
+
 /**
  * scsi_eh_scmd_add - add scsi cmd to error handling.
  * @scmd:	scmd to run eh on.
@@ -109,6 +121,9 @@
 		if (scsi_host_set_state(shost, SHOST_CANCEL_RECOVERY))
 			goto out_unlock;
  
+	if (shost->eh_deadline && !shost->last_reset)
+		shost->last_reset = jiffies;
+
 	ret = 1;
 	scmd->eh_eflags |= eh_flag;
 	list_add_tail(&scmd->eh_entry, &shost->eh_cmd_q);
@@ -138,6 +153,9 @@
 	trace_scsi_dispatch_cmd_timeout(scmd);
 	scsi_log_completion(scmd, TIMEOUT_ERROR);
  
+	if (host->eh_deadline && !host->last_reset)
+		host->last_reset = jiffies;
+
 	if (host->transportt->eh_timed_out)
 		rtn = host->transportt->eh_timed_out(scmd);
 	else if (host->hostt->eh_timed_out)
  
  
@@ -990,13 +1008,26 @@
 		      struct list_head *done_q)
 {
 	struct scsi_cmnd *scmd, *next;
+	struct Scsi_Host *shost;
 	int rtn;
+	unsigned long flags;
  
 	list_for_each_entry_safe(scmd, next, work_q, eh_entry) {
 		if ((scmd->eh_eflags & SCSI_EH_CANCEL_CMD) ||
 		    SCSI_SENSE_VALID(scmd))
 			continue;
  
+		shost = scmd->device->host;
+		spin_lock_irqsave(shost->host_lock, flags);
+		if (scsi_host_eh_past_deadline(shost)) {
+			spin_unlock_irqrestore(shost->host_lock, flags);
+			SCSI_LOG_ERROR_RECOVERY(3,
+				shost_printk(KERN_INFO, shost,
+					    "skip %s, past eh deadline\n",
+					     __func__));
+			break;
+		}
+		spin_unlock_irqrestore(shost->host_lock, flags);
 		SCSI_LOG_ERROR_RECOVERY(2, scmd_printk(KERN_INFO, scmd,
 						  "%s: requesting sense\n",
 						  current->comm));
  
@@ -1082,11 +1113,28 @@
 	struct scsi_cmnd *scmd, *next;
 	struct scsi_device *sdev;
 	int finish_cmds;
+	unsigned long flags;
  
 	while (!list_empty(cmd_list)) {
 		scmd = list_entry(cmd_list->next, struct scsi_cmnd, eh_entry);
 		sdev = scmd->device;
  
+		if (!try_stu) {
+			spin_lock_irqsave(sdev->host->host_lock, flags);
+			if (scsi_host_eh_past_deadline(sdev->host)) {
+				/* Push items back onto work_q */
+				list_splice_init(cmd_list, work_q);
+				spin_unlock_irqrestore(sdev->host->host_lock,
+						       flags);
+				SCSI_LOG_ERROR_RECOVERY(3,
+					shost_printk(KERN_INFO, sdev->host,
+						     "skip %s, past eh deadline",
+						     __func__));
+				break;
+			}
+			spin_unlock_irqrestore(sdev->host->host_lock, flags);
+		}
+
 		finish_cmds = !scsi_device_online(scmd->device) ||
 			(try_stu && !scsi_eh_try_stu(scmd) &&
 			 !scsi_eh_tur(scmd)) ||
  
  
@@ -1122,14 +1170,28 @@
 	struct scsi_cmnd *scmd, *next;
 	LIST_HEAD(check_list);
 	int rtn;
+	struct Scsi_Host *shost;
+	unsigned long flags;
  
 	list_for_each_entry_safe(scmd, next, work_q, eh_entry) {
 		if (!(scmd->eh_eflags & SCSI_EH_CANCEL_CMD))
 			continue;
+		shost = scmd->device->host;
+		spin_lock_irqsave(shost->host_lock, flags);
+		if (scsi_host_eh_past_deadline(shost)) {
+			spin_unlock_irqrestore(shost->host_lock, flags);
+			list_splice_init(&check_list, work_q);
+			SCSI_LOG_ERROR_RECOVERY(3,
+				shost_printk(KERN_INFO, shost,
+					    "skip %s, past eh deadline\n",
+					     __func__));
+			return list_empty(work_q);
+		}
+		spin_unlock_irqrestore(shost->host_lock, flags);
 		SCSI_LOG_ERROR_RECOVERY(3, printk("%s: aborting cmd:"
 						  "0x%p\n", current->comm,
 						  scmd));
-		rtn = scsi_try_to_abort_cmd(scmd->device->host->hostt, scmd);
+		rtn = scsi_try_to_abort_cmd(shost->hostt, scmd);
 		if (rtn == SUCCESS || rtn == FAST_IO_FAIL) {
 			scmd->eh_eflags &= ~SCSI_EH_CANCEL_CMD;
 			if (rtn == FAST_IO_FAIL)
  
@@ -1187,8 +1249,19 @@
 {
 	struct scsi_cmnd *scmd, *stu_scmd, *next;
 	struct scsi_device *sdev;
+	unsigned long flags;
  
 	shost_for_each_device(sdev, shost) {
+		spin_lock_irqsave(shost->host_lock, flags);
+		if (scsi_host_eh_past_deadline(shost)) {
+			spin_unlock_irqrestore(shost->host_lock, flags);
+			SCSI_LOG_ERROR_RECOVERY(3,
+				shost_printk(KERN_INFO, shost,
+					    "skip %s, past eh deadline\n",
+					     __func__));
+			break;
+		}
+		spin_unlock_irqrestore(shost->host_lock, flags);
 		stu_scmd = NULL;
 		list_for_each_entry(scmd, work_q, eh_entry)
 			if (scmd->device == sdev && SCSI_SENSE_VALID(scmd) &&
  
@@ -1241,9 +1314,20 @@
 {
 	struct scsi_cmnd *scmd, *bdr_scmd, *next;
 	struct scsi_device *sdev;
+	unsigned long flags;
 	int rtn;
  
 	shost_for_each_device(sdev, shost) {
+		spin_lock_irqsave(shost->host_lock, flags);
+		if (scsi_host_eh_past_deadline(shost)) {
+			spin_unlock_irqrestore(shost->host_lock, flags);
+			SCSI_LOG_ERROR_RECOVERY(3,
+				shost_printk(KERN_INFO, shost,
+					    "skip %s, past eh deadline\n",
+					     __func__));
+			break;
+		}
+		spin_unlock_irqrestore(shost->host_lock, flags);
 		bdr_scmd = NULL;
 		list_for_each_entry(scmd, work_q, eh_entry)
 			if (scmd->device == sdev) {
  
@@ -1303,7 +1387,22 @@
 		struct scsi_cmnd *next, *scmd;
 		int rtn;
 		unsigned int id;
+		unsigned long flags;
  
+		spin_lock_irqsave(shost->host_lock, flags);
+		if (scsi_host_eh_past_deadline(shost)) {
+			spin_unlock_irqrestore(shost->host_lock, flags);
+			/* push back on work queue for further processing */
+			list_splice_init(&check_list, work_q);
+			list_splice_init(&tmp_list, work_q);
+			SCSI_LOG_ERROR_RECOVERY(3,
+				shost_printk(KERN_INFO, shost,
+					    "skip %s, past eh deadline\n",
+					     __func__));
+			return list_empty(work_q);
+		}
+		spin_unlock_irqrestore(shost->host_lock, flags);
+
 		scmd = list_entry(tmp_list.next, struct scsi_cmnd, eh_entry);
 		id = scmd_id(scmd);
  
@@ -1347,6 +1446,7 @@
 	LIST_HEAD(check_list);
 	unsigned int channel;
 	int rtn;
+	unsigned long flags;
  
 	/*
 	 * we really want to loop over the various channels, and do this on
@@ -1356,6 +1456,18 @@
 	 */
  
 	for (channel = 0; channel <= shost->max_channel; channel++) {
+		spin_lock_irqsave(shost->host_lock, flags);
+		if (scsi_host_eh_past_deadline(shost)) {
+			spin_unlock_irqrestore(shost->host_lock, flags);
+			list_splice_init(&check_list, work_q);
+			SCSI_LOG_ERROR_RECOVERY(3,
+				shost_printk(KERN_INFO, shost,
+					    "skip %s, past eh deadline\n",
+					     __func__));
+			return list_empty(work_q);
+		}
+		spin_unlock_irqrestore(shost->host_lock, flags);
+
 		chan_scmd = NULL;
 		list_for_each_entry(scmd, work_q, eh_entry) {
 			if (channel == scmd_channel(scmd)) {
@@ -1755,8 +1867,9 @@
 	 * will be requests for character device operations, and also for
 	 * ioctls to queued block devices.
 	 */
-	SCSI_LOG_ERROR_RECOVERY(3, printk("%s: waking up host to restart\n",
-					  __func__));
+	SCSI_LOG_ERROR_RECOVERY(3,
+		printk("scsi_eh_%d waking up host to restart\n",
+		       shost->host_no));
  
 	spin_lock_irqsave(shost->host_lock, flags);
 	if (scsi_host_set_state(shost, SHOST_RUNNING))
@@ -1883,6 +1996,10 @@
 		if (!scsi_eh_abort_cmds(&eh_work_q, &eh_done_q))
 			scsi_eh_ready_devs(shost, &eh_work_q, &eh_done_q);
  
+	spin_lock_irqsave(shost->host_lock, flags);
+	if (shost->eh_deadline)
+		shost->last_reset = 0;
+	spin_unlock_irqrestore(shost->host_lock, flags);
 	scsi_eh_flush_done_q(&eh_done_q);
 }
  
@@ -1909,7 +2026,7 @@
 		if ((shost->host_failed == 0 && shost->host_eh_scheduled == 0) ||
 		    shost->host_failed != shost->host_busy) {
 			SCSI_LOG_ERROR_RECOVERY(1,
-				printk("Error handler scsi_eh_%d sleeping\n",
+				printk("scsi_eh_%d: sleeping\n",
 					shost->host_no));
 			schedule();
 			continue;
@@ -1917,8 +2034,9 @@
  
 		__set_current_state(TASK_RUNNING);
 		SCSI_LOG_ERROR_RECOVERY(1,
-			printk("Error handler scsi_eh_%d waking up\n",
-				shost->host_no));
+			printk("scsi_eh_%d: waking up %d/%d/%d\n",
+			       shost->host_no, shost->host_eh_scheduled,
+			       shost->host_failed, shost->host_busy));
  
 		/*
 		 * We have a host that is failing for some reason.  Figure out
@@ -281,6 +281,42 @@
  
 static DEVICE_ATTR(host_reset, S_IWUSR, NULL, store_host_reset);
  
+static ssize_t
+show_shost_eh_deadline(struct device *dev,
+		      struct device_attribute *attr, char *buf)
+{
+	struct Scsi_Host *shost = class_to_shost(dev);
+
+	return sprintf(buf, "%d\n", shost->eh_deadline / HZ);
+}
+
+static ssize_t
+store_shost_eh_deadline(struct device *dev, struct device_attribute *attr,
+		const char *buf, size_t count)
+{
+	struct Scsi_Host *shost = class_to_shost(dev);
+	int ret = -EINVAL;
+	int deadline;
+	unsigned long flags;
+
+	if (shost->transportt && shost->transportt->eh_strategy_handler)
+		return ret;
+
+	if (sscanf(buf, "%d\n", &deadline) == 1) {
+		spin_lock_irqsave(shost->host_lock, flags);
+		if (scsi_host_in_recovery(shost))
+			ret = -EBUSY;
+		else {
+			shost->eh_deadline = deadline * HZ;
+			ret = count;
+		}
+		spin_unlock_irqrestore(shost->host_lock, flags);
+	}
+	return ret;
+}
+
+static DEVICE_ATTR(eh_deadline, S_IRUGO | S_IWUSR, show_shost_eh_deadline, store_shost_eh_deadline);
+
 shost_rd_attr(unique_id, "%u\n");
 shost_rd_attr(host_busy, "%hu\n");
 shost_rd_attr(cmd_per_lun, "%hd\n");
@@ -308,6 +344,7 @@
 	&dev_attr_prot_capabilities.attr,
 	&dev_attr_prot_guard_type.attr,
 	&dev_attr_host_reset.attr,
+	&dev_attr_eh_deadline.attr,
 	NULL
 };
  
@@ -599,6 +599,11 @@
  
 	unsigned int host_no;  /* Used for IOCTL_GET_IDLUN, /proc/scsi et al. */
  
+	/* next two fields are used to bound the time spent in error handling */
+	int eh_deadline;
+	unsigned long last_reset;
+
+
 	/*
 	 * These three parameters can be used to allow for wide scsi,
 	 * and for host adapters that support multiple busses
...	...	@@ -316,6 +316,12 @@
316	316	kfree(shost);
317	317	}
318	318
	319	+static unsigned int shost_eh_deadline;
	320	+
	321	+module_param_named(eh_deadline, shost_eh_deadline, uint, S_IRUGO\|S_IWUSR);
	322	+MODULE_PARM_DESC(eh_deadline,
	323	+ "SCSI EH timeout in seconds (should be between 1 and 2^32-1)");
	324	+
319	325	static struct device_type scsi_host_type = {
320	326	.name = "scsi_host",
321	327	.release = scsi_host_dev_release,
...	...	@@ -388,6 +394,7 @@
388	394	shost->unchecked_isa_dma = sht->unchecked_isa_dma;
389	395	shost->use_clustering = sht->use_clustering;
390	396	shost->ordered_tag = sht->ordered_tag;
	397	+ shost->eh_deadline = shost_eh_deadline * HZ;
391	398
392	399	if (sht->supported_mode == MODE_UNKNOWN)
393	400	/* means we didn't set it ... default to INITIATOR */
...	...	@@ -87,6 +87,18 @@
87	87	}
88	88	EXPORT_SYMBOL_GPL(scsi_schedule_eh);
89	89
	90	+static int scsi_host_eh_past_deadline(struct Scsi_Host *shost)
	91	+{
	92	+ if (!shost->last_reset \|\| !shost->eh_deadline)
	93	+ return 0;
	94	+
	95	+ if (time_before(jiffies,
	96	+ shost->last_reset + shost->eh_deadline))
	97	+ return 0;
	98	+
	99	+ return 1;
	100	+}
	101	+
90	102	/**
91	103	* scsi_eh_scmd_add - add scsi cmd to error handling.
92	104	* @scmd: scmd to run eh on.
...	...	@@ -109,6 +121,9 @@
109	121	if (scsi_host_set_state(shost, SHOST_CANCEL_RECOVERY))
110	122	goto out_unlock;
111	123
	124	+ if (shost->eh_deadline && !shost->last_reset)
	125	+ shost->last_reset = jiffies;
	126	+
112	127	ret = 1;
113	128	scmd->eh_eflags \|= eh_flag;
114	129	list_add_tail(&scmd->eh_entry, &shost->eh_cmd_q);
...	...	@@ -138,6 +153,9 @@
138	153	trace_scsi_dispatch_cmd_timeout(scmd);
139	154	scsi_log_completion(scmd, TIMEOUT_ERROR);
140	155
	156	+ if (host->eh_deadline && !host->last_reset)
	157	+ host->last_reset = jiffies;
	158	+
141	159	if (host->transportt->eh_timed_out)
142	160	rtn = host->transportt->eh_timed_out(scmd);
143	161	else if (host->hostt->eh_timed_out)
144	162
145	163
...	...	@@ -990,13 +1008,26 @@
990	1008	struct list_head *done_q)
991	1009	{
992	1010	struct scsi_cmnd scmd, next;
	1011	+ struct Scsi_Host *shost;
993	1012	int rtn;
	1013	+ unsigned long flags;
994	1014
995	1015	list_for_each_entry_safe(scmd, next, work_q, eh_entry) {
996	1016	if ((scmd->eh_eflags & SCSI_EH_CANCEL_CMD) \|\|
997	1017	SCSI_SENSE_VALID(scmd))
998	1018	continue;
999	1019
	1020	+ shost = scmd->device->host;
	1021	+ spin_lock_irqsave(shost->host_lock, flags);
	1022	+ if (scsi_host_eh_past_deadline(shost)) {
	1023	+ spin_unlock_irqrestore(shost->host_lock, flags);
	1024	+ SCSI_LOG_ERROR_RECOVERY(3,
	1025	+ shost_printk(KERN_INFO, shost,
	1026	+ "skip %s, past eh deadline\n",
	1027	+ __func__));
	1028	+ break;
	1029	+ }
	1030	+ spin_unlock_irqrestore(shost->host_lock, flags);
1000	1031	SCSI_LOG_ERROR_RECOVERY(2, scmd_printk(KERN_INFO, scmd,
1001	1032	"%s: requesting sense\n",
1002	1033	current->comm));
1003	1034
...	...	@@ -1082,11 +1113,28 @@
1082	1113	struct scsi_cmnd scmd, next;
1083	1114	struct scsi_device *sdev;
1084	1115	int finish_cmds;
	1116	+ unsigned long flags;
1085	1117
1086	1118	while (!list_empty(cmd_list)) {
1087	1119	scmd = list_entry(cmd_list->next, struct scsi_cmnd, eh_entry);
1088	1120	sdev = scmd->device;
1089	1121
	1122	+ if (!try_stu) {
	1123	+ spin_lock_irqsave(sdev->host->host_lock, flags);
	1124	+ if (scsi_host_eh_past_deadline(sdev->host)) {
	1125	+ /* Push items back onto work_q */
	1126	+ list_splice_init(cmd_list, work_q);
	1127	+ spin_unlock_irqrestore(sdev->host->host_lock,
	1128	+ flags);
	1129	+ SCSI_LOG_ERROR_RECOVERY(3,
	1130	+ shost_printk(KERN_INFO, sdev->host,
	1131	+ "skip %s, past eh deadline",
	1132	+ __func__));
	1133	+ break;
	1134	+ }
	1135	+ spin_unlock_irqrestore(sdev->host->host_lock, flags);
	1136	+ }
	1137	+
1090	1138	finish_cmds = !scsi_device_online(scmd->device) \|\|
1091	1139	(try_stu && !scsi_eh_try_stu(scmd) &&
1092	1140	!scsi_eh_tur(scmd)) \|\|
1093	1141
1094	1142
...	...	@@ -1122,14 +1170,28 @@
1122	1170	struct scsi_cmnd scmd, next;
1123	1171	LIST_HEAD(check_list);
1124	1172	int rtn;
	1173	+ struct Scsi_Host *shost;
	1174	+ unsigned long flags;
1125	1175
1126	1176	list_for_each_entry_safe(scmd, next, work_q, eh_entry) {
1127	1177	if (!(scmd->eh_eflags & SCSI_EH_CANCEL_CMD))
1128	1178	continue;
	1179	+ shost = scmd->device->host;
	1180	+ spin_lock_irqsave(shost->host_lock, flags);
	1181	+ if (scsi_host_eh_past_deadline(shost)) {
	1182	+ spin_unlock_irqrestore(shost->host_lock, flags);
	1183	+ list_splice_init(&check_list, work_q);
	1184	+ SCSI_LOG_ERROR_RECOVERY(3,
	1185	+ shost_printk(KERN_INFO, shost,
	1186	+ "skip %s, past eh deadline\n",
	1187	+ __func__));
	1188	+ return list_empty(work_q);
	1189	+ }
	1190	+ spin_unlock_irqrestore(shost->host_lock, flags);
1129	1191	SCSI_LOG_ERROR_RECOVERY(3, printk("%s: aborting cmd:"
1130	1192	"0x%p\n", current->comm,
1131	1193	scmd));
1132		- rtn = scsi_try_to_abort_cmd(scmd->device->host->hostt, scmd);
	1194	+ rtn = scsi_try_to_abort_cmd(shost->hostt, scmd);
1133	1195	if (rtn == SUCCESS \|\| rtn == FAST_IO_FAIL) {
1134	1196	scmd->eh_eflags &= ~SCSI_EH_CANCEL_CMD;
1135	1197	if (rtn == FAST_IO_FAIL)
1136	1198
...	...	@@ -1187,8 +1249,19 @@
1187	1249	{
1188	1250	struct scsi_cmnd scmd, stu_scmd, *next;
1189	1251	struct scsi_device *sdev;
	1252	+ unsigned long flags;
1190	1253
1191	1254	shost_for_each_device(sdev, shost) {
	1255	+ spin_lock_irqsave(shost->host_lock, flags);
	1256	+ if (scsi_host_eh_past_deadline(shost)) {
	1257	+ spin_unlock_irqrestore(shost->host_lock, flags);
	1258	+ SCSI_LOG_ERROR_RECOVERY(3,
	1259	+ shost_printk(KERN_INFO, shost,
	1260	+ "skip %s, past eh deadline\n",
	1261	+ __func__));
	1262	+ break;
	1263	+ }
	1264	+ spin_unlock_irqrestore(shost->host_lock, flags);
1192	1265	stu_scmd = NULL;
1193	1266	list_for_each_entry(scmd, work_q, eh_entry)
1194	1267	if (scmd->device == sdev && SCSI_SENSE_VALID(scmd) &&
1195	1268
...	...	@@ -1241,9 +1314,20 @@
1241	1314	{
1242	1315	struct scsi_cmnd scmd, bdr_scmd, *next;
1243	1316	struct scsi_device *sdev;
	1317	+ unsigned long flags;
1244	1318	int rtn;
1245	1319
1246	1320	shost_for_each_device(sdev, shost) {
	1321	+ spin_lock_irqsave(shost->host_lock, flags);
	1322	+ if (scsi_host_eh_past_deadline(shost)) {
	1323	+ spin_unlock_irqrestore(shost->host_lock, flags);
	1324	+ SCSI_LOG_ERROR_RECOVERY(3,
	1325	+ shost_printk(KERN_INFO, shost,
	1326	+ "skip %s, past eh deadline\n",
	1327	+ __func__));
	1328	+ break;
	1329	+ }
	1330	+ spin_unlock_irqrestore(shost->host_lock, flags);
1247	1331	bdr_scmd = NULL;
1248	1332	list_for_each_entry(scmd, work_q, eh_entry)
1249	1333	if (scmd->device == sdev) {
1250	1334
...	...	@@ -1303,7 +1387,22 @@
1303	1387	struct scsi_cmnd next, scmd;
1304	1388	int rtn;
1305	1389	unsigned int id;
	1390	+ unsigned long flags;
1306	1391
	1392	+ spin_lock_irqsave(shost->host_lock, flags);
	1393	+ if (scsi_host_eh_past_deadline(shost)) {
	1394	+ spin_unlock_irqrestore(shost->host_lock, flags);
	1395	+ /* push back on work queue for further processing */
	1396	+ list_splice_init(&check_list, work_q);
	1397	+ list_splice_init(&tmp_list, work_q);
	1398	+ SCSI_LOG_ERROR_RECOVERY(3,
	1399	+ shost_printk(KERN_INFO, shost,
	1400	+ "skip %s, past eh deadline\n",
	1401	+ __func__));
	1402	+ return list_empty(work_q);
	1403	+ }
	1404	+ spin_unlock_irqrestore(shost->host_lock, flags);
	1405	+
1307	1406	scmd = list_entry(tmp_list.next, struct scsi_cmnd, eh_entry);
1308	1407	id = scmd_id(scmd);
1309	1408
...	...	@@ -1347,6 +1446,7 @@
1347	1446	LIST_HEAD(check_list);
1348	1447	unsigned int channel;
1349	1448	int rtn;
	1449	+ unsigned long flags;
1350	1450
1351	1451	/*
1352	1452	* we really want to loop over the various channels, and do this on
...	...	@@ -1356,6 +1456,18 @@
1356	1456	*/
1357	1457
1358	1458	for (channel = 0; channel <= shost->max_channel; channel++) {
	1459	+ spin_lock_irqsave(shost->host_lock, flags);
	1460	+ if (scsi_host_eh_past_deadline(shost)) {
	1461	+ spin_unlock_irqrestore(shost->host_lock, flags);
	1462	+ list_splice_init(&check_list, work_q);
	1463	+ SCSI_LOG_ERROR_RECOVERY(3,
	1464	+ shost_printk(KERN_INFO, shost,
	1465	+ "skip %s, past eh deadline\n",
	1466	+ __func__));
	1467	+ return list_empty(work_q);
	1468	+ }
	1469	+ spin_unlock_irqrestore(shost->host_lock, flags);
	1470	+
1359	1471	chan_scmd = NULL;
1360	1472	list_for_each_entry(scmd, work_q, eh_entry) {
1361	1473	if (channel == scmd_channel(scmd)) {
...	...	@@ -1755,8 +1867,9 @@
1755	1867	* will be requests for character device operations, and also for
1756	1868	* ioctls to queued block devices.
1757	1869	*/
1758		- SCSI_LOG_ERROR_RECOVERY(3, printk("%s: waking up host to restart\n",
1759		- __func__));
	1870	+ SCSI_LOG_ERROR_RECOVERY(3,
	1871	+ printk("scsi_eh_%d waking up host to restart\n",
	1872	+ shost->host_no));
1760	1873
1761	1874	spin_lock_irqsave(shost->host_lock, flags);
1762	1875	if (scsi_host_set_state(shost, SHOST_RUNNING))
...	...	@@ -1883,6 +1996,10 @@
1883	1996	if (!scsi_eh_abort_cmds(&eh_work_q, &eh_done_q))
1884	1997	scsi_eh_ready_devs(shost, &eh_work_q, &eh_done_q);
1885	1998
	1999	+ spin_lock_irqsave(shost->host_lock, flags);
	2000	+ if (shost->eh_deadline)
	2001	+ shost->last_reset = 0;
	2002	+ spin_unlock_irqrestore(shost->host_lock, flags);
1886	2003	scsi_eh_flush_done_q(&eh_done_q);
1887	2004	}
1888	2005
...	...	@@ -1909,7 +2026,7 @@
1909	2026	if ((shost->host_failed == 0 && shost->host_eh_scheduled == 0) \|\|
1910	2027	shost->host_failed != shost->host_busy) {
1911	2028	SCSI_LOG_ERROR_RECOVERY(1,
1912		- printk("Error handler scsi_eh_%d sleeping\n",
	2029	+ printk("scsi_eh_%d: sleeping\n",
1913	2030	shost->host_no));
1914	2031	schedule();
1915	2032	continue;
...	...	@@ -1917,8 +2034,9 @@
1917	2034
1918	2035	__set_current_state(TASK_RUNNING);
1919	2036	SCSI_LOG_ERROR_RECOVERY(1,
1920		- printk("Error handler scsi_eh_%d waking up\n",
1921		- shost->host_no));
	2037	+ printk("scsi_eh_%d: waking up %d/%d/%d\n",
	2038	+ shost->host_no, shost->host_eh_scheduled,
	2039	+ shost->host_failed, shost->host_busy));
1922	2040
1923	2041	/*
1924	2042	* We have a host that is failing for some reason. Figure out
...	...	@@ -281,6 +281,42 @@
281	281
282	282	static DEVICE_ATTR(host_reset, S_IWUSR, NULL, store_host_reset);
283	283
	284	+static ssize_t
	285	+show_shost_eh_deadline(struct device *dev,
	286	+ struct device_attribute attr, char buf)
	287	+{
	288	+ struct Scsi_Host *shost = class_to_shost(dev);
	289	+
	290	+ return sprintf(buf, "%d\n", shost->eh_deadline / HZ);
	291	+}
	292	+
	293	+static ssize_t
	294	+store_shost_eh_deadline(struct device dev, struct device_attribute attr,
	295	+ const char *buf, size_t count)
	296	+{
	297	+ struct Scsi_Host *shost = class_to_shost(dev);
	298	+ int ret = -EINVAL;
	299	+ int deadline;
	300	+ unsigned long flags;
	301	+
	302	+ if (shost->transportt && shost->transportt->eh_strategy_handler)
	303	+ return ret;
	304	+
	305	+ if (sscanf(buf, "%d\n", &deadline) == 1) {
	306	+ spin_lock_irqsave(shost->host_lock, flags);
	307	+ if (scsi_host_in_recovery(shost))
	308	+ ret = -EBUSY;
	309	+ else {
	310	+ shost->eh_deadline = deadline * HZ;
	311	+ ret = count;
	312	+ }
	313	+ spin_unlock_irqrestore(shost->host_lock, flags);
	314	+ }
	315	+ return ret;
	316	+}
	317	+
	318	+static DEVICE_ATTR(eh_deadline, S_IRUGO \| S_IWUSR, show_shost_eh_deadline, store_shost_eh_deadline);
	319	+
284	320	shost_rd_attr(unique_id, "%u\n");
285	321	shost_rd_attr(host_busy, "%hu\n");
286	322	shost_rd_attr(cmd_per_lun, "%hd\n");
...	...	@@ -308,6 +344,7 @@
308	344	&dev_attr_prot_capabilities.attr,
309	345	&dev_attr_prot_guard_type.attr,
310	346	&dev_attr_host_reset.attr,
	347	+ &dev_attr_eh_deadline.attr,
311	348	NULL
312	349	};
313	350
...	...	@@ -599,6 +599,11 @@
599	599
600	600	unsigned int host_no; /* Used for IOCTL_GET_IDLUN, /proc/scsi et al. */
601	601
	602	+ /* next two fields are used to bound the time spent in error handling */
	603	+ int eh_deadline;
	604	+ unsigned long last_reset;
	605	+
	606	+
602	607	/*
603	608	* These three parameters can be used to allow for wide scsi,
604	609	* and for host adapters that support multiple busses