Commit 5f38433885245dce82aa53c20a6b2efbe81ae350

Authored by Michael Holzheu
Committed by Linus Torvalds
1 parent 4cd190a736

[PATCH] s390: fix endless retry loop in tape driver

If a tape device is assigned to another host, the interrupt for the assign
operation comes back with deferred condition code 1.  Under some conditions
this can lead to an endless loop of retries.  Check if the current request is
still in IO in deferred condition code handling and prevent retries when the
request has already been cancelled.

Signed-off-by: Michael Holzheu <holzheu@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

Showing 3 changed files with 35 additions and 13 deletions Side-by-side Diff

drivers/s390/char/tape.h
... ... @@ -250,6 +250,7 @@
250 250 extern int tape_do_io(struct tape_device *, struct tape_request *);
251 251 extern int tape_do_io_async(struct tape_device *, struct tape_request *);
252 252 extern int tape_do_io_interruptible(struct tape_device *, struct tape_request *);
  253 +extern int tape_cancel_io(struct tape_device *, struct tape_request *);
253 254 void tape_hotplug_event(struct tape_device *, int major, int action);
254 255  
255 256 static inline int
drivers/s390/char/tape_core.c
... ... @@ -761,6 +761,13 @@
761 761 */
762 762 if (request->status == TAPE_REQUEST_IN_IO)
763 763 return;
  764 + /*
  765 + * Request has already been stopped. We have to wait until
  766 + * the request is removed from the queue in the interrupt
  767 + * handling.
  768 + */
  769 + if (request->status == TAPE_REQUEST_DONE)
  770 + return;
764 771  
765 772 /*
766 773 * We wanted to cancel the request but the common I/O layer
... ... @@ -1024,6 +1031,20 @@
1024 1031 }
1025 1032  
1026 1033 /*
  1034 + * Stop running ccw.
  1035 + */
  1036 +int
  1037 +tape_cancel_io(struct tape_device *device, struct tape_request *request)
  1038 +{
  1039 + int rc;
  1040 +
  1041 + spin_lock_irq(get_ccwdev_lock(device->cdev));
  1042 + rc = __tape_cancel_io(device, request);
  1043 + spin_unlock_irq(get_ccwdev_lock(device->cdev));
  1044 + return rc;
  1045 +}
  1046 +
  1047 +/*
1027 1048 * Tape interrupt routine, called from the ccw_device layer
1028 1049 */
1029 1050 static void
1030 1051  
... ... @@ -1068,12 +1089,12 @@
1068 1089 * error might still apply. So we just schedule the request to be
1069 1090 * started later.
1070 1091 */
1071   - if (irb->scsw.cc != 0 && (irb->scsw.fctl & SCSW_FCTL_START_FUNC)) {
1072   - PRINT_WARN("(%s): deferred cc=%i. restaring\n",
1073   - cdev->dev.bus_id,
1074   - irb->scsw.cc);
  1092 + if (irb->scsw.cc != 0 && (irb->scsw.fctl & SCSW_FCTL_START_FUNC) &&
  1093 + (request->status == TAPE_REQUEST_IN_IO)) {
  1094 + DBF_EVENT(3,"(%08x): deferred cc=%i, fctl=%i. restarting\n",
  1095 + device->cdev_id, irb->scsw.cc, irb->scsw.fctl);
1075 1096 request->status = TAPE_REQUEST_QUEUED;
1076   - schedule_work(&device->tape_dnr);
  1097 + schedule_delayed_work(&device->tape_dnr, HZ);
1077 1098 return;
1078 1099 }
1079 1100  
... ... @@ -1287,5 +1308,6 @@
1287 1308 EXPORT_SYMBOL(tape_do_io);
1288 1309 EXPORT_SYMBOL(tape_do_io_async);
1289 1310 EXPORT_SYMBOL(tape_do_io_interruptible);
  1311 +EXPORT_SYMBOL(tape_cancel_io);
1290 1312 EXPORT_SYMBOL(tape_mtop);
drivers/s390/char/tape_std.c
... ... @@ -37,20 +37,19 @@
37 37 {
38 38 struct tape_request * request;
39 39 struct tape_device * device;
  40 + int rc;
40 41  
41 42 request = (struct tape_request *) data;
42 43 if ((device = request->device) == NULL)
43 44 BUG();
44 45  
45   - spin_lock_irq(get_ccwdev_lock(device->cdev));
46   - if (request->callback != NULL) {
47   - DBF_EVENT(3, "%08x: Assignment timeout. Device busy.\n",
  46 + DBF_EVENT(3, "%08x: Assignment timeout. Device busy.\n",
48 47 device->cdev_id);
49   - PRINT_ERR("%s: Assignment timeout. Device busy.\n",
50   - device->cdev->dev.bus_id);
51   - ccw_device_clear(device->cdev, (long) request);
52   - }
53   - spin_unlock_irq(get_ccwdev_lock(device->cdev));
  48 + rc = tape_cancel_io(device, request);
  49 + if(rc)
  50 + PRINT_ERR("(%s): Assign timeout: Cancel failed with rc = %i\n",
  51 + device->cdev->dev.bus_id, rc);
  52 +
54 53 }
55 54  
56 55 int