Commit cd9070c9c512ff7995f9019392e0ae548df3a088

Authored by Christoph Hellwig
1 parent 71e75c97f9

scsi: fix the {host,target,device}_blocked counter mess

Seems like these counters are missing any sort of synchronization for
updates, as a over 10 year old comment from me noted.  Fix this by
using atomic counters, and while we're at it also make sure they are
in the same cacheline as the _busy counters and not needlessly stored
to in every I/O completion.

With the new model the _busy counters can temporarily go negative,
so all the readers are updated to check for > 0 values.  Longer
term every successful I/O completion will reset the counters to zero,
so the temporarily negative values will not cause any harm.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Webb Scales <webbnh@hp.com>
Acked-by: Jens Axboe <axboe@kernel.dk>
Tested-by: Bart Van Assche <bvanassche@acm.org>
Tested-by: Robert Elliott <elliott@hp.com>

Showing 5 changed files with 58 additions and 53 deletions Side-by-side Diff

... ... @@ -726,17 +726,16 @@
726 726  
727 727 scsi_device_unbusy(sdev);
728 728  
729   - /*
730   - * Clear the flags which say that the device/host is no longer
731   - * capable of accepting new commands. These are set in scsi_queue.c
732   - * for both the queue full condition on a device, and for a
733   - * host full condition on the host.
734   - *
735   - * XXX(hch): What about locking?
736   - */
737   - shost->host_blocked = 0;
738   - starget->target_blocked = 0;
739   - sdev->device_blocked = 0;
  729 + /*
  730 + * Clear the flags that say that the device/target/host is no longer
  731 + * capable of accepting new commands.
  732 + */
  733 + if (atomic_read(&shost->host_blocked))
  734 + atomic_set(&shost->host_blocked, 0);
  735 + if (atomic_read(&starget->target_blocked))
  736 + atomic_set(&starget->target_blocked, 0);
  737 + if (atomic_read(&sdev->device_blocked))
  738 + atomic_set(&sdev->device_blocked, 0);
740 739  
741 740 /*
742 741 * If we have valid sense information, then some kind of recovery
drivers/scsi/scsi_lib.c
... ... @@ -99,14 +99,16 @@
99 99 */
100 100 switch (reason) {
101 101 case SCSI_MLQUEUE_HOST_BUSY:
102   - host->host_blocked = host->max_host_blocked;
  102 + atomic_set(&host->host_blocked, host->max_host_blocked);
103 103 break;
104 104 case SCSI_MLQUEUE_DEVICE_BUSY:
105 105 case SCSI_MLQUEUE_EH_RETRY:
106   - device->device_blocked = device->max_device_blocked;
  106 + atomic_set(&device->device_blocked,
  107 + device->max_device_blocked);
107 108 break;
108 109 case SCSI_MLQUEUE_TARGET_BUSY:
109   - starget->target_blocked = starget->max_target_blocked;
  110 + atomic_set(&starget->target_blocked,
  111 + starget->max_target_blocked);
110 112 break;
111 113 }
112 114 }
113 115  
114 116  
115 117  
116 118  
117 119  
... ... @@ -351,29 +353,35 @@
351 353 spin_unlock_irqrestore(shost->host_lock, flags);
352 354 }
353 355  
354   -static inline int scsi_device_is_busy(struct scsi_device *sdev)
  356 +static inline bool scsi_device_is_busy(struct scsi_device *sdev)
355 357 {
356   - if (atomic_read(&sdev->device_busy) >= sdev->queue_depth ||
357   - sdev->device_blocked)
358   - return 1;
359   - return 0;
  358 + if (atomic_read(&sdev->device_busy) >= sdev->queue_depth)
  359 + return true;
  360 + if (atomic_read(&sdev->device_blocked) > 0)
  361 + return true;
  362 + return false;
360 363 }
361 364  
362   -static inline int scsi_target_is_busy(struct scsi_target *starget)
  365 +static inline bool scsi_target_is_busy(struct scsi_target *starget)
363 366 {
364   - return ((starget->can_queue > 0 &&
365   - atomic_read(&starget->target_busy) >= starget->can_queue) ||
366   - starget->target_blocked);
  367 + if (starget->can_queue > 0 &&
  368 + atomic_read(&starget->target_busy) >= starget->can_queue)
  369 + return true;
  370 + if (atomic_read(&starget->target_blocked) > 0)
  371 + return true;
  372 + return false;
367 373 }
368 374  
369   -static inline int scsi_host_is_busy(struct Scsi_Host *shost)
  375 +static inline bool scsi_host_is_busy(struct Scsi_Host *shost)
370 376 {
371   - if ((shost->can_queue > 0 &&
372   - atomic_read(&shost->host_busy) >= shost->can_queue) ||
373   - shost->host_blocked || shost->host_self_blocked)
374   - return 1;
375   -
376   - return 0;
  377 + if (shost->can_queue > 0 &&
  378 + atomic_read(&shost->host_busy) >= shost->can_queue)
  379 + return true;
  380 + if (atomic_read(&shost->host_blocked) > 0)
  381 + return true;
  382 + if (shost->host_self_blocked)
  383 + return true;
  384 + return false;
377 385 }
378 386  
379 387 static void scsi_starved_list_run(struct Scsi_Host *shost)
380 388  
... ... @@ -1256,14 +1264,14 @@
1256 1264 unsigned int busy;
1257 1265  
1258 1266 busy = atomic_inc_return(&sdev->device_busy) - 1;
1259   - if (sdev->device_blocked) {
  1267 + if (atomic_read(&sdev->device_blocked)) {
1260 1268 if (busy)
1261 1269 goto out_dec;
1262 1270  
1263 1271 /*
1264 1272 * unblock after device_blocked iterates to zero
1265 1273 */
1266   - if (--sdev->device_blocked != 0) {
  1274 + if (atomic_dec_return(&sdev->device_blocked) > 0) {
1267 1275 blk_delay_queue(q, SCSI_QUEUE_DELAY);
1268 1276 goto out_dec;
1269 1277 }
1270 1278  
1271 1279  
... ... @@ -1302,19 +1310,15 @@
1302 1310 }
1303 1311  
1304 1312 busy = atomic_inc_return(&starget->target_busy) - 1;
1305   - if (starget->target_blocked) {
  1313 + if (atomic_read(&starget->target_blocked) > 0) {
1306 1314 if (busy)
1307 1315 goto starved;
1308 1316  
1309 1317 /*
1310 1318 * unblock after target_blocked iterates to zero
1311 1319 */
1312   - spin_lock_irq(shost->host_lock);
1313   - if (--starget->target_blocked != 0) {
1314   - spin_unlock_irq(shost->host_lock);
  1320 + if (atomic_dec_return(&starget->target_blocked) > 0)
1315 1321 goto out_dec;
1316   - }
1317   - spin_unlock_irq(shost->host_lock);
1318 1322  
1319 1323 SCSI_LOG_MLQUEUE(3, starget_printk(KERN_INFO, starget,
1320 1324 "unblocking target at zero depth\n"));
1321 1325  
1322 1326  
... ... @@ -1349,19 +1353,15 @@
1349 1353 return 0;
1350 1354  
1351 1355 busy = atomic_inc_return(&shost->host_busy) - 1;
1352   - if (shost->host_blocked) {
  1356 + if (atomic_read(&shost->host_blocked) > 0) {
1353 1357 if (busy)
1354 1358 goto starved;
1355 1359  
1356 1360 /*
1357 1361 * unblock after host_blocked iterates to zero
1358 1362 */
1359   - spin_lock_irq(shost->host_lock);
1360   - if (--shost->host_blocked != 0) {
1361   - spin_unlock_irq(shost->host_lock);
  1363 + if (atomic_dec_return(&shost->host_blocked) > 0)
1362 1364 goto out_dec;
1363   - }
1364   - spin_unlock_irq(shost->host_lock);
1365 1365  
1366 1366 SCSI_LOG_MLQUEUE(3,
1367 1367 shost_printk(KERN_INFO, shost,
drivers/scsi/scsi_sysfs.c
... ... @@ -584,7 +584,6 @@
584 584 /*
585 585 * Create the actual show/store functions and data structures.
586 586 */
587   -sdev_rd_attr (device_blocked, "%d\n");
588 587 sdev_rd_attr (type, "%d\n");
589 588 sdev_rd_attr (scsi_level, "%d\n");
590 589 sdev_rd_attr (vendor, "%.8s\n");
... ... @@ -599,6 +598,15 @@
599 598 return snprintf(buf, 20, "%d\n", atomic_read(&sdev->device_busy));
600 599 }
601 600 static DEVICE_ATTR(device_busy, S_IRUGO, sdev_show_device_busy, NULL);
  601 +
  602 +static ssize_t
  603 +sdev_show_device_blocked(struct device *dev, struct device_attribute *attr,
  604 + char *buf)
  605 +{
  606 + struct scsi_device *sdev = to_scsi_device(dev);
  607 + return snprintf(buf, 20, "%d\n", atomic_read(&sdev->device_blocked));
  608 +}
  609 +static DEVICE_ATTR(device_blocked, S_IRUGO, sdev_show_device_blocked, NULL);
602 610  
603 611 /*
604 612 * TODO: can we make these symlinks to the block layer ones?
include/scsi/scsi_device.h
... ... @@ -82,6 +82,8 @@
82 82 struct list_head same_target_siblings; /* just the devices sharing same target id */
83 83  
84 84 atomic_t device_busy; /* commands actually active on LLDD */
  85 + atomic_t device_blocked; /* Device returned QUEUE_FULL. */
  86 +
85 87 spinlock_t list_lock;
86 88 struct list_head cmd_list; /* queue of in use SCSI Command structures */
87 89 struct list_head starved_entry;
... ... @@ -180,8 +182,6 @@
180 182 struct list_head event_list; /* asserted events */
181 183 struct work_struct event_work;
182 184  
183   - unsigned int device_blocked; /* Device returned QUEUE_FULL. */
184   -
185 185 unsigned int max_device_blocked; /* what device_blocked counts down from */
186 186 #define SCSI_DEFAULT_DEVICE_BLOCKED 3
187 187  
188 188  
... ... @@ -291,12 +291,13 @@
291 291 * the same target will also. */
292 292 /* commands actually active on LLD. */
293 293 atomic_t target_busy;
  294 + atomic_t target_blocked;
  295 +
294 296 /*
295 297 * LLDs should set this in the slave_alloc host template callout.
296 298 * If set to zero then there is not limit.
297 299 */
298 300 unsigned int can_queue;
299   - unsigned int target_blocked;
300 301 unsigned int max_target_blocked;
301 302 #define SCSI_DEFAULT_TARGET_BLOCKED 3
302 303  
include/scsi/scsi_host.h
... ... @@ -583,6 +583,8 @@
583 583 struct blk_queue_tag *bqt;
584 584  
585 585 atomic_t host_busy; /* commands actually active on low-level */
  586 + atomic_t host_blocked;
  587 +
586 588 unsigned int host_failed; /* commands that failed.
587 589 protected by host_lock */
588 590 unsigned int host_eh_scheduled; /* EH scheduled without command */
... ... @@ -680,11 +682,6 @@
680 682 * Task management function work queue
681 683 */
682 684 struct workqueue_struct *tmf_work_q;
683   -
684   - /*
685   - * Host has rejected a command because it was busy.
686   - */
687   - unsigned int host_blocked;
688 685  
689 686 /*
690 687 * Value host_blocked counts down from