Commit cd9070c9c512ff7995f9019392e0ae548df3a088
1 parent
71e75c97f9
scsi: fix the {host,target,device}_blocked counter mess
Seems like these counters are missing any sort of synchronization for updates, as a over 10 year old comment from me noted. Fix this by using atomic counters, and while we're at it also make sure they are in the same cacheline as the _busy counters and not needlessly stored to in every I/O completion. With the new model the _busy counters can temporarily go negative, so all the readers are updated to check for > 0 values. Longer term every successful I/O completion will reset the counters to zero, so the temporarily negative values will not cause any harm. Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com> Reviewed-by: Webb Scales <webbnh@hp.com> Acked-by: Jens Axboe <axboe@kernel.dk> Tested-by: Bart Van Assche <bvanassche@acm.org> Tested-by: Robert Elliott <elliott@hp.com>
Showing 5 changed files with 58 additions and 53 deletions Side-by-side Diff
drivers/scsi/scsi.c
... | ... | @@ -726,17 +726,16 @@ |
726 | 726 | |
727 | 727 | scsi_device_unbusy(sdev); |
728 | 728 | |
729 | - /* | |
730 | - * Clear the flags which say that the device/host is no longer | |
731 | - * capable of accepting new commands. These are set in scsi_queue.c | |
732 | - * for both the queue full condition on a device, and for a | |
733 | - * host full condition on the host. | |
734 | - * | |
735 | - * XXX(hch): What about locking? | |
736 | - */ | |
737 | - shost->host_blocked = 0; | |
738 | - starget->target_blocked = 0; | |
739 | - sdev->device_blocked = 0; | |
729 | + /* | |
730 | + * Clear the flags that say that the device/target/host is no longer | |
731 | + * capable of accepting new commands. | |
732 | + */ | |
733 | + if (atomic_read(&shost->host_blocked)) | |
734 | + atomic_set(&shost->host_blocked, 0); | |
735 | + if (atomic_read(&starget->target_blocked)) | |
736 | + atomic_set(&starget->target_blocked, 0); | |
737 | + if (atomic_read(&sdev->device_blocked)) | |
738 | + atomic_set(&sdev->device_blocked, 0); | |
740 | 739 | |
741 | 740 | /* |
742 | 741 | * If we have valid sense information, then some kind of recovery |
drivers/scsi/scsi_lib.c
... | ... | @@ -99,14 +99,16 @@ |
99 | 99 | */ |
100 | 100 | switch (reason) { |
101 | 101 | case SCSI_MLQUEUE_HOST_BUSY: |
102 | - host->host_blocked = host->max_host_blocked; | |
102 | + atomic_set(&host->host_blocked, host->max_host_blocked); | |
103 | 103 | break; |
104 | 104 | case SCSI_MLQUEUE_DEVICE_BUSY: |
105 | 105 | case SCSI_MLQUEUE_EH_RETRY: |
106 | - device->device_blocked = device->max_device_blocked; | |
106 | + atomic_set(&device->device_blocked, | |
107 | + device->max_device_blocked); | |
107 | 108 | break; |
108 | 109 | case SCSI_MLQUEUE_TARGET_BUSY: |
109 | - starget->target_blocked = starget->max_target_blocked; | |
110 | + atomic_set(&starget->target_blocked, | |
111 | + starget->max_target_blocked); | |
110 | 112 | break; |
111 | 113 | } |
112 | 114 | } |
113 | 115 | |
114 | 116 | |
115 | 117 | |
116 | 118 | |
117 | 119 | |
... | ... | @@ -351,29 +353,35 @@ |
351 | 353 | spin_unlock_irqrestore(shost->host_lock, flags); |
352 | 354 | } |
353 | 355 | |
354 | -static inline int scsi_device_is_busy(struct scsi_device *sdev) | |
356 | +static inline bool scsi_device_is_busy(struct scsi_device *sdev) | |
355 | 357 | { |
356 | - if (atomic_read(&sdev->device_busy) >= sdev->queue_depth || | |
357 | - sdev->device_blocked) | |
358 | - return 1; | |
359 | - return 0; | |
358 | + if (atomic_read(&sdev->device_busy) >= sdev->queue_depth) | |
359 | + return true; | |
360 | + if (atomic_read(&sdev->device_blocked) > 0) | |
361 | + return true; | |
362 | + return false; | |
360 | 363 | } |
361 | 364 | |
362 | -static inline int scsi_target_is_busy(struct scsi_target *starget) | |
365 | +static inline bool scsi_target_is_busy(struct scsi_target *starget) | |
363 | 366 | { |
364 | - return ((starget->can_queue > 0 && | |
365 | - atomic_read(&starget->target_busy) >= starget->can_queue) || | |
366 | - starget->target_blocked); | |
367 | + if (starget->can_queue > 0 && | |
368 | + atomic_read(&starget->target_busy) >= starget->can_queue) | |
369 | + return true; | |
370 | + if (atomic_read(&starget->target_blocked) > 0) | |
371 | + return true; | |
372 | + return false; | |
367 | 373 | } |
368 | 374 | |
369 | -static inline int scsi_host_is_busy(struct Scsi_Host *shost) | |
375 | +static inline bool scsi_host_is_busy(struct Scsi_Host *shost) | |
370 | 376 | { |
371 | - if ((shost->can_queue > 0 && | |
372 | - atomic_read(&shost->host_busy) >= shost->can_queue) || | |
373 | - shost->host_blocked || shost->host_self_blocked) | |
374 | - return 1; | |
375 | - | |
376 | - return 0; | |
377 | + if (shost->can_queue > 0 && | |
378 | + atomic_read(&shost->host_busy) >= shost->can_queue) | |
379 | + return true; | |
380 | + if (atomic_read(&shost->host_blocked) > 0) | |
381 | + return true; | |
382 | + if (shost->host_self_blocked) | |
383 | + return true; | |
384 | + return false; | |
377 | 385 | } |
378 | 386 | |
379 | 387 | static void scsi_starved_list_run(struct Scsi_Host *shost) |
380 | 388 | |
... | ... | @@ -1256,14 +1264,14 @@ |
1256 | 1264 | unsigned int busy; |
1257 | 1265 | |
1258 | 1266 | busy = atomic_inc_return(&sdev->device_busy) - 1; |
1259 | - if (sdev->device_blocked) { | |
1267 | + if (atomic_read(&sdev->device_blocked)) { | |
1260 | 1268 | if (busy) |
1261 | 1269 | goto out_dec; |
1262 | 1270 | |
1263 | 1271 | /* |
1264 | 1272 | * unblock after device_blocked iterates to zero |
1265 | 1273 | */ |
1266 | - if (--sdev->device_blocked != 0) { | |
1274 | + if (atomic_dec_return(&sdev->device_blocked) > 0) { | |
1267 | 1275 | blk_delay_queue(q, SCSI_QUEUE_DELAY); |
1268 | 1276 | goto out_dec; |
1269 | 1277 | } |
1270 | 1278 | |
1271 | 1279 | |
... | ... | @@ -1302,19 +1310,15 @@ |
1302 | 1310 | } |
1303 | 1311 | |
1304 | 1312 | busy = atomic_inc_return(&starget->target_busy) - 1; |
1305 | - if (starget->target_blocked) { | |
1313 | + if (atomic_read(&starget->target_blocked) > 0) { | |
1306 | 1314 | if (busy) |
1307 | 1315 | goto starved; |
1308 | 1316 | |
1309 | 1317 | /* |
1310 | 1318 | * unblock after target_blocked iterates to zero |
1311 | 1319 | */ |
1312 | - spin_lock_irq(shost->host_lock); | |
1313 | - if (--starget->target_blocked != 0) { | |
1314 | - spin_unlock_irq(shost->host_lock); | |
1320 | + if (atomic_dec_return(&starget->target_blocked) > 0) | |
1315 | 1321 | goto out_dec; |
1316 | - } | |
1317 | - spin_unlock_irq(shost->host_lock); | |
1318 | 1322 | |
1319 | 1323 | SCSI_LOG_MLQUEUE(3, starget_printk(KERN_INFO, starget, |
1320 | 1324 | "unblocking target at zero depth\n")); |
1321 | 1325 | |
1322 | 1326 | |
... | ... | @@ -1349,19 +1353,15 @@ |
1349 | 1353 | return 0; |
1350 | 1354 | |
1351 | 1355 | busy = atomic_inc_return(&shost->host_busy) - 1; |
1352 | - if (shost->host_blocked) { | |
1356 | + if (atomic_read(&shost->host_blocked) > 0) { | |
1353 | 1357 | if (busy) |
1354 | 1358 | goto starved; |
1355 | 1359 | |
1356 | 1360 | /* |
1357 | 1361 | * unblock after host_blocked iterates to zero |
1358 | 1362 | */ |
1359 | - spin_lock_irq(shost->host_lock); | |
1360 | - if (--shost->host_blocked != 0) { | |
1361 | - spin_unlock_irq(shost->host_lock); | |
1363 | + if (atomic_dec_return(&shost->host_blocked) > 0) | |
1362 | 1364 | goto out_dec; |
1363 | - } | |
1364 | - spin_unlock_irq(shost->host_lock); | |
1365 | 1365 | |
1366 | 1366 | SCSI_LOG_MLQUEUE(3, |
1367 | 1367 | shost_printk(KERN_INFO, shost, |
drivers/scsi/scsi_sysfs.c
... | ... | @@ -584,7 +584,6 @@ |
584 | 584 | /* |
585 | 585 | * Create the actual show/store functions and data structures. |
586 | 586 | */ |
587 | -sdev_rd_attr (device_blocked, "%d\n"); | |
588 | 587 | sdev_rd_attr (type, "%d\n"); |
589 | 588 | sdev_rd_attr (scsi_level, "%d\n"); |
590 | 589 | sdev_rd_attr (vendor, "%.8s\n"); |
... | ... | @@ -599,6 +598,15 @@ |
599 | 598 | return snprintf(buf, 20, "%d\n", atomic_read(&sdev->device_busy)); |
600 | 599 | } |
601 | 600 | static DEVICE_ATTR(device_busy, S_IRUGO, sdev_show_device_busy, NULL); |
601 | + | |
602 | +static ssize_t | |
603 | +sdev_show_device_blocked(struct device *dev, struct device_attribute *attr, | |
604 | + char *buf) | |
605 | +{ | |
606 | + struct scsi_device *sdev = to_scsi_device(dev); | |
607 | + return snprintf(buf, 20, "%d\n", atomic_read(&sdev->device_blocked)); | |
608 | +} | |
609 | +static DEVICE_ATTR(device_blocked, S_IRUGO, sdev_show_device_blocked, NULL); | |
602 | 610 | |
603 | 611 | /* |
604 | 612 | * TODO: can we make these symlinks to the block layer ones? |
include/scsi/scsi_device.h
... | ... | @@ -82,6 +82,8 @@ |
82 | 82 | struct list_head same_target_siblings; /* just the devices sharing same target id */ |
83 | 83 | |
84 | 84 | atomic_t device_busy; /* commands actually active on LLDD */ |
85 | + atomic_t device_blocked; /* Device returned QUEUE_FULL. */ | |
86 | + | |
85 | 87 | spinlock_t list_lock; |
86 | 88 | struct list_head cmd_list; /* queue of in use SCSI Command structures */ |
87 | 89 | struct list_head starved_entry; |
... | ... | @@ -180,8 +182,6 @@ |
180 | 182 | struct list_head event_list; /* asserted events */ |
181 | 183 | struct work_struct event_work; |
182 | 184 | |
183 | - unsigned int device_blocked; /* Device returned QUEUE_FULL. */ | |
184 | - | |
185 | 185 | unsigned int max_device_blocked; /* what device_blocked counts down from */ |
186 | 186 | #define SCSI_DEFAULT_DEVICE_BLOCKED 3 |
187 | 187 | |
188 | 188 | |
... | ... | @@ -291,12 +291,13 @@ |
291 | 291 | * the same target will also. */ |
292 | 292 | /* commands actually active on LLD. */ |
293 | 293 | atomic_t target_busy; |
294 | + atomic_t target_blocked; | |
295 | + | |
294 | 296 | /* |
295 | 297 | * LLDs should set this in the slave_alloc host template callout. |
296 | 298 | * If set to zero then there is not limit. |
297 | 299 | */ |
298 | 300 | unsigned int can_queue; |
299 | - unsigned int target_blocked; | |
300 | 301 | unsigned int max_target_blocked; |
301 | 302 | #define SCSI_DEFAULT_TARGET_BLOCKED 3 |
302 | 303 |
include/scsi/scsi_host.h
... | ... | @@ -583,6 +583,8 @@ |
583 | 583 | struct blk_queue_tag *bqt; |
584 | 584 | |
585 | 585 | atomic_t host_busy; /* commands actually active on low-level */ |
586 | + atomic_t host_blocked; | |
587 | + | |
586 | 588 | unsigned int host_failed; /* commands that failed. |
587 | 589 | protected by host_lock */ |
588 | 590 | unsigned int host_eh_scheduled; /* EH scheduled without command */ |
... | ... | @@ -680,11 +682,6 @@ |
680 | 682 | * Task management function work queue |
681 | 683 | */ |
682 | 684 | struct workqueue_struct *tmf_work_q; |
683 | - | |
684 | - /* | |
685 | - * Host has rejected a command because it was busy. | |
686 | - */ | |
687 | - unsigned int host_blocked; | |
688 | 685 | |
689 | 686 | /* |
690 | 687 | * Value host_blocked counts down from |