Commit 9937a5e2f32892db0dbeefc2b3bc74b3ae3ea9c7

Authored by Jens Axboe
1 parent 70087dc38c

scsi: remove performance regression due to async queue run

Commit c21e6beb removed our queue request_fn re-enter
protection, and defaulted to always running the queues from
kblockd to be safe. This was a known potential slow down,
but should be safe.

Unfortunately this is causing big performance regressions for
some, so we need to improve this logic. Looking into the details
of the re-enter, the real issue is on requeue of requests.

Requeue of requests upon seeing a BUSY condition from the device
ends up re-running the queue, causing traces like this:

scsi_request_fn()
        scsi_dispatch_cmd()
                scsi_queue_insert()
                        __scsi_queue_insert()
                                scsi_run_queue()
					scsi_request_fn()
						...

potentially causing the issue we want to avoid. So special
case the requeue re-run of the queue, but improve it to offload
the entire run of local queue and starved queue from a single
workqueue callback. This is a lot better than potentially
kicking off a workqueue run for each device seen.

This also fixes the issue of the local device going into recursion,
since the above mentioned commit never moved that queue run out
of line.

Signed-off-by: Jens Axboe <jaxboe@fusionio.com>

Showing 3 changed files with 19 additions and 4 deletions Side-by-side Diff

drivers/scsi/scsi_lib.c
... ... @@ -74,8 +74,6 @@
74 74 */
75 75 #define SCSI_QUEUE_DELAY 3
76 76  
77   -static void scsi_run_queue(struct request_queue *q);
78   -
79 77 /*
80 78 * Function: scsi_unprep_request()
81 79 *
... ... @@ -161,7 +159,7 @@
161 159 blk_requeue_request(q, cmd->request);
162 160 spin_unlock_irqrestore(q->queue_lock, flags);
163 161  
164   - scsi_run_queue(q);
  162 + kblockd_schedule_work(q, &device->requeue_work);
165 163  
166 164 return 0;
167 165 }
168 166  
... ... @@ -433,13 +431,27 @@
433 431 continue;
434 432 }
435 433  
436   - blk_run_queue_async(sdev->request_queue);
  434 + spin_unlock(shost->host_lock);
  435 + spin_lock(sdev->request_queue->queue_lock);
  436 + __blk_run_queue(sdev->request_queue);
  437 + spin_unlock(sdev->request_queue->queue_lock);
  438 + spin_lock(shost->host_lock);
437 439 }
438 440 /* put any unprocessed entries back */
439 441 list_splice(&starved_list, &shost->starved_list);
440 442 spin_unlock_irqrestore(shost->host_lock, flags);
441 443  
442 444 blk_run_queue(q);
  445 +}
  446 +
  447 +void scsi_requeue_run_queue(struct work_struct *work)
  448 +{
  449 + struct scsi_device *sdev;
  450 + struct request_queue *q;
  451 +
  452 + sdev = container_of(work, struct scsi_device, requeue_work);
  453 + q = sdev->request_queue;
  454 + scsi_run_queue(q);
443 455 }
444 456  
445 457 /*
drivers/scsi/scsi_scan.c
... ... @@ -242,6 +242,7 @@
242 242 int display_failure_msg = 1, ret;
243 243 struct Scsi_Host *shost = dev_to_shost(starget->dev.parent);
244 244 extern void scsi_evt_thread(struct work_struct *work);
  245 + extern void scsi_requeue_run_queue(struct work_struct *work);
245 246  
246 247 sdev = kzalloc(sizeof(*sdev) + shost->transportt->device_size,
247 248 GFP_ATOMIC);
... ... @@ -264,6 +265,7 @@
264 265 INIT_LIST_HEAD(&sdev->event_list);
265 266 spin_lock_init(&sdev->list_lock);
266 267 INIT_WORK(&sdev->event_work, scsi_evt_thread);
  268 + INIT_WORK(&sdev->requeue_work, scsi_requeue_run_queue);
267 269  
268 270 sdev->sdev_gendev.parent = get_device(&starget->dev);
269 271 sdev->sdev_target = starget;
include/scsi/scsi_device.h
... ... @@ -169,6 +169,7 @@
169 169 sdev_dev;
170 170  
171 171 struct execute_work ew; /* used to get process context on put */
  172 + struct work_struct requeue_work;
172 173  
173 174 struct scsi_dh_data *scsi_dh_data;
174 175 enum scsi_device_state sdev_state;