Commit 797e7dbbee0a91fa1349192f18ad5c454997d876

Authored by Tejun Heo
Committed by Jens Axboe
1 parent 52d9e67536

[BLOCK] reimplement handling of barrier request

Reimplement handling of barrier requests.

* Flexible handling to deal with various capabilities of
  target devices.
* Retry support for falling back.
* Tagged queues which don't support ordered tag can do ordered.

Signed-off-by: Tejun Heo <htejun@gmail.com>
Signed-off-by: Jens Axboe <axboe@suse.de>

Showing 4 changed files with 359 additions and 192 deletions Side-by-side Diff

... ... @@ -304,15 +304,7 @@
304 304  
305 305 rq->flags &= ~REQ_STARTED;
306 306  
307   - /*
308   - * if this is the flush, requeue the original instead and drop the flush
309   - */
310   - if (rq->flags & REQ_BAR_FLUSH) {
311   - clear_bit(QUEUE_FLAG_FLUSH, &q->queue_flags);
312   - rq = rq->end_io_data;
313   - }
314   -
315   - __elv_add_request(q, rq, ELEVATOR_INSERT_FRONT, 0);
  307 + __elv_add_request(q, rq, ELEVATOR_INSERT_REQUEUE, 0);
316 308 }
317 309  
318 310 static void elv_drain_elevator(request_queue_t *q)
319 311  
... ... @@ -332,8 +324,19 @@
332 324 void __elv_add_request(request_queue_t *q, struct request *rq, int where,
333 325 int plug)
334 326 {
  327 + struct list_head *pos;
  328 + unsigned ordseq;
  329 +
  330 + if (q->ordcolor)
  331 + rq->flags |= REQ_ORDERED_COLOR;
  332 +
335 333 if (rq->flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)) {
336 334 /*
  335 + * toggle ordered color
  336 + */
  337 + q->ordcolor ^= 1;
  338 +
  339 + /*
337 340 * barriers implicitly indicate back insertion
338 341 */
339 342 if (where == ELEVATOR_INSERT_SORT)
... ... @@ -393,6 +396,30 @@
393 396 q->elevator->ops->elevator_add_req_fn(q, rq);
394 397 break;
395 398  
  399 + case ELEVATOR_INSERT_REQUEUE:
  400 + /*
  401 + * If ordered flush isn't in progress, we do front
  402 + * insertion; otherwise, requests should be requeued
  403 + * in ordseq order.
  404 + */
  405 + rq->flags |= REQ_SOFTBARRIER;
  406 +
  407 + if (q->ordseq == 0) {
  408 + list_add(&rq->queuelist, &q->queue_head);
  409 + break;
  410 + }
  411 +
  412 + ordseq = blk_ordered_req_seq(rq);
  413 +
  414 + list_for_each(pos, &q->queue_head) {
  415 + struct request *pos_rq = list_entry_rq(pos);
  416 + if (ordseq <= blk_ordered_req_seq(pos_rq))
  417 + break;
  418 + }
  419 +
  420 + list_add_tail(&rq->queuelist, pos);
  421 + break;
  422 +
396 423 default:
397 424 printk(KERN_ERR "%s: bad insertion point %d\n",
398 425 __FUNCTION__, where);
399 426  
400 427  
... ... @@ -422,25 +449,16 @@
422 449 {
423 450 struct request *rq;
424 451  
425   - if (unlikely(list_empty(&q->queue_head) &&
426   - !q->elevator->ops->elevator_dispatch_fn(q, 0)))
427   - return NULL;
  452 + while (1) {
  453 + while (!list_empty(&q->queue_head)) {
  454 + rq = list_entry_rq(q->queue_head.next);
  455 + if (blk_do_ordered(q, &rq))
  456 + return rq;
  457 + }
428 458  
429   - rq = list_entry_rq(q->queue_head.next);
430   -
431   - /*
432   - * if this is a barrier write and the device has to issue a
433   - * flush sequence to support it, check how far we are
434   - */
435   - if (blk_fs_request(rq) && blk_barrier_rq(rq)) {
436   - BUG_ON(q->ordered == QUEUE_ORDERED_NONE);
437   -
438   - if (q->ordered == QUEUE_ORDERED_FLUSH &&
439   - !blk_barrier_preflush(rq))
440   - rq = blk_start_pre_flush(q, rq);
  459 + if (!q->elevator->ops->elevator_dispatch_fn(q, 0))
  460 + return NULL;
441 461 }
442   -
443   - return rq;
444 462 }
445 463  
446 464 struct request *elv_next_request(request_queue_t *q)
447 465  
... ... @@ -593,7 +611,21 @@
593 611 * request is released from the driver, io must be done
594 612 */
595 613 if (blk_account_rq(rq)) {
  614 + struct request *first_rq = list_entry_rq(q->queue_head.next);
  615 +
596 616 q->in_flight--;
  617 +
  618 + /*
  619 + * Check if the queue is waiting for fs requests to be
  620 + * drained for flush sequence.
  621 + */
  622 + if (q->ordseq && q->in_flight == 0 &&
  623 + blk_ordered_cur_seq(q) == QUEUE_ORDSEQ_DRAIN &&
  624 + blk_ordered_req_seq(first_rq) > QUEUE_ORDSEQ_DRAIN) {
  625 + blk_ordered_complete_seq(q, QUEUE_ORDSEQ_DRAIN, 0);
  626 + q->request_fn(q);
  627 + }
  628 +
597 629 if (blk_sorted_rq(rq) && e->ops->elevator_completed_req_fn)
598 630 e->ops->elevator_completed_req_fn(q, rq);
599 631 }
... ... @@ -290,8 +290,8 @@
290 290  
291 291 /**
292 292 * blk_queue_ordered - does this queue support ordered writes
293   - * @q: the request queue
294   - * @flag: see below
  293 + * @q: the request queue
  294 + * @ordered: one of QUEUE_ORDERED_*
295 295 *
296 296 * Description:
297 297 * For journalled file systems, doing ordered writes on a commit
298 298  
299 299  
... ... @@ -300,28 +300,30 @@
300 300 * feature should call this function and indicate so.
301 301 *
302 302 **/
303   -void blk_queue_ordered(request_queue_t *q, int flag)
  303 +int blk_queue_ordered(request_queue_t *q, unsigned ordered,
  304 + prepare_flush_fn *prepare_flush_fn)
304 305 {
305   - switch (flag) {
306   - case QUEUE_ORDERED_NONE:
307   - if (q->flush_rq)
308   - kmem_cache_free(request_cachep, q->flush_rq);
309   - q->flush_rq = NULL;
310   - q->ordered = flag;
311   - break;
312   - case QUEUE_ORDERED_TAG:
313   - q->ordered = flag;
314   - break;
315   - case QUEUE_ORDERED_FLUSH:
316   - q->ordered = flag;
317   - if (!q->flush_rq)
318   - q->flush_rq = kmem_cache_alloc(request_cachep,
319   - GFP_KERNEL);
320   - break;
321   - default:
322   - printk("blk_queue_ordered: bad value %d\n", flag);
323   - break;
  306 + if (ordered & (QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_POSTFLUSH) &&
  307 + prepare_flush_fn == NULL) {
  308 + printk(KERN_ERR "blk_queue_ordered: prepare_flush_fn required\n");
  309 + return -EINVAL;
324 310 }
  311 +
  312 + if (ordered != QUEUE_ORDERED_NONE &&
  313 + ordered != QUEUE_ORDERED_DRAIN &&
  314 + ordered != QUEUE_ORDERED_DRAIN_FLUSH &&
  315 + ordered != QUEUE_ORDERED_DRAIN_FUA &&
  316 + ordered != QUEUE_ORDERED_TAG &&
  317 + ordered != QUEUE_ORDERED_TAG_FLUSH &&
  318 + ordered != QUEUE_ORDERED_TAG_FUA) {
  319 + printk(KERN_ERR "blk_queue_ordered: bad value %d\n", ordered);
  320 + return -EINVAL;
  321 + }
  322 +
  323 + q->next_ordered = ordered;
  324 + q->prepare_flush_fn = prepare_flush_fn;
  325 +
  326 + return 0;
325 327 }
326 328  
327 329 EXPORT_SYMBOL(blk_queue_ordered);
328 330  
329 331  
330 332  
331 333  
332 334  
333 335  
334 336  
335 337  
336 338  
337 339  
338 340  
339 341  
340 342  
341 343  
342 344  
343 345  
344 346  
345 347  
346 348  
347 349  
348 350  
349 351  
350 352  
351 353  
352 354  
353 355  
354 356  
355 357  
356 358  
357 359  
358 360  
359 361  
360 362  
361 363  
362 364  
363 365  
364 366  
365 367  
366 368  
... ... @@ -346,169 +348,267 @@
346 348 /*
347 349 * Cache flushing for ordered writes handling
348 350 */
349   -static void blk_pre_flush_end_io(struct request *flush_rq, int error)
  351 +inline unsigned blk_ordered_cur_seq(request_queue_t *q)
350 352 {
351   - struct request *rq = flush_rq->end_io_data;
  353 + if (!q->ordseq)
  354 + return 0;
  355 + return 1 << ffz(q->ordseq);
  356 +}
  357 +
  358 +unsigned blk_ordered_req_seq(struct request *rq)
  359 +{
352 360 request_queue_t *q = rq->q;
353 361  
354   - elv_completed_request(q, flush_rq);
  362 + BUG_ON(q->ordseq == 0);
355 363  
356   - rq->flags |= REQ_BAR_PREFLUSH;
  364 + if (rq == &q->pre_flush_rq)
  365 + return QUEUE_ORDSEQ_PREFLUSH;
  366 + if (rq == &q->bar_rq)
  367 + return QUEUE_ORDSEQ_BAR;
  368 + if (rq == &q->post_flush_rq)
  369 + return QUEUE_ORDSEQ_POSTFLUSH;
357 370  
358   - if (!flush_rq->errors)
359   - elv_requeue_request(q, rq);
360   - else {
361   - q->end_flush_fn(q, flush_rq);
362   - clear_bit(QUEUE_FLAG_FLUSH, &q->queue_flags);
363   - q->request_fn(q);
364   - }
  371 + if ((rq->flags & REQ_ORDERED_COLOR) ==
  372 + (q->orig_bar_rq->flags & REQ_ORDERED_COLOR))
  373 + return QUEUE_ORDSEQ_DRAIN;
  374 + else
  375 + return QUEUE_ORDSEQ_DONE;
365 376 }
366 377  
367   -static void blk_post_flush_end_io(struct request *flush_rq, int error)
  378 +void blk_ordered_complete_seq(request_queue_t *q, unsigned seq, int error)
368 379 {
369   - struct request *rq = flush_rq->end_io_data;
370   - request_queue_t *q = rq->q;
  380 + struct request *rq;
  381 + int uptodate;
371 382  
372   - elv_completed_request(q, flush_rq);
  383 + if (error && !q->orderr)
  384 + q->orderr = error;
373 385  
374   - rq->flags |= REQ_BAR_POSTFLUSH;
  386 + BUG_ON(q->ordseq & seq);
  387 + q->ordseq |= seq;
375 388  
376   - q->end_flush_fn(q, flush_rq);
377   - clear_bit(QUEUE_FLAG_FLUSH, &q->queue_flags);
378   - q->request_fn(q);
  389 + if (blk_ordered_cur_seq(q) != QUEUE_ORDSEQ_DONE)
  390 + return;
  391 +
  392 + /*
  393 + * Okay, sequence complete.
  394 + */
  395 + rq = q->orig_bar_rq;
  396 + uptodate = q->orderr ? q->orderr : 1;
  397 +
  398 + q->ordseq = 0;
  399 +
  400 + end_that_request_first(rq, uptodate, rq->hard_nr_sectors);
  401 + end_that_request_last(rq, uptodate);
379 402 }
380 403  
381   -struct request *blk_start_pre_flush(request_queue_t *q, struct request *rq)
  404 +static void pre_flush_end_io(struct request *rq, int error)
382 405 {
383   - struct request *flush_rq = q->flush_rq;
  406 + elv_completed_request(rq->q, rq);
  407 + blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_PREFLUSH, error);
  408 +}
384 409  
385   - BUG_ON(!blk_barrier_rq(rq));
  410 +static void bar_end_io(struct request *rq, int error)
  411 +{
  412 + elv_completed_request(rq->q, rq);
  413 + blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_BAR, error);
  414 +}
386 415  
387   - if (test_and_set_bit(QUEUE_FLAG_FLUSH, &q->queue_flags))
388   - return NULL;
  416 +static void post_flush_end_io(struct request *rq, int error)
  417 +{
  418 + elv_completed_request(rq->q, rq);
  419 + blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_POSTFLUSH, error);
  420 +}
389 421  
390   - rq_init(q, flush_rq);
391   - flush_rq->elevator_private = NULL;
392   - flush_rq->flags = REQ_BAR_FLUSH;
393   - flush_rq->rq_disk = rq->rq_disk;
394   - flush_rq->rl = NULL;
  422 +static void queue_flush(request_queue_t *q, unsigned which)
  423 +{
  424 + struct request *rq;
  425 + rq_end_io_fn *end_io;
395 426  
  427 + if (which == QUEUE_ORDERED_PREFLUSH) {
  428 + rq = &q->pre_flush_rq;
  429 + end_io = pre_flush_end_io;
  430 + } else {
  431 + rq = &q->post_flush_rq;
  432 + end_io = post_flush_end_io;
  433 + }
  434 +
  435 + rq_init(q, rq);
  436 + rq->flags = REQ_HARDBARRIER;
  437 + rq->elevator_private = NULL;
  438 + rq->rq_disk = q->bar_rq.rq_disk;
  439 + rq->rl = NULL;
  440 + rq->end_io = end_io;
  441 + q->prepare_flush_fn(q, rq);
  442 +
  443 + __elv_add_request(q, rq, ELEVATOR_INSERT_FRONT, 0);
  444 +}
  445 +
  446 +static inline struct request *start_ordered(request_queue_t *q,
  447 + struct request *rq)
  448 +{
  449 + q->bi_size = 0;
  450 + q->orderr = 0;
  451 + q->ordered = q->next_ordered;
  452 + q->ordseq |= QUEUE_ORDSEQ_STARTED;
  453 +
396 454 /*
397   - * prepare_flush returns 0 if no flush is needed, just mark both
398   - * pre and post flush as done in that case
  455 + * Prep proxy barrier request.
399 456 */
400   - if (!q->prepare_flush_fn(q, flush_rq)) {
401   - rq->flags |= REQ_BAR_PREFLUSH | REQ_BAR_POSTFLUSH;
402   - clear_bit(QUEUE_FLAG_FLUSH, &q->queue_flags);
403   - return rq;
404   - }
  457 + blkdev_dequeue_request(rq);
  458 + q->orig_bar_rq = rq;
  459 + rq = &q->bar_rq;
  460 + rq_init(q, rq);
  461 + rq->flags = bio_data_dir(q->orig_bar_rq->bio);
  462 + rq->flags |= q->ordered & QUEUE_ORDERED_FUA ? REQ_FUA : 0;
  463 + rq->elevator_private = NULL;
  464 + rq->rl = NULL;
  465 + init_request_from_bio(rq, q->orig_bar_rq->bio);
  466 + rq->end_io = bar_end_io;
405 467  
406 468 /*
407   - * some drivers dequeue requests right away, some only after io
408   - * completion. make sure the request is dequeued.
  469 + * Queue ordered sequence. As we stack them at the head, we
  470 + * need to queue in reverse order. Note that we rely on that
  471 + * no fs request uses ELEVATOR_INSERT_FRONT and thus no fs
  472 + * request gets inbetween ordered sequence.
409 473 */
410   - if (!list_empty(&rq->queuelist))
411   - blkdev_dequeue_request(rq);
  474 + if (q->ordered & QUEUE_ORDERED_POSTFLUSH)
  475 + queue_flush(q, QUEUE_ORDERED_POSTFLUSH);
  476 + else
  477 + q->ordseq |= QUEUE_ORDSEQ_POSTFLUSH;
412 478  
413   - flush_rq->end_io_data = rq;
414   - flush_rq->end_io = blk_pre_flush_end_io;
  479 + __elv_add_request(q, rq, ELEVATOR_INSERT_FRONT, 0);
415 480  
416   - __elv_add_request(q, flush_rq, ELEVATOR_INSERT_FRONT, 0);
417   - return flush_rq;
  481 + if (q->ordered & QUEUE_ORDERED_PREFLUSH) {
  482 + queue_flush(q, QUEUE_ORDERED_PREFLUSH);
  483 + rq = &q->pre_flush_rq;
  484 + } else
  485 + q->ordseq |= QUEUE_ORDSEQ_PREFLUSH;
  486 +
  487 + if ((q->ordered & QUEUE_ORDERED_TAG) || q->in_flight == 0)
  488 + q->ordseq |= QUEUE_ORDSEQ_DRAIN;
  489 + else
  490 + rq = NULL;
  491 +
  492 + return rq;
418 493 }
419 494  
420   -static void blk_start_post_flush(request_queue_t *q, struct request *rq)
  495 +int blk_do_ordered(request_queue_t *q, struct request **rqp)
421 496 {
422   - struct request *flush_rq = q->flush_rq;
  497 + struct request *rq = *rqp, *allowed_rq;
  498 + int is_barrier = blk_fs_request(rq) && blk_barrier_rq(rq);
423 499  
424   - BUG_ON(!blk_barrier_rq(rq));
  500 + if (!q->ordseq) {
  501 + if (!is_barrier)
  502 + return 1;
425 503  
426   - rq_init(q, flush_rq);
427   - flush_rq->elevator_private = NULL;
428   - flush_rq->flags = REQ_BAR_FLUSH;
429   - flush_rq->rq_disk = rq->rq_disk;
430   - flush_rq->rl = NULL;
  504 + if (q->next_ordered != QUEUE_ORDERED_NONE) {
  505 + *rqp = start_ordered(q, rq);
  506 + return 1;
  507 + } else {
  508 + /*
  509 + * This can happen when the queue switches to
  510 + * ORDERED_NONE while this request is on it.
  511 + */
  512 + blkdev_dequeue_request(rq);
  513 + end_that_request_first(rq, -EOPNOTSUPP,
  514 + rq->hard_nr_sectors);
  515 + end_that_request_last(rq, -EOPNOTSUPP);
  516 + *rqp = NULL;
  517 + return 0;
  518 + }
  519 + }
431 520  
432   - if (q->prepare_flush_fn(q, flush_rq)) {
433   - flush_rq->end_io_data = rq;
434   - flush_rq->end_io = blk_post_flush_end_io;
  521 + if (q->ordered & QUEUE_ORDERED_TAG) {
  522 + if (is_barrier && rq != &q->bar_rq)
  523 + *rqp = NULL;
  524 + return 1;
  525 + }
435 526  
436   - __elv_add_request(q, flush_rq, ELEVATOR_INSERT_FRONT, 0);
437   - q->request_fn(q);
  527 + switch (blk_ordered_cur_seq(q)) {
  528 + case QUEUE_ORDSEQ_PREFLUSH:
  529 + allowed_rq = &q->pre_flush_rq;
  530 + break;
  531 + case QUEUE_ORDSEQ_BAR:
  532 + allowed_rq = &q->bar_rq;
  533 + break;
  534 + case QUEUE_ORDSEQ_POSTFLUSH:
  535 + allowed_rq = &q->post_flush_rq;
  536 + break;
  537 + default:
  538 + allowed_rq = NULL;
  539 + break;
438 540 }
  541 +
  542 + if (rq != allowed_rq &&
  543 + (blk_fs_request(rq) || rq == &q->pre_flush_rq ||
  544 + rq == &q->post_flush_rq))
  545 + *rqp = NULL;
  546 +
  547 + return 1;
439 548 }
440 549  
441   -static inline int blk_check_end_barrier(request_queue_t *q, struct request *rq,
442   - int sectors)
  550 +static int flush_dry_bio_endio(struct bio *bio, unsigned int bytes, int error)
443 551 {
444   - if (sectors > rq->nr_sectors)
445   - sectors = rq->nr_sectors;
  552 + request_queue_t *q = bio->bi_private;
  553 + struct bio_vec *bvec;
  554 + int i;
446 555  
447   - rq->nr_sectors -= sectors;
448   - return rq->nr_sectors;
  556 + /*
  557 + * This is dry run, restore bio_sector and size. We'll finish
  558 + * this request again with the original bi_end_io after an
  559 + * error occurs or post flush is complete.
  560 + */
  561 + q->bi_size += bytes;
  562 +
  563 + if (bio->bi_size)
  564 + return 1;
  565 +
  566 + /* Rewind bvec's */
  567 + bio->bi_idx = 0;
  568 + bio_for_each_segment(bvec, bio, i) {
  569 + bvec->bv_len += bvec->bv_offset;
  570 + bvec->bv_offset = 0;
  571 + }
  572 +
  573 + /* Reset bio */
  574 + set_bit(BIO_UPTODATE, &bio->bi_flags);
  575 + bio->bi_size = q->bi_size;
  576 + bio->bi_sector -= (q->bi_size >> 9);
  577 + q->bi_size = 0;
  578 +
  579 + return 0;
449 580 }
450 581  
451   -static int __blk_complete_barrier_rq(request_queue_t *q, struct request *rq,
452   - int sectors, int queue_locked)
  582 +static inline int ordered_bio_endio(struct request *rq, struct bio *bio,
  583 + unsigned int nbytes, int error)
453 584 {
454   - if (q->ordered != QUEUE_ORDERED_FLUSH)
  585 + request_queue_t *q = rq->q;
  586 + bio_end_io_t *endio;
  587 + void *private;
  588 +
  589 + if (&q->bar_rq != rq)
455 590 return 0;
456   - if (!blk_fs_request(rq) || !blk_barrier_rq(rq))
457   - return 0;
458   - if (blk_barrier_postflush(rq))
459   - return 0;
460 591  
461   - if (!blk_check_end_barrier(q, rq, sectors)) {
462   - unsigned long flags = 0;
  592 + /*
  593 + * Okay, this is the barrier request in progress, dry finish it.
  594 + */
  595 + if (error && !q->orderr)
  596 + q->orderr = error;
463 597  
464   - if (!queue_locked)
465   - spin_lock_irqsave(q->queue_lock, flags);
  598 + endio = bio->bi_end_io;
  599 + private = bio->bi_private;
  600 + bio->bi_end_io = flush_dry_bio_endio;
  601 + bio->bi_private = q;
466 602  
467   - blk_start_post_flush(q, rq);
  603 + bio_endio(bio, nbytes, error);
468 604  
469   - if (!queue_locked)
470   - spin_unlock_irqrestore(q->queue_lock, flags);
471   - }
  605 + bio->bi_end_io = endio;
  606 + bio->bi_private = private;
472 607  
473 608 return 1;
474 609 }
475 610  
476 611 /**
477   - * blk_complete_barrier_rq - complete possible barrier request
478   - * @q: the request queue for the device
479   - * @rq: the request
480   - * @sectors: number of sectors to complete
481   - *
482   - * Description:
483   - * Used in driver end_io handling to determine whether to postpone
484   - * completion of a barrier request until a post flush has been done. This
485   - * is the unlocked variant, used if the caller doesn't already hold the
486   - * queue lock.
487   - **/
488   -int blk_complete_barrier_rq(request_queue_t *q, struct request *rq, int sectors)
489   -{
490   - return __blk_complete_barrier_rq(q, rq, sectors, 0);
491   -}
492   -EXPORT_SYMBOL(blk_complete_barrier_rq);
493   -
494   -/**
495   - * blk_complete_barrier_rq_locked - complete possible barrier request
496   - * @q: the request queue for the device
497   - * @rq: the request
498   - * @sectors: number of sectors to complete
499   - *
500   - * Description:
501   - * See blk_complete_barrier_rq(). This variant must be used if the caller
502   - * holds the queue lock.
503   - **/
504   -int blk_complete_barrier_rq_locked(request_queue_t *q, struct request *rq,
505   - int sectors)
506   -{
507   - return __blk_complete_barrier_rq(q, rq, sectors, 1);
508   -}
509   -EXPORT_SYMBOL(blk_complete_barrier_rq_locked);
510   -
511   -/**
512 612 * blk_queue_bounce_limit - set bounce buffer limit for queue
513 613 * @q: the request queue for the device
514 614 * @dma_addr: bus address limit
... ... @@ -1047,6 +1147,7 @@
1047 1147 "REQ_SORTED",
1048 1148 "REQ_SOFTBARRIER",
1049 1149 "REQ_HARDBARRIER",
  1150 + "REQ_FUA",
1050 1151 "REQ_CMD",
1051 1152 "REQ_NOMERGE",
1052 1153 "REQ_STARTED",
... ... @@ -1066,6 +1167,7 @@
1066 1167 "REQ_PM_SUSPEND",
1067 1168 "REQ_PM_RESUME",
1068 1169 "REQ_PM_SHUTDOWN",
  1170 + "REQ_ORDERED_COLOR",
1069 1171 };
1070 1172  
1071 1173 void blk_dump_rq_flags(struct request *rq, char *msg)
... ... @@ -1643,8 +1745,6 @@
1643 1745 if (q->queue_tags)
1644 1746 __blk_queue_free_tags(q);
1645 1747  
1646   - blk_queue_ordered(q, QUEUE_ORDERED_NONE);
1647   -
1648 1748 kmem_cache_free(requestq_cachep, q);
1649 1749 }
1650 1750  
... ... @@ -2714,7 +2814,7 @@
2714 2814 spin_lock_prefetch(q->queue_lock);
2715 2815  
2716 2816 barrier = bio_barrier(bio);
2717   - if (unlikely(barrier) && (q->ordered == QUEUE_ORDERED_NONE)) {
  2817 + if (unlikely(barrier) && (q->next_ordered == QUEUE_ORDERED_NONE)) {
2718 2818 err = -EOPNOTSUPP;
2719 2819 goto end_io;
2720 2820 }
... ... @@ -3075,7 +3175,8 @@
3075 3175 if (nr_bytes >= bio->bi_size) {
3076 3176 req->bio = bio->bi_next;
3077 3177 nbytes = bio->bi_size;
3078   - bio_endio(bio, nbytes, error);
  3178 + if (!ordered_bio_endio(req, bio, nbytes, error))
  3179 + bio_endio(bio, nbytes, error);
3079 3180 next_idx = 0;
3080 3181 bio_nbytes = 0;
3081 3182 } else {
... ... @@ -3130,7 +3231,8 @@
3130 3231 * if the request wasn't completed, update state
3131 3232 */
3132 3233 if (bio_nbytes) {
3133   - bio_endio(bio, bio_nbytes, error);
  3234 + if (!ordered_bio_endio(req, bio, bio_nbytes, error))
  3235 + bio_endio(bio, bio_nbytes, error);
3134 3236 bio->bi_idx += next_idx;
3135 3237 bio_iovec(bio)->bv_offset += nr_bytes;
3136 3238 bio_iovec(bio)->bv_len -= nr_bytes;
include/linux/blkdev.h
... ... @@ -207,6 +207,7 @@
207 207 __REQ_SORTED, /* elevator knows about this request */
208 208 __REQ_SOFTBARRIER, /* may not be passed by ioscheduler */
209 209 __REQ_HARDBARRIER, /* may not be passed by drive either */
  210 + __REQ_FUA, /* forced unit access */
210 211 __REQ_CMD, /* is a regular fs rw request */
211 212 __REQ_NOMERGE, /* don't touch this for merging */
212 213 __REQ_STARTED, /* drive already may have started this one */
... ... @@ -230,9 +231,7 @@
230 231 __REQ_PM_SUSPEND, /* suspend request */
231 232 __REQ_PM_RESUME, /* resume request */
232 233 __REQ_PM_SHUTDOWN, /* shutdown request */
233   - __REQ_BAR_PREFLUSH, /* barrier pre-flush done */
234   - __REQ_BAR_POSTFLUSH, /* barrier post-flush */
235   - __REQ_BAR_FLUSH, /* rq is the flush request */
  234 + __REQ_ORDERED_COLOR, /* is before or after barrier */
236 235 __REQ_NR_BITS, /* stops here */
237 236 };
238 237  
... ... @@ -241,6 +240,7 @@
241 240 #define REQ_SORTED (1 << __REQ_SORTED)
242 241 #define REQ_SOFTBARRIER (1 << __REQ_SOFTBARRIER)
243 242 #define REQ_HARDBARRIER (1 << __REQ_HARDBARRIER)
  243 +#define REQ_FUA (1 << __REQ_FUA)
244 244 #define REQ_CMD (1 << __REQ_CMD)
245 245 #define REQ_NOMERGE (1 << __REQ_NOMERGE)
246 246 #define REQ_STARTED (1 << __REQ_STARTED)
... ... @@ -260,9 +260,7 @@
260 260 #define REQ_PM_SUSPEND (1 << __REQ_PM_SUSPEND)
261 261 #define REQ_PM_RESUME (1 << __REQ_PM_RESUME)
262 262 #define REQ_PM_SHUTDOWN (1 << __REQ_PM_SHUTDOWN)
263   -#define REQ_BAR_PREFLUSH (1 << __REQ_BAR_PREFLUSH)
264   -#define REQ_BAR_POSTFLUSH (1 << __REQ_BAR_POSTFLUSH)
265   -#define REQ_BAR_FLUSH (1 << __REQ_BAR_FLUSH)
  263 +#define REQ_ORDERED_COLOR (1 << __REQ_ORDERED_COLOR)
266 264  
267 265 /*
268 266 * State information carried for REQ_PM_SUSPEND and REQ_PM_RESUME
... ... @@ -292,8 +290,7 @@
292 290 typedef int (merge_bvec_fn) (request_queue_t *, struct bio *, struct bio_vec *);
293 291 typedef void (activity_fn) (void *data, int rw);
294 292 typedef int (issue_flush_fn) (request_queue_t *, struct gendisk *, sector_t *);
295   -typedef int (prepare_flush_fn) (request_queue_t *, struct request *);
296   -typedef void (end_flush_fn) (request_queue_t *, struct request *);
  293 +typedef void (prepare_flush_fn) (request_queue_t *, struct request *);
297 294  
298 295 enum blk_queue_state {
299 296 Queue_down,
... ... @@ -335,7 +332,6 @@
335 332 activity_fn *activity_fn;
336 333 issue_flush_fn *issue_flush_fn;
337 334 prepare_flush_fn *prepare_flush_fn;
338   - end_flush_fn *end_flush_fn;
339 335  
340 336 /*
341 337 * Dispatch queue sorting
342 338  
... ... @@ -420,16 +416,13 @@
420 416 /*
421 417 * reserved for flush operations
422 418 */
423   - struct request *flush_rq;
424   - unsigned char ordered;
  419 + unsigned int ordered, next_ordered, ordseq;
  420 + int orderr, ordcolor;
  421 + struct request pre_flush_rq, bar_rq, post_flush_rq;
  422 + struct request *orig_bar_rq;
  423 + unsigned int bi_size;
425 424 };
426 425  
427   -enum {
428   - QUEUE_ORDERED_NONE,
429   - QUEUE_ORDERED_TAG,
430   - QUEUE_ORDERED_FLUSH,
431   -};
432   -
433 426 #define RQ_INACTIVE (-1)
434 427 #define RQ_ACTIVE 1
435 428 #define RQ_SCSI_BUSY 0xffff
436 429  
437 430  
... ... @@ -445,12 +438,51 @@
445 438 #define QUEUE_FLAG_REENTER 6 /* Re-entrancy avoidance */
446 439 #define QUEUE_FLAG_PLUGGED 7 /* queue is plugged */
447 440 #define QUEUE_FLAG_ELVSWITCH 8 /* don't use elevator, just do FIFO */
448   -#define QUEUE_FLAG_FLUSH 9 /* doing barrier flush sequence */
449 441  
  442 +enum {
  443 + /*
  444 + * Hardbarrier is supported with one of the following methods.
  445 + *
  446 + * NONE : hardbarrier unsupported
  447 + * DRAIN : ordering by draining is enough
  448 + * DRAIN_FLUSH : ordering by draining w/ pre and post flushes
  449 + * DRAIN_FUA : ordering by draining w/ pre flush and FUA write
  450 + * TAG : ordering by tag is enough
  451 + * TAG_FLUSH : ordering by tag w/ pre and post flushes
  452 + * TAG_FUA : ordering by tag w/ pre flush and FUA write
  453 + */
  454 + QUEUE_ORDERED_NONE = 0x00,
  455 + QUEUE_ORDERED_DRAIN = 0x01,
  456 + QUEUE_ORDERED_TAG = 0x02,
  457 +
  458 + QUEUE_ORDERED_PREFLUSH = 0x10,
  459 + QUEUE_ORDERED_POSTFLUSH = 0x20,
  460 + QUEUE_ORDERED_FUA = 0x40,
  461 +
  462 + QUEUE_ORDERED_DRAIN_FLUSH = QUEUE_ORDERED_DRAIN |
  463 + QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_POSTFLUSH,
  464 + QUEUE_ORDERED_DRAIN_FUA = QUEUE_ORDERED_DRAIN |
  465 + QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_FUA,
  466 + QUEUE_ORDERED_TAG_FLUSH = QUEUE_ORDERED_TAG |
  467 + QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_POSTFLUSH,
  468 + QUEUE_ORDERED_TAG_FUA = QUEUE_ORDERED_TAG |
  469 + QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_FUA,
  470 +
  471 + /*
  472 + * Ordered operation sequence
  473 + */
  474 + QUEUE_ORDSEQ_STARTED = 0x01, /* flushing in progress */
  475 + QUEUE_ORDSEQ_DRAIN = 0x02, /* waiting for the queue to be drained */
  476 + QUEUE_ORDSEQ_PREFLUSH = 0x04, /* pre-flushing in progress */
  477 + QUEUE_ORDSEQ_BAR = 0x08, /* original barrier req in progress */
  478 + QUEUE_ORDSEQ_POSTFLUSH = 0x10, /* post-flushing in progress */
  479 + QUEUE_ORDSEQ_DONE = 0x20,
  480 +};
  481 +
450 482 #define blk_queue_plugged(q) test_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags)
451 483 #define blk_queue_tagged(q) test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags)
452 484 #define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags)
453   -#define blk_queue_flushing(q) test_bit(QUEUE_FLAG_FLUSH, &(q)->queue_flags)
  485 +#define blk_queue_flushing(q) ((q)->ordseq)
454 486  
455 487 #define blk_fs_request(rq) ((rq)->flags & REQ_CMD)
456 488 #define blk_pc_request(rq) ((rq)->flags & REQ_BLOCK_PC)
... ... @@ -466,8 +498,7 @@
466 498  
467 499 #define blk_sorted_rq(rq) ((rq)->flags & REQ_SORTED)
468 500 #define blk_barrier_rq(rq) ((rq)->flags & REQ_HARDBARRIER)
469   -#define blk_barrier_preflush(rq) ((rq)->flags & REQ_BAR_PREFLUSH)
470   -#define blk_barrier_postflush(rq) ((rq)->flags & REQ_BAR_POSTFLUSH)
  501 +#define blk_fua_rq(rq) ((rq)->flags & REQ_FUA)
471 502  
472 503 #define list_entry_rq(ptr) list_entry((ptr), struct request, queuelist)
473 504  
474 505  
... ... @@ -665,11 +696,12 @@
665 696 extern void blk_queue_merge_bvec(request_queue_t *, merge_bvec_fn *);
666 697 extern void blk_queue_dma_alignment(request_queue_t *, int);
667 698 extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev);
668   -extern void blk_queue_ordered(request_queue_t *, int);
  699 +extern int blk_queue_ordered(request_queue_t *, unsigned, prepare_flush_fn *);
669 700 extern void blk_queue_issue_flush_fn(request_queue_t *, issue_flush_fn *);
670   -extern struct request *blk_start_pre_flush(request_queue_t *,struct request *);
671   -extern int blk_complete_barrier_rq(request_queue_t *, struct request *, int);
672   -extern int blk_complete_barrier_rq_locked(request_queue_t *, struct request *, int);
  701 +extern int blk_do_ordered(request_queue_t *, struct request **);
  702 +extern unsigned blk_ordered_cur_seq(request_queue_t *);
  703 +extern unsigned blk_ordered_req_seq(struct request *);
  704 +extern void blk_ordered_complete_seq(request_queue_t *, unsigned, int);
673 705  
674 706 extern int blk_rq_map_sg(request_queue_t *, struct request *, struct scatterlist *);
675 707 extern void blk_dump_rq_flags(struct request *, char *);
include/linux/elevator.h
... ... @@ -130,6 +130,7 @@
130 130 #define ELEVATOR_INSERT_FRONT 1
131 131 #define ELEVATOR_INSERT_BACK 2
132 132 #define ELEVATOR_INSERT_SORT 3
  133 +#define ELEVATOR_INSERT_REQUEUE 4
133 134  
134 135 /*
135 136 * return values from elevator_may_queue_fn