Commit 4913efe456c987057e5d36a3f0a55422a9072cae

Authored by Tejun Heo
Committed by Jens Axboe
1 parent 6958f14545

block: deprecate barrier and replace blk_queue_ordered() with blk_queue_flush()

Barrier is deemed too heavy and will soon be replaced by FLUSH/FUA
requests.  Deprecate barrier.  All REQ_HARDBARRIERs are failed with
-EOPNOTSUPP and blk_queue_ordered() is replaced with simpler
blk_queue_flush().

blk_queue_flush() takes combinations of REQ_FLUSH and FUA.  If a
device has write cache and can flush it, it should set REQ_FLUSH.  If
the device can handle FUA writes, it should also set REQ_FUA.

All blk_queue_ordered() users are converted.

* ORDERED_DRAIN is mapped to 0 which is the default value.
* ORDERED_DRAIN_FLUSH is mapped to REQ_FLUSH.
* ORDERED_DRAIN_FLUSH_FUA is mapped to REQ_FLUSH | REQ_FUA.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Boaz Harrosh <bharrosh@panasas.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Nick Piggin <npiggin@kernel.dk>
Cc: Michael S. Tsirkin <mst@redhat.com>
Cc: Jeremy Fitzhardinge <jeremy@xensource.com>
Cc: Chris Wright <chrisw@sous-sol.org>
Cc: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Cc: Geert Uytterhoeven <Geert.Uytterhoeven@sonycom.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Alasdair G Kergon <agk@redhat.com>
Cc: Pierre Ossman <drzeus@drzeus.cx>
Cc: Stefan Weinhuber <wein@de.ibm.com>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>

Showing 15 changed files with 67 additions and 102 deletions Side-by-side Diff

... ... @@ -9,35 +9,6 @@
9 9  
10 10 #include "blk.h"
11 11  
12   -/**
13   - * blk_queue_ordered - does this queue support ordered writes
14   - * @q: the request queue
15   - * @ordered: one of QUEUE_ORDERED_*
16   - *
17   - * Description:
18   - * For journalled file systems, doing ordered writes on a commit
19   - * block instead of explicitly doing wait_on_buffer (which is bad
20   - * for performance) can be a big win. Block drivers supporting this
21   - * feature should call this function and indicate so.
22   - *
23   - **/
24   -int blk_queue_ordered(struct request_queue *q, unsigned ordered)
25   -{
26   - if (ordered != QUEUE_ORDERED_NONE &&
27   - ordered != QUEUE_ORDERED_DRAIN &&
28   - ordered != QUEUE_ORDERED_DRAIN_FLUSH &&
29   - ordered != QUEUE_ORDERED_DRAIN_FUA) {
30   - printk(KERN_ERR "blk_queue_ordered: bad value %d\n", ordered);
31   - return -EINVAL;
32   - }
33   -
34   - q->ordered = ordered;
35   - q->next_ordered = ordered;
36   -
37   - return 0;
38   -}
39   -EXPORT_SYMBOL(blk_queue_ordered);
40   -
41 12 /*
42 13 * Cache flushing for ordered writes handling
43 14 */
... ... @@ -1203,11 +1203,13 @@
1203 1203 const unsigned int ff = bio->bi_rw & REQ_FAILFAST_MASK;
1204 1204 int rw_flags;
1205 1205  
1206   - if ((bio->bi_rw & REQ_HARDBARRIER) &&
1207   - (q->next_ordered == QUEUE_ORDERED_NONE)) {
  1206 + /* REQ_HARDBARRIER is no more */
  1207 + if (WARN_ONCE(bio->bi_rw & REQ_HARDBARRIER,
  1208 + "block: HARDBARRIER is deprecated, use FLUSH/FUA instead\n")) {
1208 1209 bio_endio(bio, -EOPNOTSUPP);
1209 1210 return 0;
1210 1211 }
  1212 +
1211 1213 /*
1212 1214 * low level driver can indicate that it wants pages above a
1213 1215 * certain limit bounced to low memory (ie for highmem, or even
block/blk-settings.c
... ... @@ -794,6 +794,26 @@
794 794 }
795 795 EXPORT_SYMBOL(blk_queue_update_dma_alignment);
796 796  
  797 +/**
  798 + * blk_queue_flush - configure queue's cache flush capability
  799 + * @q: the request queue for the device
  800 + * @flush: 0, REQ_FLUSH or REQ_FLUSH | REQ_FUA
  801 + *
  802 + * Tell block layer cache flush capability of @q. If it supports
  803 + * flushing, REQ_FLUSH should be set. If it supports bypassing
  804 + * write cache for individual writes, REQ_FUA should be set.
  805 + */
  806 +void blk_queue_flush(struct request_queue *q, unsigned int flush)
  807 +{
  808 + WARN_ON_ONCE(flush & ~(REQ_FLUSH | REQ_FUA));
  809 +
  810 + if (WARN_ON_ONCE(!(flush & REQ_FLUSH) && (flush & REQ_FUA)))
  811 + flush &= ~REQ_FUA;
  812 +
  813 + q->flush_flags = flush & (REQ_FLUSH | REQ_FUA);
  814 +}
  815 +EXPORT_SYMBOL_GPL(blk_queue_flush);
  816 +
797 817 static int __init blk_settings_init(void)
798 818 {
799 819 blk_max_low_pfn = max_low_pfn - 1;
... ... @@ -482,7 +482,6 @@
482 482 if (!brd->brd_queue)
483 483 goto out_free_dev;
484 484 blk_queue_make_request(brd->brd_queue, brd_make_request);
485   - blk_queue_ordered(brd->brd_queue, QUEUE_ORDERED_DRAIN);
486 485 blk_queue_max_hw_sectors(brd->brd_queue, 1024);
487 486 blk_queue_bounce_limit(brd->brd_queue, BLK_BOUNCE_ANY);
488 487  
drivers/block/loop.c
... ... @@ -832,7 +832,7 @@
832 832 lo->lo_queue->unplug_fn = loop_unplug;
833 833  
834 834 if (!(lo_flags & LO_FLAGS_READ_ONLY) && file->f_op->fsync)
835   - blk_queue_ordered(lo->lo_queue, QUEUE_ORDERED_DRAIN_FLUSH);
  835 + blk_queue_flush(lo->lo_queue, REQ_FLUSH);
836 836  
837 837 set_capacity(lo->lo_disk, size);
838 838 bd_set_size(bdev, size << 9);
drivers/block/osdblk.c
... ... @@ -439,7 +439,7 @@
439 439 blk_queue_stack_limits(q, osd_request_queue(osdev->osd));
440 440  
441 441 blk_queue_prep_rq(q, blk_queue_start_tag);
442   - blk_queue_ordered(q, QUEUE_ORDERED_DRAIN_FLUSH);
  442 + blk_queue_flush(q, REQ_FLUSH);
443 443  
444 444 disk->queue = q;
445 445  
drivers/block/ps3disk.c
... ... @@ -468,7 +468,7 @@
468 468 blk_queue_dma_alignment(queue, dev->blk_size-1);
469 469 blk_queue_logical_block_size(queue, dev->blk_size);
470 470  
471   - blk_queue_ordered(queue, QUEUE_ORDERED_DRAIN_FLUSH);
  471 + blk_queue_flush(queue, REQ_FLUSH);
472 472  
473 473 blk_queue_max_segments(queue, -1);
474 474 blk_queue_max_segment_size(queue, dev->bounce_size);
drivers/block/virtio_blk.c
... ... @@ -388,22 +388,15 @@
388 388 vblk->disk->driverfs_dev = &vdev->dev;
389 389 index++;
390 390  
391   - if (virtio_has_feature(vdev, VIRTIO_BLK_F_FLUSH)) {
392   - /*
393   - * If the FLUSH feature is supported we do have support for
394   - * flushing a volatile write cache on the host. Use that
395   - * to implement write barrier support.
396   - */
397   - blk_queue_ordered(q, QUEUE_ORDERED_DRAIN_FLUSH);
398   - } else {
399   - /*
400   - * If the FLUSH feature is not supported we must assume that
401   - * the host does not perform any kind of volatile write
402   - * caching. We still need to drain the queue to provider
403   - * proper barrier semantics.
404   - */
405   - blk_queue_ordered(q, QUEUE_ORDERED_DRAIN);
406   - }
  391 + /*
  392 + * If the FLUSH feature is supported we do have support for
  393 + * flushing a volatile write cache on the host. Use that to
  394 + * implement write barrier support; otherwise, we must assume
  395 + * that the host does not perform any kind of volatile write
  396 + * caching.
  397 + */
  398 + if (virtio_has_feature(vdev, VIRTIO_BLK_F_FLUSH))
  399 + blk_queue_flush(q, REQ_FLUSH);
407 400  
408 401 /* If disk is read-only in the host, the guest should obey */
409 402 if (virtio_has_feature(vdev, VIRTIO_BLK_F_RO))
drivers/block/xen-blkfront.c
... ... @@ -95,7 +95,7 @@
95 95 struct gnttab_free_callback callback;
96 96 struct blk_shadow shadow[BLK_RING_SIZE];
97 97 unsigned long shadow_free;
98   - int feature_barrier;
  98 + unsigned int feature_flush;
99 99 int is_ready;
100 100 };
101 101  
102 102  
103 103  
... ... @@ -418,25 +418,12 @@
418 418 }
419 419  
420 420  
421   -static int xlvbd_barrier(struct blkfront_info *info)
  421 +static void xlvbd_flush(struct blkfront_info *info)
422 422 {
423   - int err;
424   - const char *barrier;
425   -
426   - switch (info->feature_barrier) {
427   - case QUEUE_ORDERED_DRAIN: barrier = "enabled"; break;
428   - case QUEUE_ORDERED_NONE: barrier = "disabled"; break;
429   - default: return -EINVAL;
430   - }
431   -
432   - err = blk_queue_ordered(info->rq, info->feature_barrier);
433   -
434   - if (err)
435   - return err;
436   -
  423 + blk_queue_flush(info->rq, info->feature_flush);
437 424 printk(KERN_INFO "blkfront: %s: barriers %s\n",
438   - info->gd->disk_name, barrier);
439   - return 0;
  425 + info->gd->disk_name,
  426 + info->feature_flush ? "enabled" : "disabled");
440 427 }
441 428  
442 429  
... ... @@ -515,7 +502,7 @@
515 502 info->rq = gd->queue;
516 503 info->gd = gd;
517 504  
518   - xlvbd_barrier(info);
  505 + xlvbd_flush(info);
519 506  
520 507 if (vdisk_info & VDISK_READONLY)
521 508 set_disk_ro(gd, 1);
... ... @@ -661,8 +648,8 @@
661 648 printk(KERN_WARNING "blkfront: %s: write barrier op failed\n",
662 649 info->gd->disk_name);
663 650 error = -EOPNOTSUPP;
664   - info->feature_barrier = QUEUE_ORDERED_NONE;
665   - xlvbd_barrier(info);
  651 + info->feature_flush = 0;
  652 + xlvbd_flush(info);
666 653 }
667 654 /* fall through */
668 655 case BLKIF_OP_READ:
669 656  
670 657  
... ... @@ -1075,19 +1062,13 @@
1075 1062 /*
1076 1063 * If there's no "feature-barrier" defined, then it means
1077 1064 * we're dealing with a very old backend which writes
1078   - * synchronously; draining will do what needs to get done.
  1065 + * synchronously; nothing to do.
1079 1066 *
1080 1067 * If there are barriers, then we use flush.
1081   - *
1082   - * If barriers are not supported, then there's no much we can
1083   - * do, so just set ordering to NONE.
1084 1068 */
1085   - if (err)
1086   - info->feature_barrier = QUEUE_ORDERED_DRAIN;
1087   - else if (barrier)
1088   - info->feature_barrier = QUEUE_ORDERED_DRAIN_FLUSH;
1089   - else
1090   - info->feature_barrier = QUEUE_ORDERED_NONE;
  1069 + info->feature_flush = 0;
  1070 + if (!err && barrier)
  1071 + info->feature_flush = REQ_FLUSH;
1091 1072  
1092 1073 err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size);
1093 1074 if (err) {
drivers/ide/ide-disk.c
... ... @@ -516,10 +516,10 @@
516 516 return ide_no_data_taskfile(drive, &cmd);
517 517 }
518 518  
519   -static void update_ordered(ide_drive_t *drive)
  519 +static void update_flush(ide_drive_t *drive)
520 520 {
521 521 u16 *id = drive->id;
522   - unsigned ordered = QUEUE_ORDERED_NONE;
  522 + unsigned flush = 0;
523 523  
524 524 if (drive->dev_flags & IDE_DFLAG_WCACHE) {
525 525 unsigned long long capacity;
526 526  
527 527  
... ... @@ -543,13 +543,12 @@
543 543 drive->name, barrier ? "" : "not ");
544 544  
545 545 if (barrier) {
546   - ordered = QUEUE_ORDERED_DRAIN_FLUSH;
  546 + flush = REQ_FLUSH;
547 547 blk_queue_prep_rq(drive->queue, idedisk_prep_fn);
548 548 }
549   - } else
550   - ordered = QUEUE_ORDERED_DRAIN;
  549 + }
551 550  
552   - blk_queue_ordered(drive->queue, ordered);
  551 + blk_queue_flush(drive->queue, flush);
553 552 }
554 553  
555 554 ide_devset_get_flag(wcache, IDE_DFLAG_WCACHE);
... ... @@ -572,7 +571,7 @@
572 571 }
573 572 }
574 573  
575   - update_ordered(drive);
  574 + update_flush(drive);
576 575  
577 576 return err;
578 577 }
... ... @@ -2245,7 +2245,7 @@
2245 2245 blk_queue_softirq_done(md->queue, dm_softirq_done);
2246 2246 blk_queue_prep_rq(md->queue, dm_prep_fn);
2247 2247 blk_queue_lld_busy(md->queue, dm_lld_busy);
2248   - blk_queue_ordered(md->queue, QUEUE_ORDERED_DRAIN_FLUSH);
  2248 + blk_queue_flush(md->queue, REQ_FLUSH);
2249 2249  
2250 2250 elv_register_queue(md->queue);
2251 2251  
drivers/mmc/card/queue.c
... ... @@ -128,7 +128,6 @@
128 128 mq->req = NULL;
129 129  
130 130 blk_queue_prep_rq(mq->queue, mmc_prep_request);
131   - blk_queue_ordered(mq->queue, QUEUE_ORDERED_DRAIN);
132 131 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, mq->queue);
133 132 if (mmc_can_erase(card)) {
134 133 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mq->queue);
drivers/s390/block/dasd.c
... ... @@ -2197,7 +2197,6 @@
2197 2197 */
2198 2198 blk_queue_max_segment_size(block->request_queue, PAGE_SIZE);
2199 2199 blk_queue_segment_boundary(block->request_queue, PAGE_SIZE - 1);
2200   - blk_queue_ordered(block->request_queue, QUEUE_ORDERED_DRAIN);
2201 2200 }
2202 2201  
2203 2202 /*
... ... @@ -2109,7 +2109,7 @@
2109 2109 struct scsi_disk *sdkp = scsi_disk(disk);
2110 2110 struct scsi_device *sdp = sdkp->device;
2111 2111 unsigned char *buffer;
2112   - unsigned ordered;
  2112 + unsigned flush = 0;
2113 2113  
2114 2114 SCSI_LOG_HLQUEUE(3, sd_printk(KERN_INFO, sdkp,
2115 2115 "sd_revalidate_disk\n"));
2116 2116  
2117 2117  
... ... @@ -2151,15 +2151,15 @@
2151 2151  
2152 2152 /*
2153 2153 * We now have all cache related info, determine how we deal
2154   - * with ordered requests.
  2154 + * with flush requests.
2155 2155 */
2156   - if (sdkp->WCE)
2157   - ordered = sdkp->DPOFUA
2158   - ? QUEUE_ORDERED_DRAIN_FUA : QUEUE_ORDERED_DRAIN_FLUSH;
2159   - else
2160   - ordered = QUEUE_ORDERED_DRAIN;
  2156 + if (sdkp->WCE) {
  2157 + flush |= REQ_FLUSH;
  2158 + if (sdkp->DPOFUA)
  2159 + flush |= REQ_FUA;
  2160 + }
2161 2161  
2162   - blk_queue_ordered(sdkp->disk->queue, ordered);
  2162 + blk_queue_flush(sdkp->disk->queue, flush);
2163 2163  
2164 2164 set_capacity(disk, sdkp->capacity);
2165 2165 kfree(buffer);
include/linux/blkdev.h
... ... @@ -355,8 +355,10 @@
355 355 struct blk_trace *blk_trace;
356 356 #endif
357 357 /*
358   - * reserved for flush operations
  358 + * for flush operations
359 359 */
  360 + unsigned int flush_flags;
  361 +
360 362 unsigned int ordered, next_ordered, ordseq;
361 363 int orderr, ordcolor;
362 364 struct request pre_flush_rq, bar_rq, post_flush_rq;
363 365  
... ... @@ -865,8 +867,8 @@
865 867 extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *);
866 868 extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *);
867 869 extern void blk_queue_rq_timeout(struct request_queue *, unsigned int);
  870 +extern void blk_queue_flush(struct request_queue *q, unsigned int flush);
868 871 extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev);
869   -extern int blk_queue_ordered(struct request_queue *, unsigned);
870 872 extern bool blk_do_ordered(struct request_queue *, struct request **);
871 873 extern unsigned blk_ordered_cur_seq(struct request_queue *);
872 874 extern unsigned blk_ordered_req_seq(struct request *);