Commit 4913efe456c987057e5d36a3f0a55422a9072cae
Committed by
Jens Axboe
1 parent
6958f14545
Exists in
master
and in
39 other branches
block: deprecate barrier and replace blk_queue_ordered() with blk_queue_flush()
Barrier is deemed too heavy and will soon be replaced by FLUSH/FUA requests. Deprecate barrier. All REQ_HARDBARRIERs are failed with -EOPNOTSUPP and blk_queue_ordered() is replaced with simpler blk_queue_flush(). blk_queue_flush() takes combinations of REQ_FLUSH and FUA. If a device has write cache and can flush it, it should set REQ_FLUSH. If the device can handle FUA writes, it should also set REQ_FUA. All blk_queue_ordered() users are converted. * ORDERED_DRAIN is mapped to 0 which is the default value. * ORDERED_DRAIN_FLUSH is mapped to REQ_FLUSH. * ORDERED_DRAIN_FLUSH_FUA is mapped to REQ_FLUSH | REQ_FUA. Signed-off-by: Tejun Heo <tj@kernel.org> Acked-by: Boaz Harrosh <bharrosh@panasas.com> Cc: Christoph Hellwig <hch@infradead.org> Cc: Nick Piggin <npiggin@kernel.dk> Cc: Michael S. Tsirkin <mst@redhat.com> Cc: Jeremy Fitzhardinge <jeremy@xensource.com> Cc: Chris Wright <chrisw@sous-sol.org> Cc: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp> Cc: Geert Uytterhoeven <Geert.Uytterhoeven@sonycom.com> Cc: David S. Miller <davem@davemloft.net> Cc: Alasdair G Kergon <agk@redhat.com> Cc: Pierre Ossman <drzeus@drzeus.cx> Cc: Stefan Weinhuber <wein@de.ibm.com> Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
Showing 15 changed files with 67 additions and 102 deletions Side-by-side Diff
- block/blk-barrier.c
- block/blk-core.c
- block/blk-settings.c
- drivers/block/brd.c
- drivers/block/loop.c
- drivers/block/osdblk.c
- drivers/block/ps3disk.c
- drivers/block/virtio_blk.c
- drivers/block/xen-blkfront.c
- drivers/ide/ide-disk.c
- drivers/md/dm.c
- drivers/mmc/card/queue.c
- drivers/s390/block/dasd.c
- drivers/scsi/sd.c
- include/linux/blkdev.h
block/blk-barrier.c
... | ... | @@ -9,35 +9,6 @@ |
9 | 9 | |
10 | 10 | #include "blk.h" |
11 | 11 | |
12 | -/** | |
13 | - * blk_queue_ordered - does this queue support ordered writes | |
14 | - * @q: the request queue | |
15 | - * @ordered: one of QUEUE_ORDERED_* | |
16 | - * | |
17 | - * Description: | |
18 | - * For journalled file systems, doing ordered writes on a commit | |
19 | - * block instead of explicitly doing wait_on_buffer (which is bad | |
20 | - * for performance) can be a big win. Block drivers supporting this | |
21 | - * feature should call this function and indicate so. | |
22 | - * | |
23 | - **/ | |
24 | -int blk_queue_ordered(struct request_queue *q, unsigned ordered) | |
25 | -{ | |
26 | - if (ordered != QUEUE_ORDERED_NONE && | |
27 | - ordered != QUEUE_ORDERED_DRAIN && | |
28 | - ordered != QUEUE_ORDERED_DRAIN_FLUSH && | |
29 | - ordered != QUEUE_ORDERED_DRAIN_FUA) { | |
30 | - printk(KERN_ERR "blk_queue_ordered: bad value %d\n", ordered); | |
31 | - return -EINVAL; | |
32 | - } | |
33 | - | |
34 | - q->ordered = ordered; | |
35 | - q->next_ordered = ordered; | |
36 | - | |
37 | - return 0; | |
38 | -} | |
39 | -EXPORT_SYMBOL(blk_queue_ordered); | |
40 | - | |
41 | 12 | /* |
42 | 13 | * Cache flushing for ordered writes handling |
43 | 14 | */ |
block/blk-core.c
... | ... | @@ -1203,11 +1203,13 @@ |
1203 | 1203 | const unsigned int ff = bio->bi_rw & REQ_FAILFAST_MASK; |
1204 | 1204 | int rw_flags; |
1205 | 1205 | |
1206 | - if ((bio->bi_rw & REQ_HARDBARRIER) && | |
1207 | - (q->next_ordered == QUEUE_ORDERED_NONE)) { | |
1206 | + /* REQ_HARDBARRIER is no more */ | |
1207 | + if (WARN_ONCE(bio->bi_rw & REQ_HARDBARRIER, | |
1208 | + "block: HARDBARRIER is deprecated, use FLUSH/FUA instead\n")) { | |
1208 | 1209 | bio_endio(bio, -EOPNOTSUPP); |
1209 | 1210 | return 0; |
1210 | 1211 | } |
1212 | + | |
1211 | 1213 | /* |
1212 | 1214 | * low level driver can indicate that it wants pages above a |
1213 | 1215 | * certain limit bounced to low memory (ie for highmem, or even |
block/blk-settings.c
... | ... | @@ -794,6 +794,26 @@ |
794 | 794 | } |
795 | 795 | EXPORT_SYMBOL(blk_queue_update_dma_alignment); |
796 | 796 | |
797 | +/** | |
798 | + * blk_queue_flush - configure queue's cache flush capability | |
799 | + * @q: the request queue for the device | |
800 | + * @flush: 0, REQ_FLUSH or REQ_FLUSH | REQ_FUA | |
801 | + * | |
802 | + * Tell block layer cache flush capability of @q. If it supports | |
803 | + * flushing, REQ_FLUSH should be set. If it supports bypassing | |
804 | + * write cache for individual writes, REQ_FUA should be set. | |
805 | + */ | |
806 | +void blk_queue_flush(struct request_queue *q, unsigned int flush) | |
807 | +{ | |
808 | + WARN_ON_ONCE(flush & ~(REQ_FLUSH | REQ_FUA)); | |
809 | + | |
810 | + if (WARN_ON_ONCE(!(flush & REQ_FLUSH) && (flush & REQ_FUA))) | |
811 | + flush &= ~REQ_FUA; | |
812 | + | |
813 | + q->flush_flags = flush & (REQ_FLUSH | REQ_FUA); | |
814 | +} | |
815 | +EXPORT_SYMBOL_GPL(blk_queue_flush); | |
816 | + | |
797 | 817 | static int __init blk_settings_init(void) |
798 | 818 | { |
799 | 819 | blk_max_low_pfn = max_low_pfn - 1; |
drivers/block/brd.c
... | ... | @@ -482,7 +482,6 @@ |
482 | 482 | if (!brd->brd_queue) |
483 | 483 | goto out_free_dev; |
484 | 484 | blk_queue_make_request(brd->brd_queue, brd_make_request); |
485 | - blk_queue_ordered(brd->brd_queue, QUEUE_ORDERED_DRAIN); | |
486 | 485 | blk_queue_max_hw_sectors(brd->brd_queue, 1024); |
487 | 486 | blk_queue_bounce_limit(brd->brd_queue, BLK_BOUNCE_ANY); |
488 | 487 |
drivers/block/loop.c
... | ... | @@ -832,7 +832,7 @@ |
832 | 832 | lo->lo_queue->unplug_fn = loop_unplug; |
833 | 833 | |
834 | 834 | if (!(lo_flags & LO_FLAGS_READ_ONLY) && file->f_op->fsync) |
835 | - blk_queue_ordered(lo->lo_queue, QUEUE_ORDERED_DRAIN_FLUSH); | |
835 | + blk_queue_flush(lo->lo_queue, REQ_FLUSH); | |
836 | 836 | |
837 | 837 | set_capacity(lo->lo_disk, size); |
838 | 838 | bd_set_size(bdev, size << 9); |
drivers/block/osdblk.c
drivers/block/ps3disk.c
... | ... | @@ -468,7 +468,7 @@ |
468 | 468 | blk_queue_dma_alignment(queue, dev->blk_size-1); |
469 | 469 | blk_queue_logical_block_size(queue, dev->blk_size); |
470 | 470 | |
471 | - blk_queue_ordered(queue, QUEUE_ORDERED_DRAIN_FLUSH); | |
471 | + blk_queue_flush(queue, REQ_FLUSH); | |
472 | 472 | |
473 | 473 | blk_queue_max_segments(queue, -1); |
474 | 474 | blk_queue_max_segment_size(queue, dev->bounce_size); |
drivers/block/virtio_blk.c
... | ... | @@ -388,22 +388,15 @@ |
388 | 388 | vblk->disk->driverfs_dev = &vdev->dev; |
389 | 389 | index++; |
390 | 390 | |
391 | - if (virtio_has_feature(vdev, VIRTIO_BLK_F_FLUSH)) { | |
392 | - /* | |
393 | - * If the FLUSH feature is supported we do have support for | |
394 | - * flushing a volatile write cache on the host. Use that | |
395 | - * to implement write barrier support. | |
396 | - */ | |
397 | - blk_queue_ordered(q, QUEUE_ORDERED_DRAIN_FLUSH); | |
398 | - } else { | |
399 | - /* | |
400 | - * If the FLUSH feature is not supported we must assume that | |
401 | - * the host does not perform any kind of volatile write | |
402 | - * caching. We still need to drain the queue to provider | |
403 | - * proper barrier semantics. | |
404 | - */ | |
405 | - blk_queue_ordered(q, QUEUE_ORDERED_DRAIN); | |
406 | - } | |
391 | + /* | |
392 | + * If the FLUSH feature is supported we do have support for | |
393 | + * flushing a volatile write cache on the host. Use that to | |
394 | + * implement write barrier support; otherwise, we must assume | |
395 | + * that the host does not perform any kind of volatile write | |
396 | + * caching. | |
397 | + */ | |
398 | + if (virtio_has_feature(vdev, VIRTIO_BLK_F_FLUSH)) | |
399 | + blk_queue_flush(q, REQ_FLUSH); | |
407 | 400 | |
408 | 401 | /* If disk is read-only in the host, the guest should obey */ |
409 | 402 | if (virtio_has_feature(vdev, VIRTIO_BLK_F_RO)) |
drivers/block/xen-blkfront.c
... | ... | @@ -95,7 +95,7 @@ |
95 | 95 | struct gnttab_free_callback callback; |
96 | 96 | struct blk_shadow shadow[BLK_RING_SIZE]; |
97 | 97 | unsigned long shadow_free; |
98 | - int feature_barrier; | |
98 | + unsigned int feature_flush; | |
99 | 99 | int is_ready; |
100 | 100 | }; |
101 | 101 | |
102 | 102 | |
103 | 103 | |
... | ... | @@ -418,25 +418,12 @@ |
418 | 418 | } |
419 | 419 | |
420 | 420 | |
421 | -static int xlvbd_barrier(struct blkfront_info *info) | |
421 | +static void xlvbd_flush(struct blkfront_info *info) | |
422 | 422 | { |
423 | - int err; | |
424 | - const char *barrier; | |
425 | - | |
426 | - switch (info->feature_barrier) { | |
427 | - case QUEUE_ORDERED_DRAIN: barrier = "enabled"; break; | |
428 | - case QUEUE_ORDERED_NONE: barrier = "disabled"; break; | |
429 | - default: return -EINVAL; | |
430 | - } | |
431 | - | |
432 | - err = blk_queue_ordered(info->rq, info->feature_barrier); | |
433 | - | |
434 | - if (err) | |
435 | - return err; | |
436 | - | |
423 | + blk_queue_flush(info->rq, info->feature_flush); | |
437 | 424 | printk(KERN_INFO "blkfront: %s: barriers %s\n", |
438 | - info->gd->disk_name, barrier); | |
439 | - return 0; | |
425 | + info->gd->disk_name, | |
426 | + info->feature_flush ? "enabled" : "disabled"); | |
440 | 427 | } |
441 | 428 | |
442 | 429 | |
... | ... | @@ -515,7 +502,7 @@ |
515 | 502 | info->rq = gd->queue; |
516 | 503 | info->gd = gd; |
517 | 504 | |
518 | - xlvbd_barrier(info); | |
505 | + xlvbd_flush(info); | |
519 | 506 | |
520 | 507 | if (vdisk_info & VDISK_READONLY) |
521 | 508 | set_disk_ro(gd, 1); |
... | ... | @@ -661,8 +648,8 @@ |
661 | 648 | printk(KERN_WARNING "blkfront: %s: write barrier op failed\n", |
662 | 649 | info->gd->disk_name); |
663 | 650 | error = -EOPNOTSUPP; |
664 | - info->feature_barrier = QUEUE_ORDERED_NONE; | |
665 | - xlvbd_barrier(info); | |
651 | + info->feature_flush = 0; | |
652 | + xlvbd_flush(info); | |
666 | 653 | } |
667 | 654 | /* fall through */ |
668 | 655 | case BLKIF_OP_READ: |
669 | 656 | |
670 | 657 | |
... | ... | @@ -1075,19 +1062,13 @@ |
1075 | 1062 | /* |
1076 | 1063 | * If there's no "feature-barrier" defined, then it means |
1077 | 1064 | * we're dealing with a very old backend which writes |
1078 | - * synchronously; draining will do what needs to get done. | |
1065 | + * synchronously; nothing to do. | |
1079 | 1066 | * |
1080 | 1067 | * If there are barriers, then we use flush. |
1081 | - * | |
1082 | - * If barriers are not supported, then there's no much we can | |
1083 | - * do, so just set ordering to NONE. | |
1084 | 1068 | */ |
1085 | - if (err) | |
1086 | - info->feature_barrier = QUEUE_ORDERED_DRAIN; | |
1087 | - else if (barrier) | |
1088 | - info->feature_barrier = QUEUE_ORDERED_DRAIN_FLUSH; | |
1089 | - else | |
1090 | - info->feature_barrier = QUEUE_ORDERED_NONE; | |
1069 | + info->feature_flush = 0; | |
1070 | + if (!err && barrier) | |
1071 | + info->feature_flush = REQ_FLUSH; | |
1091 | 1072 | |
1092 | 1073 | err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size); |
1093 | 1074 | if (err) { |
drivers/ide/ide-disk.c
... | ... | @@ -516,10 +516,10 @@ |
516 | 516 | return ide_no_data_taskfile(drive, &cmd); |
517 | 517 | } |
518 | 518 | |
519 | -static void update_ordered(ide_drive_t *drive) | |
519 | +static void update_flush(ide_drive_t *drive) | |
520 | 520 | { |
521 | 521 | u16 *id = drive->id; |
522 | - unsigned ordered = QUEUE_ORDERED_NONE; | |
522 | + unsigned flush = 0; | |
523 | 523 | |
524 | 524 | if (drive->dev_flags & IDE_DFLAG_WCACHE) { |
525 | 525 | unsigned long long capacity; |
526 | 526 | |
527 | 527 | |
... | ... | @@ -543,13 +543,12 @@ |
543 | 543 | drive->name, barrier ? "" : "not "); |
544 | 544 | |
545 | 545 | if (barrier) { |
546 | - ordered = QUEUE_ORDERED_DRAIN_FLUSH; | |
546 | + flush = REQ_FLUSH; | |
547 | 547 | blk_queue_prep_rq(drive->queue, idedisk_prep_fn); |
548 | 548 | } |
549 | - } else | |
550 | - ordered = QUEUE_ORDERED_DRAIN; | |
549 | + } | |
551 | 550 | |
552 | - blk_queue_ordered(drive->queue, ordered); | |
551 | + blk_queue_flush(drive->queue, flush); | |
553 | 552 | } |
554 | 553 | |
555 | 554 | ide_devset_get_flag(wcache, IDE_DFLAG_WCACHE); |
... | ... | @@ -572,7 +571,7 @@ |
572 | 571 | } |
573 | 572 | } |
574 | 573 | |
575 | - update_ordered(drive); | |
574 | + update_flush(drive); | |
576 | 575 | |
577 | 576 | return err; |
578 | 577 | } |
drivers/md/dm.c
... | ... | @@ -2245,7 +2245,7 @@ |
2245 | 2245 | blk_queue_softirq_done(md->queue, dm_softirq_done); |
2246 | 2246 | blk_queue_prep_rq(md->queue, dm_prep_fn); |
2247 | 2247 | blk_queue_lld_busy(md->queue, dm_lld_busy); |
2248 | - blk_queue_ordered(md->queue, QUEUE_ORDERED_DRAIN_FLUSH); | |
2248 | + blk_queue_flush(md->queue, REQ_FLUSH); | |
2249 | 2249 | |
2250 | 2250 | elv_register_queue(md->queue); |
2251 | 2251 |
drivers/mmc/card/queue.c
... | ... | @@ -128,7 +128,6 @@ |
128 | 128 | mq->req = NULL; |
129 | 129 | |
130 | 130 | blk_queue_prep_rq(mq->queue, mmc_prep_request); |
131 | - blk_queue_ordered(mq->queue, QUEUE_ORDERED_DRAIN); | |
132 | 131 | queue_flag_set_unlocked(QUEUE_FLAG_NONROT, mq->queue); |
133 | 132 | if (mmc_can_erase(card)) { |
134 | 133 | queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mq->queue); |
drivers/s390/block/dasd.c
drivers/scsi/sd.c
... | ... | @@ -2109,7 +2109,7 @@ |
2109 | 2109 | struct scsi_disk *sdkp = scsi_disk(disk); |
2110 | 2110 | struct scsi_device *sdp = sdkp->device; |
2111 | 2111 | unsigned char *buffer; |
2112 | - unsigned ordered; | |
2112 | + unsigned flush = 0; | |
2113 | 2113 | |
2114 | 2114 | SCSI_LOG_HLQUEUE(3, sd_printk(KERN_INFO, sdkp, |
2115 | 2115 | "sd_revalidate_disk\n")); |
2116 | 2116 | |
2117 | 2117 | |
... | ... | @@ -2151,15 +2151,15 @@ |
2151 | 2151 | |
2152 | 2152 | /* |
2153 | 2153 | * We now have all cache related info, determine how we deal |
2154 | - * with ordered requests. | |
2154 | + * with flush requests. | |
2155 | 2155 | */ |
2156 | - if (sdkp->WCE) | |
2157 | - ordered = sdkp->DPOFUA | |
2158 | - ? QUEUE_ORDERED_DRAIN_FUA : QUEUE_ORDERED_DRAIN_FLUSH; | |
2159 | - else | |
2160 | - ordered = QUEUE_ORDERED_DRAIN; | |
2156 | + if (sdkp->WCE) { | |
2157 | + flush |= REQ_FLUSH; | |
2158 | + if (sdkp->DPOFUA) | |
2159 | + flush |= REQ_FUA; | |
2160 | + } | |
2161 | 2161 | |
2162 | - blk_queue_ordered(sdkp->disk->queue, ordered); | |
2162 | + blk_queue_flush(sdkp->disk->queue, flush); | |
2163 | 2163 | |
2164 | 2164 | set_capacity(disk, sdkp->capacity); |
2165 | 2165 | kfree(buffer); |
include/linux/blkdev.h
... | ... | @@ -355,8 +355,10 @@ |
355 | 355 | struct blk_trace *blk_trace; |
356 | 356 | #endif |
357 | 357 | /* |
358 | - * reserved for flush operations | |
358 | + * for flush operations | |
359 | 359 | */ |
360 | + unsigned int flush_flags; | |
361 | + | |
360 | 362 | unsigned int ordered, next_ordered, ordseq; |
361 | 363 | int orderr, ordcolor; |
362 | 364 | struct request pre_flush_rq, bar_rq, post_flush_rq; |
363 | 365 | |
... | ... | @@ -865,8 +867,8 @@ |
865 | 867 | extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *); |
866 | 868 | extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *); |
867 | 869 | extern void blk_queue_rq_timeout(struct request_queue *, unsigned int); |
870 | +extern void blk_queue_flush(struct request_queue *q, unsigned int flush); | |
868 | 871 | extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev); |
869 | -extern int blk_queue_ordered(struct request_queue *, unsigned); | |
870 | 872 | extern bool blk_do_ordered(struct request_queue *, struct request **); |
871 | 873 | extern unsigned blk_ordered_cur_seq(struct request_queue *); |
872 | 874 | extern unsigned blk_ordered_req_seq(struct request *); |