Commit bf2de6f5a4faf0197268f18d08969b003b87b6e8
1 parent
c07e2b4129
Exists in
master
and in
7 other branches
block: Initial support for data-less (or empty) barrier support
This implements functionality to pass down or insert a barrier in a queue, without having data attached to it. The ->prepare_flush_fn() infrastructure from data barriers are reused to provide this functionality. Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
Showing 5 changed files with 71 additions and 21 deletions Side-by-side Diff
block/elevator.c
... | ... | @@ -712,6 +712,14 @@ |
712 | 712 | int ret; |
713 | 713 | |
714 | 714 | while ((rq = __elv_next_request(q)) != NULL) { |
715 | + /* | |
716 | + * Kill the empty barrier place holder, the driver must | |
717 | + * not ever see it. | |
718 | + */ | |
719 | + if (blk_empty_barrier(rq)) { | |
720 | + end_queued_request(rq, 1); | |
721 | + continue; | |
722 | + } | |
715 | 723 | if (!(rq->cmd_flags & REQ_STARTED)) { |
716 | 724 | /* |
717 | 725 | * This is the first time the device driver |
block/ll_rw_blk.c
... | ... | @@ -458,9 +458,12 @@ |
458 | 458 | * Queue ordered sequence. As we stack them at the head, we |
459 | 459 | * need to queue in reverse order. Note that we rely on that |
460 | 460 | * no fs request uses ELEVATOR_INSERT_FRONT and thus no fs |
461 | - * request gets inbetween ordered sequence. | |
461 | + * request gets inbetween ordered sequence. If this request is | |
462 | + * an empty barrier, we don't need to do a postflush ever since | |
463 | + * there will be no data written between the pre and post flush. | |
464 | + * Hence a single flush will suffice. | |
462 | 465 | */ |
463 | - if (q->ordered & QUEUE_ORDERED_POSTFLUSH) | |
466 | + if ((q->ordered & QUEUE_ORDERED_POSTFLUSH) && !blk_empty_barrier(rq)) | |
464 | 467 | queue_flush(q, QUEUE_ORDERED_POSTFLUSH); |
465 | 468 | else |
466 | 469 | q->ordseq |= QUEUE_ORDSEQ_POSTFLUSH; |
... | ... | @@ -484,7 +487,7 @@ |
484 | 487 | int blk_do_ordered(struct request_queue *q, struct request **rqp) |
485 | 488 | { |
486 | 489 | struct request *rq = *rqp; |
487 | - int is_barrier = blk_fs_request(rq) && blk_barrier_rq(rq); | |
490 | + const int is_barrier = blk_fs_request(rq) && blk_barrier_rq(rq); | |
488 | 491 | |
489 | 492 | if (!q->ordseq) { |
490 | 493 | if (!is_barrier) |
... | ... | @@ -3054,7 +3057,7 @@ |
3054 | 3057 | { |
3055 | 3058 | struct block_device *bdev = bio->bi_bdev; |
3056 | 3059 | |
3057 | - if (bdev != bdev->bd_contains) { | |
3060 | + if (bio_sectors(bio) && bdev != bdev->bd_contains) { | |
3058 | 3061 | struct hd_struct *p = bdev->bd_part; |
3059 | 3062 | const int rw = bio_data_dir(bio); |
3060 | 3063 | |
3061 | 3064 | |
3062 | 3065 | |
... | ... | @@ -3313,23 +3316,32 @@ |
3313 | 3316 | { |
3314 | 3317 | int count = bio_sectors(bio); |
3315 | 3318 | |
3316 | - BIO_BUG_ON(!bio->bi_size); | |
3317 | - BIO_BUG_ON(!bio->bi_io_vec); | |
3318 | 3319 | bio->bi_rw |= rw; |
3319 | - if (rw & WRITE) { | |
3320 | - count_vm_events(PGPGOUT, count); | |
3321 | - } else { | |
3322 | - task_io_account_read(bio->bi_size); | |
3323 | - count_vm_events(PGPGIN, count); | |
3324 | - } | |
3325 | 3320 | |
3326 | - if (unlikely(block_dump)) { | |
3327 | - char b[BDEVNAME_SIZE]; | |
3328 | - printk(KERN_DEBUG "%s(%d): %s block %Lu on %s\n", | |
3329 | - current->comm, current->pid, | |
3330 | - (rw & WRITE) ? "WRITE" : "READ", | |
3331 | - (unsigned long long)bio->bi_sector, | |
3332 | - bdevname(bio->bi_bdev,b)); | |
3321 | + /* | |
3322 | + * If it's a regular read/write or a barrier with data attached, | |
3323 | + * go through the normal accounting stuff before submission. | |
3324 | + */ | |
3325 | + if (!bio_empty_barrier(bio)) { | |
3326 | + | |
3327 | + BIO_BUG_ON(!bio->bi_size); | |
3328 | + BIO_BUG_ON(!bio->bi_io_vec); | |
3329 | + | |
3330 | + if (rw & WRITE) { | |
3331 | + count_vm_events(PGPGOUT, count); | |
3332 | + } else { | |
3333 | + task_io_account_read(bio->bi_size); | |
3334 | + count_vm_events(PGPGIN, count); | |
3335 | + } | |
3336 | + | |
3337 | + if (unlikely(block_dump)) { | |
3338 | + char b[BDEVNAME_SIZE]; | |
3339 | + printk(KERN_DEBUG "%s(%d): %s block %Lu on %s\n", | |
3340 | + current->comm, current->pid, | |
3341 | + (rw & WRITE) ? "WRITE" : "READ", | |
3342 | + (unsigned long long)bio->bi_sector, | |
3343 | + bdevname(bio->bi_bdev,b)); | |
3344 | + } | |
3333 | 3345 | } |
3334 | 3346 | |
3335 | 3347 | generic_make_request(bio); |
... | ... | @@ -3404,6 +3416,14 @@ |
3404 | 3416 | total_bytes = bio_nbytes = 0; |
3405 | 3417 | while ((bio = req->bio) != NULL) { |
3406 | 3418 | int nbytes; |
3419 | + | |
3420 | + /* | |
3421 | + * For an empty barrier request, the low level driver must | |
3422 | + * store a potential error location in ->sector. We pass | |
3423 | + * that back up in ->bi_sector. | |
3424 | + */ | |
3425 | + if (blk_empty_barrier(req)) | |
3426 | + bio->bi_sector = req->sector; | |
3407 | 3427 | |
3408 | 3428 | if (nr_bytes >= bio->bi_size) { |
3409 | 3429 | req->bio = bio->bi_next; |
include/linux/bio.h
... | ... | @@ -176,13 +176,28 @@ |
176 | 176 | #define bio_offset(bio) bio_iovec((bio))->bv_offset |
177 | 177 | #define bio_segments(bio) ((bio)->bi_vcnt - (bio)->bi_idx) |
178 | 178 | #define bio_sectors(bio) ((bio)->bi_size >> 9) |
179 | -#define bio_cur_sectors(bio) (bio_iovec(bio)->bv_len >> 9) | |
180 | -#define bio_data(bio) (page_address(bio_page((bio))) + bio_offset((bio))) | |
181 | 179 | #define bio_barrier(bio) ((bio)->bi_rw & (1 << BIO_RW_BARRIER)) |
182 | 180 | #define bio_sync(bio) ((bio)->bi_rw & (1 << BIO_RW_SYNC)) |
183 | 181 | #define bio_failfast(bio) ((bio)->bi_rw & (1 << BIO_RW_FAILFAST)) |
184 | 182 | #define bio_rw_ahead(bio) ((bio)->bi_rw & (1 << BIO_RW_AHEAD)) |
185 | 183 | #define bio_rw_meta(bio) ((bio)->bi_rw & (1 << BIO_RW_META)) |
184 | +#define bio_empty_barrier(bio) (bio_barrier(bio) && !(bio)->bi_size) | |
185 | + | |
186 | +static inline unsigned int bio_cur_sectors(struct bio *bio) | |
187 | +{ | |
188 | + if (bio->bi_vcnt) | |
189 | + return bio_iovec(bio)->bv_len >> 9; | |
190 | + | |
191 | + return 0; | |
192 | +} | |
193 | + | |
194 | +static inline void *bio_data(struct bio *bio) | |
195 | +{ | |
196 | + if (bio->bi_vcnt) | |
197 | + return page_address(bio_page(bio)) + bio_offset(bio); | |
198 | + | |
199 | + return NULL; | |
200 | +} | |
186 | 201 | |
187 | 202 | /* |
188 | 203 | * will die |
include/linux/blkdev.h
... | ... | @@ -540,6 +540,7 @@ |
540 | 540 | #define blk_barrier_rq(rq) ((rq)->cmd_flags & REQ_HARDBARRIER) |
541 | 541 | #define blk_fua_rq(rq) ((rq)->cmd_flags & REQ_FUA) |
542 | 542 | #define blk_bidi_rq(rq) ((rq)->next_rq != NULL) |
543 | +#define blk_empty_barrier(rq) (blk_barrier_rq(rq) && blk_fs_request(rq) && !(rq)->hard_nr_sectors) | |
543 | 544 | |
544 | 545 | #define list_entry_rq(ptr) list_entry((ptr), struct request, queuelist) |
545 | 546 |
mm/bounce.c
... | ... | @@ -265,6 +265,12 @@ |
265 | 265 | mempool_t *pool; |
266 | 266 | |
267 | 267 | /* |
268 | + * Data-less bio, nothing to bounce | |
269 | + */ | |
270 | + if (bio_empty_barrier(*bio_orig)) | |
271 | + return; | |
272 | + | |
273 | + /* | |
268 | 274 | * for non-isa bounce case, just check if the bounce pfn is equal |
269 | 275 | * to or bigger than the highest pfn in the system -- in that case, |
270 | 276 | * don't waste time iterating over bio segments |