Commit bf2de6f5a4faf0197268f18d08969b003b87b6e8

Authored by Jens Axboe
1 parent c07e2b4129

block: Initial support for data-less (or empty) barrier support

This implements functionality to pass down or insert a barrier
in a queue, without having data attached to it. The ->prepare_flush_fn()
infrastructure from data barriers are reused to provide this
functionality.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

Showing 5 changed files with 71 additions and 21 deletions Side-by-side Diff

... ... @@ -712,6 +712,14 @@
712 712 int ret;
713 713  
714 714 while ((rq = __elv_next_request(q)) != NULL) {
  715 + /*
  716 + * Kill the empty barrier place holder, the driver must
  717 + * not ever see it.
  718 + */
  719 + if (blk_empty_barrier(rq)) {
  720 + end_queued_request(rq, 1);
  721 + continue;
  722 + }
715 723 if (!(rq->cmd_flags & REQ_STARTED)) {
716 724 /*
717 725 * This is the first time the device driver
... ... @@ -458,9 +458,12 @@
458 458 * Queue ordered sequence. As we stack them at the head, we
459 459 * need to queue in reverse order. Note that we rely on that
460 460 * no fs request uses ELEVATOR_INSERT_FRONT and thus no fs
461   - * request gets inbetween ordered sequence.
  461 + * request gets inbetween ordered sequence. If this request is
  462 + * an empty barrier, we don't need to do a postflush ever since
  463 + * there will be no data written between the pre and post flush.
  464 + * Hence a single flush will suffice.
462 465 */
463   - if (q->ordered & QUEUE_ORDERED_POSTFLUSH)
  466 + if ((q->ordered & QUEUE_ORDERED_POSTFLUSH) && !blk_empty_barrier(rq))
464 467 queue_flush(q, QUEUE_ORDERED_POSTFLUSH);
465 468 else
466 469 q->ordseq |= QUEUE_ORDSEQ_POSTFLUSH;
... ... @@ -484,7 +487,7 @@
484 487 int blk_do_ordered(struct request_queue *q, struct request **rqp)
485 488 {
486 489 struct request *rq = *rqp;
487   - int is_barrier = blk_fs_request(rq) && blk_barrier_rq(rq);
  490 + const int is_barrier = blk_fs_request(rq) && blk_barrier_rq(rq);
488 491  
489 492 if (!q->ordseq) {
490 493 if (!is_barrier)
... ... @@ -3054,7 +3057,7 @@
3054 3057 {
3055 3058 struct block_device *bdev = bio->bi_bdev;
3056 3059  
3057   - if (bdev != bdev->bd_contains) {
  3060 + if (bio_sectors(bio) && bdev != bdev->bd_contains) {
3058 3061 struct hd_struct *p = bdev->bd_part;
3059 3062 const int rw = bio_data_dir(bio);
3060 3063  
3061 3064  
3062 3065  
... ... @@ -3313,23 +3316,32 @@
3313 3316 {
3314 3317 int count = bio_sectors(bio);
3315 3318  
3316   - BIO_BUG_ON(!bio->bi_size);
3317   - BIO_BUG_ON(!bio->bi_io_vec);
3318 3319 bio->bi_rw |= rw;
3319   - if (rw & WRITE) {
3320   - count_vm_events(PGPGOUT, count);
3321   - } else {
3322   - task_io_account_read(bio->bi_size);
3323   - count_vm_events(PGPGIN, count);
3324   - }
3325 3320  
3326   - if (unlikely(block_dump)) {
3327   - char b[BDEVNAME_SIZE];
3328   - printk(KERN_DEBUG "%s(%d): %s block %Lu on %s\n",
3329   - current->comm, current->pid,
3330   - (rw & WRITE) ? "WRITE" : "READ",
3331   - (unsigned long long)bio->bi_sector,
3332   - bdevname(bio->bi_bdev,b));
  3321 + /*
  3322 + * If it's a regular read/write or a barrier with data attached,
  3323 + * go through the normal accounting stuff before submission.
  3324 + */
  3325 + if (!bio_empty_barrier(bio)) {
  3326 +
  3327 + BIO_BUG_ON(!bio->bi_size);
  3328 + BIO_BUG_ON(!bio->bi_io_vec);
  3329 +
  3330 + if (rw & WRITE) {
  3331 + count_vm_events(PGPGOUT, count);
  3332 + } else {
  3333 + task_io_account_read(bio->bi_size);
  3334 + count_vm_events(PGPGIN, count);
  3335 + }
  3336 +
  3337 + if (unlikely(block_dump)) {
  3338 + char b[BDEVNAME_SIZE];
  3339 + printk(KERN_DEBUG "%s(%d): %s block %Lu on %s\n",
  3340 + current->comm, current->pid,
  3341 + (rw & WRITE) ? "WRITE" : "READ",
  3342 + (unsigned long long)bio->bi_sector,
  3343 + bdevname(bio->bi_bdev,b));
  3344 + }
3333 3345 }
3334 3346  
3335 3347 generic_make_request(bio);
... ... @@ -3404,6 +3416,14 @@
3404 3416 total_bytes = bio_nbytes = 0;
3405 3417 while ((bio = req->bio) != NULL) {
3406 3418 int nbytes;
  3419 +
  3420 + /*
  3421 + * For an empty barrier request, the low level driver must
  3422 + * store a potential error location in ->sector. We pass
  3423 + * that back up in ->bi_sector.
  3424 + */
  3425 + if (blk_empty_barrier(req))
  3426 + bio->bi_sector = req->sector;
3407 3427  
3408 3428 if (nr_bytes >= bio->bi_size) {
3409 3429 req->bio = bio->bi_next;
... ... @@ -176,13 +176,28 @@
176 176 #define bio_offset(bio) bio_iovec((bio))->bv_offset
177 177 #define bio_segments(bio) ((bio)->bi_vcnt - (bio)->bi_idx)
178 178 #define bio_sectors(bio) ((bio)->bi_size >> 9)
179   -#define bio_cur_sectors(bio) (bio_iovec(bio)->bv_len >> 9)
180   -#define bio_data(bio) (page_address(bio_page((bio))) + bio_offset((bio)))
181 179 #define bio_barrier(bio) ((bio)->bi_rw & (1 << BIO_RW_BARRIER))
182 180 #define bio_sync(bio) ((bio)->bi_rw & (1 << BIO_RW_SYNC))
183 181 #define bio_failfast(bio) ((bio)->bi_rw & (1 << BIO_RW_FAILFAST))
184 182 #define bio_rw_ahead(bio) ((bio)->bi_rw & (1 << BIO_RW_AHEAD))
185 183 #define bio_rw_meta(bio) ((bio)->bi_rw & (1 << BIO_RW_META))
  184 +#define bio_empty_barrier(bio) (bio_barrier(bio) && !(bio)->bi_size)
  185 +
  186 +static inline unsigned int bio_cur_sectors(struct bio *bio)
  187 +{
  188 + if (bio->bi_vcnt)
  189 + return bio_iovec(bio)->bv_len >> 9;
  190 +
  191 + return 0;
  192 +}
  193 +
  194 +static inline void *bio_data(struct bio *bio)
  195 +{
  196 + if (bio->bi_vcnt)
  197 + return page_address(bio_page(bio)) + bio_offset(bio);
  198 +
  199 + return NULL;
  200 +}
186 201  
187 202 /*
188 203 * will die
include/linux/blkdev.h
... ... @@ -540,6 +540,7 @@
540 540 #define blk_barrier_rq(rq) ((rq)->cmd_flags & REQ_HARDBARRIER)
541 541 #define blk_fua_rq(rq) ((rq)->cmd_flags & REQ_FUA)
542 542 #define blk_bidi_rq(rq) ((rq)->next_rq != NULL)
  543 +#define blk_empty_barrier(rq) (blk_barrier_rq(rq) && blk_fs_request(rq) && !(rq)->hard_nr_sectors)
543 544  
544 545 #define list_entry_rq(ptr) list_entry((ptr), struct request, queuelist)
545 546  
... ... @@ -265,6 +265,12 @@
265 265 mempool_t *pool;
266 266  
267 267 /*
  268 + * Data-less bio, nothing to bounce
  269 + */
  270 + if (bio_empty_barrier(*bio_orig))
  271 + return;
  272 +
  273 + /*
268 274 * for non-isa bounce case, just check if the bounce pfn is equal
269 275 * to or bigger than the highest pfn in the system -- in that case,
270 276 * don't waste time iterating over bio segments