Blame view
block/blk-barrier.c
10.6 KB
86db1e297 block: continue l... |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 |
/* * Functions related to barrier IO handling */ #include <linux/kernel.h> #include <linux/module.h> #include <linux/bio.h> #include <linux/blkdev.h> #include "blk.h" /** * blk_queue_ordered - does this queue support ordered writes * @q: the request queue * @ordered: one of QUEUE_ORDERED_* * @prepare_flush_fn: rq setup helper for cache flush ordered writes * * Description: * For journalled file systems, doing ordered writes on a commit * block instead of explicitly doing wait_on_buffer (which is bad * for performance) can be a big win. Block drivers supporting this * feature should call this function and indicate so. * **/ int blk_queue_ordered(struct request_queue *q, unsigned ordered, prepare_flush_fn *prepare_flush_fn) { |
313e42999 block: reorganize... |
27 28 |
if (!prepare_flush_fn && (ordered & (QUEUE_ORDERED_DO_PREFLUSH | QUEUE_ORDERED_DO_POSTFLUSH))) { |
24c03d47d block: remove rem... |
29 30 |
printk(KERN_ERR "%s: prepare_flush_fn required ", __func__); |
86db1e297 block: continue l... |
31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 |
return -EINVAL; } if (ordered != QUEUE_ORDERED_NONE && ordered != QUEUE_ORDERED_DRAIN && ordered != QUEUE_ORDERED_DRAIN_FLUSH && ordered != QUEUE_ORDERED_DRAIN_FUA && ordered != QUEUE_ORDERED_TAG && ordered != QUEUE_ORDERED_TAG_FLUSH && ordered != QUEUE_ORDERED_TAG_FUA) { printk(KERN_ERR "blk_queue_ordered: bad value %d ", ordered); return -EINVAL; } q->ordered = ordered; q->next_ordered = ordered; q->prepare_flush_fn = prepare_flush_fn; return 0; } |
86db1e297 block: continue l... |
52 53 54 55 56 |
EXPORT_SYMBOL(blk_queue_ordered); /* * Cache flushing for ordered writes handling */ |
6f6a036e6 block/blk-barrier... |
57 |
unsigned blk_ordered_cur_seq(struct request_queue *q) |
86db1e297 block: continue l... |
58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 |
{ if (!q->ordseq) return 0; return 1 << ffz(q->ordseq); } unsigned blk_ordered_req_seq(struct request *rq) { struct request_queue *q = rq->q; BUG_ON(q->ordseq == 0); if (rq == &q->pre_flush_rq) return QUEUE_ORDSEQ_PREFLUSH; if (rq == &q->bar_rq) return QUEUE_ORDSEQ_BAR; if (rq == &q->post_flush_rq) return QUEUE_ORDSEQ_POSTFLUSH; /* * !fs requests don't need to follow barrier ordering. Always * put them at the front. This fixes the following deadlock. * * http://thread.gmane.org/gmane.linux.kernel/537473 */ if (!blk_fs_request(rq)) return QUEUE_ORDSEQ_DRAIN; if ((rq->cmd_flags & REQ_ORDERED_COLOR) == (q->orig_bar_rq->cmd_flags & REQ_ORDERED_COLOR)) return QUEUE_ORDSEQ_DRAIN; else return QUEUE_ORDSEQ_DONE; } |
8f11b3e99 block: make barri... |
92 |
bool blk_ordered_complete_seq(struct request_queue *q, unsigned seq, int error) |
86db1e297 block: continue l... |
93 94 95 96 97 98 99 100 101 102 |
{ struct request *rq; if (error && !q->orderr) q->orderr = error; BUG_ON(q->ordseq & seq); q->ordseq |= seq; if (blk_ordered_cur_seq(q) != QUEUE_ORDSEQ_DONE) |
8f11b3e99 block: make barri... |
103 |
return false; |
86db1e297 block: continue l... |
104 105 106 107 108 109 |
/* * Okay, sequence complete. */ q->ordseq = 0; rq = q->orig_bar_rq; |
40cbbb781 block: implement ... |
110 |
__blk_end_request_all(rq, q->orderr); |
8f11b3e99 block: make barri... |
111 |
return true; |
86db1e297 block: continue l... |
112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 |
} static void pre_flush_end_io(struct request *rq, int error) { elv_completed_request(rq->q, rq); blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_PREFLUSH, error); } static void bar_end_io(struct request *rq, int error) { elv_completed_request(rq->q, rq); blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_BAR, error); } static void post_flush_end_io(struct request *rq, int error) { elv_completed_request(rq->q, rq); blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_POSTFLUSH, error); } static void queue_flush(struct request_queue *q, unsigned which) { struct request *rq; rq_end_io_fn *end_io; |
313e42999 block: reorganize... |
136 |
if (which == QUEUE_ORDERED_DO_PREFLUSH) { |
86db1e297 block: continue l... |
137 138 139 140 141 142 |
rq = &q->pre_flush_rq; end_io = pre_flush_end_io; } else { rq = &q->post_flush_rq; end_io = post_flush_end_io; } |
2a4aa30c5 block: rename and... |
143 |
blk_rq_init(q, rq); |
1afb20f30 block: make rq_in... |
144 |
rq->cmd_flags = REQ_HARDBARRIER; |
86db1e297 block: continue l... |
145 146 147 148 149 150 |
rq->rq_disk = q->bar_rq.rq_disk; rq->end_io = end_io; q->prepare_flush_fn(q, rq); elv_insert(q, rq, ELEVATOR_INSERT_FRONT); } |
8f11b3e99 block: make barri... |
151 |
static inline bool start_ordered(struct request_queue *q, struct request **rqp) |
86db1e297 block: continue l... |
152 |
{ |
8f11b3e99 block: make barri... |
153 154 |
struct request *rq = *rqp; unsigned skip = 0; |
86db1e297 block: continue l... |
155 156 157 |
q->orderr = 0; q->ordered = q->next_ordered; q->ordseq |= QUEUE_ORDSEQ_STARTED; |
58eea927d block: simplify e... |
158 159 160 161 |
/* * For an empty barrier, there's no actual BAR request, which * in turn makes POSTFLUSH unnecessary. Mask them off. */ |
5b93629b4 block: implement ... |
162 |
if (!blk_rq_sectors(rq)) { |
58eea927d block: simplify e... |
163 164 |
q->ordered &= ~(QUEUE_ORDERED_DO_BAR | QUEUE_ORDERED_DO_POSTFLUSH); |
a185eb4bc block: fix empty ... |
165 166 167 168 169 170 171 172 173 174 175 176 |
/* * Empty barrier on a write-through device w/ ordered * tag has no command to issue and without any command * to issue, ordering by tag can't be used. Drain * instead. */ if ((q->ordered & QUEUE_ORDERED_BY_TAG) && !(q->ordered & QUEUE_ORDERED_DO_PREFLUSH)) { q->ordered &= ~QUEUE_ORDERED_BY_TAG; q->ordered |= QUEUE_ORDERED_BY_DRAIN; } } |
58eea927d block: simplify e... |
177 |
|
f671620e7 block: make every... |
178 |
/* stash away the original request */ |
9934c8c04 block: implement ... |
179 |
blk_dequeue_request(rq); |
86db1e297 block: continue l... |
180 |
q->orig_bar_rq = rq; |
f671620e7 block: make every... |
181 |
rq = NULL; |
86db1e297 block: continue l... |
182 183 184 185 186 |
/* * Queue ordered sequence. As we stack them at the head, we * need to queue in reverse order. Note that we rely on that * no fs request uses ELEVATOR_INSERT_FRONT and thus no fs |
58eea927d block: simplify e... |
187 |
* request gets inbetween ordered sequence. |
86db1e297 block: continue l... |
188 |
*/ |
58eea927d block: simplify e... |
189 |
if (q->ordered & QUEUE_ORDERED_DO_POSTFLUSH) { |
313e42999 block: reorganize... |
190 |
queue_flush(q, QUEUE_ORDERED_DO_POSTFLUSH); |
f671620e7 block: make every... |
191 192 |
rq = &q->post_flush_rq; } else |
8f11b3e99 block: make barri... |
193 |
skip |= QUEUE_ORDSEQ_POSTFLUSH; |
86db1e297 block: continue l... |
194 |
|
f671620e7 block: make every... |
195 196 197 198 199 200 201 202 203 204 205 206 207 208 |
if (q->ordered & QUEUE_ORDERED_DO_BAR) { rq = &q->bar_rq; /* initialize proxy request and queue it */ blk_rq_init(q, rq); if (bio_data_dir(q->orig_bar_rq->bio) == WRITE) rq->cmd_flags |= REQ_RW; if (q->ordered & QUEUE_ORDERED_DO_FUA) rq->cmd_flags |= REQ_FUA; init_request_from_bio(rq, q->orig_bar_rq->bio); rq->end_io = bar_end_io; elv_insert(q, rq, ELEVATOR_INSERT_FRONT); } else |
8f11b3e99 block: make barri... |
209 |
skip |= QUEUE_ORDSEQ_BAR; |
86db1e297 block: continue l... |
210 |
|
313e42999 block: reorganize... |
211 212 |
if (q->ordered & QUEUE_ORDERED_DO_PREFLUSH) { queue_flush(q, QUEUE_ORDERED_DO_PREFLUSH); |
86db1e297 block: continue l... |
213 214 |
rq = &q->pre_flush_rq; } else |
8f11b3e99 block: make barri... |
215 |
skip |= QUEUE_ORDSEQ_PREFLUSH; |
86db1e297 block: continue l... |
216 |
|
0a7ae2ff0 block: change the... |
217 |
if ((q->ordered & QUEUE_ORDERED_BY_DRAIN) && queue_in_flight(q)) |
86db1e297 block: continue l... |
218 |
rq = NULL; |
f671620e7 block: make every... |
219 |
else |
8f11b3e99 block: make barri... |
220 |
skip |= QUEUE_ORDSEQ_DRAIN; |
86db1e297 block: continue l... |
221 |
|
8f11b3e99 block: make barri... |
222 223 224 225 226 227 228 |
*rqp = rq; /* * Complete skipped sequences. If whole sequence is complete, * return false to tell elevator that this request is gone. */ return !blk_ordered_complete_seq(q, skip, 0); |
86db1e297 block: continue l... |
229 |
} |
8f11b3e99 block: make barri... |
230 |
bool blk_do_ordered(struct request_queue *q, struct request **rqp) |
86db1e297 block: continue l... |
231 232 233 234 235 236 |
{ struct request *rq = *rqp; const int is_barrier = blk_fs_request(rq) && blk_barrier_rq(rq); if (!q->ordseq) { if (!is_barrier) |
8f11b3e99 block: make barri... |
237 |
return true; |
86db1e297 block: continue l... |
238 |
|
8f11b3e99 block: make barri... |
239 240 241 |
if (q->next_ordered != QUEUE_ORDERED_NONE) return start_ordered(q, rqp); else { |
86db1e297 block: continue l... |
242 |
/* |
a7384677b block: remove dup... |
243 244 |
* Queue ordering not supported. Terminate * with prejudice. |
86db1e297 block: continue l... |
245 |
*/ |
9934c8c04 block: implement ... |
246 |
blk_dequeue_request(rq); |
40cbbb781 block: implement ... |
247 |
__blk_end_request_all(rq, -EOPNOTSUPP); |
86db1e297 block: continue l... |
248 |
*rqp = NULL; |
8f11b3e99 block: make barri... |
249 |
return false; |
86db1e297 block: continue l... |
250 251 252 253 254 255 256 257 258 259 |
} } /* * Ordered sequence in progress */ /* Special requests are not subject to ordering rules. */ if (!blk_fs_request(rq) && rq != &q->pre_flush_rq && rq != &q->post_flush_rq) |
8f11b3e99 block: make barri... |
260 |
return true; |
86db1e297 block: continue l... |
261 |
|
313e42999 block: reorganize... |
262 |
if (q->ordered & QUEUE_ORDERED_BY_TAG) { |
86db1e297 block: continue l... |
263 264 265 266 267 268 269 270 271 |
/* Ordered by tag. Blocking the next barrier is enough. */ if (is_barrier && rq != &q->bar_rq) *rqp = NULL; } else { /* Ordered by draining. Wait for turn. */ WARN_ON(blk_ordered_req_seq(rq) < blk_ordered_cur_seq(q)); if (blk_ordered_req_seq(rq) > blk_ordered_cur_seq(q)) *rqp = NULL; } |
8f11b3e99 block: make barri... |
272 |
return true; |
86db1e297 block: continue l... |
273 274 275 276 |
} static void bio_end_empty_barrier(struct bio *bio, int err) { |
cc66b4512 block: fix blkdev... |
277 278 279 |
if (err) { if (err == -EOPNOTSUPP) set_bit(BIO_EOPNOTSUPP, &bio->bi_flags); |
86db1e297 block: continue l... |
280 |
clear_bit(BIO_UPTODATE, &bio->bi_flags); |
cc66b4512 block: fix blkdev... |
281 |
} |
86db1e297 block: continue l... |
282 283 284 285 286 287 288 289 290 291 292 293 |
complete(bio->bi_private); } /** * blkdev_issue_flush - queue a flush * @bdev: blockdev to issue flush for * @error_sector: error sector * * Description: * Issue a flush for the block device in question. Caller can supply * room for storing the error offset in case of a flush error, if they |
dbdac9b71 block: Fix docume... |
294 |
* wish to. |
86db1e297 block: continue l... |
295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 |
*/ int blkdev_issue_flush(struct block_device *bdev, sector_t *error_sector) { DECLARE_COMPLETION_ONSTACK(wait); struct request_queue *q; struct bio *bio; int ret; if (bdev->bd_disk == NULL) return -ENXIO; q = bdev_get_queue(bdev); if (!q) return -ENXIO; bio = bio_alloc(GFP_KERNEL, 0); |
86db1e297 block: continue l... |
311 312 313 |
bio->bi_end_io = bio_end_empty_barrier; bio->bi_private = &wait; bio->bi_bdev = bdev; |
2ebca85ab Use WRITE_BARRIER... |
314 |
submit_bio(WRITE_BARRIER, bio); |
86db1e297 block: continue l... |
315 316 317 318 319 320 |
wait_for_completion(&wait); /* * The driver must store the error location in ->bi_sector, if * it supports it. For non-stacked drivers, this should be copied |
83096ebf1 block: convert to... |
321 |
* from blk_rq_pos(rq). |
86db1e297 block: continue l... |
322 323 324 325 326 |
*/ if (error_sector) *error_sector = bio->bi_sector; ret = 0; |
cc66b4512 block: fix blkdev... |
327 328 329 |
if (bio_flagged(bio, BIO_EOPNOTSUPP)) ret = -EOPNOTSUPP; else if (!bio_flagged(bio, BIO_UPTODATE)) |
86db1e297 block: continue l... |
330 331 332 333 334 |
ret = -EIO; bio_put(bio); return ret; } |
86db1e297 block: continue l... |
335 |
EXPORT_SYMBOL(blkdev_issue_flush); |
fb2dce862 Add 'discard' req... |
336 337 338 339 340 341 342 343 |
static void blkdev_discard_end_io(struct bio *bio, int err) { if (err) { if (err == -EOPNOTSUPP) set_bit(BIO_EOPNOTSUPP, &bio->bi_flags); clear_bit(BIO_UPTODATE, &bio->bi_flags); } |
746cd1e7e block: use blkdev... |
344 345 |
if (bio->bi_private) complete(bio->bi_private); |
c15227de1 block: use normal... |
346 |
__free_page(bio_page(bio)); |
746cd1e7e block: use blkdev... |
347 |
|
fb2dce862 Add 'discard' req... |
348 349 350 351 352 353 354 355 |
bio_put(bio); } /** * blkdev_issue_discard - queue a discard * @bdev: blockdev to issue discard for * @sector: start sector * @nr_sects: number of sectors to discard |
3e6053d76 block: adjust blk... |
356 |
* @gfp_mask: memory allocation flags (for bio_alloc) |
746cd1e7e block: use blkdev... |
357 |
* @flags: DISCARD_FL_* flags to control behaviour |
fb2dce862 Add 'discard' req... |
358 359 |
* * Description: |
746cd1e7e block: use blkdev... |
360 |
* Issue a discard request for the sectors in question. |
fb2dce862 Add 'discard' req... |
361 |
*/ |
746cd1e7e block: use blkdev... |
362 363 |
int blkdev_issue_discard(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp_mask, int flags) |
fb2dce862 Add 'discard' req... |
364 |
{ |
746cd1e7e block: use blkdev... |
365 366 367 368 |
DECLARE_COMPLETION_ONSTACK(wait); struct request_queue *q = bdev_get_queue(bdev); int type = flags & DISCARD_FL_BARRIER ? DISCARD_BARRIER : DISCARD_NOBARRIER; |
c15227de1 block: use normal... |
369 370 |
struct bio *bio; struct page *page; |
fb2dce862 Add 'discard' req... |
371 |
int ret = 0; |
fb2dce862 Add 'discard' req... |
372 373 |
if (!q) return -ENXIO; |
c15227de1 block: use normal... |
374 |
if (!blk_queue_discard(q)) |
fb2dce862 Add 'discard' req... |
375 376 377 |
return -EOPNOTSUPP; while (nr_sects && !ret) { |
c15227de1 block: use normal... |
378 |
unsigned int sector_size = q->limits.logical_block_size; |
67efc9258 block: allow larg... |
379 380 |
unsigned int max_discard_sectors = min(q->limits.max_discard_sectors, UINT_MAX >> 9); |
fb2dce862 Add 'discard' req... |
381 |
|
c15227de1 block: use normal... |
382 383 384 385 |
bio = bio_alloc(gfp_mask, 1); if (!bio) goto out; bio->bi_sector = sector; |
fb2dce862 Add 'discard' req... |
386 387 |
bio->bi_end_io = blkdev_discard_end_io; bio->bi_bdev = bdev; |
746cd1e7e block: use blkdev... |
388 389 |
if (flags & DISCARD_FL_WAIT) bio->bi_private = &wait; |
fb2dce862 Add 'discard' req... |
390 |
|
c15227de1 block: use normal... |
391 392 393 394 395 396 397 398 399 400 |
/* * Add a zeroed one-sector payload as that's what * our current implementations need. If we'll ever need * more the interface will need revisiting. */ page = alloc_page(GFP_KERNEL | __GFP_ZERO); if (!page) goto out_free_bio; if (bio_add_pc_page(q, bio, page, sector_size, 0) < sector_size) goto out_free_page; |
fb2dce862 Add 'discard' req... |
401 |
|
c15227de1 block: use normal... |
402 403 404 405 406 |
/* * And override the bio size - the way discard works we * touch many more blocks on disk than the actual payload * length. */ |
67efc9258 block: allow larg... |
407 408 409 410 |
if (nr_sects > max_discard_sectors) { bio->bi_size = max_discard_sectors << 9; nr_sects -= max_discard_sectors; sector += max_discard_sectors; |
fb2dce862 Add 'discard' req... |
411 412 413 414 |
} else { bio->bi_size = nr_sects << 9; nr_sects = 0; } |
746cd1e7e block: use blkdev... |
415 |
|
fb2dce862 Add 'discard' req... |
416 |
bio_get(bio); |
746cd1e7e block: use blkdev... |
417 418 419 420 |
submit_bio(type, bio); if (flags & DISCARD_FL_WAIT) wait_for_completion(&wait); |
fb2dce862 Add 'discard' req... |
421 |
|
fb2dce862 Add 'discard' req... |
422 423 424 425 426 427 428 |
if (bio_flagged(bio, BIO_EOPNOTSUPP)) ret = -EOPNOTSUPP; else if (!bio_flagged(bio, BIO_UPTODATE)) ret = -EIO; bio_put(bio); } return ret; |
c15227de1 block: use normal... |
429 430 431 432 433 434 |
out_free_page: __free_page(page); out_free_bio: bio_put(bio); out: return -ENOMEM; |
fb2dce862 Add 'discard' req... |
435 436 |
} EXPORT_SYMBOL(blkdev_issue_discard); |