Commit fb2dce862d9f9a68e6b9374579056ec9eca02a63
Committed by
Jens Axboe
1 parent
d628eaef31
Exists in
master
and in
7 other branches
Add 'discard' request handling
Some block devices benefit from a hint that they can forget the contents of certain sectors. Add basic support for this to the block core, along with a 'blkdev_issue_discard()' helper function which issues such requests. The caller doesn't get to provide an end_io functio, since blkdev_issue_discard() will automatically split the request up into multiple bios if appropriate. Neither does the function wait for completion -- it's expected that callers won't care about when, or even _if_, the request completes. It's only a hint to the device anyway. By definition, the file system doesn't _care_ about these sectors any more. [With feedback from OGAWA Hirofumi <hirofumi@mail.parknet.co.jp> and Jens Axboe <jens.axboe@oracle.com] Signed-off-by: David Woodhouse <David.Woodhouse@intel.com> Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
Showing 6 changed files with 130 additions and 11 deletions Side-by-side Diff
block/blk-barrier.c
... | ... | @@ -315,4 +315,73 @@ |
315 | 315 | return ret; |
316 | 316 | } |
317 | 317 | EXPORT_SYMBOL(blkdev_issue_flush); |
318 | + | |
319 | +static void blkdev_discard_end_io(struct bio *bio, int err) | |
320 | +{ | |
321 | + if (err) { | |
322 | + if (err == -EOPNOTSUPP) | |
323 | + set_bit(BIO_EOPNOTSUPP, &bio->bi_flags); | |
324 | + clear_bit(BIO_UPTODATE, &bio->bi_flags); | |
325 | + } | |
326 | + | |
327 | + bio_put(bio); | |
328 | +} | |
329 | + | |
330 | +/** | |
331 | + * blkdev_issue_discard - queue a discard | |
332 | + * @bdev: blockdev to issue discard for | |
333 | + * @sector: start sector | |
334 | + * @nr_sects: number of sectors to discard | |
335 | + * | |
336 | + * Description: | |
337 | + * Issue a discard request for the sectors in question. Does not wait. | |
338 | + */ | |
339 | +int blkdev_issue_discard(struct block_device *bdev, sector_t sector, | |
340 | + unsigned nr_sects) | |
341 | +{ | |
342 | + struct request_queue *q; | |
343 | + struct bio *bio; | |
344 | + int ret = 0; | |
345 | + | |
346 | + if (bdev->bd_disk == NULL) | |
347 | + return -ENXIO; | |
348 | + | |
349 | + q = bdev_get_queue(bdev); | |
350 | + if (!q) | |
351 | + return -ENXIO; | |
352 | + | |
353 | + if (!q->prepare_discard_fn) | |
354 | + return -EOPNOTSUPP; | |
355 | + | |
356 | + while (nr_sects && !ret) { | |
357 | + bio = bio_alloc(GFP_KERNEL, 0); | |
358 | + if (!bio) | |
359 | + return -ENOMEM; | |
360 | + | |
361 | + bio->bi_end_io = blkdev_discard_end_io; | |
362 | + bio->bi_bdev = bdev; | |
363 | + | |
364 | + bio->bi_sector = sector; | |
365 | + | |
366 | + if (nr_sects > q->max_hw_sectors) { | |
367 | + bio->bi_size = q->max_hw_sectors << 9; | |
368 | + nr_sects -= q->max_hw_sectors; | |
369 | + sector += q->max_hw_sectors; | |
370 | + } else { | |
371 | + bio->bi_size = nr_sects << 9; | |
372 | + nr_sects = 0; | |
373 | + } | |
374 | + bio_get(bio); | |
375 | + submit_bio(WRITE_DISCARD, bio); | |
376 | + | |
377 | + /* Check if it failed immediately */ | |
378 | + if (bio_flagged(bio, BIO_EOPNOTSUPP)) | |
379 | + ret = -EOPNOTSUPP; | |
380 | + else if (!bio_flagged(bio, BIO_UPTODATE)) | |
381 | + ret = -EIO; | |
382 | + bio_put(bio); | |
383 | + } | |
384 | + return ret; | |
385 | +} | |
386 | +EXPORT_SYMBOL(blkdev_issue_discard); |
block/blk-core.c
... | ... | @@ -1079,6 +1079,10 @@ |
1079 | 1079 | */ |
1080 | 1080 | if (unlikely(bio_barrier(bio))) |
1081 | 1081 | req->cmd_flags |= (REQ_HARDBARRIER | REQ_NOMERGE); |
1082 | + if (unlikely(bio_discard(bio))) { | |
1083 | + req->cmd_flags |= (REQ_SOFTBARRIER | REQ_DISCARD); | |
1084 | + req->q->prepare_discard_fn(req->q, req); | |
1085 | + } | |
1082 | 1086 | |
1083 | 1087 | if (bio_sync(bio)) |
1084 | 1088 | req->cmd_flags |= REQ_RW_SYNC; |
... | ... | @@ -1095,7 +1099,7 @@ |
1095 | 1099 | static int __make_request(struct request_queue *q, struct bio *bio) |
1096 | 1100 | { |
1097 | 1101 | struct request *req; |
1098 | - int el_ret, nr_sectors, barrier, err; | |
1102 | + int el_ret, nr_sectors, barrier, discard, err; | |
1099 | 1103 | const unsigned short prio = bio_prio(bio); |
1100 | 1104 | const int sync = bio_sync(bio); |
1101 | 1105 | int rw_flags; |
... | ... | @@ -1115,6 +1119,12 @@ |
1115 | 1119 | goto end_io; |
1116 | 1120 | } |
1117 | 1121 | |
1122 | + discard = bio_discard(bio); | |
1123 | + if (unlikely(discard) && !q->prepare_discard_fn) { | |
1124 | + err = -EOPNOTSUPP; | |
1125 | + goto end_io; | |
1126 | + } | |
1127 | + | |
1118 | 1128 | spin_lock_irq(q->queue_lock); |
1119 | 1129 | |
1120 | 1130 | if (unlikely(barrier) || elv_queue_empty(q)) |
... | ... | @@ -1405,7 +1415,8 @@ |
1405 | 1415 | |
1406 | 1416 | if (bio_check_eod(bio, nr_sectors)) |
1407 | 1417 | goto end_io; |
1408 | - if (bio_empty_barrier(bio) && !q->prepare_flush_fn) { | |
1418 | + if ((bio_empty_barrier(bio) && !q->prepare_flush_fn) || | |
1419 | + (bio_discard(bio) && !q->prepare_discard_fn)) { | |
1409 | 1420 | err = -EOPNOTSUPP; |
1410 | 1421 | goto end_io; |
1411 | 1422 | } |
... | ... | @@ -1487,7 +1498,6 @@ |
1487 | 1498 | * go through the normal accounting stuff before submission. |
1488 | 1499 | */ |
1489 | 1500 | if (bio_has_data(bio)) { |
1490 | - | |
1491 | 1501 | if (rw & WRITE) { |
1492 | 1502 | count_vm_events(PGPGOUT, count); |
1493 | 1503 | } else { |
... | ... | @@ -1881,7 +1891,7 @@ |
1881 | 1891 | struct request_queue *q = rq->q; |
1882 | 1892 | unsigned long flags = 0UL; |
1883 | 1893 | |
1884 | - if (bio_has_data(rq->bio)) { | |
1894 | + if (bio_has_data(rq->bio) || blk_discard_rq(rq)) { | |
1885 | 1895 | if (__end_that_request_first(rq, error, nr_bytes)) |
1886 | 1896 | return 1; |
1887 | 1897 | |
... | ... | @@ -1939,7 +1949,7 @@ |
1939 | 1949 | **/ |
1940 | 1950 | int __blk_end_request(struct request *rq, int error, unsigned int nr_bytes) |
1941 | 1951 | { |
1942 | - if (bio_has_data(rq->bio) && | |
1952 | + if ((bio_has_data(rq->bio) || blk_discard_rq(rq)) && | |
1943 | 1953 | __end_that_request_first(rq, error, nr_bytes)) |
1944 | 1954 | return 1; |
1945 | 1955 | |
1946 | 1956 | |
... | ... | @@ -2012,12 +2022,14 @@ |
2012 | 2022 | we want BIO_RW_AHEAD (bit 1) to imply REQ_FAILFAST (bit 1). */ |
2013 | 2023 | rq->cmd_flags |= (bio->bi_rw & 3); |
2014 | 2024 | |
2015 | - rq->nr_phys_segments = bio_phys_segments(q, bio); | |
2016 | - rq->nr_hw_segments = bio_hw_segments(q, bio); | |
2025 | + if (bio_has_data(bio)) { | |
2026 | + rq->nr_phys_segments = bio_phys_segments(q, bio); | |
2027 | + rq->nr_hw_segments = bio_hw_segments(q, bio); | |
2028 | + rq->buffer = bio_data(bio); | |
2029 | + } | |
2017 | 2030 | rq->current_nr_sectors = bio_cur_sectors(bio); |
2018 | 2031 | rq->hard_cur_sectors = rq->current_nr_sectors; |
2019 | 2032 | rq->hard_nr_sectors = rq->nr_sectors = bio_sectors(bio); |
2020 | - rq->buffer = bio_data(bio); | |
2021 | 2033 | rq->data_len = bio->bi_size; |
2022 | 2034 | |
2023 | 2035 | rq->bio = rq->biotail = bio; |
block/blk-settings.c
... | ... | @@ -33,6 +33,23 @@ |
33 | 33 | EXPORT_SYMBOL(blk_queue_prep_rq); |
34 | 34 | |
35 | 35 | /** |
36 | + * blk_queue_set_discard - set a discard_sectors function for queue | |
37 | + * @q: queue | |
38 | + * @dfn: prepare_discard function | |
39 | + * | |
40 | + * It's possible for a queue to register a discard callback which is used | |
41 | + * to transform a discard request into the appropriate type for the | |
42 | + * hardware. If none is registered, then discard requests are failed | |
43 | + * with %EOPNOTSUPP. | |
44 | + * | |
45 | + */ | |
46 | +void blk_queue_set_discard(struct request_queue *q, prepare_discard_fn *dfn) | |
47 | +{ | |
48 | + q->prepare_discard_fn = dfn; | |
49 | +} | |
50 | +EXPORT_SYMBOL(blk_queue_set_discard); | |
51 | + | |
52 | +/** | |
36 | 53 | * blk_queue_merge_bvec - set a merge_bvec function for queue |
37 | 54 | * @q: queue |
38 | 55 | * @mbfn: merge_bvec_fn |
include/linux/bio.h
... | ... | @@ -149,6 +149,8 @@ |
149 | 149 | * bit 2 -- barrier |
150 | 150 | * bit 3 -- fail fast, don't want low level driver retries |
151 | 151 | * bit 4 -- synchronous I/O hint: the block layer will unplug immediately |
152 | + * bit 5 -- metadata request | |
153 | + * bit 6 -- discard sectors | |
152 | 154 | */ |
153 | 155 | #define BIO_RW 0 /* Must match RW in req flags (blkdev.h) */ |
154 | 156 | #define BIO_RW_AHEAD 1 /* Must match FAILFAST in req flags */ |
... | ... | @@ -156,6 +158,7 @@ |
156 | 158 | #define BIO_RW_FAILFAST 3 |
157 | 159 | #define BIO_RW_SYNC 4 |
158 | 160 | #define BIO_RW_META 5 |
161 | +#define BIO_RW_DISCARD 6 | |
159 | 162 | |
160 | 163 | /* |
161 | 164 | * upper 16 bits of bi_rw define the io priority of this bio |
162 | 165 | |
... | ... | @@ -186,13 +189,14 @@ |
186 | 189 | #define bio_rw_ahead(bio) ((bio)->bi_rw & (1 << BIO_RW_AHEAD)) |
187 | 190 | #define bio_rw_meta(bio) ((bio)->bi_rw & (1 << BIO_RW_META)) |
188 | 191 | #define bio_empty_barrier(bio) (bio_barrier(bio) && !bio_has_data(bio)) |
192 | +#define bio_discard(bio) ((bio)->bi_rw & (1 << BIO_RW_DISCARD)) | |
189 | 193 | |
190 | 194 | static inline unsigned int bio_cur_sectors(struct bio *bio) |
191 | 195 | { |
192 | 196 | if (bio->bi_vcnt) |
193 | 197 | return bio_iovec(bio)->bv_len >> 9; |
194 | - | |
195 | - return 0; | |
198 | + else /* dataless requests such as discard */ | |
199 | + return bio->bi_size >> 9; | |
196 | 200 | } |
197 | 201 | |
198 | 202 | static inline void *bio_data(struct bio *bio) |
include/linux/blkdev.h
... | ... | @@ -89,6 +89,7 @@ |
89 | 89 | enum rq_flag_bits { |
90 | 90 | __REQ_RW, /* not set, read. set, write */ |
91 | 91 | __REQ_FAILFAST, /* no low level driver retries */ |
92 | + __REQ_DISCARD, /* request to discard sectors */ | |
92 | 93 | __REQ_SORTED, /* elevator knows about this request */ |
93 | 94 | __REQ_SOFTBARRIER, /* may not be passed by ioscheduler */ |
94 | 95 | __REQ_HARDBARRIER, /* may not be passed by drive either */ |
... | ... | @@ -111,6 +112,7 @@ |
111 | 112 | }; |
112 | 113 | |
113 | 114 | #define REQ_RW (1 << __REQ_RW) |
115 | +#define REQ_DISCARD (1 << __REQ_DISCARD) | |
114 | 116 | #define REQ_FAILFAST (1 << __REQ_FAILFAST) |
115 | 117 | #define REQ_SORTED (1 << __REQ_SORTED) |
116 | 118 | #define REQ_SOFTBARRIER (1 << __REQ_SOFTBARRIER) |
... | ... | @@ -252,6 +254,7 @@ |
252 | 254 | typedef int (make_request_fn) (struct request_queue *q, struct bio *bio); |
253 | 255 | typedef int (prep_rq_fn) (struct request_queue *, struct request *); |
254 | 256 | typedef void (unplug_fn) (struct request_queue *); |
257 | +typedef int (prepare_discard_fn) (struct request_queue *, struct request *); | |
255 | 258 | |
256 | 259 | struct bio_vec; |
257 | 260 | struct bvec_merge_data { |
... | ... | @@ -307,6 +310,7 @@ |
307 | 310 | make_request_fn *make_request_fn; |
308 | 311 | prep_rq_fn *prep_rq_fn; |
309 | 312 | unplug_fn *unplug_fn; |
313 | + prepare_discard_fn *prepare_discard_fn; | |
310 | 314 | merge_bvec_fn *merge_bvec_fn; |
311 | 315 | prepare_flush_fn *prepare_flush_fn; |
312 | 316 | softirq_done_fn *softirq_done_fn; |
... | ... | @@ -546,6 +550,7 @@ |
546 | 550 | #define blk_sorted_rq(rq) ((rq)->cmd_flags & REQ_SORTED) |
547 | 551 | #define blk_barrier_rq(rq) ((rq)->cmd_flags & REQ_HARDBARRIER) |
548 | 552 | #define blk_fua_rq(rq) ((rq)->cmd_flags & REQ_FUA) |
553 | +#define blk_discard_rq(rq) ((rq)->cmd_flags & REQ_DISCARD) | |
549 | 554 | #define blk_bidi_rq(rq) ((rq)->next_rq != NULL) |
550 | 555 | #define blk_empty_barrier(rq) (blk_barrier_rq(rq) && blk_fs_request(rq) && !(rq)->hard_nr_sectors) |
551 | 556 | /* rq->queuelist of dequeued request must be list_empty() */ |
... | ... | @@ -796,6 +801,7 @@ |
796 | 801 | extern void blk_queue_dma_alignment(struct request_queue *, int); |
797 | 802 | extern void blk_queue_update_dma_alignment(struct request_queue *, int); |
798 | 803 | extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *); |
804 | +extern void blk_queue_set_discard(struct request_queue *, prepare_discard_fn *); | |
799 | 805 | extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev); |
800 | 806 | extern int blk_queue_ordered(struct request_queue *, unsigned, prepare_flush_fn *); |
801 | 807 | extern int blk_do_ordered(struct request_queue *, struct request **); |
... | ... | @@ -837,6 +843,16 @@ |
837 | 843 | } |
838 | 844 | |
839 | 845 | extern int blkdev_issue_flush(struct block_device *, sector_t *); |
846 | +extern int blkdev_issue_discard(struct block_device *, sector_t sector, | |
847 | + unsigned nr_sects); | |
848 | + | |
849 | +static inline int sb_issue_discard(struct super_block *sb, | |
850 | + sector_t block, unsigned nr_blocks) | |
851 | +{ | |
852 | + block <<= (sb->s_blocksize_bits - 9); | |
853 | + nr_blocks <<= (sb->s_blocksize_bits - 9); | |
854 | + return blkdev_issue_discard(sb->s_bdev, block, nr_blocks); | |
855 | +} | |
840 | 856 | |
841 | 857 | /* |
842 | 858 | * command filter functions |
include/linux/fs.h
... | ... | @@ -86,7 +86,8 @@ |
86 | 86 | #define READ_META (READ | (1 << BIO_RW_META)) |
87 | 87 | #define WRITE_SYNC (WRITE | (1 << BIO_RW_SYNC)) |
88 | 88 | #define SWRITE_SYNC (SWRITE | (1 << BIO_RW_SYNC)) |
89 | -#define WRITE_BARRIER ((1 << BIO_RW) | (1 << BIO_RW_BARRIER)) | |
89 | +#define WRITE_BARRIER (WRITE | (1 << BIO_RW_BARRIER)) | |
90 | +#define WRITE_DISCARD (WRITE | (1 << BIO_RW_DISCARD)) | |
90 | 91 | |
91 | 92 | #define SEL_IN 1 |
92 | 93 | #define SEL_OUT 2 |