Commit 49cac01e1fa74174d72adb0e872504a7fefd7c01
1 parent
a237c1c5bc
block: make unplug timer trace event correspond to the schedule() unplug
It's a pretty close match to what we had before - the timer triggering would mean that nobody unplugged the plug in due time, in the new scheme this matches very closely what the schedule() unplug now is. It's essentially the difference between an explicit unplug (IO unplug) or an implicit unplug (timer unplug, we scheduled with pending IO queued). Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
Showing 3 changed files with 31 additions and 18 deletions Inline Diff
block/blk-core.c
1 | /* | 1 | /* |
2 | * Copyright (C) 1991, 1992 Linus Torvalds | 2 | * Copyright (C) 1991, 1992 Linus Torvalds |
3 | * Copyright (C) 1994, Karl Keyte: Added support for disk statistics | 3 | * Copyright (C) 1994, Karl Keyte: Added support for disk statistics |
4 | * Elevator latency, (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE | 4 | * Elevator latency, (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE |
5 | * Queue request tables / lock, selectable elevator, Jens Axboe <axboe@suse.de> | 5 | * Queue request tables / lock, selectable elevator, Jens Axboe <axboe@suse.de> |
6 | * kernel-doc documentation started by NeilBrown <neilb@cse.unsw.edu.au> | 6 | * kernel-doc documentation started by NeilBrown <neilb@cse.unsw.edu.au> |
7 | * - July2000 | 7 | * - July2000 |
8 | * bio rewrite, highmem i/o, etc, Jens Axboe <axboe@suse.de> - may 2001 | 8 | * bio rewrite, highmem i/o, etc, Jens Axboe <axboe@suse.de> - may 2001 |
9 | */ | 9 | */ |
10 | 10 | ||
11 | /* | 11 | /* |
12 | * This handles all read/write requests to block devices | 12 | * This handles all read/write requests to block devices |
13 | */ | 13 | */ |
14 | #include <linux/kernel.h> | 14 | #include <linux/kernel.h> |
15 | #include <linux/module.h> | 15 | #include <linux/module.h> |
16 | #include <linux/backing-dev.h> | 16 | #include <linux/backing-dev.h> |
17 | #include <linux/bio.h> | 17 | #include <linux/bio.h> |
18 | #include <linux/blkdev.h> | 18 | #include <linux/blkdev.h> |
19 | #include <linux/highmem.h> | 19 | #include <linux/highmem.h> |
20 | #include <linux/mm.h> | 20 | #include <linux/mm.h> |
21 | #include <linux/kernel_stat.h> | 21 | #include <linux/kernel_stat.h> |
22 | #include <linux/string.h> | 22 | #include <linux/string.h> |
23 | #include <linux/init.h> | 23 | #include <linux/init.h> |
24 | #include <linux/completion.h> | 24 | #include <linux/completion.h> |
25 | #include <linux/slab.h> | 25 | #include <linux/slab.h> |
26 | #include <linux/swap.h> | 26 | #include <linux/swap.h> |
27 | #include <linux/writeback.h> | 27 | #include <linux/writeback.h> |
28 | #include <linux/task_io_accounting_ops.h> | 28 | #include <linux/task_io_accounting_ops.h> |
29 | #include <linux/fault-inject.h> | 29 | #include <linux/fault-inject.h> |
30 | #include <linux/list_sort.h> | 30 | #include <linux/list_sort.h> |
31 | 31 | ||
32 | #define CREATE_TRACE_POINTS | 32 | #define CREATE_TRACE_POINTS |
33 | #include <trace/events/block.h> | 33 | #include <trace/events/block.h> |
34 | 34 | ||
35 | #include "blk.h" | 35 | #include "blk.h" |
36 | 36 | ||
37 | EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap); | 37 | EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap); |
38 | EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap); | 38 | EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap); |
39 | EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete); | 39 | EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete); |
40 | 40 | ||
41 | static int __make_request(struct request_queue *q, struct bio *bio); | 41 | static int __make_request(struct request_queue *q, struct bio *bio); |
42 | 42 | ||
43 | /* | 43 | /* |
44 | * For the allocated request tables | 44 | * For the allocated request tables |
45 | */ | 45 | */ |
46 | static struct kmem_cache *request_cachep; | 46 | static struct kmem_cache *request_cachep; |
47 | 47 | ||
48 | /* | 48 | /* |
49 | * For queue allocation | 49 | * For queue allocation |
50 | */ | 50 | */ |
51 | struct kmem_cache *blk_requestq_cachep; | 51 | struct kmem_cache *blk_requestq_cachep; |
52 | 52 | ||
53 | /* | 53 | /* |
54 | * Controlling structure to kblockd | 54 | * Controlling structure to kblockd |
55 | */ | 55 | */ |
56 | static struct workqueue_struct *kblockd_workqueue; | 56 | static struct workqueue_struct *kblockd_workqueue; |
57 | 57 | ||
58 | static void drive_stat_acct(struct request *rq, int new_io) | 58 | static void drive_stat_acct(struct request *rq, int new_io) |
59 | { | 59 | { |
60 | struct hd_struct *part; | 60 | struct hd_struct *part; |
61 | int rw = rq_data_dir(rq); | 61 | int rw = rq_data_dir(rq); |
62 | int cpu; | 62 | int cpu; |
63 | 63 | ||
64 | if (!blk_do_io_stat(rq)) | 64 | if (!blk_do_io_stat(rq)) |
65 | return; | 65 | return; |
66 | 66 | ||
67 | cpu = part_stat_lock(); | 67 | cpu = part_stat_lock(); |
68 | 68 | ||
69 | if (!new_io) { | 69 | if (!new_io) { |
70 | part = rq->part; | 70 | part = rq->part; |
71 | part_stat_inc(cpu, part, merges[rw]); | 71 | part_stat_inc(cpu, part, merges[rw]); |
72 | } else { | 72 | } else { |
73 | part = disk_map_sector_rcu(rq->rq_disk, blk_rq_pos(rq)); | 73 | part = disk_map_sector_rcu(rq->rq_disk, blk_rq_pos(rq)); |
74 | if (!hd_struct_try_get(part)) { | 74 | if (!hd_struct_try_get(part)) { |
75 | /* | 75 | /* |
76 | * The partition is already being removed, | 76 | * The partition is already being removed, |
77 | * the request will be accounted on the disk only | 77 | * the request will be accounted on the disk only |
78 | * | 78 | * |
79 | * We take a reference on disk->part0 although that | 79 | * We take a reference on disk->part0 although that |
80 | * partition will never be deleted, so we can treat | 80 | * partition will never be deleted, so we can treat |
81 | * it as any other partition. | 81 | * it as any other partition. |
82 | */ | 82 | */ |
83 | part = &rq->rq_disk->part0; | 83 | part = &rq->rq_disk->part0; |
84 | hd_struct_get(part); | 84 | hd_struct_get(part); |
85 | } | 85 | } |
86 | part_round_stats(cpu, part); | 86 | part_round_stats(cpu, part); |
87 | part_inc_in_flight(part, rw); | 87 | part_inc_in_flight(part, rw); |
88 | rq->part = part; | 88 | rq->part = part; |
89 | } | 89 | } |
90 | 90 | ||
91 | part_stat_unlock(); | 91 | part_stat_unlock(); |
92 | } | 92 | } |
93 | 93 | ||
94 | void blk_queue_congestion_threshold(struct request_queue *q) | 94 | void blk_queue_congestion_threshold(struct request_queue *q) |
95 | { | 95 | { |
96 | int nr; | 96 | int nr; |
97 | 97 | ||
98 | nr = q->nr_requests - (q->nr_requests / 8) + 1; | 98 | nr = q->nr_requests - (q->nr_requests / 8) + 1; |
99 | if (nr > q->nr_requests) | 99 | if (nr > q->nr_requests) |
100 | nr = q->nr_requests; | 100 | nr = q->nr_requests; |
101 | q->nr_congestion_on = nr; | 101 | q->nr_congestion_on = nr; |
102 | 102 | ||
103 | nr = q->nr_requests - (q->nr_requests / 8) - (q->nr_requests / 16) - 1; | 103 | nr = q->nr_requests - (q->nr_requests / 8) - (q->nr_requests / 16) - 1; |
104 | if (nr < 1) | 104 | if (nr < 1) |
105 | nr = 1; | 105 | nr = 1; |
106 | q->nr_congestion_off = nr; | 106 | q->nr_congestion_off = nr; |
107 | } | 107 | } |
108 | 108 | ||
109 | /** | 109 | /** |
110 | * blk_get_backing_dev_info - get the address of a queue's backing_dev_info | 110 | * blk_get_backing_dev_info - get the address of a queue's backing_dev_info |
111 | * @bdev: device | 111 | * @bdev: device |
112 | * | 112 | * |
113 | * Locates the passed device's request queue and returns the address of its | 113 | * Locates the passed device's request queue and returns the address of its |
114 | * backing_dev_info | 114 | * backing_dev_info |
115 | * | 115 | * |
116 | * Will return NULL if the request queue cannot be located. | 116 | * Will return NULL if the request queue cannot be located. |
117 | */ | 117 | */ |
118 | struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev) | 118 | struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev) |
119 | { | 119 | { |
120 | struct backing_dev_info *ret = NULL; | 120 | struct backing_dev_info *ret = NULL; |
121 | struct request_queue *q = bdev_get_queue(bdev); | 121 | struct request_queue *q = bdev_get_queue(bdev); |
122 | 122 | ||
123 | if (q) | 123 | if (q) |
124 | ret = &q->backing_dev_info; | 124 | ret = &q->backing_dev_info; |
125 | return ret; | 125 | return ret; |
126 | } | 126 | } |
127 | EXPORT_SYMBOL(blk_get_backing_dev_info); | 127 | EXPORT_SYMBOL(blk_get_backing_dev_info); |
128 | 128 | ||
129 | void blk_rq_init(struct request_queue *q, struct request *rq) | 129 | void blk_rq_init(struct request_queue *q, struct request *rq) |
130 | { | 130 | { |
131 | memset(rq, 0, sizeof(*rq)); | 131 | memset(rq, 0, sizeof(*rq)); |
132 | 132 | ||
133 | INIT_LIST_HEAD(&rq->queuelist); | 133 | INIT_LIST_HEAD(&rq->queuelist); |
134 | INIT_LIST_HEAD(&rq->timeout_list); | 134 | INIT_LIST_HEAD(&rq->timeout_list); |
135 | rq->cpu = -1; | 135 | rq->cpu = -1; |
136 | rq->q = q; | 136 | rq->q = q; |
137 | rq->__sector = (sector_t) -1; | 137 | rq->__sector = (sector_t) -1; |
138 | INIT_HLIST_NODE(&rq->hash); | 138 | INIT_HLIST_NODE(&rq->hash); |
139 | RB_CLEAR_NODE(&rq->rb_node); | 139 | RB_CLEAR_NODE(&rq->rb_node); |
140 | rq->cmd = rq->__cmd; | 140 | rq->cmd = rq->__cmd; |
141 | rq->cmd_len = BLK_MAX_CDB; | 141 | rq->cmd_len = BLK_MAX_CDB; |
142 | rq->tag = -1; | 142 | rq->tag = -1; |
143 | rq->ref_count = 1; | 143 | rq->ref_count = 1; |
144 | rq->start_time = jiffies; | 144 | rq->start_time = jiffies; |
145 | set_start_time_ns(rq); | 145 | set_start_time_ns(rq); |
146 | rq->part = NULL; | 146 | rq->part = NULL; |
147 | } | 147 | } |
148 | EXPORT_SYMBOL(blk_rq_init); | 148 | EXPORT_SYMBOL(blk_rq_init); |
149 | 149 | ||
150 | static void req_bio_endio(struct request *rq, struct bio *bio, | 150 | static void req_bio_endio(struct request *rq, struct bio *bio, |
151 | unsigned int nbytes, int error) | 151 | unsigned int nbytes, int error) |
152 | { | 152 | { |
153 | if (error) | 153 | if (error) |
154 | clear_bit(BIO_UPTODATE, &bio->bi_flags); | 154 | clear_bit(BIO_UPTODATE, &bio->bi_flags); |
155 | else if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) | 155 | else if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) |
156 | error = -EIO; | 156 | error = -EIO; |
157 | 157 | ||
158 | if (unlikely(nbytes > bio->bi_size)) { | 158 | if (unlikely(nbytes > bio->bi_size)) { |
159 | printk(KERN_ERR "%s: want %u bytes done, %u left\n", | 159 | printk(KERN_ERR "%s: want %u bytes done, %u left\n", |
160 | __func__, nbytes, bio->bi_size); | 160 | __func__, nbytes, bio->bi_size); |
161 | nbytes = bio->bi_size; | 161 | nbytes = bio->bi_size; |
162 | } | 162 | } |
163 | 163 | ||
164 | if (unlikely(rq->cmd_flags & REQ_QUIET)) | 164 | if (unlikely(rq->cmd_flags & REQ_QUIET)) |
165 | set_bit(BIO_QUIET, &bio->bi_flags); | 165 | set_bit(BIO_QUIET, &bio->bi_flags); |
166 | 166 | ||
167 | bio->bi_size -= nbytes; | 167 | bio->bi_size -= nbytes; |
168 | bio->bi_sector += (nbytes >> 9); | 168 | bio->bi_sector += (nbytes >> 9); |
169 | 169 | ||
170 | if (bio_integrity(bio)) | 170 | if (bio_integrity(bio)) |
171 | bio_integrity_advance(bio, nbytes); | 171 | bio_integrity_advance(bio, nbytes); |
172 | 172 | ||
173 | /* don't actually finish bio if it's part of flush sequence */ | 173 | /* don't actually finish bio if it's part of flush sequence */ |
174 | if (bio->bi_size == 0 && !(rq->cmd_flags & REQ_FLUSH_SEQ)) | 174 | if (bio->bi_size == 0 && !(rq->cmd_flags & REQ_FLUSH_SEQ)) |
175 | bio_endio(bio, error); | 175 | bio_endio(bio, error); |
176 | } | 176 | } |
177 | 177 | ||
178 | void blk_dump_rq_flags(struct request *rq, char *msg) | 178 | void blk_dump_rq_flags(struct request *rq, char *msg) |
179 | { | 179 | { |
180 | int bit; | 180 | int bit; |
181 | 181 | ||
182 | printk(KERN_INFO "%s: dev %s: type=%x, flags=%x\n", msg, | 182 | printk(KERN_INFO "%s: dev %s: type=%x, flags=%x\n", msg, |
183 | rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->cmd_type, | 183 | rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->cmd_type, |
184 | rq->cmd_flags); | 184 | rq->cmd_flags); |
185 | 185 | ||
186 | printk(KERN_INFO " sector %llu, nr/cnr %u/%u\n", | 186 | printk(KERN_INFO " sector %llu, nr/cnr %u/%u\n", |
187 | (unsigned long long)blk_rq_pos(rq), | 187 | (unsigned long long)blk_rq_pos(rq), |
188 | blk_rq_sectors(rq), blk_rq_cur_sectors(rq)); | 188 | blk_rq_sectors(rq), blk_rq_cur_sectors(rq)); |
189 | printk(KERN_INFO " bio %p, biotail %p, buffer %p, len %u\n", | 189 | printk(KERN_INFO " bio %p, biotail %p, buffer %p, len %u\n", |
190 | rq->bio, rq->biotail, rq->buffer, blk_rq_bytes(rq)); | 190 | rq->bio, rq->biotail, rq->buffer, blk_rq_bytes(rq)); |
191 | 191 | ||
192 | if (rq->cmd_type == REQ_TYPE_BLOCK_PC) { | 192 | if (rq->cmd_type == REQ_TYPE_BLOCK_PC) { |
193 | printk(KERN_INFO " cdb: "); | 193 | printk(KERN_INFO " cdb: "); |
194 | for (bit = 0; bit < BLK_MAX_CDB; bit++) | 194 | for (bit = 0; bit < BLK_MAX_CDB; bit++) |
195 | printk("%02x ", rq->cmd[bit]); | 195 | printk("%02x ", rq->cmd[bit]); |
196 | printk("\n"); | 196 | printk("\n"); |
197 | } | 197 | } |
198 | } | 198 | } |
199 | EXPORT_SYMBOL(blk_dump_rq_flags); | 199 | EXPORT_SYMBOL(blk_dump_rq_flags); |
200 | 200 | ||
201 | static void blk_delay_work(struct work_struct *work) | 201 | static void blk_delay_work(struct work_struct *work) |
202 | { | 202 | { |
203 | struct request_queue *q; | 203 | struct request_queue *q; |
204 | 204 | ||
205 | q = container_of(work, struct request_queue, delay_work.work); | 205 | q = container_of(work, struct request_queue, delay_work.work); |
206 | spin_lock_irq(q->queue_lock); | 206 | spin_lock_irq(q->queue_lock); |
207 | __blk_run_queue(q, false); | 207 | __blk_run_queue(q, false); |
208 | spin_unlock_irq(q->queue_lock); | 208 | spin_unlock_irq(q->queue_lock); |
209 | } | 209 | } |
210 | 210 | ||
211 | /** | 211 | /** |
212 | * blk_delay_queue - restart queueing after defined interval | 212 | * blk_delay_queue - restart queueing after defined interval |
213 | * @q: The &struct request_queue in question | 213 | * @q: The &struct request_queue in question |
214 | * @msecs: Delay in msecs | 214 | * @msecs: Delay in msecs |
215 | * | 215 | * |
216 | * Description: | 216 | * Description: |
217 | * Sometimes queueing needs to be postponed for a little while, to allow | 217 | * Sometimes queueing needs to be postponed for a little while, to allow |
218 | * resources to come back. This function will make sure that queueing is | 218 | * resources to come back. This function will make sure that queueing is |
219 | * restarted around the specified time. | 219 | * restarted around the specified time. |
220 | */ | 220 | */ |
221 | void blk_delay_queue(struct request_queue *q, unsigned long msecs) | 221 | void blk_delay_queue(struct request_queue *q, unsigned long msecs) |
222 | { | 222 | { |
223 | schedule_delayed_work(&q->delay_work, msecs_to_jiffies(msecs)); | 223 | schedule_delayed_work(&q->delay_work, msecs_to_jiffies(msecs)); |
224 | } | 224 | } |
225 | EXPORT_SYMBOL(blk_delay_queue); | 225 | EXPORT_SYMBOL(blk_delay_queue); |
226 | 226 | ||
227 | /** | 227 | /** |
228 | * blk_start_queue - restart a previously stopped queue | 228 | * blk_start_queue - restart a previously stopped queue |
229 | * @q: The &struct request_queue in question | 229 | * @q: The &struct request_queue in question |
230 | * | 230 | * |
231 | * Description: | 231 | * Description: |
232 | * blk_start_queue() will clear the stop flag on the queue, and call | 232 | * blk_start_queue() will clear the stop flag on the queue, and call |
233 | * the request_fn for the queue if it was in a stopped state when | 233 | * the request_fn for the queue if it was in a stopped state when |
234 | * entered. Also see blk_stop_queue(). Queue lock must be held. | 234 | * entered. Also see blk_stop_queue(). Queue lock must be held. |
235 | **/ | 235 | **/ |
236 | void blk_start_queue(struct request_queue *q) | 236 | void blk_start_queue(struct request_queue *q) |
237 | { | 237 | { |
238 | WARN_ON(!irqs_disabled()); | 238 | WARN_ON(!irqs_disabled()); |
239 | 239 | ||
240 | queue_flag_clear(QUEUE_FLAG_STOPPED, q); | 240 | queue_flag_clear(QUEUE_FLAG_STOPPED, q); |
241 | __blk_run_queue(q, false); | 241 | __blk_run_queue(q, false); |
242 | } | 242 | } |
243 | EXPORT_SYMBOL(blk_start_queue); | 243 | EXPORT_SYMBOL(blk_start_queue); |
244 | 244 | ||
245 | /** | 245 | /** |
246 | * blk_stop_queue - stop a queue | 246 | * blk_stop_queue - stop a queue |
247 | * @q: The &struct request_queue in question | 247 | * @q: The &struct request_queue in question |
248 | * | 248 | * |
249 | * Description: | 249 | * Description: |
250 | * The Linux block layer assumes that a block driver will consume all | 250 | * The Linux block layer assumes that a block driver will consume all |
251 | * entries on the request queue when the request_fn strategy is called. | 251 | * entries on the request queue when the request_fn strategy is called. |
252 | * Often this will not happen, because of hardware limitations (queue | 252 | * Often this will not happen, because of hardware limitations (queue |
253 | * depth settings). If a device driver gets a 'queue full' response, | 253 | * depth settings). If a device driver gets a 'queue full' response, |
254 | * or if it simply chooses not to queue more I/O at one point, it can | 254 | * or if it simply chooses not to queue more I/O at one point, it can |
255 | * call this function to prevent the request_fn from being called until | 255 | * call this function to prevent the request_fn from being called until |
256 | * the driver has signalled it's ready to go again. This happens by calling | 256 | * the driver has signalled it's ready to go again. This happens by calling |
257 | * blk_start_queue() to restart queue operations. Queue lock must be held. | 257 | * blk_start_queue() to restart queue operations. Queue lock must be held. |
258 | **/ | 258 | **/ |
259 | void blk_stop_queue(struct request_queue *q) | 259 | void blk_stop_queue(struct request_queue *q) |
260 | { | 260 | { |
261 | __cancel_delayed_work(&q->delay_work); | 261 | __cancel_delayed_work(&q->delay_work); |
262 | queue_flag_set(QUEUE_FLAG_STOPPED, q); | 262 | queue_flag_set(QUEUE_FLAG_STOPPED, q); |
263 | } | 263 | } |
264 | EXPORT_SYMBOL(blk_stop_queue); | 264 | EXPORT_SYMBOL(blk_stop_queue); |
265 | 265 | ||
266 | /** | 266 | /** |
267 | * blk_sync_queue - cancel any pending callbacks on a queue | 267 | * blk_sync_queue - cancel any pending callbacks on a queue |
268 | * @q: the queue | 268 | * @q: the queue |
269 | * | 269 | * |
270 | * Description: | 270 | * Description: |
271 | * The block layer may perform asynchronous callback activity | 271 | * The block layer may perform asynchronous callback activity |
272 | * on a queue, such as calling the unplug function after a timeout. | 272 | * on a queue, such as calling the unplug function after a timeout. |
273 | * A block device may call blk_sync_queue to ensure that any | 273 | * A block device may call blk_sync_queue to ensure that any |
274 | * such activity is cancelled, thus allowing it to release resources | 274 | * such activity is cancelled, thus allowing it to release resources |
275 | * that the callbacks might use. The caller must already have made sure | 275 | * that the callbacks might use. The caller must already have made sure |
276 | * that its ->make_request_fn will not re-add plugging prior to calling | 276 | * that its ->make_request_fn will not re-add plugging prior to calling |
277 | * this function. | 277 | * this function. |
278 | * | 278 | * |
279 | * This function does not cancel any asynchronous activity arising | 279 | * This function does not cancel any asynchronous activity arising |
280 | * out of elevator or throttling code. That would require elevaotor_exit() | 280 | * out of elevator or throttling code. That would require elevaotor_exit() |
281 | * and blk_throtl_exit() to be called with queue lock initialized. | 281 | * and blk_throtl_exit() to be called with queue lock initialized. |
282 | * | 282 | * |
283 | */ | 283 | */ |
284 | void blk_sync_queue(struct request_queue *q) | 284 | void blk_sync_queue(struct request_queue *q) |
285 | { | 285 | { |
286 | del_timer_sync(&q->timeout); | 286 | del_timer_sync(&q->timeout); |
287 | cancel_delayed_work_sync(&q->delay_work); | 287 | cancel_delayed_work_sync(&q->delay_work); |
288 | } | 288 | } |
289 | EXPORT_SYMBOL(blk_sync_queue); | 289 | EXPORT_SYMBOL(blk_sync_queue); |
290 | 290 | ||
291 | /** | 291 | /** |
292 | * __blk_run_queue - run a single device queue | 292 | * __blk_run_queue - run a single device queue |
293 | * @q: The queue to run | 293 | * @q: The queue to run |
294 | * @force_kblockd: Don't run @q->request_fn directly. Use kblockd. | 294 | * @force_kblockd: Don't run @q->request_fn directly. Use kblockd. |
295 | * | 295 | * |
296 | * Description: | 296 | * Description: |
297 | * See @blk_run_queue. This variant must be called with the queue lock | 297 | * See @blk_run_queue. This variant must be called with the queue lock |
298 | * held and interrupts disabled. | 298 | * held and interrupts disabled. |
299 | * | 299 | * |
300 | */ | 300 | */ |
301 | void __blk_run_queue(struct request_queue *q, bool force_kblockd) | 301 | void __blk_run_queue(struct request_queue *q, bool force_kblockd) |
302 | { | 302 | { |
303 | if (unlikely(blk_queue_stopped(q))) | 303 | if (unlikely(blk_queue_stopped(q))) |
304 | return; | 304 | return; |
305 | 305 | ||
306 | /* | 306 | /* |
307 | * Only recurse once to avoid overrunning the stack, let the unplug | 307 | * Only recurse once to avoid overrunning the stack, let the unplug |
308 | * handling reinvoke the handler shortly if we already got there. | 308 | * handling reinvoke the handler shortly if we already got there. |
309 | */ | 309 | */ |
310 | if (!force_kblockd && !queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) { | 310 | if (!force_kblockd && !queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) { |
311 | q->request_fn(q); | 311 | q->request_fn(q); |
312 | queue_flag_clear(QUEUE_FLAG_REENTER, q); | 312 | queue_flag_clear(QUEUE_FLAG_REENTER, q); |
313 | } else | 313 | } else |
314 | queue_delayed_work(kblockd_workqueue, &q->delay_work, 0); | 314 | queue_delayed_work(kblockd_workqueue, &q->delay_work, 0); |
315 | } | 315 | } |
316 | EXPORT_SYMBOL(__blk_run_queue); | 316 | EXPORT_SYMBOL(__blk_run_queue); |
317 | 317 | ||
318 | /** | 318 | /** |
319 | * blk_run_queue - run a single device queue | 319 | * blk_run_queue - run a single device queue |
320 | * @q: The queue to run | 320 | * @q: The queue to run |
321 | * | 321 | * |
322 | * Description: | 322 | * Description: |
323 | * Invoke request handling on this queue, if it has pending work to do. | 323 | * Invoke request handling on this queue, if it has pending work to do. |
324 | * May be used to restart queueing when a request has completed. | 324 | * May be used to restart queueing when a request has completed. |
325 | */ | 325 | */ |
326 | void blk_run_queue(struct request_queue *q) | 326 | void blk_run_queue(struct request_queue *q) |
327 | { | 327 | { |
328 | unsigned long flags; | 328 | unsigned long flags; |
329 | 329 | ||
330 | spin_lock_irqsave(q->queue_lock, flags); | 330 | spin_lock_irqsave(q->queue_lock, flags); |
331 | __blk_run_queue(q, false); | 331 | __blk_run_queue(q, false); |
332 | spin_unlock_irqrestore(q->queue_lock, flags); | 332 | spin_unlock_irqrestore(q->queue_lock, flags); |
333 | } | 333 | } |
334 | EXPORT_SYMBOL(blk_run_queue); | 334 | EXPORT_SYMBOL(blk_run_queue); |
335 | 335 | ||
336 | void blk_put_queue(struct request_queue *q) | 336 | void blk_put_queue(struct request_queue *q) |
337 | { | 337 | { |
338 | kobject_put(&q->kobj); | 338 | kobject_put(&q->kobj); |
339 | } | 339 | } |
340 | 340 | ||
341 | /* | 341 | /* |
342 | * Note: If a driver supplied the queue lock, it should not zap that lock | 342 | * Note: If a driver supplied the queue lock, it should not zap that lock |
343 | * unexpectedly as some queue cleanup components like elevator_exit() and | 343 | * unexpectedly as some queue cleanup components like elevator_exit() and |
344 | * blk_throtl_exit() need queue lock. | 344 | * blk_throtl_exit() need queue lock. |
345 | */ | 345 | */ |
346 | void blk_cleanup_queue(struct request_queue *q) | 346 | void blk_cleanup_queue(struct request_queue *q) |
347 | { | 347 | { |
348 | /* | 348 | /* |
349 | * We know we have process context here, so we can be a little | 349 | * We know we have process context here, so we can be a little |
350 | * cautious and ensure that pending block actions on this device | 350 | * cautious and ensure that pending block actions on this device |
351 | * are done before moving on. Going into this function, we should | 351 | * are done before moving on. Going into this function, we should |
352 | * not have processes doing IO to this device. | 352 | * not have processes doing IO to this device. |
353 | */ | 353 | */ |
354 | blk_sync_queue(q); | 354 | blk_sync_queue(q); |
355 | 355 | ||
356 | del_timer_sync(&q->backing_dev_info.laptop_mode_wb_timer); | 356 | del_timer_sync(&q->backing_dev_info.laptop_mode_wb_timer); |
357 | mutex_lock(&q->sysfs_lock); | 357 | mutex_lock(&q->sysfs_lock); |
358 | queue_flag_set_unlocked(QUEUE_FLAG_DEAD, q); | 358 | queue_flag_set_unlocked(QUEUE_FLAG_DEAD, q); |
359 | mutex_unlock(&q->sysfs_lock); | 359 | mutex_unlock(&q->sysfs_lock); |
360 | 360 | ||
361 | if (q->elevator) | 361 | if (q->elevator) |
362 | elevator_exit(q->elevator); | 362 | elevator_exit(q->elevator); |
363 | 363 | ||
364 | blk_throtl_exit(q); | 364 | blk_throtl_exit(q); |
365 | 365 | ||
366 | blk_put_queue(q); | 366 | blk_put_queue(q); |
367 | } | 367 | } |
368 | EXPORT_SYMBOL(blk_cleanup_queue); | 368 | EXPORT_SYMBOL(blk_cleanup_queue); |
369 | 369 | ||
370 | static int blk_init_free_list(struct request_queue *q) | 370 | static int blk_init_free_list(struct request_queue *q) |
371 | { | 371 | { |
372 | struct request_list *rl = &q->rq; | 372 | struct request_list *rl = &q->rq; |
373 | 373 | ||
374 | if (unlikely(rl->rq_pool)) | 374 | if (unlikely(rl->rq_pool)) |
375 | return 0; | 375 | return 0; |
376 | 376 | ||
377 | rl->count[BLK_RW_SYNC] = rl->count[BLK_RW_ASYNC] = 0; | 377 | rl->count[BLK_RW_SYNC] = rl->count[BLK_RW_ASYNC] = 0; |
378 | rl->starved[BLK_RW_SYNC] = rl->starved[BLK_RW_ASYNC] = 0; | 378 | rl->starved[BLK_RW_SYNC] = rl->starved[BLK_RW_ASYNC] = 0; |
379 | rl->elvpriv = 0; | 379 | rl->elvpriv = 0; |
380 | init_waitqueue_head(&rl->wait[BLK_RW_SYNC]); | 380 | init_waitqueue_head(&rl->wait[BLK_RW_SYNC]); |
381 | init_waitqueue_head(&rl->wait[BLK_RW_ASYNC]); | 381 | init_waitqueue_head(&rl->wait[BLK_RW_ASYNC]); |
382 | 382 | ||
383 | rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab, | 383 | rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab, |
384 | mempool_free_slab, request_cachep, q->node); | 384 | mempool_free_slab, request_cachep, q->node); |
385 | 385 | ||
386 | if (!rl->rq_pool) | 386 | if (!rl->rq_pool) |
387 | return -ENOMEM; | 387 | return -ENOMEM; |
388 | 388 | ||
389 | return 0; | 389 | return 0; |
390 | } | 390 | } |
391 | 391 | ||
392 | struct request_queue *blk_alloc_queue(gfp_t gfp_mask) | 392 | struct request_queue *blk_alloc_queue(gfp_t gfp_mask) |
393 | { | 393 | { |
394 | return blk_alloc_queue_node(gfp_mask, -1); | 394 | return blk_alloc_queue_node(gfp_mask, -1); |
395 | } | 395 | } |
396 | EXPORT_SYMBOL(blk_alloc_queue); | 396 | EXPORT_SYMBOL(blk_alloc_queue); |
397 | 397 | ||
398 | struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) | 398 | struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) |
399 | { | 399 | { |
400 | struct request_queue *q; | 400 | struct request_queue *q; |
401 | int err; | 401 | int err; |
402 | 402 | ||
403 | q = kmem_cache_alloc_node(blk_requestq_cachep, | 403 | q = kmem_cache_alloc_node(blk_requestq_cachep, |
404 | gfp_mask | __GFP_ZERO, node_id); | 404 | gfp_mask | __GFP_ZERO, node_id); |
405 | if (!q) | 405 | if (!q) |
406 | return NULL; | 406 | return NULL; |
407 | 407 | ||
408 | q->backing_dev_info.ra_pages = | 408 | q->backing_dev_info.ra_pages = |
409 | (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; | 409 | (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; |
410 | q->backing_dev_info.state = 0; | 410 | q->backing_dev_info.state = 0; |
411 | q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY; | 411 | q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY; |
412 | q->backing_dev_info.name = "block"; | 412 | q->backing_dev_info.name = "block"; |
413 | 413 | ||
414 | err = bdi_init(&q->backing_dev_info); | 414 | err = bdi_init(&q->backing_dev_info); |
415 | if (err) { | 415 | if (err) { |
416 | kmem_cache_free(blk_requestq_cachep, q); | 416 | kmem_cache_free(blk_requestq_cachep, q); |
417 | return NULL; | 417 | return NULL; |
418 | } | 418 | } |
419 | 419 | ||
420 | if (blk_throtl_init(q)) { | 420 | if (blk_throtl_init(q)) { |
421 | kmem_cache_free(blk_requestq_cachep, q); | 421 | kmem_cache_free(blk_requestq_cachep, q); |
422 | return NULL; | 422 | return NULL; |
423 | } | 423 | } |
424 | 424 | ||
425 | setup_timer(&q->backing_dev_info.laptop_mode_wb_timer, | 425 | setup_timer(&q->backing_dev_info.laptop_mode_wb_timer, |
426 | laptop_mode_timer_fn, (unsigned long) q); | 426 | laptop_mode_timer_fn, (unsigned long) q); |
427 | setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q); | 427 | setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q); |
428 | INIT_LIST_HEAD(&q->timeout_list); | 428 | INIT_LIST_HEAD(&q->timeout_list); |
429 | INIT_LIST_HEAD(&q->flush_queue[0]); | 429 | INIT_LIST_HEAD(&q->flush_queue[0]); |
430 | INIT_LIST_HEAD(&q->flush_queue[1]); | 430 | INIT_LIST_HEAD(&q->flush_queue[1]); |
431 | INIT_LIST_HEAD(&q->flush_data_in_flight); | 431 | INIT_LIST_HEAD(&q->flush_data_in_flight); |
432 | INIT_DELAYED_WORK(&q->delay_work, blk_delay_work); | 432 | INIT_DELAYED_WORK(&q->delay_work, blk_delay_work); |
433 | 433 | ||
434 | kobject_init(&q->kobj, &blk_queue_ktype); | 434 | kobject_init(&q->kobj, &blk_queue_ktype); |
435 | 435 | ||
436 | mutex_init(&q->sysfs_lock); | 436 | mutex_init(&q->sysfs_lock); |
437 | spin_lock_init(&q->__queue_lock); | 437 | spin_lock_init(&q->__queue_lock); |
438 | 438 | ||
439 | /* | 439 | /* |
440 | * By default initialize queue_lock to internal lock and driver can | 440 | * By default initialize queue_lock to internal lock and driver can |
441 | * override it later if need be. | 441 | * override it later if need be. |
442 | */ | 442 | */ |
443 | q->queue_lock = &q->__queue_lock; | 443 | q->queue_lock = &q->__queue_lock; |
444 | 444 | ||
445 | return q; | 445 | return q; |
446 | } | 446 | } |
447 | EXPORT_SYMBOL(blk_alloc_queue_node); | 447 | EXPORT_SYMBOL(blk_alloc_queue_node); |
448 | 448 | ||
449 | /** | 449 | /** |
450 | * blk_init_queue - prepare a request queue for use with a block device | 450 | * blk_init_queue - prepare a request queue for use with a block device |
451 | * @rfn: The function to be called to process requests that have been | 451 | * @rfn: The function to be called to process requests that have been |
452 | * placed on the queue. | 452 | * placed on the queue. |
453 | * @lock: Request queue spin lock | 453 | * @lock: Request queue spin lock |
454 | * | 454 | * |
455 | * Description: | 455 | * Description: |
456 | * If a block device wishes to use the standard request handling procedures, | 456 | * If a block device wishes to use the standard request handling procedures, |
457 | * which sorts requests and coalesces adjacent requests, then it must | 457 | * which sorts requests and coalesces adjacent requests, then it must |
458 | * call blk_init_queue(). The function @rfn will be called when there | 458 | * call blk_init_queue(). The function @rfn will be called when there |
459 | * are requests on the queue that need to be processed. If the device | 459 | * are requests on the queue that need to be processed. If the device |
460 | * supports plugging, then @rfn may not be called immediately when requests | 460 | * supports plugging, then @rfn may not be called immediately when requests |
461 | * are available on the queue, but may be called at some time later instead. | 461 | * are available on the queue, but may be called at some time later instead. |
462 | * Plugged queues are generally unplugged when a buffer belonging to one | 462 | * Plugged queues are generally unplugged when a buffer belonging to one |
463 | * of the requests on the queue is needed, or due to memory pressure. | 463 | * of the requests on the queue is needed, or due to memory pressure. |
464 | * | 464 | * |
465 | * @rfn is not required, or even expected, to remove all requests off the | 465 | * @rfn is not required, or even expected, to remove all requests off the |
466 | * queue, but only as many as it can handle at a time. If it does leave | 466 | * queue, but only as many as it can handle at a time. If it does leave |
467 | * requests on the queue, it is responsible for arranging that the requests | 467 | * requests on the queue, it is responsible for arranging that the requests |
468 | * get dealt with eventually. | 468 | * get dealt with eventually. |
469 | * | 469 | * |
470 | * The queue spin lock must be held while manipulating the requests on the | 470 | * The queue spin lock must be held while manipulating the requests on the |
471 | * request queue; this lock will be taken also from interrupt context, so irq | 471 | * request queue; this lock will be taken also from interrupt context, so irq |
472 | * disabling is needed for it. | 472 | * disabling is needed for it. |
473 | * | 473 | * |
474 | * Function returns a pointer to the initialized request queue, or %NULL if | 474 | * Function returns a pointer to the initialized request queue, or %NULL if |
475 | * it didn't succeed. | 475 | * it didn't succeed. |
476 | * | 476 | * |
477 | * Note: | 477 | * Note: |
478 | * blk_init_queue() must be paired with a blk_cleanup_queue() call | 478 | * blk_init_queue() must be paired with a blk_cleanup_queue() call |
479 | * when the block device is deactivated (such as at module unload). | 479 | * when the block device is deactivated (such as at module unload). |
480 | **/ | 480 | **/ |
481 | 481 | ||
482 | struct request_queue *blk_init_queue(request_fn_proc *rfn, spinlock_t *lock) | 482 | struct request_queue *blk_init_queue(request_fn_proc *rfn, spinlock_t *lock) |
483 | { | 483 | { |
484 | return blk_init_queue_node(rfn, lock, -1); | 484 | return blk_init_queue_node(rfn, lock, -1); |
485 | } | 485 | } |
486 | EXPORT_SYMBOL(blk_init_queue); | 486 | EXPORT_SYMBOL(blk_init_queue); |
487 | 487 | ||
488 | struct request_queue * | 488 | struct request_queue * |
489 | blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id) | 489 | blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id) |
490 | { | 490 | { |
491 | struct request_queue *uninit_q, *q; | 491 | struct request_queue *uninit_q, *q; |
492 | 492 | ||
493 | uninit_q = blk_alloc_queue_node(GFP_KERNEL, node_id); | 493 | uninit_q = blk_alloc_queue_node(GFP_KERNEL, node_id); |
494 | if (!uninit_q) | 494 | if (!uninit_q) |
495 | return NULL; | 495 | return NULL; |
496 | 496 | ||
497 | q = blk_init_allocated_queue_node(uninit_q, rfn, lock, node_id); | 497 | q = blk_init_allocated_queue_node(uninit_q, rfn, lock, node_id); |
498 | if (!q) | 498 | if (!q) |
499 | blk_cleanup_queue(uninit_q); | 499 | blk_cleanup_queue(uninit_q); |
500 | 500 | ||
501 | return q; | 501 | return q; |
502 | } | 502 | } |
503 | EXPORT_SYMBOL(blk_init_queue_node); | 503 | EXPORT_SYMBOL(blk_init_queue_node); |
504 | 504 | ||
505 | struct request_queue * | 505 | struct request_queue * |
506 | blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn, | 506 | blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn, |
507 | spinlock_t *lock) | 507 | spinlock_t *lock) |
508 | { | 508 | { |
509 | return blk_init_allocated_queue_node(q, rfn, lock, -1); | 509 | return blk_init_allocated_queue_node(q, rfn, lock, -1); |
510 | } | 510 | } |
511 | EXPORT_SYMBOL(blk_init_allocated_queue); | 511 | EXPORT_SYMBOL(blk_init_allocated_queue); |
512 | 512 | ||
513 | struct request_queue * | 513 | struct request_queue * |
514 | blk_init_allocated_queue_node(struct request_queue *q, request_fn_proc *rfn, | 514 | blk_init_allocated_queue_node(struct request_queue *q, request_fn_proc *rfn, |
515 | spinlock_t *lock, int node_id) | 515 | spinlock_t *lock, int node_id) |
516 | { | 516 | { |
517 | if (!q) | 517 | if (!q) |
518 | return NULL; | 518 | return NULL; |
519 | 519 | ||
520 | q->node = node_id; | 520 | q->node = node_id; |
521 | if (blk_init_free_list(q)) | 521 | if (blk_init_free_list(q)) |
522 | return NULL; | 522 | return NULL; |
523 | 523 | ||
524 | q->request_fn = rfn; | 524 | q->request_fn = rfn; |
525 | q->prep_rq_fn = NULL; | 525 | q->prep_rq_fn = NULL; |
526 | q->unprep_rq_fn = NULL; | 526 | q->unprep_rq_fn = NULL; |
527 | q->queue_flags = QUEUE_FLAG_DEFAULT; | 527 | q->queue_flags = QUEUE_FLAG_DEFAULT; |
528 | 528 | ||
529 | /* Override internal queue lock with supplied lock pointer */ | 529 | /* Override internal queue lock with supplied lock pointer */ |
530 | if (lock) | 530 | if (lock) |
531 | q->queue_lock = lock; | 531 | q->queue_lock = lock; |
532 | 532 | ||
533 | /* | 533 | /* |
534 | * This also sets hw/phys segments, boundary and size | 534 | * This also sets hw/phys segments, boundary and size |
535 | */ | 535 | */ |
536 | blk_queue_make_request(q, __make_request); | 536 | blk_queue_make_request(q, __make_request); |
537 | 537 | ||
538 | q->sg_reserved_size = INT_MAX; | 538 | q->sg_reserved_size = INT_MAX; |
539 | 539 | ||
540 | /* | 540 | /* |
541 | * all done | 541 | * all done |
542 | */ | 542 | */ |
543 | if (!elevator_init(q, NULL)) { | 543 | if (!elevator_init(q, NULL)) { |
544 | blk_queue_congestion_threshold(q); | 544 | blk_queue_congestion_threshold(q); |
545 | return q; | 545 | return q; |
546 | } | 546 | } |
547 | 547 | ||
548 | return NULL; | 548 | return NULL; |
549 | } | 549 | } |
550 | EXPORT_SYMBOL(blk_init_allocated_queue_node); | 550 | EXPORT_SYMBOL(blk_init_allocated_queue_node); |
551 | 551 | ||
552 | int blk_get_queue(struct request_queue *q) | 552 | int blk_get_queue(struct request_queue *q) |
553 | { | 553 | { |
554 | if (likely(!test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) { | 554 | if (likely(!test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) { |
555 | kobject_get(&q->kobj); | 555 | kobject_get(&q->kobj); |
556 | return 0; | 556 | return 0; |
557 | } | 557 | } |
558 | 558 | ||
559 | return 1; | 559 | return 1; |
560 | } | 560 | } |
561 | 561 | ||
562 | static inline void blk_free_request(struct request_queue *q, struct request *rq) | 562 | static inline void blk_free_request(struct request_queue *q, struct request *rq) |
563 | { | 563 | { |
564 | BUG_ON(rq->cmd_flags & REQ_ON_PLUG); | 564 | BUG_ON(rq->cmd_flags & REQ_ON_PLUG); |
565 | 565 | ||
566 | if (rq->cmd_flags & REQ_ELVPRIV) | 566 | if (rq->cmd_flags & REQ_ELVPRIV) |
567 | elv_put_request(q, rq); | 567 | elv_put_request(q, rq); |
568 | mempool_free(rq, q->rq.rq_pool); | 568 | mempool_free(rq, q->rq.rq_pool); |
569 | } | 569 | } |
570 | 570 | ||
571 | static struct request * | 571 | static struct request * |
572 | blk_alloc_request(struct request_queue *q, int flags, int priv, gfp_t gfp_mask) | 572 | blk_alloc_request(struct request_queue *q, int flags, int priv, gfp_t gfp_mask) |
573 | { | 573 | { |
574 | struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask); | 574 | struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask); |
575 | 575 | ||
576 | if (!rq) | 576 | if (!rq) |
577 | return NULL; | 577 | return NULL; |
578 | 578 | ||
579 | blk_rq_init(q, rq); | 579 | blk_rq_init(q, rq); |
580 | 580 | ||
581 | rq->cmd_flags = flags | REQ_ALLOCED; | 581 | rq->cmd_flags = flags | REQ_ALLOCED; |
582 | 582 | ||
583 | if (priv) { | 583 | if (priv) { |
584 | if (unlikely(elv_set_request(q, rq, gfp_mask))) { | 584 | if (unlikely(elv_set_request(q, rq, gfp_mask))) { |
585 | mempool_free(rq, q->rq.rq_pool); | 585 | mempool_free(rq, q->rq.rq_pool); |
586 | return NULL; | 586 | return NULL; |
587 | } | 587 | } |
588 | rq->cmd_flags |= REQ_ELVPRIV; | 588 | rq->cmd_flags |= REQ_ELVPRIV; |
589 | } | 589 | } |
590 | 590 | ||
591 | return rq; | 591 | return rq; |
592 | } | 592 | } |
593 | 593 | ||
594 | /* | 594 | /* |
595 | * ioc_batching returns true if the ioc is a valid batching request and | 595 | * ioc_batching returns true if the ioc is a valid batching request and |
596 | * should be given priority access to a request. | 596 | * should be given priority access to a request. |
597 | */ | 597 | */ |
598 | static inline int ioc_batching(struct request_queue *q, struct io_context *ioc) | 598 | static inline int ioc_batching(struct request_queue *q, struct io_context *ioc) |
599 | { | 599 | { |
600 | if (!ioc) | 600 | if (!ioc) |
601 | return 0; | 601 | return 0; |
602 | 602 | ||
603 | /* | 603 | /* |
604 | * Make sure the process is able to allocate at least 1 request | 604 | * Make sure the process is able to allocate at least 1 request |
605 | * even if the batch times out, otherwise we could theoretically | 605 | * even if the batch times out, otherwise we could theoretically |
606 | * lose wakeups. | 606 | * lose wakeups. |
607 | */ | 607 | */ |
608 | return ioc->nr_batch_requests == q->nr_batching || | 608 | return ioc->nr_batch_requests == q->nr_batching || |
609 | (ioc->nr_batch_requests > 0 | 609 | (ioc->nr_batch_requests > 0 |
610 | && time_before(jiffies, ioc->last_waited + BLK_BATCH_TIME)); | 610 | && time_before(jiffies, ioc->last_waited + BLK_BATCH_TIME)); |
611 | } | 611 | } |
612 | 612 | ||
613 | /* | 613 | /* |
614 | * ioc_set_batching sets ioc to be a new "batcher" if it is not one. This | 614 | * ioc_set_batching sets ioc to be a new "batcher" if it is not one. This |
615 | * will cause the process to be a "batcher" on all queues in the system. This | 615 | * will cause the process to be a "batcher" on all queues in the system. This |
616 | * is the behaviour we want though - once it gets a wakeup it should be given | 616 | * is the behaviour we want though - once it gets a wakeup it should be given |
617 | * a nice run. | 617 | * a nice run. |
618 | */ | 618 | */ |
619 | static void ioc_set_batching(struct request_queue *q, struct io_context *ioc) | 619 | static void ioc_set_batching(struct request_queue *q, struct io_context *ioc) |
620 | { | 620 | { |
621 | if (!ioc || ioc_batching(q, ioc)) | 621 | if (!ioc || ioc_batching(q, ioc)) |
622 | return; | 622 | return; |
623 | 623 | ||
624 | ioc->nr_batch_requests = q->nr_batching; | 624 | ioc->nr_batch_requests = q->nr_batching; |
625 | ioc->last_waited = jiffies; | 625 | ioc->last_waited = jiffies; |
626 | } | 626 | } |
627 | 627 | ||
628 | static void __freed_request(struct request_queue *q, int sync) | 628 | static void __freed_request(struct request_queue *q, int sync) |
629 | { | 629 | { |
630 | struct request_list *rl = &q->rq; | 630 | struct request_list *rl = &q->rq; |
631 | 631 | ||
632 | if (rl->count[sync] < queue_congestion_off_threshold(q)) | 632 | if (rl->count[sync] < queue_congestion_off_threshold(q)) |
633 | blk_clear_queue_congested(q, sync); | 633 | blk_clear_queue_congested(q, sync); |
634 | 634 | ||
635 | if (rl->count[sync] + 1 <= q->nr_requests) { | 635 | if (rl->count[sync] + 1 <= q->nr_requests) { |
636 | if (waitqueue_active(&rl->wait[sync])) | 636 | if (waitqueue_active(&rl->wait[sync])) |
637 | wake_up(&rl->wait[sync]); | 637 | wake_up(&rl->wait[sync]); |
638 | 638 | ||
639 | blk_clear_queue_full(q, sync); | 639 | blk_clear_queue_full(q, sync); |
640 | } | 640 | } |
641 | } | 641 | } |
642 | 642 | ||
643 | /* | 643 | /* |
644 | * A request has just been released. Account for it, update the full and | 644 | * A request has just been released. Account for it, update the full and |
645 | * congestion status, wake up any waiters. Called under q->queue_lock. | 645 | * congestion status, wake up any waiters. Called under q->queue_lock. |
646 | */ | 646 | */ |
647 | static void freed_request(struct request_queue *q, int sync, int priv) | 647 | static void freed_request(struct request_queue *q, int sync, int priv) |
648 | { | 648 | { |
649 | struct request_list *rl = &q->rq; | 649 | struct request_list *rl = &q->rq; |
650 | 650 | ||
651 | rl->count[sync]--; | 651 | rl->count[sync]--; |
652 | if (priv) | 652 | if (priv) |
653 | rl->elvpriv--; | 653 | rl->elvpriv--; |
654 | 654 | ||
655 | __freed_request(q, sync); | 655 | __freed_request(q, sync); |
656 | 656 | ||
657 | if (unlikely(rl->starved[sync ^ 1])) | 657 | if (unlikely(rl->starved[sync ^ 1])) |
658 | __freed_request(q, sync ^ 1); | 658 | __freed_request(q, sync ^ 1); |
659 | } | 659 | } |
660 | 660 | ||
661 | /* | 661 | /* |
662 | * Determine if elevator data should be initialized when allocating the | 662 | * Determine if elevator data should be initialized when allocating the |
663 | * request associated with @bio. | 663 | * request associated with @bio. |
664 | */ | 664 | */ |
665 | static bool blk_rq_should_init_elevator(struct bio *bio) | 665 | static bool blk_rq_should_init_elevator(struct bio *bio) |
666 | { | 666 | { |
667 | if (!bio) | 667 | if (!bio) |
668 | return true; | 668 | return true; |
669 | 669 | ||
670 | /* | 670 | /* |
671 | * Flush requests do not use the elevator so skip initialization. | 671 | * Flush requests do not use the elevator so skip initialization. |
672 | * This allows a request to share the flush and elevator data. | 672 | * This allows a request to share the flush and elevator data. |
673 | */ | 673 | */ |
674 | if (bio->bi_rw & (REQ_FLUSH | REQ_FUA)) | 674 | if (bio->bi_rw & (REQ_FLUSH | REQ_FUA)) |
675 | return false; | 675 | return false; |
676 | 676 | ||
677 | return true; | 677 | return true; |
678 | } | 678 | } |
679 | 679 | ||
680 | /* | 680 | /* |
681 | * Get a free request, queue_lock must be held. | 681 | * Get a free request, queue_lock must be held. |
682 | * Returns NULL on failure, with queue_lock held. | 682 | * Returns NULL on failure, with queue_lock held. |
683 | * Returns !NULL on success, with queue_lock *not held*. | 683 | * Returns !NULL on success, with queue_lock *not held*. |
684 | */ | 684 | */ |
685 | static struct request *get_request(struct request_queue *q, int rw_flags, | 685 | static struct request *get_request(struct request_queue *q, int rw_flags, |
686 | struct bio *bio, gfp_t gfp_mask) | 686 | struct bio *bio, gfp_t gfp_mask) |
687 | { | 687 | { |
688 | struct request *rq = NULL; | 688 | struct request *rq = NULL; |
689 | struct request_list *rl = &q->rq; | 689 | struct request_list *rl = &q->rq; |
690 | struct io_context *ioc = NULL; | 690 | struct io_context *ioc = NULL; |
691 | const bool is_sync = rw_is_sync(rw_flags) != 0; | 691 | const bool is_sync = rw_is_sync(rw_flags) != 0; |
692 | int may_queue, priv = 0; | 692 | int may_queue, priv = 0; |
693 | 693 | ||
694 | may_queue = elv_may_queue(q, rw_flags); | 694 | may_queue = elv_may_queue(q, rw_flags); |
695 | if (may_queue == ELV_MQUEUE_NO) | 695 | if (may_queue == ELV_MQUEUE_NO) |
696 | goto rq_starved; | 696 | goto rq_starved; |
697 | 697 | ||
698 | if (rl->count[is_sync]+1 >= queue_congestion_on_threshold(q)) { | 698 | if (rl->count[is_sync]+1 >= queue_congestion_on_threshold(q)) { |
699 | if (rl->count[is_sync]+1 >= q->nr_requests) { | 699 | if (rl->count[is_sync]+1 >= q->nr_requests) { |
700 | ioc = current_io_context(GFP_ATOMIC, q->node); | 700 | ioc = current_io_context(GFP_ATOMIC, q->node); |
701 | /* | 701 | /* |
702 | * The queue will fill after this allocation, so set | 702 | * The queue will fill after this allocation, so set |
703 | * it as full, and mark this process as "batching". | 703 | * it as full, and mark this process as "batching". |
704 | * This process will be allowed to complete a batch of | 704 | * This process will be allowed to complete a batch of |
705 | * requests, others will be blocked. | 705 | * requests, others will be blocked. |
706 | */ | 706 | */ |
707 | if (!blk_queue_full(q, is_sync)) { | 707 | if (!blk_queue_full(q, is_sync)) { |
708 | ioc_set_batching(q, ioc); | 708 | ioc_set_batching(q, ioc); |
709 | blk_set_queue_full(q, is_sync); | 709 | blk_set_queue_full(q, is_sync); |
710 | } else { | 710 | } else { |
711 | if (may_queue != ELV_MQUEUE_MUST | 711 | if (may_queue != ELV_MQUEUE_MUST |
712 | && !ioc_batching(q, ioc)) { | 712 | && !ioc_batching(q, ioc)) { |
713 | /* | 713 | /* |
714 | * The queue is full and the allocating | 714 | * The queue is full and the allocating |
715 | * process is not a "batcher", and not | 715 | * process is not a "batcher", and not |
716 | * exempted by the IO scheduler | 716 | * exempted by the IO scheduler |
717 | */ | 717 | */ |
718 | goto out; | 718 | goto out; |
719 | } | 719 | } |
720 | } | 720 | } |
721 | } | 721 | } |
722 | blk_set_queue_congested(q, is_sync); | 722 | blk_set_queue_congested(q, is_sync); |
723 | } | 723 | } |
724 | 724 | ||
725 | /* | 725 | /* |
726 | * Only allow batching queuers to allocate up to 50% over the defined | 726 | * Only allow batching queuers to allocate up to 50% over the defined |
727 | * limit of requests, otherwise we could have thousands of requests | 727 | * limit of requests, otherwise we could have thousands of requests |
728 | * allocated with any setting of ->nr_requests | 728 | * allocated with any setting of ->nr_requests |
729 | */ | 729 | */ |
730 | if (rl->count[is_sync] >= (3 * q->nr_requests / 2)) | 730 | if (rl->count[is_sync] >= (3 * q->nr_requests / 2)) |
731 | goto out; | 731 | goto out; |
732 | 732 | ||
733 | rl->count[is_sync]++; | 733 | rl->count[is_sync]++; |
734 | rl->starved[is_sync] = 0; | 734 | rl->starved[is_sync] = 0; |
735 | 735 | ||
736 | if (blk_rq_should_init_elevator(bio)) { | 736 | if (blk_rq_should_init_elevator(bio)) { |
737 | priv = !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); | 737 | priv = !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); |
738 | if (priv) | 738 | if (priv) |
739 | rl->elvpriv++; | 739 | rl->elvpriv++; |
740 | } | 740 | } |
741 | 741 | ||
742 | if (blk_queue_io_stat(q)) | 742 | if (blk_queue_io_stat(q)) |
743 | rw_flags |= REQ_IO_STAT; | 743 | rw_flags |= REQ_IO_STAT; |
744 | spin_unlock_irq(q->queue_lock); | 744 | spin_unlock_irq(q->queue_lock); |
745 | 745 | ||
746 | rq = blk_alloc_request(q, rw_flags, priv, gfp_mask); | 746 | rq = blk_alloc_request(q, rw_flags, priv, gfp_mask); |
747 | if (unlikely(!rq)) { | 747 | if (unlikely(!rq)) { |
748 | /* | 748 | /* |
749 | * Allocation failed presumably due to memory. Undo anything | 749 | * Allocation failed presumably due to memory. Undo anything |
750 | * we might have messed up. | 750 | * we might have messed up. |
751 | * | 751 | * |
752 | * Allocating task should really be put onto the front of the | 752 | * Allocating task should really be put onto the front of the |
753 | * wait queue, but this is pretty rare. | 753 | * wait queue, but this is pretty rare. |
754 | */ | 754 | */ |
755 | spin_lock_irq(q->queue_lock); | 755 | spin_lock_irq(q->queue_lock); |
756 | freed_request(q, is_sync, priv); | 756 | freed_request(q, is_sync, priv); |
757 | 757 | ||
758 | /* | 758 | /* |
759 | * in the very unlikely event that allocation failed and no | 759 | * in the very unlikely event that allocation failed and no |
760 | * requests for this direction was pending, mark us starved | 760 | * requests for this direction was pending, mark us starved |
761 | * so that freeing of a request in the other direction will | 761 | * so that freeing of a request in the other direction will |
762 | * notice us. another possible fix would be to split the | 762 | * notice us. another possible fix would be to split the |
763 | * rq mempool into READ and WRITE | 763 | * rq mempool into READ and WRITE |
764 | */ | 764 | */ |
765 | rq_starved: | 765 | rq_starved: |
766 | if (unlikely(rl->count[is_sync] == 0)) | 766 | if (unlikely(rl->count[is_sync] == 0)) |
767 | rl->starved[is_sync] = 1; | 767 | rl->starved[is_sync] = 1; |
768 | 768 | ||
769 | goto out; | 769 | goto out; |
770 | } | 770 | } |
771 | 771 | ||
772 | /* | 772 | /* |
773 | * ioc may be NULL here, and ioc_batching will be false. That's | 773 | * ioc may be NULL here, and ioc_batching will be false. That's |
774 | * OK, if the queue is under the request limit then requests need | 774 | * OK, if the queue is under the request limit then requests need |
775 | * not count toward the nr_batch_requests limit. There will always | 775 | * not count toward the nr_batch_requests limit. There will always |
776 | * be some limit enforced by BLK_BATCH_TIME. | 776 | * be some limit enforced by BLK_BATCH_TIME. |
777 | */ | 777 | */ |
778 | if (ioc_batching(q, ioc)) | 778 | if (ioc_batching(q, ioc)) |
779 | ioc->nr_batch_requests--; | 779 | ioc->nr_batch_requests--; |
780 | 780 | ||
781 | trace_block_getrq(q, bio, rw_flags & 1); | 781 | trace_block_getrq(q, bio, rw_flags & 1); |
782 | out: | 782 | out: |
783 | return rq; | 783 | return rq; |
784 | } | 784 | } |
785 | 785 | ||
786 | /* | 786 | /* |
787 | * No available requests for this queue, wait for some requests to become | 787 | * No available requests for this queue, wait for some requests to become |
788 | * available. | 788 | * available. |
789 | * | 789 | * |
790 | * Called with q->queue_lock held, and returns with it unlocked. | 790 | * Called with q->queue_lock held, and returns with it unlocked. |
791 | */ | 791 | */ |
792 | static struct request *get_request_wait(struct request_queue *q, int rw_flags, | 792 | static struct request *get_request_wait(struct request_queue *q, int rw_flags, |
793 | struct bio *bio) | 793 | struct bio *bio) |
794 | { | 794 | { |
795 | const bool is_sync = rw_is_sync(rw_flags) != 0; | 795 | const bool is_sync = rw_is_sync(rw_flags) != 0; |
796 | struct request *rq; | 796 | struct request *rq; |
797 | 797 | ||
798 | rq = get_request(q, rw_flags, bio, GFP_NOIO); | 798 | rq = get_request(q, rw_flags, bio, GFP_NOIO); |
799 | while (!rq) { | 799 | while (!rq) { |
800 | DEFINE_WAIT(wait); | 800 | DEFINE_WAIT(wait); |
801 | struct io_context *ioc; | 801 | struct io_context *ioc; |
802 | struct request_list *rl = &q->rq; | 802 | struct request_list *rl = &q->rq; |
803 | 803 | ||
804 | prepare_to_wait_exclusive(&rl->wait[is_sync], &wait, | 804 | prepare_to_wait_exclusive(&rl->wait[is_sync], &wait, |
805 | TASK_UNINTERRUPTIBLE); | 805 | TASK_UNINTERRUPTIBLE); |
806 | 806 | ||
807 | trace_block_sleeprq(q, bio, rw_flags & 1); | 807 | trace_block_sleeprq(q, bio, rw_flags & 1); |
808 | 808 | ||
809 | spin_unlock_irq(q->queue_lock); | 809 | spin_unlock_irq(q->queue_lock); |
810 | io_schedule(); | 810 | io_schedule(); |
811 | 811 | ||
812 | /* | 812 | /* |
813 | * After sleeping, we become a "batching" process and | 813 | * After sleeping, we become a "batching" process and |
814 | * will be able to allocate at least one request, and | 814 | * will be able to allocate at least one request, and |
815 | * up to a big batch of them for a small period time. | 815 | * up to a big batch of them for a small period time. |
816 | * See ioc_batching, ioc_set_batching | 816 | * See ioc_batching, ioc_set_batching |
817 | */ | 817 | */ |
818 | ioc = current_io_context(GFP_NOIO, q->node); | 818 | ioc = current_io_context(GFP_NOIO, q->node); |
819 | ioc_set_batching(q, ioc); | 819 | ioc_set_batching(q, ioc); |
820 | 820 | ||
821 | spin_lock_irq(q->queue_lock); | 821 | spin_lock_irq(q->queue_lock); |
822 | finish_wait(&rl->wait[is_sync], &wait); | 822 | finish_wait(&rl->wait[is_sync], &wait); |
823 | 823 | ||
824 | rq = get_request(q, rw_flags, bio, GFP_NOIO); | 824 | rq = get_request(q, rw_flags, bio, GFP_NOIO); |
825 | }; | 825 | }; |
826 | 826 | ||
827 | return rq; | 827 | return rq; |
828 | } | 828 | } |
829 | 829 | ||
830 | struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask) | 830 | struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask) |
831 | { | 831 | { |
832 | struct request *rq; | 832 | struct request *rq; |
833 | 833 | ||
834 | BUG_ON(rw != READ && rw != WRITE); | 834 | BUG_ON(rw != READ && rw != WRITE); |
835 | 835 | ||
836 | spin_lock_irq(q->queue_lock); | 836 | spin_lock_irq(q->queue_lock); |
837 | if (gfp_mask & __GFP_WAIT) { | 837 | if (gfp_mask & __GFP_WAIT) { |
838 | rq = get_request_wait(q, rw, NULL); | 838 | rq = get_request_wait(q, rw, NULL); |
839 | } else { | 839 | } else { |
840 | rq = get_request(q, rw, NULL, gfp_mask); | 840 | rq = get_request(q, rw, NULL, gfp_mask); |
841 | if (!rq) | 841 | if (!rq) |
842 | spin_unlock_irq(q->queue_lock); | 842 | spin_unlock_irq(q->queue_lock); |
843 | } | 843 | } |
844 | /* q->queue_lock is unlocked at this point */ | 844 | /* q->queue_lock is unlocked at this point */ |
845 | 845 | ||
846 | return rq; | 846 | return rq; |
847 | } | 847 | } |
848 | EXPORT_SYMBOL(blk_get_request); | 848 | EXPORT_SYMBOL(blk_get_request); |
849 | 849 | ||
850 | /** | 850 | /** |
851 | * blk_make_request - given a bio, allocate a corresponding struct request. | 851 | * blk_make_request - given a bio, allocate a corresponding struct request. |
852 | * @q: target request queue | 852 | * @q: target request queue |
853 | * @bio: The bio describing the memory mappings that will be submitted for IO. | 853 | * @bio: The bio describing the memory mappings that will be submitted for IO. |
854 | * It may be a chained-bio properly constructed by block/bio layer. | 854 | * It may be a chained-bio properly constructed by block/bio layer. |
855 | * @gfp_mask: gfp flags to be used for memory allocation | 855 | * @gfp_mask: gfp flags to be used for memory allocation |
856 | * | 856 | * |
857 | * blk_make_request is the parallel of generic_make_request for BLOCK_PC | 857 | * blk_make_request is the parallel of generic_make_request for BLOCK_PC |
858 | * type commands. Where the struct request needs to be farther initialized by | 858 | * type commands. Where the struct request needs to be farther initialized by |
859 | * the caller. It is passed a &struct bio, which describes the memory info of | 859 | * the caller. It is passed a &struct bio, which describes the memory info of |
860 | * the I/O transfer. | 860 | * the I/O transfer. |
861 | * | 861 | * |
862 | * The caller of blk_make_request must make sure that bi_io_vec | 862 | * The caller of blk_make_request must make sure that bi_io_vec |
863 | * are set to describe the memory buffers. That bio_data_dir() will return | 863 | * are set to describe the memory buffers. That bio_data_dir() will return |
864 | * the needed direction of the request. (And all bio's in the passed bio-chain | 864 | * the needed direction of the request. (And all bio's in the passed bio-chain |
865 | * are properly set accordingly) | 865 | * are properly set accordingly) |
866 | * | 866 | * |
867 | * If called under none-sleepable conditions, mapped bio buffers must not | 867 | * If called under none-sleepable conditions, mapped bio buffers must not |
868 | * need bouncing, by calling the appropriate masked or flagged allocator, | 868 | * need bouncing, by calling the appropriate masked or flagged allocator, |
869 | * suitable for the target device. Otherwise the call to blk_queue_bounce will | 869 | * suitable for the target device. Otherwise the call to blk_queue_bounce will |
870 | * BUG. | 870 | * BUG. |
871 | * | 871 | * |
872 | * WARNING: When allocating/cloning a bio-chain, careful consideration should be | 872 | * WARNING: When allocating/cloning a bio-chain, careful consideration should be |
873 | * given to how you allocate bios. In particular, you cannot use __GFP_WAIT for | 873 | * given to how you allocate bios. In particular, you cannot use __GFP_WAIT for |
874 | * anything but the first bio in the chain. Otherwise you risk waiting for IO | 874 | * anything but the first bio in the chain. Otherwise you risk waiting for IO |
875 | * completion of a bio that hasn't been submitted yet, thus resulting in a | 875 | * completion of a bio that hasn't been submitted yet, thus resulting in a |
876 | * deadlock. Alternatively bios should be allocated using bio_kmalloc() instead | 876 | * deadlock. Alternatively bios should be allocated using bio_kmalloc() instead |
877 | * of bio_alloc(), as that avoids the mempool deadlock. | 877 | * of bio_alloc(), as that avoids the mempool deadlock. |
878 | * If possible a big IO should be split into smaller parts when allocation | 878 | * If possible a big IO should be split into smaller parts when allocation |
879 | * fails. Partial allocation should not be an error, or you risk a live-lock. | 879 | * fails. Partial allocation should not be an error, or you risk a live-lock. |
880 | */ | 880 | */ |
881 | struct request *blk_make_request(struct request_queue *q, struct bio *bio, | 881 | struct request *blk_make_request(struct request_queue *q, struct bio *bio, |
882 | gfp_t gfp_mask) | 882 | gfp_t gfp_mask) |
883 | { | 883 | { |
884 | struct request *rq = blk_get_request(q, bio_data_dir(bio), gfp_mask); | 884 | struct request *rq = blk_get_request(q, bio_data_dir(bio), gfp_mask); |
885 | 885 | ||
886 | if (unlikely(!rq)) | 886 | if (unlikely(!rq)) |
887 | return ERR_PTR(-ENOMEM); | 887 | return ERR_PTR(-ENOMEM); |
888 | 888 | ||
889 | for_each_bio(bio) { | 889 | for_each_bio(bio) { |
890 | struct bio *bounce_bio = bio; | 890 | struct bio *bounce_bio = bio; |
891 | int ret; | 891 | int ret; |
892 | 892 | ||
893 | blk_queue_bounce(q, &bounce_bio); | 893 | blk_queue_bounce(q, &bounce_bio); |
894 | ret = blk_rq_append_bio(q, rq, bounce_bio); | 894 | ret = blk_rq_append_bio(q, rq, bounce_bio); |
895 | if (unlikely(ret)) { | 895 | if (unlikely(ret)) { |
896 | blk_put_request(rq); | 896 | blk_put_request(rq); |
897 | return ERR_PTR(ret); | 897 | return ERR_PTR(ret); |
898 | } | 898 | } |
899 | } | 899 | } |
900 | 900 | ||
901 | return rq; | 901 | return rq; |
902 | } | 902 | } |
903 | EXPORT_SYMBOL(blk_make_request); | 903 | EXPORT_SYMBOL(blk_make_request); |
904 | 904 | ||
905 | /** | 905 | /** |
906 | * blk_requeue_request - put a request back on queue | 906 | * blk_requeue_request - put a request back on queue |
907 | * @q: request queue where request should be inserted | 907 | * @q: request queue where request should be inserted |
908 | * @rq: request to be inserted | 908 | * @rq: request to be inserted |
909 | * | 909 | * |
910 | * Description: | 910 | * Description: |
911 | * Drivers often keep queueing requests until the hardware cannot accept | 911 | * Drivers often keep queueing requests until the hardware cannot accept |
912 | * more, when that condition happens we need to put the request back | 912 | * more, when that condition happens we need to put the request back |
913 | * on the queue. Must be called with queue lock held. | 913 | * on the queue. Must be called with queue lock held. |
914 | */ | 914 | */ |
915 | void blk_requeue_request(struct request_queue *q, struct request *rq) | 915 | void blk_requeue_request(struct request_queue *q, struct request *rq) |
916 | { | 916 | { |
917 | blk_delete_timer(rq); | 917 | blk_delete_timer(rq); |
918 | blk_clear_rq_complete(rq); | 918 | blk_clear_rq_complete(rq); |
919 | trace_block_rq_requeue(q, rq); | 919 | trace_block_rq_requeue(q, rq); |
920 | 920 | ||
921 | if (blk_rq_tagged(rq)) | 921 | if (blk_rq_tagged(rq)) |
922 | blk_queue_end_tag(q, rq); | 922 | blk_queue_end_tag(q, rq); |
923 | 923 | ||
924 | BUG_ON(blk_queued_rq(rq)); | 924 | BUG_ON(blk_queued_rq(rq)); |
925 | 925 | ||
926 | elv_requeue_request(q, rq); | 926 | elv_requeue_request(q, rq); |
927 | } | 927 | } |
928 | EXPORT_SYMBOL(blk_requeue_request); | 928 | EXPORT_SYMBOL(blk_requeue_request); |
929 | 929 | ||
930 | static void add_acct_request(struct request_queue *q, struct request *rq, | 930 | static void add_acct_request(struct request_queue *q, struct request *rq, |
931 | int where) | 931 | int where) |
932 | { | 932 | { |
933 | drive_stat_acct(rq, 1); | 933 | drive_stat_acct(rq, 1); |
934 | __elv_add_request(q, rq, where); | 934 | __elv_add_request(q, rq, where); |
935 | } | 935 | } |
936 | 936 | ||
937 | /** | 937 | /** |
938 | * blk_insert_request - insert a special request into a request queue | 938 | * blk_insert_request - insert a special request into a request queue |
939 | * @q: request queue where request should be inserted | 939 | * @q: request queue where request should be inserted |
940 | * @rq: request to be inserted | 940 | * @rq: request to be inserted |
941 | * @at_head: insert request at head or tail of queue | 941 | * @at_head: insert request at head or tail of queue |
942 | * @data: private data | 942 | * @data: private data |
943 | * | 943 | * |
944 | * Description: | 944 | * Description: |
945 | * Many block devices need to execute commands asynchronously, so they don't | 945 | * Many block devices need to execute commands asynchronously, so they don't |
946 | * block the whole kernel from preemption during request execution. This is | 946 | * block the whole kernel from preemption during request execution. This is |
947 | * accomplished normally by inserting aritficial requests tagged as | 947 | * accomplished normally by inserting aritficial requests tagged as |
948 | * REQ_TYPE_SPECIAL in to the corresponding request queue, and letting them | 948 | * REQ_TYPE_SPECIAL in to the corresponding request queue, and letting them |
949 | * be scheduled for actual execution by the request queue. | 949 | * be scheduled for actual execution by the request queue. |
950 | * | 950 | * |
951 | * We have the option of inserting the head or the tail of the queue. | 951 | * We have the option of inserting the head or the tail of the queue. |
952 | * Typically we use the tail for new ioctls and so forth. We use the head | 952 | * Typically we use the tail for new ioctls and so forth. We use the head |
953 | * of the queue for things like a QUEUE_FULL message from a device, or a | 953 | * of the queue for things like a QUEUE_FULL message from a device, or a |
954 | * host that is unable to accept a particular command. | 954 | * host that is unable to accept a particular command. |
955 | */ | 955 | */ |
956 | void blk_insert_request(struct request_queue *q, struct request *rq, | 956 | void blk_insert_request(struct request_queue *q, struct request *rq, |
957 | int at_head, void *data) | 957 | int at_head, void *data) |
958 | { | 958 | { |
959 | int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK; | 959 | int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK; |
960 | unsigned long flags; | 960 | unsigned long flags; |
961 | 961 | ||
962 | /* | 962 | /* |
963 | * tell I/O scheduler that this isn't a regular read/write (ie it | 963 | * tell I/O scheduler that this isn't a regular read/write (ie it |
964 | * must not attempt merges on this) and that it acts as a soft | 964 | * must not attempt merges on this) and that it acts as a soft |
965 | * barrier | 965 | * barrier |
966 | */ | 966 | */ |
967 | rq->cmd_type = REQ_TYPE_SPECIAL; | 967 | rq->cmd_type = REQ_TYPE_SPECIAL; |
968 | 968 | ||
969 | rq->special = data; | 969 | rq->special = data; |
970 | 970 | ||
971 | spin_lock_irqsave(q->queue_lock, flags); | 971 | spin_lock_irqsave(q->queue_lock, flags); |
972 | 972 | ||
973 | /* | 973 | /* |
974 | * If command is tagged, release the tag | 974 | * If command is tagged, release the tag |
975 | */ | 975 | */ |
976 | if (blk_rq_tagged(rq)) | 976 | if (blk_rq_tagged(rq)) |
977 | blk_queue_end_tag(q, rq); | 977 | blk_queue_end_tag(q, rq); |
978 | 978 | ||
979 | add_acct_request(q, rq, where); | 979 | add_acct_request(q, rq, where); |
980 | __blk_run_queue(q, false); | 980 | __blk_run_queue(q, false); |
981 | spin_unlock_irqrestore(q->queue_lock, flags); | 981 | spin_unlock_irqrestore(q->queue_lock, flags); |
982 | } | 982 | } |
983 | EXPORT_SYMBOL(blk_insert_request); | 983 | EXPORT_SYMBOL(blk_insert_request); |
984 | 984 | ||
985 | static void part_round_stats_single(int cpu, struct hd_struct *part, | 985 | static void part_round_stats_single(int cpu, struct hd_struct *part, |
986 | unsigned long now) | 986 | unsigned long now) |
987 | { | 987 | { |
988 | if (now == part->stamp) | 988 | if (now == part->stamp) |
989 | return; | 989 | return; |
990 | 990 | ||
991 | if (part_in_flight(part)) { | 991 | if (part_in_flight(part)) { |
992 | __part_stat_add(cpu, part, time_in_queue, | 992 | __part_stat_add(cpu, part, time_in_queue, |
993 | part_in_flight(part) * (now - part->stamp)); | 993 | part_in_flight(part) * (now - part->stamp)); |
994 | __part_stat_add(cpu, part, io_ticks, (now - part->stamp)); | 994 | __part_stat_add(cpu, part, io_ticks, (now - part->stamp)); |
995 | } | 995 | } |
996 | part->stamp = now; | 996 | part->stamp = now; |
997 | } | 997 | } |
998 | 998 | ||
999 | /** | 999 | /** |
1000 | * part_round_stats() - Round off the performance stats on a struct disk_stats. | 1000 | * part_round_stats() - Round off the performance stats on a struct disk_stats. |
1001 | * @cpu: cpu number for stats access | 1001 | * @cpu: cpu number for stats access |
1002 | * @part: target partition | 1002 | * @part: target partition |
1003 | * | 1003 | * |
1004 | * The average IO queue length and utilisation statistics are maintained | 1004 | * The average IO queue length and utilisation statistics are maintained |
1005 | * by observing the current state of the queue length and the amount of | 1005 | * by observing the current state of the queue length and the amount of |
1006 | * time it has been in this state for. | 1006 | * time it has been in this state for. |
1007 | * | 1007 | * |
1008 | * Normally, that accounting is done on IO completion, but that can result | 1008 | * Normally, that accounting is done on IO completion, but that can result |
1009 | * in more than a second's worth of IO being accounted for within any one | 1009 | * in more than a second's worth of IO being accounted for within any one |
1010 | * second, leading to >100% utilisation. To deal with that, we call this | 1010 | * second, leading to >100% utilisation. To deal with that, we call this |
1011 | * function to do a round-off before returning the results when reading | 1011 | * function to do a round-off before returning the results when reading |
1012 | * /proc/diskstats. This accounts immediately for all queue usage up to | 1012 | * /proc/diskstats. This accounts immediately for all queue usage up to |
1013 | * the current jiffies and restarts the counters again. | 1013 | * the current jiffies and restarts the counters again. |
1014 | */ | 1014 | */ |
1015 | void part_round_stats(int cpu, struct hd_struct *part) | 1015 | void part_round_stats(int cpu, struct hd_struct *part) |
1016 | { | 1016 | { |
1017 | unsigned long now = jiffies; | 1017 | unsigned long now = jiffies; |
1018 | 1018 | ||
1019 | if (part->partno) | 1019 | if (part->partno) |
1020 | part_round_stats_single(cpu, &part_to_disk(part)->part0, now); | 1020 | part_round_stats_single(cpu, &part_to_disk(part)->part0, now); |
1021 | part_round_stats_single(cpu, part, now); | 1021 | part_round_stats_single(cpu, part, now); |
1022 | } | 1022 | } |
1023 | EXPORT_SYMBOL_GPL(part_round_stats); | 1023 | EXPORT_SYMBOL_GPL(part_round_stats); |
1024 | 1024 | ||
1025 | /* | 1025 | /* |
1026 | * queue lock must be held | 1026 | * queue lock must be held |
1027 | */ | 1027 | */ |
1028 | void __blk_put_request(struct request_queue *q, struct request *req) | 1028 | void __blk_put_request(struct request_queue *q, struct request *req) |
1029 | { | 1029 | { |
1030 | if (unlikely(!q)) | 1030 | if (unlikely(!q)) |
1031 | return; | 1031 | return; |
1032 | if (unlikely(--req->ref_count)) | 1032 | if (unlikely(--req->ref_count)) |
1033 | return; | 1033 | return; |
1034 | 1034 | ||
1035 | elv_completed_request(q, req); | 1035 | elv_completed_request(q, req); |
1036 | 1036 | ||
1037 | /* this is a bio leak */ | 1037 | /* this is a bio leak */ |
1038 | WARN_ON(req->bio != NULL); | 1038 | WARN_ON(req->bio != NULL); |
1039 | 1039 | ||
1040 | /* | 1040 | /* |
1041 | * Request may not have originated from ll_rw_blk. if not, | 1041 | * Request may not have originated from ll_rw_blk. if not, |
1042 | * it didn't come out of our reserved rq pools | 1042 | * it didn't come out of our reserved rq pools |
1043 | */ | 1043 | */ |
1044 | if (req->cmd_flags & REQ_ALLOCED) { | 1044 | if (req->cmd_flags & REQ_ALLOCED) { |
1045 | int is_sync = rq_is_sync(req) != 0; | 1045 | int is_sync = rq_is_sync(req) != 0; |
1046 | int priv = req->cmd_flags & REQ_ELVPRIV; | 1046 | int priv = req->cmd_flags & REQ_ELVPRIV; |
1047 | 1047 | ||
1048 | BUG_ON(!list_empty(&req->queuelist)); | 1048 | BUG_ON(!list_empty(&req->queuelist)); |
1049 | BUG_ON(!hlist_unhashed(&req->hash)); | 1049 | BUG_ON(!hlist_unhashed(&req->hash)); |
1050 | 1050 | ||
1051 | blk_free_request(q, req); | 1051 | blk_free_request(q, req); |
1052 | freed_request(q, is_sync, priv); | 1052 | freed_request(q, is_sync, priv); |
1053 | } | 1053 | } |
1054 | } | 1054 | } |
1055 | EXPORT_SYMBOL_GPL(__blk_put_request); | 1055 | EXPORT_SYMBOL_GPL(__blk_put_request); |
1056 | 1056 | ||
1057 | void blk_put_request(struct request *req) | 1057 | void blk_put_request(struct request *req) |
1058 | { | 1058 | { |
1059 | unsigned long flags; | 1059 | unsigned long flags; |
1060 | struct request_queue *q = req->q; | 1060 | struct request_queue *q = req->q; |
1061 | 1061 | ||
1062 | spin_lock_irqsave(q->queue_lock, flags); | 1062 | spin_lock_irqsave(q->queue_lock, flags); |
1063 | __blk_put_request(q, req); | 1063 | __blk_put_request(q, req); |
1064 | spin_unlock_irqrestore(q->queue_lock, flags); | 1064 | spin_unlock_irqrestore(q->queue_lock, flags); |
1065 | } | 1065 | } |
1066 | EXPORT_SYMBOL(blk_put_request); | 1066 | EXPORT_SYMBOL(blk_put_request); |
1067 | 1067 | ||
1068 | /** | 1068 | /** |
1069 | * blk_add_request_payload - add a payload to a request | 1069 | * blk_add_request_payload - add a payload to a request |
1070 | * @rq: request to update | 1070 | * @rq: request to update |
1071 | * @page: page backing the payload | 1071 | * @page: page backing the payload |
1072 | * @len: length of the payload. | 1072 | * @len: length of the payload. |
1073 | * | 1073 | * |
1074 | * This allows to later add a payload to an already submitted request by | 1074 | * This allows to later add a payload to an already submitted request by |
1075 | * a block driver. The driver needs to take care of freeing the payload | 1075 | * a block driver. The driver needs to take care of freeing the payload |
1076 | * itself. | 1076 | * itself. |
1077 | * | 1077 | * |
1078 | * Note that this is a quite horrible hack and nothing but handling of | 1078 | * Note that this is a quite horrible hack and nothing but handling of |
1079 | * discard requests should ever use it. | 1079 | * discard requests should ever use it. |
1080 | */ | 1080 | */ |
1081 | void blk_add_request_payload(struct request *rq, struct page *page, | 1081 | void blk_add_request_payload(struct request *rq, struct page *page, |
1082 | unsigned int len) | 1082 | unsigned int len) |
1083 | { | 1083 | { |
1084 | struct bio *bio = rq->bio; | 1084 | struct bio *bio = rq->bio; |
1085 | 1085 | ||
1086 | bio->bi_io_vec->bv_page = page; | 1086 | bio->bi_io_vec->bv_page = page; |
1087 | bio->bi_io_vec->bv_offset = 0; | 1087 | bio->bi_io_vec->bv_offset = 0; |
1088 | bio->bi_io_vec->bv_len = len; | 1088 | bio->bi_io_vec->bv_len = len; |
1089 | 1089 | ||
1090 | bio->bi_size = len; | 1090 | bio->bi_size = len; |
1091 | bio->bi_vcnt = 1; | 1091 | bio->bi_vcnt = 1; |
1092 | bio->bi_phys_segments = 1; | 1092 | bio->bi_phys_segments = 1; |
1093 | 1093 | ||
1094 | rq->__data_len = rq->resid_len = len; | 1094 | rq->__data_len = rq->resid_len = len; |
1095 | rq->nr_phys_segments = 1; | 1095 | rq->nr_phys_segments = 1; |
1096 | rq->buffer = bio_data(bio); | 1096 | rq->buffer = bio_data(bio); |
1097 | } | 1097 | } |
1098 | EXPORT_SYMBOL_GPL(blk_add_request_payload); | 1098 | EXPORT_SYMBOL_GPL(blk_add_request_payload); |
1099 | 1099 | ||
1100 | static bool bio_attempt_back_merge(struct request_queue *q, struct request *req, | 1100 | static bool bio_attempt_back_merge(struct request_queue *q, struct request *req, |
1101 | struct bio *bio) | 1101 | struct bio *bio) |
1102 | { | 1102 | { |
1103 | const int ff = bio->bi_rw & REQ_FAILFAST_MASK; | 1103 | const int ff = bio->bi_rw & REQ_FAILFAST_MASK; |
1104 | 1104 | ||
1105 | /* | 1105 | /* |
1106 | * Debug stuff, kill later | 1106 | * Debug stuff, kill later |
1107 | */ | 1107 | */ |
1108 | if (!rq_mergeable(req)) { | 1108 | if (!rq_mergeable(req)) { |
1109 | blk_dump_rq_flags(req, "back"); | 1109 | blk_dump_rq_flags(req, "back"); |
1110 | return false; | 1110 | return false; |
1111 | } | 1111 | } |
1112 | 1112 | ||
1113 | if (!ll_back_merge_fn(q, req, bio)) | 1113 | if (!ll_back_merge_fn(q, req, bio)) |
1114 | return false; | 1114 | return false; |
1115 | 1115 | ||
1116 | trace_block_bio_backmerge(q, bio); | 1116 | trace_block_bio_backmerge(q, bio); |
1117 | 1117 | ||
1118 | if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff) | 1118 | if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff) |
1119 | blk_rq_set_mixed_merge(req); | 1119 | blk_rq_set_mixed_merge(req); |
1120 | 1120 | ||
1121 | req->biotail->bi_next = bio; | 1121 | req->biotail->bi_next = bio; |
1122 | req->biotail = bio; | 1122 | req->biotail = bio; |
1123 | req->__data_len += bio->bi_size; | 1123 | req->__data_len += bio->bi_size; |
1124 | req->ioprio = ioprio_best(req->ioprio, bio_prio(bio)); | 1124 | req->ioprio = ioprio_best(req->ioprio, bio_prio(bio)); |
1125 | 1125 | ||
1126 | drive_stat_acct(req, 0); | 1126 | drive_stat_acct(req, 0); |
1127 | return true; | 1127 | return true; |
1128 | } | 1128 | } |
1129 | 1129 | ||
1130 | static bool bio_attempt_front_merge(struct request_queue *q, | 1130 | static bool bio_attempt_front_merge(struct request_queue *q, |
1131 | struct request *req, struct bio *bio) | 1131 | struct request *req, struct bio *bio) |
1132 | { | 1132 | { |
1133 | const int ff = bio->bi_rw & REQ_FAILFAST_MASK; | 1133 | const int ff = bio->bi_rw & REQ_FAILFAST_MASK; |
1134 | sector_t sector; | 1134 | sector_t sector; |
1135 | 1135 | ||
1136 | /* | 1136 | /* |
1137 | * Debug stuff, kill later | 1137 | * Debug stuff, kill later |
1138 | */ | 1138 | */ |
1139 | if (!rq_mergeable(req)) { | 1139 | if (!rq_mergeable(req)) { |
1140 | blk_dump_rq_flags(req, "front"); | 1140 | blk_dump_rq_flags(req, "front"); |
1141 | return false; | 1141 | return false; |
1142 | } | 1142 | } |
1143 | 1143 | ||
1144 | if (!ll_front_merge_fn(q, req, bio)) | 1144 | if (!ll_front_merge_fn(q, req, bio)) |
1145 | return false; | 1145 | return false; |
1146 | 1146 | ||
1147 | trace_block_bio_frontmerge(q, bio); | 1147 | trace_block_bio_frontmerge(q, bio); |
1148 | 1148 | ||
1149 | if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff) | 1149 | if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff) |
1150 | blk_rq_set_mixed_merge(req); | 1150 | blk_rq_set_mixed_merge(req); |
1151 | 1151 | ||
1152 | sector = bio->bi_sector; | 1152 | sector = bio->bi_sector; |
1153 | 1153 | ||
1154 | bio->bi_next = req->bio; | 1154 | bio->bi_next = req->bio; |
1155 | req->bio = bio; | 1155 | req->bio = bio; |
1156 | 1156 | ||
1157 | /* | 1157 | /* |
1158 | * may not be valid. if the low level driver said | 1158 | * may not be valid. if the low level driver said |
1159 | * it didn't need a bounce buffer then it better | 1159 | * it didn't need a bounce buffer then it better |
1160 | * not touch req->buffer either... | 1160 | * not touch req->buffer either... |
1161 | */ | 1161 | */ |
1162 | req->buffer = bio_data(bio); | 1162 | req->buffer = bio_data(bio); |
1163 | req->__sector = bio->bi_sector; | 1163 | req->__sector = bio->bi_sector; |
1164 | req->__data_len += bio->bi_size; | 1164 | req->__data_len += bio->bi_size; |
1165 | req->ioprio = ioprio_best(req->ioprio, bio_prio(bio)); | 1165 | req->ioprio = ioprio_best(req->ioprio, bio_prio(bio)); |
1166 | 1166 | ||
1167 | drive_stat_acct(req, 0); | 1167 | drive_stat_acct(req, 0); |
1168 | return true; | 1168 | return true; |
1169 | } | 1169 | } |
1170 | 1170 | ||
1171 | /* | 1171 | /* |
1172 | * Attempts to merge with the plugged list in the current process. Returns | 1172 | * Attempts to merge with the plugged list in the current process. Returns |
1173 | * true if merge was successful, otherwise false. | 1173 | * true if merge was successful, otherwise false. |
1174 | */ | 1174 | */ |
1175 | static bool attempt_plug_merge(struct task_struct *tsk, struct request_queue *q, | 1175 | static bool attempt_plug_merge(struct task_struct *tsk, struct request_queue *q, |
1176 | struct bio *bio) | 1176 | struct bio *bio) |
1177 | { | 1177 | { |
1178 | struct blk_plug *plug; | 1178 | struct blk_plug *plug; |
1179 | struct request *rq; | 1179 | struct request *rq; |
1180 | bool ret = false; | 1180 | bool ret = false; |
1181 | 1181 | ||
1182 | plug = tsk->plug; | 1182 | plug = tsk->plug; |
1183 | if (!plug) | 1183 | if (!plug) |
1184 | goto out; | 1184 | goto out; |
1185 | 1185 | ||
1186 | list_for_each_entry_reverse(rq, &plug->list, queuelist) { | 1186 | list_for_each_entry_reverse(rq, &plug->list, queuelist) { |
1187 | int el_ret; | 1187 | int el_ret; |
1188 | 1188 | ||
1189 | if (rq->q != q) | 1189 | if (rq->q != q) |
1190 | continue; | 1190 | continue; |
1191 | 1191 | ||
1192 | el_ret = elv_try_merge(rq, bio); | 1192 | el_ret = elv_try_merge(rq, bio); |
1193 | if (el_ret == ELEVATOR_BACK_MERGE) { | 1193 | if (el_ret == ELEVATOR_BACK_MERGE) { |
1194 | ret = bio_attempt_back_merge(q, rq, bio); | 1194 | ret = bio_attempt_back_merge(q, rq, bio); |
1195 | if (ret) | 1195 | if (ret) |
1196 | break; | 1196 | break; |
1197 | } else if (el_ret == ELEVATOR_FRONT_MERGE) { | 1197 | } else if (el_ret == ELEVATOR_FRONT_MERGE) { |
1198 | ret = bio_attempt_front_merge(q, rq, bio); | 1198 | ret = bio_attempt_front_merge(q, rq, bio); |
1199 | if (ret) | 1199 | if (ret) |
1200 | break; | 1200 | break; |
1201 | } | 1201 | } |
1202 | } | 1202 | } |
1203 | out: | 1203 | out: |
1204 | return ret; | 1204 | return ret; |
1205 | } | 1205 | } |
1206 | 1206 | ||
1207 | void init_request_from_bio(struct request *req, struct bio *bio) | 1207 | void init_request_from_bio(struct request *req, struct bio *bio) |
1208 | { | 1208 | { |
1209 | req->cpu = bio->bi_comp_cpu; | 1209 | req->cpu = bio->bi_comp_cpu; |
1210 | req->cmd_type = REQ_TYPE_FS; | 1210 | req->cmd_type = REQ_TYPE_FS; |
1211 | 1211 | ||
1212 | req->cmd_flags |= bio->bi_rw & REQ_COMMON_MASK; | 1212 | req->cmd_flags |= bio->bi_rw & REQ_COMMON_MASK; |
1213 | if (bio->bi_rw & REQ_RAHEAD) | 1213 | if (bio->bi_rw & REQ_RAHEAD) |
1214 | req->cmd_flags |= REQ_FAILFAST_MASK; | 1214 | req->cmd_flags |= REQ_FAILFAST_MASK; |
1215 | 1215 | ||
1216 | req->errors = 0; | 1216 | req->errors = 0; |
1217 | req->__sector = bio->bi_sector; | 1217 | req->__sector = bio->bi_sector; |
1218 | req->ioprio = bio_prio(bio); | 1218 | req->ioprio = bio_prio(bio); |
1219 | blk_rq_bio_prep(req->q, req, bio); | 1219 | blk_rq_bio_prep(req->q, req, bio); |
1220 | } | 1220 | } |
1221 | 1221 | ||
1222 | static int __make_request(struct request_queue *q, struct bio *bio) | 1222 | static int __make_request(struct request_queue *q, struct bio *bio) |
1223 | { | 1223 | { |
1224 | const bool sync = !!(bio->bi_rw & REQ_SYNC); | 1224 | const bool sync = !!(bio->bi_rw & REQ_SYNC); |
1225 | struct blk_plug *plug; | 1225 | struct blk_plug *plug; |
1226 | int el_ret, rw_flags, where = ELEVATOR_INSERT_SORT; | 1226 | int el_ret, rw_flags, where = ELEVATOR_INSERT_SORT; |
1227 | struct request *req; | 1227 | struct request *req; |
1228 | 1228 | ||
1229 | /* | 1229 | /* |
1230 | * low level driver can indicate that it wants pages above a | 1230 | * low level driver can indicate that it wants pages above a |
1231 | * certain limit bounced to low memory (ie for highmem, or even | 1231 | * certain limit bounced to low memory (ie for highmem, or even |
1232 | * ISA dma in theory) | 1232 | * ISA dma in theory) |
1233 | */ | 1233 | */ |
1234 | blk_queue_bounce(q, &bio); | 1234 | blk_queue_bounce(q, &bio); |
1235 | 1235 | ||
1236 | if (bio->bi_rw & (REQ_FLUSH | REQ_FUA)) { | 1236 | if (bio->bi_rw & (REQ_FLUSH | REQ_FUA)) { |
1237 | spin_lock_irq(q->queue_lock); | 1237 | spin_lock_irq(q->queue_lock); |
1238 | where = ELEVATOR_INSERT_FLUSH; | 1238 | where = ELEVATOR_INSERT_FLUSH; |
1239 | goto get_rq; | 1239 | goto get_rq; |
1240 | } | 1240 | } |
1241 | 1241 | ||
1242 | /* | 1242 | /* |
1243 | * Check if we can merge with the plugged list before grabbing | 1243 | * Check if we can merge with the plugged list before grabbing |
1244 | * any locks. | 1244 | * any locks. |
1245 | */ | 1245 | */ |
1246 | if (attempt_plug_merge(current, q, bio)) | 1246 | if (attempt_plug_merge(current, q, bio)) |
1247 | goto out; | 1247 | goto out; |
1248 | 1248 | ||
1249 | spin_lock_irq(q->queue_lock); | 1249 | spin_lock_irq(q->queue_lock); |
1250 | 1250 | ||
1251 | el_ret = elv_merge(q, &req, bio); | 1251 | el_ret = elv_merge(q, &req, bio); |
1252 | if (el_ret == ELEVATOR_BACK_MERGE) { | 1252 | if (el_ret == ELEVATOR_BACK_MERGE) { |
1253 | BUG_ON(req->cmd_flags & REQ_ON_PLUG); | 1253 | BUG_ON(req->cmd_flags & REQ_ON_PLUG); |
1254 | if (bio_attempt_back_merge(q, req, bio)) { | 1254 | if (bio_attempt_back_merge(q, req, bio)) { |
1255 | if (!attempt_back_merge(q, req)) | 1255 | if (!attempt_back_merge(q, req)) |
1256 | elv_merged_request(q, req, el_ret); | 1256 | elv_merged_request(q, req, el_ret); |
1257 | goto out_unlock; | 1257 | goto out_unlock; |
1258 | } | 1258 | } |
1259 | } else if (el_ret == ELEVATOR_FRONT_MERGE) { | 1259 | } else if (el_ret == ELEVATOR_FRONT_MERGE) { |
1260 | BUG_ON(req->cmd_flags & REQ_ON_PLUG); | 1260 | BUG_ON(req->cmd_flags & REQ_ON_PLUG); |
1261 | if (bio_attempt_front_merge(q, req, bio)) { | 1261 | if (bio_attempt_front_merge(q, req, bio)) { |
1262 | if (!attempt_front_merge(q, req)) | 1262 | if (!attempt_front_merge(q, req)) |
1263 | elv_merged_request(q, req, el_ret); | 1263 | elv_merged_request(q, req, el_ret); |
1264 | goto out_unlock; | 1264 | goto out_unlock; |
1265 | } | 1265 | } |
1266 | } | 1266 | } |
1267 | 1267 | ||
1268 | get_rq: | 1268 | get_rq: |
1269 | /* | 1269 | /* |
1270 | * This sync check and mask will be re-done in init_request_from_bio(), | 1270 | * This sync check and mask will be re-done in init_request_from_bio(), |
1271 | * but we need to set it earlier to expose the sync flag to the | 1271 | * but we need to set it earlier to expose the sync flag to the |
1272 | * rq allocator and io schedulers. | 1272 | * rq allocator and io schedulers. |
1273 | */ | 1273 | */ |
1274 | rw_flags = bio_data_dir(bio); | 1274 | rw_flags = bio_data_dir(bio); |
1275 | if (sync) | 1275 | if (sync) |
1276 | rw_flags |= REQ_SYNC; | 1276 | rw_flags |= REQ_SYNC; |
1277 | 1277 | ||
1278 | /* | 1278 | /* |
1279 | * Grab a free request. This is might sleep but can not fail. | 1279 | * Grab a free request. This is might sleep but can not fail. |
1280 | * Returns with the queue unlocked. | 1280 | * Returns with the queue unlocked. |
1281 | */ | 1281 | */ |
1282 | req = get_request_wait(q, rw_flags, bio); | 1282 | req = get_request_wait(q, rw_flags, bio); |
1283 | 1283 | ||
1284 | /* | 1284 | /* |
1285 | * After dropping the lock and possibly sleeping here, our request | 1285 | * After dropping the lock and possibly sleeping here, our request |
1286 | * may now be mergeable after it had proven unmergeable (above). | 1286 | * may now be mergeable after it had proven unmergeable (above). |
1287 | * We don't worry about that case for efficiency. It won't happen | 1287 | * We don't worry about that case for efficiency. It won't happen |
1288 | * often, and the elevators are able to handle it. | 1288 | * often, and the elevators are able to handle it. |
1289 | */ | 1289 | */ |
1290 | init_request_from_bio(req, bio); | 1290 | init_request_from_bio(req, bio); |
1291 | 1291 | ||
1292 | if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) || | 1292 | if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) || |
1293 | bio_flagged(bio, BIO_CPU_AFFINE)) { | 1293 | bio_flagged(bio, BIO_CPU_AFFINE)) { |
1294 | req->cpu = blk_cpu_to_group(get_cpu()); | 1294 | req->cpu = blk_cpu_to_group(get_cpu()); |
1295 | put_cpu(); | 1295 | put_cpu(); |
1296 | } | 1296 | } |
1297 | 1297 | ||
1298 | plug = current->plug; | 1298 | plug = current->plug; |
1299 | if (plug) { | 1299 | if (plug) { |
1300 | /* | 1300 | /* |
1301 | * If this is the first request added after a plug, fire | 1301 | * If this is the first request added after a plug, fire |
1302 | * of a plug trace. If others have been added before, check | 1302 | * of a plug trace. If others have been added before, check |
1303 | * if we have multiple devices in this plug. If so, make a | 1303 | * if we have multiple devices in this plug. If so, make a |
1304 | * note to sort the list before dispatch. | 1304 | * note to sort the list before dispatch. |
1305 | */ | 1305 | */ |
1306 | if (list_empty(&plug->list)) | 1306 | if (list_empty(&plug->list)) |
1307 | trace_block_plug(q); | 1307 | trace_block_plug(q); |
1308 | else if (!plug->should_sort) { | 1308 | else if (!plug->should_sort) { |
1309 | struct request *__rq; | 1309 | struct request *__rq; |
1310 | 1310 | ||
1311 | __rq = list_entry_rq(plug->list.prev); | 1311 | __rq = list_entry_rq(plug->list.prev); |
1312 | if (__rq->q != q) | 1312 | if (__rq->q != q) |
1313 | plug->should_sort = 1; | 1313 | plug->should_sort = 1; |
1314 | } | 1314 | } |
1315 | /* | 1315 | /* |
1316 | * Debug flag, kill later | 1316 | * Debug flag, kill later |
1317 | */ | 1317 | */ |
1318 | req->cmd_flags |= REQ_ON_PLUG; | 1318 | req->cmd_flags |= REQ_ON_PLUG; |
1319 | list_add_tail(&req->queuelist, &plug->list); | 1319 | list_add_tail(&req->queuelist, &plug->list); |
1320 | drive_stat_acct(req, 1); | 1320 | drive_stat_acct(req, 1); |
1321 | } else { | 1321 | } else { |
1322 | spin_lock_irq(q->queue_lock); | 1322 | spin_lock_irq(q->queue_lock); |
1323 | add_acct_request(q, req, where); | 1323 | add_acct_request(q, req, where); |
1324 | __blk_run_queue(q, false); | 1324 | __blk_run_queue(q, false); |
1325 | out_unlock: | 1325 | out_unlock: |
1326 | spin_unlock_irq(q->queue_lock); | 1326 | spin_unlock_irq(q->queue_lock); |
1327 | } | 1327 | } |
1328 | out: | 1328 | out: |
1329 | return 0; | 1329 | return 0; |
1330 | } | 1330 | } |
1331 | 1331 | ||
1332 | /* | 1332 | /* |
1333 | * If bio->bi_dev is a partition, remap the location | 1333 | * If bio->bi_dev is a partition, remap the location |
1334 | */ | 1334 | */ |
1335 | static inline void blk_partition_remap(struct bio *bio) | 1335 | static inline void blk_partition_remap(struct bio *bio) |
1336 | { | 1336 | { |
1337 | struct block_device *bdev = bio->bi_bdev; | 1337 | struct block_device *bdev = bio->bi_bdev; |
1338 | 1338 | ||
1339 | if (bio_sectors(bio) && bdev != bdev->bd_contains) { | 1339 | if (bio_sectors(bio) && bdev != bdev->bd_contains) { |
1340 | struct hd_struct *p = bdev->bd_part; | 1340 | struct hd_struct *p = bdev->bd_part; |
1341 | 1341 | ||
1342 | bio->bi_sector += p->start_sect; | 1342 | bio->bi_sector += p->start_sect; |
1343 | bio->bi_bdev = bdev->bd_contains; | 1343 | bio->bi_bdev = bdev->bd_contains; |
1344 | 1344 | ||
1345 | trace_block_bio_remap(bdev_get_queue(bio->bi_bdev), bio, | 1345 | trace_block_bio_remap(bdev_get_queue(bio->bi_bdev), bio, |
1346 | bdev->bd_dev, | 1346 | bdev->bd_dev, |
1347 | bio->bi_sector - p->start_sect); | 1347 | bio->bi_sector - p->start_sect); |
1348 | } | 1348 | } |
1349 | } | 1349 | } |
1350 | 1350 | ||
1351 | static void handle_bad_sector(struct bio *bio) | 1351 | static void handle_bad_sector(struct bio *bio) |
1352 | { | 1352 | { |
1353 | char b[BDEVNAME_SIZE]; | 1353 | char b[BDEVNAME_SIZE]; |
1354 | 1354 | ||
1355 | printk(KERN_INFO "attempt to access beyond end of device\n"); | 1355 | printk(KERN_INFO "attempt to access beyond end of device\n"); |
1356 | printk(KERN_INFO "%s: rw=%ld, want=%Lu, limit=%Lu\n", | 1356 | printk(KERN_INFO "%s: rw=%ld, want=%Lu, limit=%Lu\n", |
1357 | bdevname(bio->bi_bdev, b), | 1357 | bdevname(bio->bi_bdev, b), |
1358 | bio->bi_rw, | 1358 | bio->bi_rw, |
1359 | (unsigned long long)bio->bi_sector + bio_sectors(bio), | 1359 | (unsigned long long)bio->bi_sector + bio_sectors(bio), |
1360 | (long long)(i_size_read(bio->bi_bdev->bd_inode) >> 9)); | 1360 | (long long)(i_size_read(bio->bi_bdev->bd_inode) >> 9)); |
1361 | 1361 | ||
1362 | set_bit(BIO_EOF, &bio->bi_flags); | 1362 | set_bit(BIO_EOF, &bio->bi_flags); |
1363 | } | 1363 | } |
1364 | 1364 | ||
1365 | #ifdef CONFIG_FAIL_MAKE_REQUEST | 1365 | #ifdef CONFIG_FAIL_MAKE_REQUEST |
1366 | 1366 | ||
1367 | static DECLARE_FAULT_ATTR(fail_make_request); | 1367 | static DECLARE_FAULT_ATTR(fail_make_request); |
1368 | 1368 | ||
1369 | static int __init setup_fail_make_request(char *str) | 1369 | static int __init setup_fail_make_request(char *str) |
1370 | { | 1370 | { |
1371 | return setup_fault_attr(&fail_make_request, str); | 1371 | return setup_fault_attr(&fail_make_request, str); |
1372 | } | 1372 | } |
1373 | __setup("fail_make_request=", setup_fail_make_request); | 1373 | __setup("fail_make_request=", setup_fail_make_request); |
1374 | 1374 | ||
1375 | static int should_fail_request(struct bio *bio) | 1375 | static int should_fail_request(struct bio *bio) |
1376 | { | 1376 | { |
1377 | struct hd_struct *part = bio->bi_bdev->bd_part; | 1377 | struct hd_struct *part = bio->bi_bdev->bd_part; |
1378 | 1378 | ||
1379 | if (part_to_disk(part)->part0.make_it_fail || part->make_it_fail) | 1379 | if (part_to_disk(part)->part0.make_it_fail || part->make_it_fail) |
1380 | return should_fail(&fail_make_request, bio->bi_size); | 1380 | return should_fail(&fail_make_request, bio->bi_size); |
1381 | 1381 | ||
1382 | return 0; | 1382 | return 0; |
1383 | } | 1383 | } |
1384 | 1384 | ||
1385 | static int __init fail_make_request_debugfs(void) | 1385 | static int __init fail_make_request_debugfs(void) |
1386 | { | 1386 | { |
1387 | return init_fault_attr_dentries(&fail_make_request, | 1387 | return init_fault_attr_dentries(&fail_make_request, |
1388 | "fail_make_request"); | 1388 | "fail_make_request"); |
1389 | } | 1389 | } |
1390 | 1390 | ||
1391 | late_initcall(fail_make_request_debugfs); | 1391 | late_initcall(fail_make_request_debugfs); |
1392 | 1392 | ||
1393 | #else /* CONFIG_FAIL_MAKE_REQUEST */ | 1393 | #else /* CONFIG_FAIL_MAKE_REQUEST */ |
1394 | 1394 | ||
1395 | static inline int should_fail_request(struct bio *bio) | 1395 | static inline int should_fail_request(struct bio *bio) |
1396 | { | 1396 | { |
1397 | return 0; | 1397 | return 0; |
1398 | } | 1398 | } |
1399 | 1399 | ||
1400 | #endif /* CONFIG_FAIL_MAKE_REQUEST */ | 1400 | #endif /* CONFIG_FAIL_MAKE_REQUEST */ |
1401 | 1401 | ||
1402 | /* | 1402 | /* |
1403 | * Check whether this bio extends beyond the end of the device. | 1403 | * Check whether this bio extends beyond the end of the device. |
1404 | */ | 1404 | */ |
1405 | static inline int bio_check_eod(struct bio *bio, unsigned int nr_sectors) | 1405 | static inline int bio_check_eod(struct bio *bio, unsigned int nr_sectors) |
1406 | { | 1406 | { |
1407 | sector_t maxsector; | 1407 | sector_t maxsector; |
1408 | 1408 | ||
1409 | if (!nr_sectors) | 1409 | if (!nr_sectors) |
1410 | return 0; | 1410 | return 0; |
1411 | 1411 | ||
1412 | /* Test device or partition size, when known. */ | 1412 | /* Test device or partition size, when known. */ |
1413 | maxsector = i_size_read(bio->bi_bdev->bd_inode) >> 9; | 1413 | maxsector = i_size_read(bio->bi_bdev->bd_inode) >> 9; |
1414 | if (maxsector) { | 1414 | if (maxsector) { |
1415 | sector_t sector = bio->bi_sector; | 1415 | sector_t sector = bio->bi_sector; |
1416 | 1416 | ||
1417 | if (maxsector < nr_sectors || maxsector - nr_sectors < sector) { | 1417 | if (maxsector < nr_sectors || maxsector - nr_sectors < sector) { |
1418 | /* | 1418 | /* |
1419 | * This may well happen - the kernel calls bread() | 1419 | * This may well happen - the kernel calls bread() |
1420 | * without checking the size of the device, e.g., when | 1420 | * without checking the size of the device, e.g., when |
1421 | * mounting a device. | 1421 | * mounting a device. |
1422 | */ | 1422 | */ |
1423 | handle_bad_sector(bio); | 1423 | handle_bad_sector(bio); |
1424 | return 1; | 1424 | return 1; |
1425 | } | 1425 | } |
1426 | } | 1426 | } |
1427 | 1427 | ||
1428 | return 0; | 1428 | return 0; |
1429 | } | 1429 | } |
1430 | 1430 | ||
1431 | /** | 1431 | /** |
1432 | * generic_make_request - hand a buffer to its device driver for I/O | 1432 | * generic_make_request - hand a buffer to its device driver for I/O |
1433 | * @bio: The bio describing the location in memory and on the device. | 1433 | * @bio: The bio describing the location in memory and on the device. |
1434 | * | 1434 | * |
1435 | * generic_make_request() is used to make I/O requests of block | 1435 | * generic_make_request() is used to make I/O requests of block |
1436 | * devices. It is passed a &struct bio, which describes the I/O that needs | 1436 | * devices. It is passed a &struct bio, which describes the I/O that needs |
1437 | * to be done. | 1437 | * to be done. |
1438 | * | 1438 | * |
1439 | * generic_make_request() does not return any status. The | 1439 | * generic_make_request() does not return any status. The |
1440 | * success/failure status of the request, along with notification of | 1440 | * success/failure status of the request, along with notification of |
1441 | * completion, is delivered asynchronously through the bio->bi_end_io | 1441 | * completion, is delivered asynchronously through the bio->bi_end_io |
1442 | * function described (one day) else where. | 1442 | * function described (one day) else where. |
1443 | * | 1443 | * |
1444 | * The caller of generic_make_request must make sure that bi_io_vec | 1444 | * The caller of generic_make_request must make sure that bi_io_vec |
1445 | * are set to describe the memory buffer, and that bi_dev and bi_sector are | 1445 | * are set to describe the memory buffer, and that bi_dev and bi_sector are |
1446 | * set to describe the device address, and the | 1446 | * set to describe the device address, and the |
1447 | * bi_end_io and optionally bi_private are set to describe how | 1447 | * bi_end_io and optionally bi_private are set to describe how |
1448 | * completion notification should be signaled. | 1448 | * completion notification should be signaled. |
1449 | * | 1449 | * |
1450 | * generic_make_request and the drivers it calls may use bi_next if this | 1450 | * generic_make_request and the drivers it calls may use bi_next if this |
1451 | * bio happens to be merged with someone else, and may change bi_dev and | 1451 | * bio happens to be merged with someone else, and may change bi_dev and |
1452 | * bi_sector for remaps as it sees fit. So the values of these fields | 1452 | * bi_sector for remaps as it sees fit. So the values of these fields |
1453 | * should NOT be depended on after the call to generic_make_request. | 1453 | * should NOT be depended on after the call to generic_make_request. |
1454 | */ | 1454 | */ |
1455 | static inline void __generic_make_request(struct bio *bio) | 1455 | static inline void __generic_make_request(struct bio *bio) |
1456 | { | 1456 | { |
1457 | struct request_queue *q; | 1457 | struct request_queue *q; |
1458 | sector_t old_sector; | 1458 | sector_t old_sector; |
1459 | int ret, nr_sectors = bio_sectors(bio); | 1459 | int ret, nr_sectors = bio_sectors(bio); |
1460 | dev_t old_dev; | 1460 | dev_t old_dev; |
1461 | int err = -EIO; | 1461 | int err = -EIO; |
1462 | 1462 | ||
1463 | might_sleep(); | 1463 | might_sleep(); |
1464 | 1464 | ||
1465 | if (bio_check_eod(bio, nr_sectors)) | 1465 | if (bio_check_eod(bio, nr_sectors)) |
1466 | goto end_io; | 1466 | goto end_io; |
1467 | 1467 | ||
1468 | /* | 1468 | /* |
1469 | * Resolve the mapping until finished. (drivers are | 1469 | * Resolve the mapping until finished. (drivers are |
1470 | * still free to implement/resolve their own stacking | 1470 | * still free to implement/resolve their own stacking |
1471 | * by explicitly returning 0) | 1471 | * by explicitly returning 0) |
1472 | * | 1472 | * |
1473 | * NOTE: we don't repeat the blk_size check for each new device. | 1473 | * NOTE: we don't repeat the blk_size check for each new device. |
1474 | * Stacking drivers are expected to know what they are doing. | 1474 | * Stacking drivers are expected to know what they are doing. |
1475 | */ | 1475 | */ |
1476 | old_sector = -1; | 1476 | old_sector = -1; |
1477 | old_dev = 0; | 1477 | old_dev = 0; |
1478 | do { | 1478 | do { |
1479 | char b[BDEVNAME_SIZE]; | 1479 | char b[BDEVNAME_SIZE]; |
1480 | 1480 | ||
1481 | q = bdev_get_queue(bio->bi_bdev); | 1481 | q = bdev_get_queue(bio->bi_bdev); |
1482 | if (unlikely(!q)) { | 1482 | if (unlikely(!q)) { |
1483 | printk(KERN_ERR | 1483 | printk(KERN_ERR |
1484 | "generic_make_request: Trying to access " | 1484 | "generic_make_request: Trying to access " |
1485 | "nonexistent block-device %s (%Lu)\n", | 1485 | "nonexistent block-device %s (%Lu)\n", |
1486 | bdevname(bio->bi_bdev, b), | 1486 | bdevname(bio->bi_bdev, b), |
1487 | (long long) bio->bi_sector); | 1487 | (long long) bio->bi_sector); |
1488 | goto end_io; | 1488 | goto end_io; |
1489 | } | 1489 | } |
1490 | 1490 | ||
1491 | if (unlikely(!(bio->bi_rw & REQ_DISCARD) && | 1491 | if (unlikely(!(bio->bi_rw & REQ_DISCARD) && |
1492 | nr_sectors > queue_max_hw_sectors(q))) { | 1492 | nr_sectors > queue_max_hw_sectors(q))) { |
1493 | printk(KERN_ERR "bio too big device %s (%u > %u)\n", | 1493 | printk(KERN_ERR "bio too big device %s (%u > %u)\n", |
1494 | bdevname(bio->bi_bdev, b), | 1494 | bdevname(bio->bi_bdev, b), |
1495 | bio_sectors(bio), | 1495 | bio_sectors(bio), |
1496 | queue_max_hw_sectors(q)); | 1496 | queue_max_hw_sectors(q)); |
1497 | goto end_io; | 1497 | goto end_io; |
1498 | } | 1498 | } |
1499 | 1499 | ||
1500 | if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) | 1500 | if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) |
1501 | goto end_io; | 1501 | goto end_io; |
1502 | 1502 | ||
1503 | if (should_fail_request(bio)) | 1503 | if (should_fail_request(bio)) |
1504 | goto end_io; | 1504 | goto end_io; |
1505 | 1505 | ||
1506 | /* | 1506 | /* |
1507 | * If this device has partitions, remap block n | 1507 | * If this device has partitions, remap block n |
1508 | * of partition p to block n+start(p) of the disk. | 1508 | * of partition p to block n+start(p) of the disk. |
1509 | */ | 1509 | */ |
1510 | blk_partition_remap(bio); | 1510 | blk_partition_remap(bio); |
1511 | 1511 | ||
1512 | if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) | 1512 | if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) |
1513 | goto end_io; | 1513 | goto end_io; |
1514 | 1514 | ||
1515 | if (old_sector != -1) | 1515 | if (old_sector != -1) |
1516 | trace_block_bio_remap(q, bio, old_dev, old_sector); | 1516 | trace_block_bio_remap(q, bio, old_dev, old_sector); |
1517 | 1517 | ||
1518 | old_sector = bio->bi_sector; | 1518 | old_sector = bio->bi_sector; |
1519 | old_dev = bio->bi_bdev->bd_dev; | 1519 | old_dev = bio->bi_bdev->bd_dev; |
1520 | 1520 | ||
1521 | if (bio_check_eod(bio, nr_sectors)) | 1521 | if (bio_check_eod(bio, nr_sectors)) |
1522 | goto end_io; | 1522 | goto end_io; |
1523 | 1523 | ||
1524 | /* | 1524 | /* |
1525 | * Filter flush bio's early so that make_request based | 1525 | * Filter flush bio's early so that make_request based |
1526 | * drivers without flush support don't have to worry | 1526 | * drivers without flush support don't have to worry |
1527 | * about them. | 1527 | * about them. |
1528 | */ | 1528 | */ |
1529 | if ((bio->bi_rw & (REQ_FLUSH | REQ_FUA)) && !q->flush_flags) { | 1529 | if ((bio->bi_rw & (REQ_FLUSH | REQ_FUA)) && !q->flush_flags) { |
1530 | bio->bi_rw &= ~(REQ_FLUSH | REQ_FUA); | 1530 | bio->bi_rw &= ~(REQ_FLUSH | REQ_FUA); |
1531 | if (!nr_sectors) { | 1531 | if (!nr_sectors) { |
1532 | err = 0; | 1532 | err = 0; |
1533 | goto end_io; | 1533 | goto end_io; |
1534 | } | 1534 | } |
1535 | } | 1535 | } |
1536 | 1536 | ||
1537 | if ((bio->bi_rw & REQ_DISCARD) && | 1537 | if ((bio->bi_rw & REQ_DISCARD) && |
1538 | (!blk_queue_discard(q) || | 1538 | (!blk_queue_discard(q) || |
1539 | ((bio->bi_rw & REQ_SECURE) && | 1539 | ((bio->bi_rw & REQ_SECURE) && |
1540 | !blk_queue_secdiscard(q)))) { | 1540 | !blk_queue_secdiscard(q)))) { |
1541 | err = -EOPNOTSUPP; | 1541 | err = -EOPNOTSUPP; |
1542 | goto end_io; | 1542 | goto end_io; |
1543 | } | 1543 | } |
1544 | 1544 | ||
1545 | blk_throtl_bio(q, &bio); | 1545 | blk_throtl_bio(q, &bio); |
1546 | 1546 | ||
1547 | /* | 1547 | /* |
1548 | * If bio = NULL, bio has been throttled and will be submitted | 1548 | * If bio = NULL, bio has been throttled and will be submitted |
1549 | * later. | 1549 | * later. |
1550 | */ | 1550 | */ |
1551 | if (!bio) | 1551 | if (!bio) |
1552 | break; | 1552 | break; |
1553 | 1553 | ||
1554 | trace_block_bio_queue(q, bio); | 1554 | trace_block_bio_queue(q, bio); |
1555 | 1555 | ||
1556 | ret = q->make_request_fn(q, bio); | 1556 | ret = q->make_request_fn(q, bio); |
1557 | } while (ret); | 1557 | } while (ret); |
1558 | 1558 | ||
1559 | return; | 1559 | return; |
1560 | 1560 | ||
1561 | end_io: | 1561 | end_io: |
1562 | bio_endio(bio, err); | 1562 | bio_endio(bio, err); |
1563 | } | 1563 | } |
1564 | 1564 | ||
1565 | /* | 1565 | /* |
1566 | * We only want one ->make_request_fn to be active at a time, | 1566 | * We only want one ->make_request_fn to be active at a time, |
1567 | * else stack usage with stacked devices could be a problem. | 1567 | * else stack usage with stacked devices could be a problem. |
1568 | * So use current->bio_list to keep a list of requests | 1568 | * So use current->bio_list to keep a list of requests |
1569 | * submited by a make_request_fn function. | 1569 | * submited by a make_request_fn function. |
1570 | * current->bio_list is also used as a flag to say if | 1570 | * current->bio_list is also used as a flag to say if |
1571 | * generic_make_request is currently active in this task or not. | 1571 | * generic_make_request is currently active in this task or not. |
1572 | * If it is NULL, then no make_request is active. If it is non-NULL, | 1572 | * If it is NULL, then no make_request is active. If it is non-NULL, |
1573 | * then a make_request is active, and new requests should be added | 1573 | * then a make_request is active, and new requests should be added |
1574 | * at the tail | 1574 | * at the tail |
1575 | */ | 1575 | */ |
1576 | void generic_make_request(struct bio *bio) | 1576 | void generic_make_request(struct bio *bio) |
1577 | { | 1577 | { |
1578 | struct bio_list bio_list_on_stack; | 1578 | struct bio_list bio_list_on_stack; |
1579 | 1579 | ||
1580 | if (current->bio_list) { | 1580 | if (current->bio_list) { |
1581 | /* make_request is active */ | 1581 | /* make_request is active */ |
1582 | bio_list_add(current->bio_list, bio); | 1582 | bio_list_add(current->bio_list, bio); |
1583 | return; | 1583 | return; |
1584 | } | 1584 | } |
1585 | /* following loop may be a bit non-obvious, and so deserves some | 1585 | /* following loop may be a bit non-obvious, and so deserves some |
1586 | * explanation. | 1586 | * explanation. |
1587 | * Before entering the loop, bio->bi_next is NULL (as all callers | 1587 | * Before entering the loop, bio->bi_next is NULL (as all callers |
1588 | * ensure that) so we have a list with a single bio. | 1588 | * ensure that) so we have a list with a single bio. |
1589 | * We pretend that we have just taken it off a longer list, so | 1589 | * We pretend that we have just taken it off a longer list, so |
1590 | * we assign bio_list to a pointer to the bio_list_on_stack, | 1590 | * we assign bio_list to a pointer to the bio_list_on_stack, |
1591 | * thus initialising the bio_list of new bios to be | 1591 | * thus initialising the bio_list of new bios to be |
1592 | * added. __generic_make_request may indeed add some more bios | 1592 | * added. __generic_make_request may indeed add some more bios |
1593 | * through a recursive call to generic_make_request. If it | 1593 | * through a recursive call to generic_make_request. If it |
1594 | * did, we find a non-NULL value in bio_list and re-enter the loop | 1594 | * did, we find a non-NULL value in bio_list and re-enter the loop |
1595 | * from the top. In this case we really did just take the bio | 1595 | * from the top. In this case we really did just take the bio |
1596 | * of the top of the list (no pretending) and so remove it from | 1596 | * of the top of the list (no pretending) and so remove it from |
1597 | * bio_list, and call into __generic_make_request again. | 1597 | * bio_list, and call into __generic_make_request again. |
1598 | * | 1598 | * |
1599 | * The loop was structured like this to make only one call to | 1599 | * The loop was structured like this to make only one call to |
1600 | * __generic_make_request (which is important as it is large and | 1600 | * __generic_make_request (which is important as it is large and |
1601 | * inlined) and to keep the structure simple. | 1601 | * inlined) and to keep the structure simple. |
1602 | */ | 1602 | */ |
1603 | BUG_ON(bio->bi_next); | 1603 | BUG_ON(bio->bi_next); |
1604 | bio_list_init(&bio_list_on_stack); | 1604 | bio_list_init(&bio_list_on_stack); |
1605 | current->bio_list = &bio_list_on_stack; | 1605 | current->bio_list = &bio_list_on_stack; |
1606 | do { | 1606 | do { |
1607 | __generic_make_request(bio); | 1607 | __generic_make_request(bio); |
1608 | bio = bio_list_pop(current->bio_list); | 1608 | bio = bio_list_pop(current->bio_list); |
1609 | } while (bio); | 1609 | } while (bio); |
1610 | current->bio_list = NULL; /* deactivate */ | 1610 | current->bio_list = NULL; /* deactivate */ |
1611 | } | 1611 | } |
1612 | EXPORT_SYMBOL(generic_make_request); | 1612 | EXPORT_SYMBOL(generic_make_request); |
1613 | 1613 | ||
1614 | /** | 1614 | /** |
1615 | * submit_bio - submit a bio to the block device layer for I/O | 1615 | * submit_bio - submit a bio to the block device layer for I/O |
1616 | * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead) | 1616 | * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead) |
1617 | * @bio: The &struct bio which describes the I/O | 1617 | * @bio: The &struct bio which describes the I/O |
1618 | * | 1618 | * |
1619 | * submit_bio() is very similar in purpose to generic_make_request(), and | 1619 | * submit_bio() is very similar in purpose to generic_make_request(), and |
1620 | * uses that function to do most of the work. Both are fairly rough | 1620 | * uses that function to do most of the work. Both are fairly rough |
1621 | * interfaces; @bio must be presetup and ready for I/O. | 1621 | * interfaces; @bio must be presetup and ready for I/O. |
1622 | * | 1622 | * |
1623 | */ | 1623 | */ |
1624 | void submit_bio(int rw, struct bio *bio) | 1624 | void submit_bio(int rw, struct bio *bio) |
1625 | { | 1625 | { |
1626 | int count = bio_sectors(bio); | 1626 | int count = bio_sectors(bio); |
1627 | 1627 | ||
1628 | bio->bi_rw |= rw; | 1628 | bio->bi_rw |= rw; |
1629 | 1629 | ||
1630 | /* | 1630 | /* |
1631 | * If it's a regular read/write or a barrier with data attached, | 1631 | * If it's a regular read/write or a barrier with data attached, |
1632 | * go through the normal accounting stuff before submission. | 1632 | * go through the normal accounting stuff before submission. |
1633 | */ | 1633 | */ |
1634 | if (bio_has_data(bio) && !(rw & REQ_DISCARD)) { | 1634 | if (bio_has_data(bio) && !(rw & REQ_DISCARD)) { |
1635 | if (rw & WRITE) { | 1635 | if (rw & WRITE) { |
1636 | count_vm_events(PGPGOUT, count); | 1636 | count_vm_events(PGPGOUT, count); |
1637 | } else { | 1637 | } else { |
1638 | task_io_account_read(bio->bi_size); | 1638 | task_io_account_read(bio->bi_size); |
1639 | count_vm_events(PGPGIN, count); | 1639 | count_vm_events(PGPGIN, count); |
1640 | } | 1640 | } |
1641 | 1641 | ||
1642 | if (unlikely(block_dump)) { | 1642 | if (unlikely(block_dump)) { |
1643 | char b[BDEVNAME_SIZE]; | 1643 | char b[BDEVNAME_SIZE]; |
1644 | printk(KERN_DEBUG "%s(%d): %s block %Lu on %s (%u sectors)\n", | 1644 | printk(KERN_DEBUG "%s(%d): %s block %Lu on %s (%u sectors)\n", |
1645 | current->comm, task_pid_nr(current), | 1645 | current->comm, task_pid_nr(current), |
1646 | (rw & WRITE) ? "WRITE" : "READ", | 1646 | (rw & WRITE) ? "WRITE" : "READ", |
1647 | (unsigned long long)bio->bi_sector, | 1647 | (unsigned long long)bio->bi_sector, |
1648 | bdevname(bio->bi_bdev, b), | 1648 | bdevname(bio->bi_bdev, b), |
1649 | count); | 1649 | count); |
1650 | } | 1650 | } |
1651 | } | 1651 | } |
1652 | 1652 | ||
1653 | generic_make_request(bio); | 1653 | generic_make_request(bio); |
1654 | } | 1654 | } |
1655 | EXPORT_SYMBOL(submit_bio); | 1655 | EXPORT_SYMBOL(submit_bio); |
1656 | 1656 | ||
1657 | /** | 1657 | /** |
1658 | * blk_rq_check_limits - Helper function to check a request for the queue limit | 1658 | * blk_rq_check_limits - Helper function to check a request for the queue limit |
1659 | * @q: the queue | 1659 | * @q: the queue |
1660 | * @rq: the request being checked | 1660 | * @rq: the request being checked |
1661 | * | 1661 | * |
1662 | * Description: | 1662 | * Description: |
1663 | * @rq may have been made based on weaker limitations of upper-level queues | 1663 | * @rq may have been made based on weaker limitations of upper-level queues |
1664 | * in request stacking drivers, and it may violate the limitation of @q. | 1664 | * in request stacking drivers, and it may violate the limitation of @q. |
1665 | * Since the block layer and the underlying device driver trust @rq | 1665 | * Since the block layer and the underlying device driver trust @rq |
1666 | * after it is inserted to @q, it should be checked against @q before | 1666 | * after it is inserted to @q, it should be checked against @q before |
1667 | * the insertion using this generic function. | 1667 | * the insertion using this generic function. |
1668 | * | 1668 | * |
1669 | * This function should also be useful for request stacking drivers | 1669 | * This function should also be useful for request stacking drivers |
1670 | * in some cases below, so export this function. | 1670 | * in some cases below, so export this function. |
1671 | * Request stacking drivers like request-based dm may change the queue | 1671 | * Request stacking drivers like request-based dm may change the queue |
1672 | * limits while requests are in the queue (e.g. dm's table swapping). | 1672 | * limits while requests are in the queue (e.g. dm's table swapping). |
1673 | * Such request stacking drivers should check those requests agaist | 1673 | * Such request stacking drivers should check those requests agaist |
1674 | * the new queue limits again when they dispatch those requests, | 1674 | * the new queue limits again when they dispatch those requests, |
1675 | * although such checkings are also done against the old queue limits | 1675 | * although such checkings are also done against the old queue limits |
1676 | * when submitting requests. | 1676 | * when submitting requests. |
1677 | */ | 1677 | */ |
1678 | int blk_rq_check_limits(struct request_queue *q, struct request *rq) | 1678 | int blk_rq_check_limits(struct request_queue *q, struct request *rq) |
1679 | { | 1679 | { |
1680 | if (rq->cmd_flags & REQ_DISCARD) | 1680 | if (rq->cmd_flags & REQ_DISCARD) |
1681 | return 0; | 1681 | return 0; |
1682 | 1682 | ||
1683 | if (blk_rq_sectors(rq) > queue_max_sectors(q) || | 1683 | if (blk_rq_sectors(rq) > queue_max_sectors(q) || |
1684 | blk_rq_bytes(rq) > queue_max_hw_sectors(q) << 9) { | 1684 | blk_rq_bytes(rq) > queue_max_hw_sectors(q) << 9) { |
1685 | printk(KERN_ERR "%s: over max size limit.\n", __func__); | 1685 | printk(KERN_ERR "%s: over max size limit.\n", __func__); |
1686 | return -EIO; | 1686 | return -EIO; |
1687 | } | 1687 | } |
1688 | 1688 | ||
1689 | /* | 1689 | /* |
1690 | * queue's settings related to segment counting like q->bounce_pfn | 1690 | * queue's settings related to segment counting like q->bounce_pfn |
1691 | * may differ from that of other stacking queues. | 1691 | * may differ from that of other stacking queues. |
1692 | * Recalculate it to check the request correctly on this queue's | 1692 | * Recalculate it to check the request correctly on this queue's |
1693 | * limitation. | 1693 | * limitation. |
1694 | */ | 1694 | */ |
1695 | blk_recalc_rq_segments(rq); | 1695 | blk_recalc_rq_segments(rq); |
1696 | if (rq->nr_phys_segments > queue_max_segments(q)) { | 1696 | if (rq->nr_phys_segments > queue_max_segments(q)) { |
1697 | printk(KERN_ERR "%s: over max segments limit.\n", __func__); | 1697 | printk(KERN_ERR "%s: over max segments limit.\n", __func__); |
1698 | return -EIO; | 1698 | return -EIO; |
1699 | } | 1699 | } |
1700 | 1700 | ||
1701 | return 0; | 1701 | return 0; |
1702 | } | 1702 | } |
1703 | EXPORT_SYMBOL_GPL(blk_rq_check_limits); | 1703 | EXPORT_SYMBOL_GPL(blk_rq_check_limits); |
1704 | 1704 | ||
1705 | /** | 1705 | /** |
1706 | * blk_insert_cloned_request - Helper for stacking drivers to submit a request | 1706 | * blk_insert_cloned_request - Helper for stacking drivers to submit a request |
1707 | * @q: the queue to submit the request | 1707 | * @q: the queue to submit the request |
1708 | * @rq: the request being queued | 1708 | * @rq: the request being queued |
1709 | */ | 1709 | */ |
1710 | int blk_insert_cloned_request(struct request_queue *q, struct request *rq) | 1710 | int blk_insert_cloned_request(struct request_queue *q, struct request *rq) |
1711 | { | 1711 | { |
1712 | unsigned long flags; | 1712 | unsigned long flags; |
1713 | 1713 | ||
1714 | if (blk_rq_check_limits(q, rq)) | 1714 | if (blk_rq_check_limits(q, rq)) |
1715 | return -EIO; | 1715 | return -EIO; |
1716 | 1716 | ||
1717 | #ifdef CONFIG_FAIL_MAKE_REQUEST | 1717 | #ifdef CONFIG_FAIL_MAKE_REQUEST |
1718 | if (rq->rq_disk && rq->rq_disk->part0.make_it_fail && | 1718 | if (rq->rq_disk && rq->rq_disk->part0.make_it_fail && |
1719 | should_fail(&fail_make_request, blk_rq_bytes(rq))) | 1719 | should_fail(&fail_make_request, blk_rq_bytes(rq))) |
1720 | return -EIO; | 1720 | return -EIO; |
1721 | #endif | 1721 | #endif |
1722 | 1722 | ||
1723 | spin_lock_irqsave(q->queue_lock, flags); | 1723 | spin_lock_irqsave(q->queue_lock, flags); |
1724 | 1724 | ||
1725 | /* | 1725 | /* |
1726 | * Submitting request must be dequeued before calling this function | 1726 | * Submitting request must be dequeued before calling this function |
1727 | * because it will be linked to another request_queue | 1727 | * because it will be linked to another request_queue |
1728 | */ | 1728 | */ |
1729 | BUG_ON(blk_queued_rq(rq)); | 1729 | BUG_ON(blk_queued_rq(rq)); |
1730 | 1730 | ||
1731 | add_acct_request(q, rq, ELEVATOR_INSERT_BACK); | 1731 | add_acct_request(q, rq, ELEVATOR_INSERT_BACK); |
1732 | spin_unlock_irqrestore(q->queue_lock, flags); | 1732 | spin_unlock_irqrestore(q->queue_lock, flags); |
1733 | 1733 | ||
1734 | return 0; | 1734 | return 0; |
1735 | } | 1735 | } |
1736 | EXPORT_SYMBOL_GPL(blk_insert_cloned_request); | 1736 | EXPORT_SYMBOL_GPL(blk_insert_cloned_request); |
1737 | 1737 | ||
1738 | /** | 1738 | /** |
1739 | * blk_rq_err_bytes - determine number of bytes till the next failure boundary | 1739 | * blk_rq_err_bytes - determine number of bytes till the next failure boundary |
1740 | * @rq: request to examine | 1740 | * @rq: request to examine |
1741 | * | 1741 | * |
1742 | * Description: | 1742 | * Description: |
1743 | * A request could be merge of IOs which require different failure | 1743 | * A request could be merge of IOs which require different failure |
1744 | * handling. This function determines the number of bytes which | 1744 | * handling. This function determines the number of bytes which |
1745 | * can be failed from the beginning of the request without | 1745 | * can be failed from the beginning of the request without |
1746 | * crossing into area which need to be retried further. | 1746 | * crossing into area which need to be retried further. |
1747 | * | 1747 | * |
1748 | * Return: | 1748 | * Return: |
1749 | * The number of bytes to fail. | 1749 | * The number of bytes to fail. |
1750 | * | 1750 | * |
1751 | * Context: | 1751 | * Context: |
1752 | * queue_lock must be held. | 1752 | * queue_lock must be held. |
1753 | */ | 1753 | */ |
1754 | unsigned int blk_rq_err_bytes(const struct request *rq) | 1754 | unsigned int blk_rq_err_bytes(const struct request *rq) |
1755 | { | 1755 | { |
1756 | unsigned int ff = rq->cmd_flags & REQ_FAILFAST_MASK; | 1756 | unsigned int ff = rq->cmd_flags & REQ_FAILFAST_MASK; |
1757 | unsigned int bytes = 0; | 1757 | unsigned int bytes = 0; |
1758 | struct bio *bio; | 1758 | struct bio *bio; |
1759 | 1759 | ||
1760 | if (!(rq->cmd_flags & REQ_MIXED_MERGE)) | 1760 | if (!(rq->cmd_flags & REQ_MIXED_MERGE)) |
1761 | return blk_rq_bytes(rq); | 1761 | return blk_rq_bytes(rq); |
1762 | 1762 | ||
1763 | /* | 1763 | /* |
1764 | * Currently the only 'mixing' which can happen is between | 1764 | * Currently the only 'mixing' which can happen is between |
1765 | * different fastfail types. We can safely fail portions | 1765 | * different fastfail types. We can safely fail portions |
1766 | * which have all the failfast bits that the first one has - | 1766 | * which have all the failfast bits that the first one has - |
1767 | * the ones which are at least as eager to fail as the first | 1767 | * the ones which are at least as eager to fail as the first |
1768 | * one. | 1768 | * one. |
1769 | */ | 1769 | */ |
1770 | for (bio = rq->bio; bio; bio = bio->bi_next) { | 1770 | for (bio = rq->bio; bio; bio = bio->bi_next) { |
1771 | if ((bio->bi_rw & ff) != ff) | 1771 | if ((bio->bi_rw & ff) != ff) |
1772 | break; | 1772 | break; |
1773 | bytes += bio->bi_size; | 1773 | bytes += bio->bi_size; |
1774 | } | 1774 | } |
1775 | 1775 | ||
1776 | /* this could lead to infinite loop */ | 1776 | /* this could lead to infinite loop */ |
1777 | BUG_ON(blk_rq_bytes(rq) && !bytes); | 1777 | BUG_ON(blk_rq_bytes(rq) && !bytes); |
1778 | return bytes; | 1778 | return bytes; |
1779 | } | 1779 | } |
1780 | EXPORT_SYMBOL_GPL(blk_rq_err_bytes); | 1780 | EXPORT_SYMBOL_GPL(blk_rq_err_bytes); |
1781 | 1781 | ||
1782 | static void blk_account_io_completion(struct request *req, unsigned int bytes) | 1782 | static void blk_account_io_completion(struct request *req, unsigned int bytes) |
1783 | { | 1783 | { |
1784 | if (blk_do_io_stat(req)) { | 1784 | if (blk_do_io_stat(req)) { |
1785 | const int rw = rq_data_dir(req); | 1785 | const int rw = rq_data_dir(req); |
1786 | struct hd_struct *part; | 1786 | struct hd_struct *part; |
1787 | int cpu; | 1787 | int cpu; |
1788 | 1788 | ||
1789 | cpu = part_stat_lock(); | 1789 | cpu = part_stat_lock(); |
1790 | part = req->part; | 1790 | part = req->part; |
1791 | part_stat_add(cpu, part, sectors[rw], bytes >> 9); | 1791 | part_stat_add(cpu, part, sectors[rw], bytes >> 9); |
1792 | part_stat_unlock(); | 1792 | part_stat_unlock(); |
1793 | } | 1793 | } |
1794 | } | 1794 | } |
1795 | 1795 | ||
1796 | static void blk_account_io_done(struct request *req) | 1796 | static void blk_account_io_done(struct request *req) |
1797 | { | 1797 | { |
1798 | /* | 1798 | /* |
1799 | * Account IO completion. flush_rq isn't accounted as a | 1799 | * Account IO completion. flush_rq isn't accounted as a |
1800 | * normal IO on queueing nor completion. Accounting the | 1800 | * normal IO on queueing nor completion. Accounting the |
1801 | * containing request is enough. | 1801 | * containing request is enough. |
1802 | */ | 1802 | */ |
1803 | if (blk_do_io_stat(req) && !(req->cmd_flags & REQ_FLUSH_SEQ)) { | 1803 | if (blk_do_io_stat(req) && !(req->cmd_flags & REQ_FLUSH_SEQ)) { |
1804 | unsigned long duration = jiffies - req->start_time; | 1804 | unsigned long duration = jiffies - req->start_time; |
1805 | const int rw = rq_data_dir(req); | 1805 | const int rw = rq_data_dir(req); |
1806 | struct hd_struct *part; | 1806 | struct hd_struct *part; |
1807 | int cpu; | 1807 | int cpu; |
1808 | 1808 | ||
1809 | cpu = part_stat_lock(); | 1809 | cpu = part_stat_lock(); |
1810 | part = req->part; | 1810 | part = req->part; |
1811 | 1811 | ||
1812 | part_stat_inc(cpu, part, ios[rw]); | 1812 | part_stat_inc(cpu, part, ios[rw]); |
1813 | part_stat_add(cpu, part, ticks[rw], duration); | 1813 | part_stat_add(cpu, part, ticks[rw], duration); |
1814 | part_round_stats(cpu, part); | 1814 | part_round_stats(cpu, part); |
1815 | part_dec_in_flight(part, rw); | 1815 | part_dec_in_flight(part, rw); |
1816 | 1816 | ||
1817 | hd_struct_put(part); | 1817 | hd_struct_put(part); |
1818 | part_stat_unlock(); | 1818 | part_stat_unlock(); |
1819 | } | 1819 | } |
1820 | } | 1820 | } |
1821 | 1821 | ||
1822 | /** | 1822 | /** |
1823 | * blk_peek_request - peek at the top of a request queue | 1823 | * blk_peek_request - peek at the top of a request queue |
1824 | * @q: request queue to peek at | 1824 | * @q: request queue to peek at |
1825 | * | 1825 | * |
1826 | * Description: | 1826 | * Description: |
1827 | * Return the request at the top of @q. The returned request | 1827 | * Return the request at the top of @q. The returned request |
1828 | * should be started using blk_start_request() before LLD starts | 1828 | * should be started using blk_start_request() before LLD starts |
1829 | * processing it. | 1829 | * processing it. |
1830 | * | 1830 | * |
1831 | * Return: | 1831 | * Return: |
1832 | * Pointer to the request at the top of @q if available. Null | 1832 | * Pointer to the request at the top of @q if available. Null |
1833 | * otherwise. | 1833 | * otherwise. |
1834 | * | 1834 | * |
1835 | * Context: | 1835 | * Context: |
1836 | * queue_lock must be held. | 1836 | * queue_lock must be held. |
1837 | */ | 1837 | */ |
1838 | struct request *blk_peek_request(struct request_queue *q) | 1838 | struct request *blk_peek_request(struct request_queue *q) |
1839 | { | 1839 | { |
1840 | struct request *rq; | 1840 | struct request *rq; |
1841 | int ret; | 1841 | int ret; |
1842 | 1842 | ||
1843 | while ((rq = __elv_next_request(q)) != NULL) { | 1843 | while ((rq = __elv_next_request(q)) != NULL) { |
1844 | if (!(rq->cmd_flags & REQ_STARTED)) { | 1844 | if (!(rq->cmd_flags & REQ_STARTED)) { |
1845 | /* | 1845 | /* |
1846 | * This is the first time the device driver | 1846 | * This is the first time the device driver |
1847 | * sees this request (possibly after | 1847 | * sees this request (possibly after |
1848 | * requeueing). Notify IO scheduler. | 1848 | * requeueing). Notify IO scheduler. |
1849 | */ | 1849 | */ |
1850 | if (rq->cmd_flags & REQ_SORTED) | 1850 | if (rq->cmd_flags & REQ_SORTED) |
1851 | elv_activate_rq(q, rq); | 1851 | elv_activate_rq(q, rq); |
1852 | 1852 | ||
1853 | /* | 1853 | /* |
1854 | * just mark as started even if we don't start | 1854 | * just mark as started even if we don't start |
1855 | * it, a request that has been delayed should | 1855 | * it, a request that has been delayed should |
1856 | * not be passed by new incoming requests | 1856 | * not be passed by new incoming requests |
1857 | */ | 1857 | */ |
1858 | rq->cmd_flags |= REQ_STARTED; | 1858 | rq->cmd_flags |= REQ_STARTED; |
1859 | trace_block_rq_issue(q, rq); | 1859 | trace_block_rq_issue(q, rq); |
1860 | } | 1860 | } |
1861 | 1861 | ||
1862 | if (!q->boundary_rq || q->boundary_rq == rq) { | 1862 | if (!q->boundary_rq || q->boundary_rq == rq) { |
1863 | q->end_sector = rq_end_sector(rq); | 1863 | q->end_sector = rq_end_sector(rq); |
1864 | q->boundary_rq = NULL; | 1864 | q->boundary_rq = NULL; |
1865 | } | 1865 | } |
1866 | 1866 | ||
1867 | if (rq->cmd_flags & REQ_DONTPREP) | 1867 | if (rq->cmd_flags & REQ_DONTPREP) |
1868 | break; | 1868 | break; |
1869 | 1869 | ||
1870 | if (q->dma_drain_size && blk_rq_bytes(rq)) { | 1870 | if (q->dma_drain_size && blk_rq_bytes(rq)) { |
1871 | /* | 1871 | /* |
1872 | * make sure space for the drain appears we | 1872 | * make sure space for the drain appears we |
1873 | * know we can do this because max_hw_segments | 1873 | * know we can do this because max_hw_segments |
1874 | * has been adjusted to be one fewer than the | 1874 | * has been adjusted to be one fewer than the |
1875 | * device can handle | 1875 | * device can handle |
1876 | */ | 1876 | */ |
1877 | rq->nr_phys_segments++; | 1877 | rq->nr_phys_segments++; |
1878 | } | 1878 | } |
1879 | 1879 | ||
1880 | if (!q->prep_rq_fn) | 1880 | if (!q->prep_rq_fn) |
1881 | break; | 1881 | break; |
1882 | 1882 | ||
1883 | ret = q->prep_rq_fn(q, rq); | 1883 | ret = q->prep_rq_fn(q, rq); |
1884 | if (ret == BLKPREP_OK) { | 1884 | if (ret == BLKPREP_OK) { |
1885 | break; | 1885 | break; |
1886 | } else if (ret == BLKPREP_DEFER) { | 1886 | } else if (ret == BLKPREP_DEFER) { |
1887 | /* | 1887 | /* |
1888 | * the request may have been (partially) prepped. | 1888 | * the request may have been (partially) prepped. |
1889 | * we need to keep this request in the front to | 1889 | * we need to keep this request in the front to |
1890 | * avoid resource deadlock. REQ_STARTED will | 1890 | * avoid resource deadlock. REQ_STARTED will |
1891 | * prevent other fs requests from passing this one. | 1891 | * prevent other fs requests from passing this one. |
1892 | */ | 1892 | */ |
1893 | if (q->dma_drain_size && blk_rq_bytes(rq) && | 1893 | if (q->dma_drain_size && blk_rq_bytes(rq) && |
1894 | !(rq->cmd_flags & REQ_DONTPREP)) { | 1894 | !(rq->cmd_flags & REQ_DONTPREP)) { |
1895 | /* | 1895 | /* |
1896 | * remove the space for the drain we added | 1896 | * remove the space for the drain we added |
1897 | * so that we don't add it again | 1897 | * so that we don't add it again |
1898 | */ | 1898 | */ |
1899 | --rq->nr_phys_segments; | 1899 | --rq->nr_phys_segments; |
1900 | } | 1900 | } |
1901 | 1901 | ||
1902 | rq = NULL; | 1902 | rq = NULL; |
1903 | break; | 1903 | break; |
1904 | } else if (ret == BLKPREP_KILL) { | 1904 | } else if (ret == BLKPREP_KILL) { |
1905 | rq->cmd_flags |= REQ_QUIET; | 1905 | rq->cmd_flags |= REQ_QUIET; |
1906 | /* | 1906 | /* |
1907 | * Mark this request as started so we don't trigger | 1907 | * Mark this request as started so we don't trigger |
1908 | * any debug logic in the end I/O path. | 1908 | * any debug logic in the end I/O path. |
1909 | */ | 1909 | */ |
1910 | blk_start_request(rq); | 1910 | blk_start_request(rq); |
1911 | __blk_end_request_all(rq, -EIO); | 1911 | __blk_end_request_all(rq, -EIO); |
1912 | } else { | 1912 | } else { |
1913 | printk(KERN_ERR "%s: bad return=%d\n", __func__, ret); | 1913 | printk(KERN_ERR "%s: bad return=%d\n", __func__, ret); |
1914 | break; | 1914 | break; |
1915 | } | 1915 | } |
1916 | } | 1916 | } |
1917 | 1917 | ||
1918 | return rq; | 1918 | return rq; |
1919 | } | 1919 | } |
1920 | EXPORT_SYMBOL(blk_peek_request); | 1920 | EXPORT_SYMBOL(blk_peek_request); |
1921 | 1921 | ||
1922 | void blk_dequeue_request(struct request *rq) | 1922 | void blk_dequeue_request(struct request *rq) |
1923 | { | 1923 | { |
1924 | struct request_queue *q = rq->q; | 1924 | struct request_queue *q = rq->q; |
1925 | 1925 | ||
1926 | BUG_ON(list_empty(&rq->queuelist)); | 1926 | BUG_ON(list_empty(&rq->queuelist)); |
1927 | BUG_ON(ELV_ON_HASH(rq)); | 1927 | BUG_ON(ELV_ON_HASH(rq)); |
1928 | 1928 | ||
1929 | list_del_init(&rq->queuelist); | 1929 | list_del_init(&rq->queuelist); |
1930 | 1930 | ||
1931 | /* | 1931 | /* |
1932 | * the time frame between a request being removed from the lists | 1932 | * the time frame between a request being removed from the lists |
1933 | * and to it is freed is accounted as io that is in progress at | 1933 | * and to it is freed is accounted as io that is in progress at |
1934 | * the driver side. | 1934 | * the driver side. |
1935 | */ | 1935 | */ |
1936 | if (blk_account_rq(rq)) { | 1936 | if (blk_account_rq(rq)) { |
1937 | q->in_flight[rq_is_sync(rq)]++; | 1937 | q->in_flight[rq_is_sync(rq)]++; |
1938 | set_io_start_time_ns(rq); | 1938 | set_io_start_time_ns(rq); |
1939 | } | 1939 | } |
1940 | } | 1940 | } |
1941 | 1941 | ||
1942 | /** | 1942 | /** |
1943 | * blk_start_request - start request processing on the driver | 1943 | * blk_start_request - start request processing on the driver |
1944 | * @req: request to dequeue | 1944 | * @req: request to dequeue |
1945 | * | 1945 | * |
1946 | * Description: | 1946 | * Description: |
1947 | * Dequeue @req and start timeout timer on it. This hands off the | 1947 | * Dequeue @req and start timeout timer on it. This hands off the |
1948 | * request to the driver. | 1948 | * request to the driver. |
1949 | * | 1949 | * |
1950 | * Block internal functions which don't want to start timer should | 1950 | * Block internal functions which don't want to start timer should |
1951 | * call blk_dequeue_request(). | 1951 | * call blk_dequeue_request(). |
1952 | * | 1952 | * |
1953 | * Context: | 1953 | * Context: |
1954 | * queue_lock must be held. | 1954 | * queue_lock must be held. |
1955 | */ | 1955 | */ |
1956 | void blk_start_request(struct request *req) | 1956 | void blk_start_request(struct request *req) |
1957 | { | 1957 | { |
1958 | blk_dequeue_request(req); | 1958 | blk_dequeue_request(req); |
1959 | 1959 | ||
1960 | /* | 1960 | /* |
1961 | * We are now handing the request to the hardware, initialize | 1961 | * We are now handing the request to the hardware, initialize |
1962 | * resid_len to full count and add the timeout handler. | 1962 | * resid_len to full count and add the timeout handler. |
1963 | */ | 1963 | */ |
1964 | req->resid_len = blk_rq_bytes(req); | 1964 | req->resid_len = blk_rq_bytes(req); |
1965 | if (unlikely(blk_bidi_rq(req))) | 1965 | if (unlikely(blk_bidi_rq(req))) |
1966 | req->next_rq->resid_len = blk_rq_bytes(req->next_rq); | 1966 | req->next_rq->resid_len = blk_rq_bytes(req->next_rq); |
1967 | 1967 | ||
1968 | blk_add_timer(req); | 1968 | blk_add_timer(req); |
1969 | } | 1969 | } |
1970 | EXPORT_SYMBOL(blk_start_request); | 1970 | EXPORT_SYMBOL(blk_start_request); |
1971 | 1971 | ||
1972 | /** | 1972 | /** |
1973 | * blk_fetch_request - fetch a request from a request queue | 1973 | * blk_fetch_request - fetch a request from a request queue |
1974 | * @q: request queue to fetch a request from | 1974 | * @q: request queue to fetch a request from |
1975 | * | 1975 | * |
1976 | * Description: | 1976 | * Description: |
1977 | * Return the request at the top of @q. The request is started on | 1977 | * Return the request at the top of @q. The request is started on |
1978 | * return and LLD can start processing it immediately. | 1978 | * return and LLD can start processing it immediately. |
1979 | * | 1979 | * |
1980 | * Return: | 1980 | * Return: |
1981 | * Pointer to the request at the top of @q if available. Null | 1981 | * Pointer to the request at the top of @q if available. Null |
1982 | * otherwise. | 1982 | * otherwise. |
1983 | * | 1983 | * |
1984 | * Context: | 1984 | * Context: |
1985 | * queue_lock must be held. | 1985 | * queue_lock must be held. |
1986 | */ | 1986 | */ |
1987 | struct request *blk_fetch_request(struct request_queue *q) | 1987 | struct request *blk_fetch_request(struct request_queue *q) |
1988 | { | 1988 | { |
1989 | struct request *rq; | 1989 | struct request *rq; |
1990 | 1990 | ||
1991 | rq = blk_peek_request(q); | 1991 | rq = blk_peek_request(q); |
1992 | if (rq) | 1992 | if (rq) |
1993 | blk_start_request(rq); | 1993 | blk_start_request(rq); |
1994 | return rq; | 1994 | return rq; |
1995 | } | 1995 | } |
1996 | EXPORT_SYMBOL(blk_fetch_request); | 1996 | EXPORT_SYMBOL(blk_fetch_request); |
1997 | 1997 | ||
1998 | /** | 1998 | /** |
1999 | * blk_update_request - Special helper function for request stacking drivers | 1999 | * blk_update_request - Special helper function for request stacking drivers |
2000 | * @req: the request being processed | 2000 | * @req: the request being processed |
2001 | * @error: %0 for success, < %0 for error | 2001 | * @error: %0 for success, < %0 for error |
2002 | * @nr_bytes: number of bytes to complete @req | 2002 | * @nr_bytes: number of bytes to complete @req |
2003 | * | 2003 | * |
2004 | * Description: | 2004 | * Description: |
2005 | * Ends I/O on a number of bytes attached to @req, but doesn't complete | 2005 | * Ends I/O on a number of bytes attached to @req, but doesn't complete |
2006 | * the request structure even if @req doesn't have leftover. | 2006 | * the request structure even if @req doesn't have leftover. |
2007 | * If @req has leftover, sets it up for the next range of segments. | 2007 | * If @req has leftover, sets it up for the next range of segments. |
2008 | * | 2008 | * |
2009 | * This special helper function is only for request stacking drivers | 2009 | * This special helper function is only for request stacking drivers |
2010 | * (e.g. request-based dm) so that they can handle partial completion. | 2010 | * (e.g. request-based dm) so that they can handle partial completion. |
2011 | * Actual device drivers should use blk_end_request instead. | 2011 | * Actual device drivers should use blk_end_request instead. |
2012 | * | 2012 | * |
2013 | * Passing the result of blk_rq_bytes() as @nr_bytes guarantees | 2013 | * Passing the result of blk_rq_bytes() as @nr_bytes guarantees |
2014 | * %false return from this function. | 2014 | * %false return from this function. |
2015 | * | 2015 | * |
2016 | * Return: | 2016 | * Return: |
2017 | * %false - this request doesn't have any more data | 2017 | * %false - this request doesn't have any more data |
2018 | * %true - this request has more data | 2018 | * %true - this request has more data |
2019 | **/ | 2019 | **/ |
2020 | bool blk_update_request(struct request *req, int error, unsigned int nr_bytes) | 2020 | bool blk_update_request(struct request *req, int error, unsigned int nr_bytes) |
2021 | { | 2021 | { |
2022 | int total_bytes, bio_nbytes, next_idx = 0; | 2022 | int total_bytes, bio_nbytes, next_idx = 0; |
2023 | struct bio *bio; | 2023 | struct bio *bio; |
2024 | 2024 | ||
2025 | if (!req->bio) | 2025 | if (!req->bio) |
2026 | return false; | 2026 | return false; |
2027 | 2027 | ||
2028 | trace_block_rq_complete(req->q, req); | 2028 | trace_block_rq_complete(req->q, req); |
2029 | 2029 | ||
2030 | /* | 2030 | /* |
2031 | * For fs requests, rq is just carrier of independent bio's | 2031 | * For fs requests, rq is just carrier of independent bio's |
2032 | * and each partial completion should be handled separately. | 2032 | * and each partial completion should be handled separately. |
2033 | * Reset per-request error on each partial completion. | 2033 | * Reset per-request error on each partial completion. |
2034 | * | 2034 | * |
2035 | * TODO: tj: This is too subtle. It would be better to let | 2035 | * TODO: tj: This is too subtle. It would be better to let |
2036 | * low level drivers do what they see fit. | 2036 | * low level drivers do what they see fit. |
2037 | */ | 2037 | */ |
2038 | if (req->cmd_type == REQ_TYPE_FS) | 2038 | if (req->cmd_type == REQ_TYPE_FS) |
2039 | req->errors = 0; | 2039 | req->errors = 0; |
2040 | 2040 | ||
2041 | if (error && req->cmd_type == REQ_TYPE_FS && | 2041 | if (error && req->cmd_type == REQ_TYPE_FS && |
2042 | !(req->cmd_flags & REQ_QUIET)) { | 2042 | !(req->cmd_flags & REQ_QUIET)) { |
2043 | char *error_type; | 2043 | char *error_type; |
2044 | 2044 | ||
2045 | switch (error) { | 2045 | switch (error) { |
2046 | case -ENOLINK: | 2046 | case -ENOLINK: |
2047 | error_type = "recoverable transport"; | 2047 | error_type = "recoverable transport"; |
2048 | break; | 2048 | break; |
2049 | case -EREMOTEIO: | 2049 | case -EREMOTEIO: |
2050 | error_type = "critical target"; | 2050 | error_type = "critical target"; |
2051 | break; | 2051 | break; |
2052 | case -EBADE: | 2052 | case -EBADE: |
2053 | error_type = "critical nexus"; | 2053 | error_type = "critical nexus"; |
2054 | break; | 2054 | break; |
2055 | case -EIO: | 2055 | case -EIO: |
2056 | default: | 2056 | default: |
2057 | error_type = "I/O"; | 2057 | error_type = "I/O"; |
2058 | break; | 2058 | break; |
2059 | } | 2059 | } |
2060 | printk(KERN_ERR "end_request: %s error, dev %s, sector %llu\n", | 2060 | printk(KERN_ERR "end_request: %s error, dev %s, sector %llu\n", |
2061 | error_type, req->rq_disk ? req->rq_disk->disk_name : "?", | 2061 | error_type, req->rq_disk ? req->rq_disk->disk_name : "?", |
2062 | (unsigned long long)blk_rq_pos(req)); | 2062 | (unsigned long long)blk_rq_pos(req)); |
2063 | } | 2063 | } |
2064 | 2064 | ||
2065 | blk_account_io_completion(req, nr_bytes); | 2065 | blk_account_io_completion(req, nr_bytes); |
2066 | 2066 | ||
2067 | total_bytes = bio_nbytes = 0; | 2067 | total_bytes = bio_nbytes = 0; |
2068 | while ((bio = req->bio) != NULL) { | 2068 | while ((bio = req->bio) != NULL) { |
2069 | int nbytes; | 2069 | int nbytes; |
2070 | 2070 | ||
2071 | if (nr_bytes >= bio->bi_size) { | 2071 | if (nr_bytes >= bio->bi_size) { |
2072 | req->bio = bio->bi_next; | 2072 | req->bio = bio->bi_next; |
2073 | nbytes = bio->bi_size; | 2073 | nbytes = bio->bi_size; |
2074 | req_bio_endio(req, bio, nbytes, error); | 2074 | req_bio_endio(req, bio, nbytes, error); |
2075 | next_idx = 0; | 2075 | next_idx = 0; |
2076 | bio_nbytes = 0; | 2076 | bio_nbytes = 0; |
2077 | } else { | 2077 | } else { |
2078 | int idx = bio->bi_idx + next_idx; | 2078 | int idx = bio->bi_idx + next_idx; |
2079 | 2079 | ||
2080 | if (unlikely(idx >= bio->bi_vcnt)) { | 2080 | if (unlikely(idx >= bio->bi_vcnt)) { |
2081 | blk_dump_rq_flags(req, "__end_that"); | 2081 | blk_dump_rq_flags(req, "__end_that"); |
2082 | printk(KERN_ERR "%s: bio idx %d >= vcnt %d\n", | 2082 | printk(KERN_ERR "%s: bio idx %d >= vcnt %d\n", |
2083 | __func__, idx, bio->bi_vcnt); | 2083 | __func__, idx, bio->bi_vcnt); |
2084 | break; | 2084 | break; |
2085 | } | 2085 | } |
2086 | 2086 | ||
2087 | nbytes = bio_iovec_idx(bio, idx)->bv_len; | 2087 | nbytes = bio_iovec_idx(bio, idx)->bv_len; |
2088 | BIO_BUG_ON(nbytes > bio->bi_size); | 2088 | BIO_BUG_ON(nbytes > bio->bi_size); |
2089 | 2089 | ||
2090 | /* | 2090 | /* |
2091 | * not a complete bvec done | 2091 | * not a complete bvec done |
2092 | */ | 2092 | */ |
2093 | if (unlikely(nbytes > nr_bytes)) { | 2093 | if (unlikely(nbytes > nr_bytes)) { |
2094 | bio_nbytes += nr_bytes; | 2094 | bio_nbytes += nr_bytes; |
2095 | total_bytes += nr_bytes; | 2095 | total_bytes += nr_bytes; |
2096 | break; | 2096 | break; |
2097 | } | 2097 | } |
2098 | 2098 | ||
2099 | /* | 2099 | /* |
2100 | * advance to the next vector | 2100 | * advance to the next vector |
2101 | */ | 2101 | */ |
2102 | next_idx++; | 2102 | next_idx++; |
2103 | bio_nbytes += nbytes; | 2103 | bio_nbytes += nbytes; |
2104 | } | 2104 | } |
2105 | 2105 | ||
2106 | total_bytes += nbytes; | 2106 | total_bytes += nbytes; |
2107 | nr_bytes -= nbytes; | 2107 | nr_bytes -= nbytes; |
2108 | 2108 | ||
2109 | bio = req->bio; | 2109 | bio = req->bio; |
2110 | if (bio) { | 2110 | if (bio) { |
2111 | /* | 2111 | /* |
2112 | * end more in this run, or just return 'not-done' | 2112 | * end more in this run, or just return 'not-done' |
2113 | */ | 2113 | */ |
2114 | if (unlikely(nr_bytes <= 0)) | 2114 | if (unlikely(nr_bytes <= 0)) |
2115 | break; | 2115 | break; |
2116 | } | 2116 | } |
2117 | } | 2117 | } |
2118 | 2118 | ||
2119 | /* | 2119 | /* |
2120 | * completely done | 2120 | * completely done |
2121 | */ | 2121 | */ |
2122 | if (!req->bio) { | 2122 | if (!req->bio) { |
2123 | /* | 2123 | /* |
2124 | * Reset counters so that the request stacking driver | 2124 | * Reset counters so that the request stacking driver |
2125 | * can find how many bytes remain in the request | 2125 | * can find how many bytes remain in the request |
2126 | * later. | 2126 | * later. |
2127 | */ | 2127 | */ |
2128 | req->__data_len = 0; | 2128 | req->__data_len = 0; |
2129 | return false; | 2129 | return false; |
2130 | } | 2130 | } |
2131 | 2131 | ||
2132 | /* | 2132 | /* |
2133 | * if the request wasn't completed, update state | 2133 | * if the request wasn't completed, update state |
2134 | */ | 2134 | */ |
2135 | if (bio_nbytes) { | 2135 | if (bio_nbytes) { |
2136 | req_bio_endio(req, bio, bio_nbytes, error); | 2136 | req_bio_endio(req, bio, bio_nbytes, error); |
2137 | bio->bi_idx += next_idx; | 2137 | bio->bi_idx += next_idx; |
2138 | bio_iovec(bio)->bv_offset += nr_bytes; | 2138 | bio_iovec(bio)->bv_offset += nr_bytes; |
2139 | bio_iovec(bio)->bv_len -= nr_bytes; | 2139 | bio_iovec(bio)->bv_len -= nr_bytes; |
2140 | } | 2140 | } |
2141 | 2141 | ||
2142 | req->__data_len -= total_bytes; | 2142 | req->__data_len -= total_bytes; |
2143 | req->buffer = bio_data(req->bio); | 2143 | req->buffer = bio_data(req->bio); |
2144 | 2144 | ||
2145 | /* update sector only for requests with clear definition of sector */ | 2145 | /* update sector only for requests with clear definition of sector */ |
2146 | if (req->cmd_type == REQ_TYPE_FS || (req->cmd_flags & REQ_DISCARD)) | 2146 | if (req->cmd_type == REQ_TYPE_FS || (req->cmd_flags & REQ_DISCARD)) |
2147 | req->__sector += total_bytes >> 9; | 2147 | req->__sector += total_bytes >> 9; |
2148 | 2148 | ||
2149 | /* mixed attributes always follow the first bio */ | 2149 | /* mixed attributes always follow the first bio */ |
2150 | if (req->cmd_flags & REQ_MIXED_MERGE) { | 2150 | if (req->cmd_flags & REQ_MIXED_MERGE) { |
2151 | req->cmd_flags &= ~REQ_FAILFAST_MASK; | 2151 | req->cmd_flags &= ~REQ_FAILFAST_MASK; |
2152 | req->cmd_flags |= req->bio->bi_rw & REQ_FAILFAST_MASK; | 2152 | req->cmd_flags |= req->bio->bi_rw & REQ_FAILFAST_MASK; |
2153 | } | 2153 | } |
2154 | 2154 | ||
2155 | /* | 2155 | /* |
2156 | * If total number of sectors is less than the first segment | 2156 | * If total number of sectors is less than the first segment |
2157 | * size, something has gone terribly wrong. | 2157 | * size, something has gone terribly wrong. |
2158 | */ | 2158 | */ |
2159 | if (blk_rq_bytes(req) < blk_rq_cur_bytes(req)) { | 2159 | if (blk_rq_bytes(req) < blk_rq_cur_bytes(req)) { |
2160 | blk_dump_rq_flags(req, "request botched"); | 2160 | blk_dump_rq_flags(req, "request botched"); |
2161 | req->__data_len = blk_rq_cur_bytes(req); | 2161 | req->__data_len = blk_rq_cur_bytes(req); |
2162 | } | 2162 | } |
2163 | 2163 | ||
2164 | /* recalculate the number of segments */ | 2164 | /* recalculate the number of segments */ |
2165 | blk_recalc_rq_segments(req); | 2165 | blk_recalc_rq_segments(req); |
2166 | 2166 | ||
2167 | return true; | 2167 | return true; |
2168 | } | 2168 | } |
2169 | EXPORT_SYMBOL_GPL(blk_update_request); | 2169 | EXPORT_SYMBOL_GPL(blk_update_request); |
2170 | 2170 | ||
2171 | static bool blk_update_bidi_request(struct request *rq, int error, | 2171 | static bool blk_update_bidi_request(struct request *rq, int error, |
2172 | unsigned int nr_bytes, | 2172 | unsigned int nr_bytes, |
2173 | unsigned int bidi_bytes) | 2173 | unsigned int bidi_bytes) |
2174 | { | 2174 | { |
2175 | if (blk_update_request(rq, error, nr_bytes)) | 2175 | if (blk_update_request(rq, error, nr_bytes)) |
2176 | return true; | 2176 | return true; |
2177 | 2177 | ||
2178 | /* Bidi request must be completed as a whole */ | 2178 | /* Bidi request must be completed as a whole */ |
2179 | if (unlikely(blk_bidi_rq(rq)) && | 2179 | if (unlikely(blk_bidi_rq(rq)) && |
2180 | blk_update_request(rq->next_rq, error, bidi_bytes)) | 2180 | blk_update_request(rq->next_rq, error, bidi_bytes)) |
2181 | return true; | 2181 | return true; |
2182 | 2182 | ||
2183 | if (blk_queue_add_random(rq->q)) | 2183 | if (blk_queue_add_random(rq->q)) |
2184 | add_disk_randomness(rq->rq_disk); | 2184 | add_disk_randomness(rq->rq_disk); |
2185 | 2185 | ||
2186 | return false; | 2186 | return false; |
2187 | } | 2187 | } |
2188 | 2188 | ||
2189 | /** | 2189 | /** |
2190 | * blk_unprep_request - unprepare a request | 2190 | * blk_unprep_request - unprepare a request |
2191 | * @req: the request | 2191 | * @req: the request |
2192 | * | 2192 | * |
2193 | * This function makes a request ready for complete resubmission (or | 2193 | * This function makes a request ready for complete resubmission (or |
2194 | * completion). It happens only after all error handling is complete, | 2194 | * completion). It happens only after all error handling is complete, |
2195 | * so represents the appropriate moment to deallocate any resources | 2195 | * so represents the appropriate moment to deallocate any resources |
2196 | * that were allocated to the request in the prep_rq_fn. The queue | 2196 | * that were allocated to the request in the prep_rq_fn. The queue |
2197 | * lock is held when calling this. | 2197 | * lock is held when calling this. |
2198 | */ | 2198 | */ |
2199 | void blk_unprep_request(struct request *req) | 2199 | void blk_unprep_request(struct request *req) |
2200 | { | 2200 | { |
2201 | struct request_queue *q = req->q; | 2201 | struct request_queue *q = req->q; |
2202 | 2202 | ||
2203 | req->cmd_flags &= ~REQ_DONTPREP; | 2203 | req->cmd_flags &= ~REQ_DONTPREP; |
2204 | if (q->unprep_rq_fn) | 2204 | if (q->unprep_rq_fn) |
2205 | q->unprep_rq_fn(q, req); | 2205 | q->unprep_rq_fn(q, req); |
2206 | } | 2206 | } |
2207 | EXPORT_SYMBOL_GPL(blk_unprep_request); | 2207 | EXPORT_SYMBOL_GPL(blk_unprep_request); |
2208 | 2208 | ||
2209 | /* | 2209 | /* |
2210 | * queue lock must be held | 2210 | * queue lock must be held |
2211 | */ | 2211 | */ |
2212 | static void blk_finish_request(struct request *req, int error) | 2212 | static void blk_finish_request(struct request *req, int error) |
2213 | { | 2213 | { |
2214 | if (blk_rq_tagged(req)) | 2214 | if (blk_rq_tagged(req)) |
2215 | blk_queue_end_tag(req->q, req); | 2215 | blk_queue_end_tag(req->q, req); |
2216 | 2216 | ||
2217 | BUG_ON(blk_queued_rq(req)); | 2217 | BUG_ON(blk_queued_rq(req)); |
2218 | 2218 | ||
2219 | if (unlikely(laptop_mode) && req->cmd_type == REQ_TYPE_FS) | 2219 | if (unlikely(laptop_mode) && req->cmd_type == REQ_TYPE_FS) |
2220 | laptop_io_completion(&req->q->backing_dev_info); | 2220 | laptop_io_completion(&req->q->backing_dev_info); |
2221 | 2221 | ||
2222 | blk_delete_timer(req); | 2222 | blk_delete_timer(req); |
2223 | 2223 | ||
2224 | if (req->cmd_flags & REQ_DONTPREP) | 2224 | if (req->cmd_flags & REQ_DONTPREP) |
2225 | blk_unprep_request(req); | 2225 | blk_unprep_request(req); |
2226 | 2226 | ||
2227 | 2227 | ||
2228 | blk_account_io_done(req); | 2228 | blk_account_io_done(req); |
2229 | 2229 | ||
2230 | if (req->end_io) | 2230 | if (req->end_io) |
2231 | req->end_io(req, error); | 2231 | req->end_io(req, error); |
2232 | else { | 2232 | else { |
2233 | if (blk_bidi_rq(req)) | 2233 | if (blk_bidi_rq(req)) |
2234 | __blk_put_request(req->next_rq->q, req->next_rq); | 2234 | __blk_put_request(req->next_rq->q, req->next_rq); |
2235 | 2235 | ||
2236 | __blk_put_request(req->q, req); | 2236 | __blk_put_request(req->q, req); |
2237 | } | 2237 | } |
2238 | } | 2238 | } |
2239 | 2239 | ||
2240 | /** | 2240 | /** |
2241 | * blk_end_bidi_request - Complete a bidi request | 2241 | * blk_end_bidi_request - Complete a bidi request |
2242 | * @rq: the request to complete | 2242 | * @rq: the request to complete |
2243 | * @error: %0 for success, < %0 for error | 2243 | * @error: %0 for success, < %0 for error |
2244 | * @nr_bytes: number of bytes to complete @rq | 2244 | * @nr_bytes: number of bytes to complete @rq |
2245 | * @bidi_bytes: number of bytes to complete @rq->next_rq | 2245 | * @bidi_bytes: number of bytes to complete @rq->next_rq |
2246 | * | 2246 | * |
2247 | * Description: | 2247 | * Description: |
2248 | * Ends I/O on a number of bytes attached to @rq and @rq->next_rq. | 2248 | * Ends I/O on a number of bytes attached to @rq and @rq->next_rq. |
2249 | * Drivers that supports bidi can safely call this member for any | 2249 | * Drivers that supports bidi can safely call this member for any |
2250 | * type of request, bidi or uni. In the later case @bidi_bytes is | 2250 | * type of request, bidi or uni. In the later case @bidi_bytes is |
2251 | * just ignored. | 2251 | * just ignored. |
2252 | * | 2252 | * |
2253 | * Return: | 2253 | * Return: |
2254 | * %false - we are done with this request | 2254 | * %false - we are done with this request |
2255 | * %true - still buffers pending for this request | 2255 | * %true - still buffers pending for this request |
2256 | **/ | 2256 | **/ |
2257 | static bool blk_end_bidi_request(struct request *rq, int error, | 2257 | static bool blk_end_bidi_request(struct request *rq, int error, |
2258 | unsigned int nr_bytes, unsigned int bidi_bytes) | 2258 | unsigned int nr_bytes, unsigned int bidi_bytes) |
2259 | { | 2259 | { |
2260 | struct request_queue *q = rq->q; | 2260 | struct request_queue *q = rq->q; |
2261 | unsigned long flags; | 2261 | unsigned long flags; |
2262 | 2262 | ||
2263 | if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes)) | 2263 | if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes)) |
2264 | return true; | 2264 | return true; |
2265 | 2265 | ||
2266 | spin_lock_irqsave(q->queue_lock, flags); | 2266 | spin_lock_irqsave(q->queue_lock, flags); |
2267 | blk_finish_request(rq, error); | 2267 | blk_finish_request(rq, error); |
2268 | spin_unlock_irqrestore(q->queue_lock, flags); | 2268 | spin_unlock_irqrestore(q->queue_lock, flags); |
2269 | 2269 | ||
2270 | return false; | 2270 | return false; |
2271 | } | 2271 | } |
2272 | 2272 | ||
2273 | /** | 2273 | /** |
2274 | * __blk_end_bidi_request - Complete a bidi request with queue lock held | 2274 | * __blk_end_bidi_request - Complete a bidi request with queue lock held |
2275 | * @rq: the request to complete | 2275 | * @rq: the request to complete |
2276 | * @error: %0 for success, < %0 for error | 2276 | * @error: %0 for success, < %0 for error |
2277 | * @nr_bytes: number of bytes to complete @rq | 2277 | * @nr_bytes: number of bytes to complete @rq |
2278 | * @bidi_bytes: number of bytes to complete @rq->next_rq | 2278 | * @bidi_bytes: number of bytes to complete @rq->next_rq |
2279 | * | 2279 | * |
2280 | * Description: | 2280 | * Description: |
2281 | * Identical to blk_end_bidi_request() except that queue lock is | 2281 | * Identical to blk_end_bidi_request() except that queue lock is |
2282 | * assumed to be locked on entry and remains so on return. | 2282 | * assumed to be locked on entry and remains so on return. |
2283 | * | 2283 | * |
2284 | * Return: | 2284 | * Return: |
2285 | * %false - we are done with this request | 2285 | * %false - we are done with this request |
2286 | * %true - still buffers pending for this request | 2286 | * %true - still buffers pending for this request |
2287 | **/ | 2287 | **/ |
2288 | static bool __blk_end_bidi_request(struct request *rq, int error, | 2288 | static bool __blk_end_bidi_request(struct request *rq, int error, |
2289 | unsigned int nr_bytes, unsigned int bidi_bytes) | 2289 | unsigned int nr_bytes, unsigned int bidi_bytes) |
2290 | { | 2290 | { |
2291 | if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes)) | 2291 | if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes)) |
2292 | return true; | 2292 | return true; |
2293 | 2293 | ||
2294 | blk_finish_request(rq, error); | 2294 | blk_finish_request(rq, error); |
2295 | 2295 | ||
2296 | return false; | 2296 | return false; |
2297 | } | 2297 | } |
2298 | 2298 | ||
2299 | /** | 2299 | /** |
2300 | * blk_end_request - Helper function for drivers to complete the request. | 2300 | * blk_end_request - Helper function for drivers to complete the request. |
2301 | * @rq: the request being processed | 2301 | * @rq: the request being processed |
2302 | * @error: %0 for success, < %0 for error | 2302 | * @error: %0 for success, < %0 for error |
2303 | * @nr_bytes: number of bytes to complete | 2303 | * @nr_bytes: number of bytes to complete |
2304 | * | 2304 | * |
2305 | * Description: | 2305 | * Description: |
2306 | * Ends I/O on a number of bytes attached to @rq. | 2306 | * Ends I/O on a number of bytes attached to @rq. |
2307 | * If @rq has leftover, sets it up for the next range of segments. | 2307 | * If @rq has leftover, sets it up for the next range of segments. |
2308 | * | 2308 | * |
2309 | * Return: | 2309 | * Return: |
2310 | * %false - we are done with this request | 2310 | * %false - we are done with this request |
2311 | * %true - still buffers pending for this request | 2311 | * %true - still buffers pending for this request |
2312 | **/ | 2312 | **/ |
2313 | bool blk_end_request(struct request *rq, int error, unsigned int nr_bytes) | 2313 | bool blk_end_request(struct request *rq, int error, unsigned int nr_bytes) |
2314 | { | 2314 | { |
2315 | return blk_end_bidi_request(rq, error, nr_bytes, 0); | 2315 | return blk_end_bidi_request(rq, error, nr_bytes, 0); |
2316 | } | 2316 | } |
2317 | EXPORT_SYMBOL(blk_end_request); | 2317 | EXPORT_SYMBOL(blk_end_request); |
2318 | 2318 | ||
2319 | /** | 2319 | /** |
2320 | * blk_end_request_all - Helper function for drives to finish the request. | 2320 | * blk_end_request_all - Helper function for drives to finish the request. |
2321 | * @rq: the request to finish | 2321 | * @rq: the request to finish |
2322 | * @error: %0 for success, < %0 for error | 2322 | * @error: %0 for success, < %0 for error |
2323 | * | 2323 | * |
2324 | * Description: | 2324 | * Description: |
2325 | * Completely finish @rq. | 2325 | * Completely finish @rq. |
2326 | */ | 2326 | */ |
2327 | void blk_end_request_all(struct request *rq, int error) | 2327 | void blk_end_request_all(struct request *rq, int error) |
2328 | { | 2328 | { |
2329 | bool pending; | 2329 | bool pending; |
2330 | unsigned int bidi_bytes = 0; | 2330 | unsigned int bidi_bytes = 0; |
2331 | 2331 | ||
2332 | if (unlikely(blk_bidi_rq(rq))) | 2332 | if (unlikely(blk_bidi_rq(rq))) |
2333 | bidi_bytes = blk_rq_bytes(rq->next_rq); | 2333 | bidi_bytes = blk_rq_bytes(rq->next_rq); |
2334 | 2334 | ||
2335 | pending = blk_end_bidi_request(rq, error, blk_rq_bytes(rq), bidi_bytes); | 2335 | pending = blk_end_bidi_request(rq, error, blk_rq_bytes(rq), bidi_bytes); |
2336 | BUG_ON(pending); | 2336 | BUG_ON(pending); |
2337 | } | 2337 | } |
2338 | EXPORT_SYMBOL(blk_end_request_all); | 2338 | EXPORT_SYMBOL(blk_end_request_all); |
2339 | 2339 | ||
2340 | /** | 2340 | /** |
2341 | * blk_end_request_cur - Helper function to finish the current request chunk. | 2341 | * blk_end_request_cur - Helper function to finish the current request chunk. |
2342 | * @rq: the request to finish the current chunk for | 2342 | * @rq: the request to finish the current chunk for |
2343 | * @error: %0 for success, < %0 for error | 2343 | * @error: %0 for success, < %0 for error |
2344 | * | 2344 | * |
2345 | * Description: | 2345 | * Description: |
2346 | * Complete the current consecutively mapped chunk from @rq. | 2346 | * Complete the current consecutively mapped chunk from @rq. |
2347 | * | 2347 | * |
2348 | * Return: | 2348 | * Return: |
2349 | * %false - we are done with this request | 2349 | * %false - we are done with this request |
2350 | * %true - still buffers pending for this request | 2350 | * %true - still buffers pending for this request |
2351 | */ | 2351 | */ |
2352 | bool blk_end_request_cur(struct request *rq, int error) | 2352 | bool blk_end_request_cur(struct request *rq, int error) |
2353 | { | 2353 | { |
2354 | return blk_end_request(rq, error, blk_rq_cur_bytes(rq)); | 2354 | return blk_end_request(rq, error, blk_rq_cur_bytes(rq)); |
2355 | } | 2355 | } |
2356 | EXPORT_SYMBOL(blk_end_request_cur); | 2356 | EXPORT_SYMBOL(blk_end_request_cur); |
2357 | 2357 | ||
2358 | /** | 2358 | /** |
2359 | * blk_end_request_err - Finish a request till the next failure boundary. | 2359 | * blk_end_request_err - Finish a request till the next failure boundary. |
2360 | * @rq: the request to finish till the next failure boundary for | 2360 | * @rq: the request to finish till the next failure boundary for |
2361 | * @error: must be negative errno | 2361 | * @error: must be negative errno |
2362 | * | 2362 | * |
2363 | * Description: | 2363 | * Description: |
2364 | * Complete @rq till the next failure boundary. | 2364 | * Complete @rq till the next failure boundary. |
2365 | * | 2365 | * |
2366 | * Return: | 2366 | * Return: |
2367 | * %false - we are done with this request | 2367 | * %false - we are done with this request |
2368 | * %true - still buffers pending for this request | 2368 | * %true - still buffers pending for this request |
2369 | */ | 2369 | */ |
2370 | bool blk_end_request_err(struct request *rq, int error) | 2370 | bool blk_end_request_err(struct request *rq, int error) |
2371 | { | 2371 | { |
2372 | WARN_ON(error >= 0); | 2372 | WARN_ON(error >= 0); |
2373 | return blk_end_request(rq, error, blk_rq_err_bytes(rq)); | 2373 | return blk_end_request(rq, error, blk_rq_err_bytes(rq)); |
2374 | } | 2374 | } |
2375 | EXPORT_SYMBOL_GPL(blk_end_request_err); | 2375 | EXPORT_SYMBOL_GPL(blk_end_request_err); |
2376 | 2376 | ||
2377 | /** | 2377 | /** |
2378 | * __blk_end_request - Helper function for drivers to complete the request. | 2378 | * __blk_end_request - Helper function for drivers to complete the request. |
2379 | * @rq: the request being processed | 2379 | * @rq: the request being processed |
2380 | * @error: %0 for success, < %0 for error | 2380 | * @error: %0 for success, < %0 for error |
2381 | * @nr_bytes: number of bytes to complete | 2381 | * @nr_bytes: number of bytes to complete |
2382 | * | 2382 | * |
2383 | * Description: | 2383 | * Description: |
2384 | * Must be called with queue lock held unlike blk_end_request(). | 2384 | * Must be called with queue lock held unlike blk_end_request(). |
2385 | * | 2385 | * |
2386 | * Return: | 2386 | * Return: |
2387 | * %false - we are done with this request | 2387 | * %false - we are done with this request |
2388 | * %true - still buffers pending for this request | 2388 | * %true - still buffers pending for this request |
2389 | **/ | 2389 | **/ |
2390 | bool __blk_end_request(struct request *rq, int error, unsigned int nr_bytes) | 2390 | bool __blk_end_request(struct request *rq, int error, unsigned int nr_bytes) |
2391 | { | 2391 | { |
2392 | return __blk_end_bidi_request(rq, error, nr_bytes, 0); | 2392 | return __blk_end_bidi_request(rq, error, nr_bytes, 0); |
2393 | } | 2393 | } |
2394 | EXPORT_SYMBOL(__blk_end_request); | 2394 | EXPORT_SYMBOL(__blk_end_request); |
2395 | 2395 | ||
2396 | /** | 2396 | /** |
2397 | * __blk_end_request_all - Helper function for drives to finish the request. | 2397 | * __blk_end_request_all - Helper function for drives to finish the request. |
2398 | * @rq: the request to finish | 2398 | * @rq: the request to finish |
2399 | * @error: %0 for success, < %0 for error | 2399 | * @error: %0 for success, < %0 for error |
2400 | * | 2400 | * |
2401 | * Description: | 2401 | * Description: |
2402 | * Completely finish @rq. Must be called with queue lock held. | 2402 | * Completely finish @rq. Must be called with queue lock held. |
2403 | */ | 2403 | */ |
2404 | void __blk_end_request_all(struct request *rq, int error) | 2404 | void __blk_end_request_all(struct request *rq, int error) |
2405 | { | 2405 | { |
2406 | bool pending; | 2406 | bool pending; |
2407 | unsigned int bidi_bytes = 0; | 2407 | unsigned int bidi_bytes = 0; |
2408 | 2408 | ||
2409 | if (unlikely(blk_bidi_rq(rq))) | 2409 | if (unlikely(blk_bidi_rq(rq))) |
2410 | bidi_bytes = blk_rq_bytes(rq->next_rq); | 2410 | bidi_bytes = blk_rq_bytes(rq->next_rq); |
2411 | 2411 | ||
2412 | pending = __blk_end_bidi_request(rq, error, blk_rq_bytes(rq), bidi_bytes); | 2412 | pending = __blk_end_bidi_request(rq, error, blk_rq_bytes(rq), bidi_bytes); |
2413 | BUG_ON(pending); | 2413 | BUG_ON(pending); |
2414 | } | 2414 | } |
2415 | EXPORT_SYMBOL(__blk_end_request_all); | 2415 | EXPORT_SYMBOL(__blk_end_request_all); |
2416 | 2416 | ||
2417 | /** | 2417 | /** |
2418 | * __blk_end_request_cur - Helper function to finish the current request chunk. | 2418 | * __blk_end_request_cur - Helper function to finish the current request chunk. |
2419 | * @rq: the request to finish the current chunk for | 2419 | * @rq: the request to finish the current chunk for |
2420 | * @error: %0 for success, < %0 for error | 2420 | * @error: %0 for success, < %0 for error |
2421 | * | 2421 | * |
2422 | * Description: | 2422 | * Description: |
2423 | * Complete the current consecutively mapped chunk from @rq. Must | 2423 | * Complete the current consecutively mapped chunk from @rq. Must |
2424 | * be called with queue lock held. | 2424 | * be called with queue lock held. |
2425 | * | 2425 | * |
2426 | * Return: | 2426 | * Return: |
2427 | * %false - we are done with this request | 2427 | * %false - we are done with this request |
2428 | * %true - still buffers pending for this request | 2428 | * %true - still buffers pending for this request |
2429 | */ | 2429 | */ |
2430 | bool __blk_end_request_cur(struct request *rq, int error) | 2430 | bool __blk_end_request_cur(struct request *rq, int error) |
2431 | { | 2431 | { |
2432 | return __blk_end_request(rq, error, blk_rq_cur_bytes(rq)); | 2432 | return __blk_end_request(rq, error, blk_rq_cur_bytes(rq)); |
2433 | } | 2433 | } |
2434 | EXPORT_SYMBOL(__blk_end_request_cur); | 2434 | EXPORT_SYMBOL(__blk_end_request_cur); |
2435 | 2435 | ||
2436 | /** | 2436 | /** |
2437 | * __blk_end_request_err - Finish a request till the next failure boundary. | 2437 | * __blk_end_request_err - Finish a request till the next failure boundary. |
2438 | * @rq: the request to finish till the next failure boundary for | 2438 | * @rq: the request to finish till the next failure boundary for |
2439 | * @error: must be negative errno | 2439 | * @error: must be negative errno |
2440 | * | 2440 | * |
2441 | * Description: | 2441 | * Description: |
2442 | * Complete @rq till the next failure boundary. Must be called | 2442 | * Complete @rq till the next failure boundary. Must be called |
2443 | * with queue lock held. | 2443 | * with queue lock held. |
2444 | * | 2444 | * |
2445 | * Return: | 2445 | * Return: |
2446 | * %false - we are done with this request | 2446 | * %false - we are done with this request |
2447 | * %true - still buffers pending for this request | 2447 | * %true - still buffers pending for this request |
2448 | */ | 2448 | */ |
2449 | bool __blk_end_request_err(struct request *rq, int error) | 2449 | bool __blk_end_request_err(struct request *rq, int error) |
2450 | { | 2450 | { |
2451 | WARN_ON(error >= 0); | 2451 | WARN_ON(error >= 0); |
2452 | return __blk_end_request(rq, error, blk_rq_err_bytes(rq)); | 2452 | return __blk_end_request(rq, error, blk_rq_err_bytes(rq)); |
2453 | } | 2453 | } |
2454 | EXPORT_SYMBOL_GPL(__blk_end_request_err); | 2454 | EXPORT_SYMBOL_GPL(__blk_end_request_err); |
2455 | 2455 | ||
2456 | void blk_rq_bio_prep(struct request_queue *q, struct request *rq, | 2456 | void blk_rq_bio_prep(struct request_queue *q, struct request *rq, |
2457 | struct bio *bio) | 2457 | struct bio *bio) |
2458 | { | 2458 | { |
2459 | /* Bit 0 (R/W) is identical in rq->cmd_flags and bio->bi_rw */ | 2459 | /* Bit 0 (R/W) is identical in rq->cmd_flags and bio->bi_rw */ |
2460 | rq->cmd_flags |= bio->bi_rw & REQ_WRITE; | 2460 | rq->cmd_flags |= bio->bi_rw & REQ_WRITE; |
2461 | 2461 | ||
2462 | if (bio_has_data(bio)) { | 2462 | if (bio_has_data(bio)) { |
2463 | rq->nr_phys_segments = bio_phys_segments(q, bio); | 2463 | rq->nr_phys_segments = bio_phys_segments(q, bio); |
2464 | rq->buffer = bio_data(bio); | 2464 | rq->buffer = bio_data(bio); |
2465 | } | 2465 | } |
2466 | rq->__data_len = bio->bi_size; | 2466 | rq->__data_len = bio->bi_size; |
2467 | rq->bio = rq->biotail = bio; | 2467 | rq->bio = rq->biotail = bio; |
2468 | 2468 | ||
2469 | if (bio->bi_bdev) | 2469 | if (bio->bi_bdev) |
2470 | rq->rq_disk = bio->bi_bdev->bd_disk; | 2470 | rq->rq_disk = bio->bi_bdev->bd_disk; |
2471 | } | 2471 | } |
2472 | 2472 | ||
2473 | #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE | 2473 | #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE |
2474 | /** | 2474 | /** |
2475 | * rq_flush_dcache_pages - Helper function to flush all pages in a request | 2475 | * rq_flush_dcache_pages - Helper function to flush all pages in a request |
2476 | * @rq: the request to be flushed | 2476 | * @rq: the request to be flushed |
2477 | * | 2477 | * |
2478 | * Description: | 2478 | * Description: |
2479 | * Flush all pages in @rq. | 2479 | * Flush all pages in @rq. |
2480 | */ | 2480 | */ |
2481 | void rq_flush_dcache_pages(struct request *rq) | 2481 | void rq_flush_dcache_pages(struct request *rq) |
2482 | { | 2482 | { |
2483 | struct req_iterator iter; | 2483 | struct req_iterator iter; |
2484 | struct bio_vec *bvec; | 2484 | struct bio_vec *bvec; |
2485 | 2485 | ||
2486 | rq_for_each_segment(bvec, rq, iter) | 2486 | rq_for_each_segment(bvec, rq, iter) |
2487 | flush_dcache_page(bvec->bv_page); | 2487 | flush_dcache_page(bvec->bv_page); |
2488 | } | 2488 | } |
2489 | EXPORT_SYMBOL_GPL(rq_flush_dcache_pages); | 2489 | EXPORT_SYMBOL_GPL(rq_flush_dcache_pages); |
2490 | #endif | 2490 | #endif |
2491 | 2491 | ||
2492 | /** | 2492 | /** |
2493 | * blk_lld_busy - Check if underlying low-level drivers of a device are busy | 2493 | * blk_lld_busy - Check if underlying low-level drivers of a device are busy |
2494 | * @q : the queue of the device being checked | 2494 | * @q : the queue of the device being checked |
2495 | * | 2495 | * |
2496 | * Description: | 2496 | * Description: |
2497 | * Check if underlying low-level drivers of a device are busy. | 2497 | * Check if underlying low-level drivers of a device are busy. |
2498 | * If the drivers want to export their busy state, they must set own | 2498 | * If the drivers want to export their busy state, they must set own |
2499 | * exporting function using blk_queue_lld_busy() first. | 2499 | * exporting function using blk_queue_lld_busy() first. |
2500 | * | 2500 | * |
2501 | * Basically, this function is used only by request stacking drivers | 2501 | * Basically, this function is used only by request stacking drivers |
2502 | * to stop dispatching requests to underlying devices when underlying | 2502 | * to stop dispatching requests to underlying devices when underlying |
2503 | * devices are busy. This behavior helps more I/O merging on the queue | 2503 | * devices are busy. This behavior helps more I/O merging on the queue |
2504 | * of the request stacking driver and prevents I/O throughput regression | 2504 | * of the request stacking driver and prevents I/O throughput regression |
2505 | * on burst I/O load. | 2505 | * on burst I/O load. |
2506 | * | 2506 | * |
2507 | * Return: | 2507 | * Return: |
2508 | * 0 - Not busy (The request stacking driver should dispatch request) | 2508 | * 0 - Not busy (The request stacking driver should dispatch request) |
2509 | * 1 - Busy (The request stacking driver should stop dispatching request) | 2509 | * 1 - Busy (The request stacking driver should stop dispatching request) |
2510 | */ | 2510 | */ |
2511 | int blk_lld_busy(struct request_queue *q) | 2511 | int blk_lld_busy(struct request_queue *q) |
2512 | { | 2512 | { |
2513 | if (q->lld_busy_fn) | 2513 | if (q->lld_busy_fn) |
2514 | return q->lld_busy_fn(q); | 2514 | return q->lld_busy_fn(q); |
2515 | 2515 | ||
2516 | return 0; | 2516 | return 0; |
2517 | } | 2517 | } |
2518 | EXPORT_SYMBOL_GPL(blk_lld_busy); | 2518 | EXPORT_SYMBOL_GPL(blk_lld_busy); |
2519 | 2519 | ||
2520 | /** | 2520 | /** |
2521 | * blk_rq_unprep_clone - Helper function to free all bios in a cloned request | 2521 | * blk_rq_unprep_clone - Helper function to free all bios in a cloned request |
2522 | * @rq: the clone request to be cleaned up | 2522 | * @rq: the clone request to be cleaned up |
2523 | * | 2523 | * |
2524 | * Description: | 2524 | * Description: |
2525 | * Free all bios in @rq for a cloned request. | 2525 | * Free all bios in @rq for a cloned request. |
2526 | */ | 2526 | */ |
2527 | void blk_rq_unprep_clone(struct request *rq) | 2527 | void blk_rq_unprep_clone(struct request *rq) |
2528 | { | 2528 | { |
2529 | struct bio *bio; | 2529 | struct bio *bio; |
2530 | 2530 | ||
2531 | while ((bio = rq->bio) != NULL) { | 2531 | while ((bio = rq->bio) != NULL) { |
2532 | rq->bio = bio->bi_next; | 2532 | rq->bio = bio->bi_next; |
2533 | 2533 | ||
2534 | bio_put(bio); | 2534 | bio_put(bio); |
2535 | } | 2535 | } |
2536 | } | 2536 | } |
2537 | EXPORT_SYMBOL_GPL(blk_rq_unprep_clone); | 2537 | EXPORT_SYMBOL_GPL(blk_rq_unprep_clone); |
2538 | 2538 | ||
2539 | /* | 2539 | /* |
2540 | * Copy attributes of the original request to the clone request. | 2540 | * Copy attributes of the original request to the clone request. |
2541 | * The actual data parts (e.g. ->cmd, ->buffer, ->sense) are not copied. | 2541 | * The actual data parts (e.g. ->cmd, ->buffer, ->sense) are not copied. |
2542 | */ | 2542 | */ |
2543 | static void __blk_rq_prep_clone(struct request *dst, struct request *src) | 2543 | static void __blk_rq_prep_clone(struct request *dst, struct request *src) |
2544 | { | 2544 | { |
2545 | dst->cpu = src->cpu; | 2545 | dst->cpu = src->cpu; |
2546 | dst->cmd_flags = (src->cmd_flags & REQ_CLONE_MASK) | REQ_NOMERGE; | 2546 | dst->cmd_flags = (src->cmd_flags & REQ_CLONE_MASK) | REQ_NOMERGE; |
2547 | dst->cmd_type = src->cmd_type; | 2547 | dst->cmd_type = src->cmd_type; |
2548 | dst->__sector = blk_rq_pos(src); | 2548 | dst->__sector = blk_rq_pos(src); |
2549 | dst->__data_len = blk_rq_bytes(src); | 2549 | dst->__data_len = blk_rq_bytes(src); |
2550 | dst->nr_phys_segments = src->nr_phys_segments; | 2550 | dst->nr_phys_segments = src->nr_phys_segments; |
2551 | dst->ioprio = src->ioprio; | 2551 | dst->ioprio = src->ioprio; |
2552 | dst->extra_len = src->extra_len; | 2552 | dst->extra_len = src->extra_len; |
2553 | } | 2553 | } |
2554 | 2554 | ||
2555 | /** | 2555 | /** |
2556 | * blk_rq_prep_clone - Helper function to setup clone request | 2556 | * blk_rq_prep_clone - Helper function to setup clone request |
2557 | * @rq: the request to be setup | 2557 | * @rq: the request to be setup |
2558 | * @rq_src: original request to be cloned | 2558 | * @rq_src: original request to be cloned |
2559 | * @bs: bio_set that bios for clone are allocated from | 2559 | * @bs: bio_set that bios for clone are allocated from |
2560 | * @gfp_mask: memory allocation mask for bio | 2560 | * @gfp_mask: memory allocation mask for bio |
2561 | * @bio_ctr: setup function to be called for each clone bio. | 2561 | * @bio_ctr: setup function to be called for each clone bio. |
2562 | * Returns %0 for success, non %0 for failure. | 2562 | * Returns %0 for success, non %0 for failure. |
2563 | * @data: private data to be passed to @bio_ctr | 2563 | * @data: private data to be passed to @bio_ctr |
2564 | * | 2564 | * |
2565 | * Description: | 2565 | * Description: |
2566 | * Clones bios in @rq_src to @rq, and copies attributes of @rq_src to @rq. | 2566 | * Clones bios in @rq_src to @rq, and copies attributes of @rq_src to @rq. |
2567 | * The actual data parts of @rq_src (e.g. ->cmd, ->buffer, ->sense) | 2567 | * The actual data parts of @rq_src (e.g. ->cmd, ->buffer, ->sense) |
2568 | * are not copied, and copying such parts is the caller's responsibility. | 2568 | * are not copied, and copying such parts is the caller's responsibility. |
2569 | * Also, pages which the original bios are pointing to are not copied | 2569 | * Also, pages which the original bios are pointing to are not copied |
2570 | * and the cloned bios just point same pages. | 2570 | * and the cloned bios just point same pages. |
2571 | * So cloned bios must be completed before original bios, which means | 2571 | * So cloned bios must be completed before original bios, which means |
2572 | * the caller must complete @rq before @rq_src. | 2572 | * the caller must complete @rq before @rq_src. |
2573 | */ | 2573 | */ |
2574 | int blk_rq_prep_clone(struct request *rq, struct request *rq_src, | 2574 | int blk_rq_prep_clone(struct request *rq, struct request *rq_src, |
2575 | struct bio_set *bs, gfp_t gfp_mask, | 2575 | struct bio_set *bs, gfp_t gfp_mask, |
2576 | int (*bio_ctr)(struct bio *, struct bio *, void *), | 2576 | int (*bio_ctr)(struct bio *, struct bio *, void *), |
2577 | void *data) | 2577 | void *data) |
2578 | { | 2578 | { |
2579 | struct bio *bio, *bio_src; | 2579 | struct bio *bio, *bio_src; |
2580 | 2580 | ||
2581 | if (!bs) | 2581 | if (!bs) |
2582 | bs = fs_bio_set; | 2582 | bs = fs_bio_set; |
2583 | 2583 | ||
2584 | blk_rq_init(NULL, rq); | 2584 | blk_rq_init(NULL, rq); |
2585 | 2585 | ||
2586 | __rq_for_each_bio(bio_src, rq_src) { | 2586 | __rq_for_each_bio(bio_src, rq_src) { |
2587 | bio = bio_alloc_bioset(gfp_mask, bio_src->bi_max_vecs, bs); | 2587 | bio = bio_alloc_bioset(gfp_mask, bio_src->bi_max_vecs, bs); |
2588 | if (!bio) | 2588 | if (!bio) |
2589 | goto free_and_out; | 2589 | goto free_and_out; |
2590 | 2590 | ||
2591 | __bio_clone(bio, bio_src); | 2591 | __bio_clone(bio, bio_src); |
2592 | 2592 | ||
2593 | if (bio_integrity(bio_src) && | 2593 | if (bio_integrity(bio_src) && |
2594 | bio_integrity_clone(bio, bio_src, gfp_mask, bs)) | 2594 | bio_integrity_clone(bio, bio_src, gfp_mask, bs)) |
2595 | goto free_and_out; | 2595 | goto free_and_out; |
2596 | 2596 | ||
2597 | if (bio_ctr && bio_ctr(bio, bio_src, data)) | 2597 | if (bio_ctr && bio_ctr(bio, bio_src, data)) |
2598 | goto free_and_out; | 2598 | goto free_and_out; |
2599 | 2599 | ||
2600 | if (rq->bio) { | 2600 | if (rq->bio) { |
2601 | rq->biotail->bi_next = bio; | 2601 | rq->biotail->bi_next = bio; |
2602 | rq->biotail = bio; | 2602 | rq->biotail = bio; |
2603 | } else | 2603 | } else |
2604 | rq->bio = rq->biotail = bio; | 2604 | rq->bio = rq->biotail = bio; |
2605 | } | 2605 | } |
2606 | 2606 | ||
2607 | __blk_rq_prep_clone(rq, rq_src); | 2607 | __blk_rq_prep_clone(rq, rq_src); |
2608 | 2608 | ||
2609 | return 0; | 2609 | return 0; |
2610 | 2610 | ||
2611 | free_and_out: | 2611 | free_and_out: |
2612 | if (bio) | 2612 | if (bio) |
2613 | bio_free(bio, bs); | 2613 | bio_free(bio, bs); |
2614 | blk_rq_unprep_clone(rq); | 2614 | blk_rq_unprep_clone(rq); |
2615 | 2615 | ||
2616 | return -ENOMEM; | 2616 | return -ENOMEM; |
2617 | } | 2617 | } |
2618 | EXPORT_SYMBOL_GPL(blk_rq_prep_clone); | 2618 | EXPORT_SYMBOL_GPL(blk_rq_prep_clone); |
2619 | 2619 | ||
2620 | int kblockd_schedule_work(struct request_queue *q, struct work_struct *work) | 2620 | int kblockd_schedule_work(struct request_queue *q, struct work_struct *work) |
2621 | { | 2621 | { |
2622 | return queue_work(kblockd_workqueue, work); | 2622 | return queue_work(kblockd_workqueue, work); |
2623 | } | 2623 | } |
2624 | EXPORT_SYMBOL(kblockd_schedule_work); | 2624 | EXPORT_SYMBOL(kblockd_schedule_work); |
2625 | 2625 | ||
2626 | int kblockd_schedule_delayed_work(struct request_queue *q, | 2626 | int kblockd_schedule_delayed_work(struct request_queue *q, |
2627 | struct delayed_work *dwork, unsigned long delay) | 2627 | struct delayed_work *dwork, unsigned long delay) |
2628 | { | 2628 | { |
2629 | return queue_delayed_work(kblockd_workqueue, dwork, delay); | 2629 | return queue_delayed_work(kblockd_workqueue, dwork, delay); |
2630 | } | 2630 | } |
2631 | EXPORT_SYMBOL(kblockd_schedule_delayed_work); | 2631 | EXPORT_SYMBOL(kblockd_schedule_delayed_work); |
2632 | 2632 | ||
2633 | #define PLUG_MAGIC 0x91827364 | 2633 | #define PLUG_MAGIC 0x91827364 |
2634 | 2634 | ||
2635 | void blk_start_plug(struct blk_plug *plug) | 2635 | void blk_start_plug(struct blk_plug *plug) |
2636 | { | 2636 | { |
2637 | struct task_struct *tsk = current; | 2637 | struct task_struct *tsk = current; |
2638 | 2638 | ||
2639 | plug->magic = PLUG_MAGIC; | 2639 | plug->magic = PLUG_MAGIC; |
2640 | INIT_LIST_HEAD(&plug->list); | 2640 | INIT_LIST_HEAD(&plug->list); |
2641 | plug->should_sort = 0; | 2641 | plug->should_sort = 0; |
2642 | 2642 | ||
2643 | /* | 2643 | /* |
2644 | * If this is a nested plug, don't actually assign it. It will be | 2644 | * If this is a nested plug, don't actually assign it. It will be |
2645 | * flushed on its own. | 2645 | * flushed on its own. |
2646 | */ | 2646 | */ |
2647 | if (!tsk->plug) { | 2647 | if (!tsk->plug) { |
2648 | /* | 2648 | /* |
2649 | * Store ordering should not be needed here, since a potential | 2649 | * Store ordering should not be needed here, since a potential |
2650 | * preempt will imply a full memory barrier | 2650 | * preempt will imply a full memory barrier |
2651 | */ | 2651 | */ |
2652 | tsk->plug = plug; | 2652 | tsk->plug = plug; |
2653 | } | 2653 | } |
2654 | } | 2654 | } |
2655 | EXPORT_SYMBOL(blk_start_plug); | 2655 | EXPORT_SYMBOL(blk_start_plug); |
2656 | 2656 | ||
2657 | static int plug_rq_cmp(void *priv, struct list_head *a, struct list_head *b) | 2657 | static int plug_rq_cmp(void *priv, struct list_head *a, struct list_head *b) |
2658 | { | 2658 | { |
2659 | struct request *rqa = container_of(a, struct request, queuelist); | 2659 | struct request *rqa = container_of(a, struct request, queuelist); |
2660 | struct request *rqb = container_of(b, struct request, queuelist); | 2660 | struct request *rqb = container_of(b, struct request, queuelist); |
2661 | 2661 | ||
2662 | return !(rqa->q <= rqb->q); | 2662 | return !(rqa->q <= rqb->q); |
2663 | } | 2663 | } |
2664 | 2664 | ||
2665 | /* | ||
2666 | * If 'from_schedule' is true, then postpone the dispatch of requests | ||
2667 | * until a safe kblockd context. We due this to avoid accidental big | ||
2668 | * additional stack usage in driver dispatch, in places where the originally | ||
2669 | * plugger did not intend it. | ||
2670 | */ | ||
2665 | static void queue_unplugged(struct request_queue *q, unsigned int depth, | 2671 | static void queue_unplugged(struct request_queue *q, unsigned int depth, |
2666 | bool force_kblockd) | 2672 | bool from_schedule) |
2667 | { | 2673 | { |
2668 | trace_block_unplug_io(q, depth); | 2674 | trace_block_unplug(q, depth, !from_schedule); |
2669 | __blk_run_queue(q, force_kblockd); | 2675 | __blk_run_queue(q, from_schedule); |
2670 | 2676 | ||
2671 | if (q->unplugged_fn) | 2677 | if (q->unplugged_fn) |
2672 | q->unplugged_fn(q); | 2678 | q->unplugged_fn(q); |
2673 | } | 2679 | } |
2674 | 2680 | ||
2675 | void blk_flush_plug_list(struct blk_plug *plug, bool force_kblockd) | 2681 | void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule) |
2676 | { | 2682 | { |
2677 | struct request_queue *q; | 2683 | struct request_queue *q; |
2678 | unsigned long flags; | 2684 | unsigned long flags; |
2679 | struct request *rq; | 2685 | struct request *rq; |
2680 | LIST_HEAD(list); | 2686 | LIST_HEAD(list); |
2681 | unsigned int depth; | 2687 | unsigned int depth; |
2682 | 2688 | ||
2683 | BUG_ON(plug->magic != PLUG_MAGIC); | 2689 | BUG_ON(plug->magic != PLUG_MAGIC); |
2684 | 2690 | ||
2685 | if (list_empty(&plug->list)) | 2691 | if (list_empty(&plug->list)) |
2686 | return; | 2692 | return; |
2687 | 2693 | ||
2688 | list_splice_init(&plug->list, &list); | 2694 | list_splice_init(&plug->list, &list); |
2689 | 2695 | ||
2690 | if (plug->should_sort) { | 2696 | if (plug->should_sort) { |
2691 | list_sort(NULL, &list, plug_rq_cmp); | 2697 | list_sort(NULL, &list, plug_rq_cmp); |
2692 | plug->should_sort = 0; | 2698 | plug->should_sort = 0; |
2693 | } | 2699 | } |
2694 | 2700 | ||
2695 | q = NULL; | 2701 | q = NULL; |
2696 | depth = 0; | 2702 | depth = 0; |
2697 | 2703 | ||
2698 | /* | 2704 | /* |
2699 | * Save and disable interrupts here, to avoid doing it for every | 2705 | * Save and disable interrupts here, to avoid doing it for every |
2700 | * queue lock we have to take. | 2706 | * queue lock we have to take. |
2701 | */ | 2707 | */ |
2702 | local_irq_save(flags); | 2708 | local_irq_save(flags); |
2703 | while (!list_empty(&list)) { | 2709 | while (!list_empty(&list)) { |
2704 | rq = list_entry_rq(list.next); | 2710 | rq = list_entry_rq(list.next); |
2705 | list_del_init(&rq->queuelist); | 2711 | list_del_init(&rq->queuelist); |
2706 | BUG_ON(!(rq->cmd_flags & REQ_ON_PLUG)); | 2712 | BUG_ON(!(rq->cmd_flags & REQ_ON_PLUG)); |
2707 | BUG_ON(!rq->q); | 2713 | BUG_ON(!rq->q); |
2708 | if (rq->q != q) { | 2714 | if (rq->q != q) { |
2709 | if (q) { | 2715 | if (q) { |
2710 | queue_unplugged(q, depth, force_kblockd); | 2716 | queue_unplugged(q, depth, from_schedule); |
2711 | spin_unlock(q->queue_lock); | 2717 | spin_unlock(q->queue_lock); |
2712 | } | 2718 | } |
2713 | q = rq->q; | 2719 | q = rq->q; |
2714 | depth = 0; | 2720 | depth = 0; |
2715 | spin_lock(q->queue_lock); | 2721 | spin_lock(q->queue_lock); |
2716 | } | 2722 | } |
2717 | rq->cmd_flags &= ~REQ_ON_PLUG; | 2723 | rq->cmd_flags &= ~REQ_ON_PLUG; |
2718 | 2724 | ||
2719 | /* | 2725 | /* |
2720 | * rq is already accounted, so use raw insert | 2726 | * rq is already accounted, so use raw insert |
2721 | */ | 2727 | */ |
2722 | if (rq->cmd_flags & (REQ_FLUSH | REQ_FUA)) | 2728 | if (rq->cmd_flags & (REQ_FLUSH | REQ_FUA)) |
2723 | __elv_add_request(q, rq, ELEVATOR_INSERT_FLUSH); | 2729 | __elv_add_request(q, rq, ELEVATOR_INSERT_FLUSH); |
2724 | else | 2730 | else |
2725 | __elv_add_request(q, rq, ELEVATOR_INSERT_SORT_MERGE); | 2731 | __elv_add_request(q, rq, ELEVATOR_INSERT_SORT_MERGE); |
2726 | 2732 | ||
2727 | depth++; | 2733 | depth++; |
2728 | } | 2734 | } |
2729 | 2735 | ||
2730 | if (q) { | 2736 | if (q) { |
2731 | queue_unplugged(q, depth, force_kblockd); | 2737 | queue_unplugged(q, depth, from_schedule); |
2732 | spin_unlock(q->queue_lock); | 2738 | spin_unlock(q->queue_lock); |
2733 | } | 2739 | } |
2734 | 2740 | ||
2735 | local_irq_restore(flags); | 2741 | local_irq_restore(flags); |
2736 | } | 2742 | } |
2737 | EXPORT_SYMBOL(blk_flush_plug_list); | 2743 | EXPORT_SYMBOL(blk_flush_plug_list); |
2738 | 2744 | ||
2739 | void blk_finish_plug(struct blk_plug *plug) | 2745 | void blk_finish_plug(struct blk_plug *plug) |
2740 | { | 2746 | { |
2741 | blk_flush_plug_list(plug, false); | 2747 | blk_flush_plug_list(plug, false); |
2742 | 2748 | ||
2743 | if (plug == current->plug) | 2749 | if (plug == current->plug) |
2744 | current->plug = NULL; | 2750 | current->plug = NULL; |
2745 | } | 2751 | } |
2746 | EXPORT_SYMBOL(blk_finish_plug); | 2752 | EXPORT_SYMBOL(blk_finish_plug); |
2747 | 2753 | ||
2748 | int __init blk_dev_init(void) | 2754 | int __init blk_dev_init(void) |
2749 | { | 2755 | { |
2750 | BUILD_BUG_ON(__REQ_NR_BITS > 8 * | 2756 | BUILD_BUG_ON(__REQ_NR_BITS > 8 * |
2751 | sizeof(((struct request *)0)->cmd_flags)); | 2757 | sizeof(((struct request *)0)->cmd_flags)); |
2752 | 2758 | ||
2753 | /* used for unplugging and affects IO latency/throughput - HIGHPRI */ | 2759 | /* used for unplugging and affects IO latency/throughput - HIGHPRI */ |
2754 | kblockd_workqueue = alloc_workqueue("kblockd", | 2760 | kblockd_workqueue = alloc_workqueue("kblockd", |
2755 | WQ_MEM_RECLAIM | WQ_HIGHPRI, 0); | 2761 | WQ_MEM_RECLAIM | WQ_HIGHPRI, 0); |
2756 | if (!kblockd_workqueue) | 2762 | if (!kblockd_workqueue) |
2757 | panic("Failed to create kblockd\n"); | 2763 | panic("Failed to create kblockd\n"); |
2758 | 2764 | ||
2759 | request_cachep = kmem_cache_create("blkdev_requests", | 2765 | request_cachep = kmem_cache_create("blkdev_requests", |
2760 | sizeof(struct request), 0, SLAB_PANIC, NULL); | 2766 | sizeof(struct request), 0, SLAB_PANIC, NULL); |
2761 | 2767 | ||
2762 | blk_requestq_cachep = kmem_cache_create("blkdev_queue", | 2768 | blk_requestq_cachep = kmem_cache_create("blkdev_queue", |
2763 | sizeof(struct request_queue), 0, SLAB_PANIC, NULL); | 2769 | sizeof(struct request_queue), 0, SLAB_PANIC, NULL); |
2764 | 2770 | ||
2765 | return 0; | 2771 | return 0; |
2766 | } | 2772 | } |
2767 | 2773 |
include/trace/events/block.h
1 | #undef TRACE_SYSTEM | 1 | #undef TRACE_SYSTEM |
2 | #define TRACE_SYSTEM block | 2 | #define TRACE_SYSTEM block |
3 | 3 | ||
4 | #if !defined(_TRACE_BLOCK_H) || defined(TRACE_HEADER_MULTI_READ) | 4 | #if !defined(_TRACE_BLOCK_H) || defined(TRACE_HEADER_MULTI_READ) |
5 | #define _TRACE_BLOCK_H | 5 | #define _TRACE_BLOCK_H |
6 | 6 | ||
7 | #include <linux/blktrace_api.h> | 7 | #include <linux/blktrace_api.h> |
8 | #include <linux/blkdev.h> | 8 | #include <linux/blkdev.h> |
9 | #include <linux/tracepoint.h> | 9 | #include <linux/tracepoint.h> |
10 | 10 | ||
11 | DECLARE_EVENT_CLASS(block_rq_with_error, | 11 | DECLARE_EVENT_CLASS(block_rq_with_error, |
12 | 12 | ||
13 | TP_PROTO(struct request_queue *q, struct request *rq), | 13 | TP_PROTO(struct request_queue *q, struct request *rq), |
14 | 14 | ||
15 | TP_ARGS(q, rq), | 15 | TP_ARGS(q, rq), |
16 | 16 | ||
17 | TP_STRUCT__entry( | 17 | TP_STRUCT__entry( |
18 | __field( dev_t, dev ) | 18 | __field( dev_t, dev ) |
19 | __field( sector_t, sector ) | 19 | __field( sector_t, sector ) |
20 | __field( unsigned int, nr_sector ) | 20 | __field( unsigned int, nr_sector ) |
21 | __field( int, errors ) | 21 | __field( int, errors ) |
22 | __array( char, rwbs, 6 ) | 22 | __array( char, rwbs, 6 ) |
23 | __dynamic_array( char, cmd, blk_cmd_buf_len(rq) ) | 23 | __dynamic_array( char, cmd, blk_cmd_buf_len(rq) ) |
24 | ), | 24 | ), |
25 | 25 | ||
26 | TP_fast_assign( | 26 | TP_fast_assign( |
27 | __entry->dev = rq->rq_disk ? disk_devt(rq->rq_disk) : 0; | 27 | __entry->dev = rq->rq_disk ? disk_devt(rq->rq_disk) : 0; |
28 | __entry->sector = (rq->cmd_type == REQ_TYPE_BLOCK_PC) ? | 28 | __entry->sector = (rq->cmd_type == REQ_TYPE_BLOCK_PC) ? |
29 | 0 : blk_rq_pos(rq); | 29 | 0 : blk_rq_pos(rq); |
30 | __entry->nr_sector = (rq->cmd_type == REQ_TYPE_BLOCK_PC) ? | 30 | __entry->nr_sector = (rq->cmd_type == REQ_TYPE_BLOCK_PC) ? |
31 | 0 : blk_rq_sectors(rq); | 31 | 0 : blk_rq_sectors(rq); |
32 | __entry->errors = rq->errors; | 32 | __entry->errors = rq->errors; |
33 | 33 | ||
34 | blk_fill_rwbs(__entry->rwbs, rq->cmd_flags, blk_rq_bytes(rq)); | 34 | blk_fill_rwbs(__entry->rwbs, rq->cmd_flags, blk_rq_bytes(rq)); |
35 | blk_dump_cmd(__get_str(cmd), rq); | 35 | blk_dump_cmd(__get_str(cmd), rq); |
36 | ), | 36 | ), |
37 | 37 | ||
38 | TP_printk("%d,%d %s (%s) %llu + %u [%d]", | 38 | TP_printk("%d,%d %s (%s) %llu + %u [%d]", |
39 | MAJOR(__entry->dev), MINOR(__entry->dev), | 39 | MAJOR(__entry->dev), MINOR(__entry->dev), |
40 | __entry->rwbs, __get_str(cmd), | 40 | __entry->rwbs, __get_str(cmd), |
41 | (unsigned long long)__entry->sector, | 41 | (unsigned long long)__entry->sector, |
42 | __entry->nr_sector, __entry->errors) | 42 | __entry->nr_sector, __entry->errors) |
43 | ); | 43 | ); |
44 | 44 | ||
45 | /** | 45 | /** |
46 | * block_rq_abort - abort block operation request | 46 | * block_rq_abort - abort block operation request |
47 | * @q: queue containing the block operation request | 47 | * @q: queue containing the block operation request |
48 | * @rq: block IO operation request | 48 | * @rq: block IO operation request |
49 | * | 49 | * |
50 | * Called immediately after pending block IO operation request @rq in | 50 | * Called immediately after pending block IO operation request @rq in |
51 | * queue @q is aborted. The fields in the operation request @rq | 51 | * queue @q is aborted. The fields in the operation request @rq |
52 | * can be examined to determine which device and sectors the pending | 52 | * can be examined to determine which device and sectors the pending |
53 | * operation would access. | 53 | * operation would access. |
54 | */ | 54 | */ |
55 | DEFINE_EVENT(block_rq_with_error, block_rq_abort, | 55 | DEFINE_EVENT(block_rq_with_error, block_rq_abort, |
56 | 56 | ||
57 | TP_PROTO(struct request_queue *q, struct request *rq), | 57 | TP_PROTO(struct request_queue *q, struct request *rq), |
58 | 58 | ||
59 | TP_ARGS(q, rq) | 59 | TP_ARGS(q, rq) |
60 | ); | 60 | ); |
61 | 61 | ||
62 | /** | 62 | /** |
63 | * block_rq_requeue - place block IO request back on a queue | 63 | * block_rq_requeue - place block IO request back on a queue |
64 | * @q: queue holding operation | 64 | * @q: queue holding operation |
65 | * @rq: block IO operation request | 65 | * @rq: block IO operation request |
66 | * | 66 | * |
67 | * The block operation request @rq is being placed back into queue | 67 | * The block operation request @rq is being placed back into queue |
68 | * @q. For some reason the request was not completed and needs to be | 68 | * @q. For some reason the request was not completed and needs to be |
69 | * put back in the queue. | 69 | * put back in the queue. |
70 | */ | 70 | */ |
71 | DEFINE_EVENT(block_rq_with_error, block_rq_requeue, | 71 | DEFINE_EVENT(block_rq_with_error, block_rq_requeue, |
72 | 72 | ||
73 | TP_PROTO(struct request_queue *q, struct request *rq), | 73 | TP_PROTO(struct request_queue *q, struct request *rq), |
74 | 74 | ||
75 | TP_ARGS(q, rq) | 75 | TP_ARGS(q, rq) |
76 | ); | 76 | ); |
77 | 77 | ||
78 | /** | 78 | /** |
79 | * block_rq_complete - block IO operation completed by device driver | 79 | * block_rq_complete - block IO operation completed by device driver |
80 | * @q: queue containing the block operation request | 80 | * @q: queue containing the block operation request |
81 | * @rq: block operations request | 81 | * @rq: block operations request |
82 | * | 82 | * |
83 | * The block_rq_complete tracepoint event indicates that some portion | 83 | * The block_rq_complete tracepoint event indicates that some portion |
84 | * of operation request has been completed by the device driver. If | 84 | * of operation request has been completed by the device driver. If |
85 | * the @rq->bio is %NULL, then there is absolutely no additional work to | 85 | * the @rq->bio is %NULL, then there is absolutely no additional work to |
86 | * do for the request. If @rq->bio is non-NULL then there is | 86 | * do for the request. If @rq->bio is non-NULL then there is |
87 | * additional work required to complete the request. | 87 | * additional work required to complete the request. |
88 | */ | 88 | */ |
89 | DEFINE_EVENT(block_rq_with_error, block_rq_complete, | 89 | DEFINE_EVENT(block_rq_with_error, block_rq_complete, |
90 | 90 | ||
91 | TP_PROTO(struct request_queue *q, struct request *rq), | 91 | TP_PROTO(struct request_queue *q, struct request *rq), |
92 | 92 | ||
93 | TP_ARGS(q, rq) | 93 | TP_ARGS(q, rq) |
94 | ); | 94 | ); |
95 | 95 | ||
96 | DECLARE_EVENT_CLASS(block_rq, | 96 | DECLARE_EVENT_CLASS(block_rq, |
97 | 97 | ||
98 | TP_PROTO(struct request_queue *q, struct request *rq), | 98 | TP_PROTO(struct request_queue *q, struct request *rq), |
99 | 99 | ||
100 | TP_ARGS(q, rq), | 100 | TP_ARGS(q, rq), |
101 | 101 | ||
102 | TP_STRUCT__entry( | 102 | TP_STRUCT__entry( |
103 | __field( dev_t, dev ) | 103 | __field( dev_t, dev ) |
104 | __field( sector_t, sector ) | 104 | __field( sector_t, sector ) |
105 | __field( unsigned int, nr_sector ) | 105 | __field( unsigned int, nr_sector ) |
106 | __field( unsigned int, bytes ) | 106 | __field( unsigned int, bytes ) |
107 | __array( char, rwbs, 6 ) | 107 | __array( char, rwbs, 6 ) |
108 | __array( char, comm, TASK_COMM_LEN ) | 108 | __array( char, comm, TASK_COMM_LEN ) |
109 | __dynamic_array( char, cmd, blk_cmd_buf_len(rq) ) | 109 | __dynamic_array( char, cmd, blk_cmd_buf_len(rq) ) |
110 | ), | 110 | ), |
111 | 111 | ||
112 | TP_fast_assign( | 112 | TP_fast_assign( |
113 | __entry->dev = rq->rq_disk ? disk_devt(rq->rq_disk) : 0; | 113 | __entry->dev = rq->rq_disk ? disk_devt(rq->rq_disk) : 0; |
114 | __entry->sector = (rq->cmd_type == REQ_TYPE_BLOCK_PC) ? | 114 | __entry->sector = (rq->cmd_type == REQ_TYPE_BLOCK_PC) ? |
115 | 0 : blk_rq_pos(rq); | 115 | 0 : blk_rq_pos(rq); |
116 | __entry->nr_sector = (rq->cmd_type == REQ_TYPE_BLOCK_PC) ? | 116 | __entry->nr_sector = (rq->cmd_type == REQ_TYPE_BLOCK_PC) ? |
117 | 0 : blk_rq_sectors(rq); | 117 | 0 : blk_rq_sectors(rq); |
118 | __entry->bytes = (rq->cmd_type == REQ_TYPE_BLOCK_PC) ? | 118 | __entry->bytes = (rq->cmd_type == REQ_TYPE_BLOCK_PC) ? |
119 | blk_rq_bytes(rq) : 0; | 119 | blk_rq_bytes(rq) : 0; |
120 | 120 | ||
121 | blk_fill_rwbs(__entry->rwbs, rq->cmd_flags, blk_rq_bytes(rq)); | 121 | blk_fill_rwbs(__entry->rwbs, rq->cmd_flags, blk_rq_bytes(rq)); |
122 | blk_dump_cmd(__get_str(cmd), rq); | 122 | blk_dump_cmd(__get_str(cmd), rq); |
123 | memcpy(__entry->comm, current->comm, TASK_COMM_LEN); | 123 | memcpy(__entry->comm, current->comm, TASK_COMM_LEN); |
124 | ), | 124 | ), |
125 | 125 | ||
126 | TP_printk("%d,%d %s %u (%s) %llu + %u [%s]", | 126 | TP_printk("%d,%d %s %u (%s) %llu + %u [%s]", |
127 | MAJOR(__entry->dev), MINOR(__entry->dev), | 127 | MAJOR(__entry->dev), MINOR(__entry->dev), |
128 | __entry->rwbs, __entry->bytes, __get_str(cmd), | 128 | __entry->rwbs, __entry->bytes, __get_str(cmd), |
129 | (unsigned long long)__entry->sector, | 129 | (unsigned long long)__entry->sector, |
130 | __entry->nr_sector, __entry->comm) | 130 | __entry->nr_sector, __entry->comm) |
131 | ); | 131 | ); |
132 | 132 | ||
133 | /** | 133 | /** |
134 | * block_rq_insert - insert block operation request into queue | 134 | * block_rq_insert - insert block operation request into queue |
135 | * @q: target queue | 135 | * @q: target queue |
136 | * @rq: block IO operation request | 136 | * @rq: block IO operation request |
137 | * | 137 | * |
138 | * Called immediately before block operation request @rq is inserted | 138 | * Called immediately before block operation request @rq is inserted |
139 | * into queue @q. The fields in the operation request @rq struct can | 139 | * into queue @q. The fields in the operation request @rq struct can |
140 | * be examined to determine which device and sectors the pending | 140 | * be examined to determine which device and sectors the pending |
141 | * operation would access. | 141 | * operation would access. |
142 | */ | 142 | */ |
143 | DEFINE_EVENT(block_rq, block_rq_insert, | 143 | DEFINE_EVENT(block_rq, block_rq_insert, |
144 | 144 | ||
145 | TP_PROTO(struct request_queue *q, struct request *rq), | 145 | TP_PROTO(struct request_queue *q, struct request *rq), |
146 | 146 | ||
147 | TP_ARGS(q, rq) | 147 | TP_ARGS(q, rq) |
148 | ); | 148 | ); |
149 | 149 | ||
150 | /** | 150 | /** |
151 | * block_rq_issue - issue pending block IO request operation to device driver | 151 | * block_rq_issue - issue pending block IO request operation to device driver |
152 | * @q: queue holding operation | 152 | * @q: queue holding operation |
153 | * @rq: block IO operation operation request | 153 | * @rq: block IO operation operation request |
154 | * | 154 | * |
155 | * Called when block operation request @rq from queue @q is sent to a | 155 | * Called when block operation request @rq from queue @q is sent to a |
156 | * device driver for processing. | 156 | * device driver for processing. |
157 | */ | 157 | */ |
158 | DEFINE_EVENT(block_rq, block_rq_issue, | 158 | DEFINE_EVENT(block_rq, block_rq_issue, |
159 | 159 | ||
160 | TP_PROTO(struct request_queue *q, struct request *rq), | 160 | TP_PROTO(struct request_queue *q, struct request *rq), |
161 | 161 | ||
162 | TP_ARGS(q, rq) | 162 | TP_ARGS(q, rq) |
163 | ); | 163 | ); |
164 | 164 | ||
165 | /** | 165 | /** |
166 | * block_bio_bounce - used bounce buffer when processing block operation | 166 | * block_bio_bounce - used bounce buffer when processing block operation |
167 | * @q: queue holding the block operation | 167 | * @q: queue holding the block operation |
168 | * @bio: block operation | 168 | * @bio: block operation |
169 | * | 169 | * |
170 | * A bounce buffer was used to handle the block operation @bio in @q. | 170 | * A bounce buffer was used to handle the block operation @bio in @q. |
171 | * This occurs when hardware limitations prevent a direct transfer of | 171 | * This occurs when hardware limitations prevent a direct transfer of |
172 | * data between the @bio data memory area and the IO device. Use of a | 172 | * data between the @bio data memory area and the IO device. Use of a |
173 | * bounce buffer requires extra copying of data and decreases | 173 | * bounce buffer requires extra copying of data and decreases |
174 | * performance. | 174 | * performance. |
175 | */ | 175 | */ |
176 | TRACE_EVENT(block_bio_bounce, | 176 | TRACE_EVENT(block_bio_bounce, |
177 | 177 | ||
178 | TP_PROTO(struct request_queue *q, struct bio *bio), | 178 | TP_PROTO(struct request_queue *q, struct bio *bio), |
179 | 179 | ||
180 | TP_ARGS(q, bio), | 180 | TP_ARGS(q, bio), |
181 | 181 | ||
182 | TP_STRUCT__entry( | 182 | TP_STRUCT__entry( |
183 | __field( dev_t, dev ) | 183 | __field( dev_t, dev ) |
184 | __field( sector_t, sector ) | 184 | __field( sector_t, sector ) |
185 | __field( unsigned int, nr_sector ) | 185 | __field( unsigned int, nr_sector ) |
186 | __array( char, rwbs, 6 ) | 186 | __array( char, rwbs, 6 ) |
187 | __array( char, comm, TASK_COMM_LEN ) | 187 | __array( char, comm, TASK_COMM_LEN ) |
188 | ), | 188 | ), |
189 | 189 | ||
190 | TP_fast_assign( | 190 | TP_fast_assign( |
191 | __entry->dev = bio->bi_bdev ? | 191 | __entry->dev = bio->bi_bdev ? |
192 | bio->bi_bdev->bd_dev : 0; | 192 | bio->bi_bdev->bd_dev : 0; |
193 | __entry->sector = bio->bi_sector; | 193 | __entry->sector = bio->bi_sector; |
194 | __entry->nr_sector = bio->bi_size >> 9; | 194 | __entry->nr_sector = bio->bi_size >> 9; |
195 | blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); | 195 | blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); |
196 | memcpy(__entry->comm, current->comm, TASK_COMM_LEN); | 196 | memcpy(__entry->comm, current->comm, TASK_COMM_LEN); |
197 | ), | 197 | ), |
198 | 198 | ||
199 | TP_printk("%d,%d %s %llu + %u [%s]", | 199 | TP_printk("%d,%d %s %llu + %u [%s]", |
200 | MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, | 200 | MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, |
201 | (unsigned long long)__entry->sector, | 201 | (unsigned long long)__entry->sector, |
202 | __entry->nr_sector, __entry->comm) | 202 | __entry->nr_sector, __entry->comm) |
203 | ); | 203 | ); |
204 | 204 | ||
205 | /** | 205 | /** |
206 | * block_bio_complete - completed all work on the block operation | 206 | * block_bio_complete - completed all work on the block operation |
207 | * @q: queue holding the block operation | 207 | * @q: queue holding the block operation |
208 | * @bio: block operation completed | 208 | * @bio: block operation completed |
209 | * @error: io error value | 209 | * @error: io error value |
210 | * | 210 | * |
211 | * This tracepoint indicates there is no further work to do on this | 211 | * This tracepoint indicates there is no further work to do on this |
212 | * block IO operation @bio. | 212 | * block IO operation @bio. |
213 | */ | 213 | */ |
214 | TRACE_EVENT(block_bio_complete, | 214 | TRACE_EVENT(block_bio_complete, |
215 | 215 | ||
216 | TP_PROTO(struct request_queue *q, struct bio *bio, int error), | 216 | TP_PROTO(struct request_queue *q, struct bio *bio, int error), |
217 | 217 | ||
218 | TP_ARGS(q, bio, error), | 218 | TP_ARGS(q, bio, error), |
219 | 219 | ||
220 | TP_STRUCT__entry( | 220 | TP_STRUCT__entry( |
221 | __field( dev_t, dev ) | 221 | __field( dev_t, dev ) |
222 | __field( sector_t, sector ) | 222 | __field( sector_t, sector ) |
223 | __field( unsigned, nr_sector ) | 223 | __field( unsigned, nr_sector ) |
224 | __field( int, error ) | 224 | __field( int, error ) |
225 | __array( char, rwbs, 6 ) | 225 | __array( char, rwbs, 6 ) |
226 | ), | 226 | ), |
227 | 227 | ||
228 | TP_fast_assign( | 228 | TP_fast_assign( |
229 | __entry->dev = bio->bi_bdev->bd_dev; | 229 | __entry->dev = bio->bi_bdev->bd_dev; |
230 | __entry->sector = bio->bi_sector; | 230 | __entry->sector = bio->bi_sector; |
231 | __entry->nr_sector = bio->bi_size >> 9; | 231 | __entry->nr_sector = bio->bi_size >> 9; |
232 | __entry->error = error; | 232 | __entry->error = error; |
233 | blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); | 233 | blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); |
234 | ), | 234 | ), |
235 | 235 | ||
236 | TP_printk("%d,%d %s %llu + %u [%d]", | 236 | TP_printk("%d,%d %s %llu + %u [%d]", |
237 | MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, | 237 | MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, |
238 | (unsigned long long)__entry->sector, | 238 | (unsigned long long)__entry->sector, |
239 | __entry->nr_sector, __entry->error) | 239 | __entry->nr_sector, __entry->error) |
240 | ); | 240 | ); |
241 | 241 | ||
242 | DECLARE_EVENT_CLASS(block_bio, | 242 | DECLARE_EVENT_CLASS(block_bio, |
243 | 243 | ||
244 | TP_PROTO(struct request_queue *q, struct bio *bio), | 244 | TP_PROTO(struct request_queue *q, struct bio *bio), |
245 | 245 | ||
246 | TP_ARGS(q, bio), | 246 | TP_ARGS(q, bio), |
247 | 247 | ||
248 | TP_STRUCT__entry( | 248 | TP_STRUCT__entry( |
249 | __field( dev_t, dev ) | 249 | __field( dev_t, dev ) |
250 | __field( sector_t, sector ) | 250 | __field( sector_t, sector ) |
251 | __field( unsigned int, nr_sector ) | 251 | __field( unsigned int, nr_sector ) |
252 | __array( char, rwbs, 6 ) | 252 | __array( char, rwbs, 6 ) |
253 | __array( char, comm, TASK_COMM_LEN ) | 253 | __array( char, comm, TASK_COMM_LEN ) |
254 | ), | 254 | ), |
255 | 255 | ||
256 | TP_fast_assign( | 256 | TP_fast_assign( |
257 | __entry->dev = bio->bi_bdev->bd_dev; | 257 | __entry->dev = bio->bi_bdev->bd_dev; |
258 | __entry->sector = bio->bi_sector; | 258 | __entry->sector = bio->bi_sector; |
259 | __entry->nr_sector = bio->bi_size >> 9; | 259 | __entry->nr_sector = bio->bi_size >> 9; |
260 | blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); | 260 | blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); |
261 | memcpy(__entry->comm, current->comm, TASK_COMM_LEN); | 261 | memcpy(__entry->comm, current->comm, TASK_COMM_LEN); |
262 | ), | 262 | ), |
263 | 263 | ||
264 | TP_printk("%d,%d %s %llu + %u [%s]", | 264 | TP_printk("%d,%d %s %llu + %u [%s]", |
265 | MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, | 265 | MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, |
266 | (unsigned long long)__entry->sector, | 266 | (unsigned long long)__entry->sector, |
267 | __entry->nr_sector, __entry->comm) | 267 | __entry->nr_sector, __entry->comm) |
268 | ); | 268 | ); |
269 | 269 | ||
270 | /** | 270 | /** |
271 | * block_bio_backmerge - merging block operation to the end of an existing operation | 271 | * block_bio_backmerge - merging block operation to the end of an existing operation |
272 | * @q: queue holding operation | 272 | * @q: queue holding operation |
273 | * @bio: new block operation to merge | 273 | * @bio: new block operation to merge |
274 | * | 274 | * |
275 | * Merging block request @bio to the end of an existing block request | 275 | * Merging block request @bio to the end of an existing block request |
276 | * in queue @q. | 276 | * in queue @q. |
277 | */ | 277 | */ |
278 | DEFINE_EVENT(block_bio, block_bio_backmerge, | 278 | DEFINE_EVENT(block_bio, block_bio_backmerge, |
279 | 279 | ||
280 | TP_PROTO(struct request_queue *q, struct bio *bio), | 280 | TP_PROTO(struct request_queue *q, struct bio *bio), |
281 | 281 | ||
282 | TP_ARGS(q, bio) | 282 | TP_ARGS(q, bio) |
283 | ); | 283 | ); |
284 | 284 | ||
285 | /** | 285 | /** |
286 | * block_bio_frontmerge - merging block operation to the beginning of an existing operation | 286 | * block_bio_frontmerge - merging block operation to the beginning of an existing operation |
287 | * @q: queue holding operation | 287 | * @q: queue holding operation |
288 | * @bio: new block operation to merge | 288 | * @bio: new block operation to merge |
289 | * | 289 | * |
290 | * Merging block IO operation @bio to the beginning of an existing block | 290 | * Merging block IO operation @bio to the beginning of an existing block |
291 | * operation in queue @q. | 291 | * operation in queue @q. |
292 | */ | 292 | */ |
293 | DEFINE_EVENT(block_bio, block_bio_frontmerge, | 293 | DEFINE_EVENT(block_bio, block_bio_frontmerge, |
294 | 294 | ||
295 | TP_PROTO(struct request_queue *q, struct bio *bio), | 295 | TP_PROTO(struct request_queue *q, struct bio *bio), |
296 | 296 | ||
297 | TP_ARGS(q, bio) | 297 | TP_ARGS(q, bio) |
298 | ); | 298 | ); |
299 | 299 | ||
300 | /** | 300 | /** |
301 | * block_bio_queue - putting new block IO operation in queue | 301 | * block_bio_queue - putting new block IO operation in queue |
302 | * @q: queue holding operation | 302 | * @q: queue holding operation |
303 | * @bio: new block operation | 303 | * @bio: new block operation |
304 | * | 304 | * |
305 | * About to place the block IO operation @bio into queue @q. | 305 | * About to place the block IO operation @bio into queue @q. |
306 | */ | 306 | */ |
307 | DEFINE_EVENT(block_bio, block_bio_queue, | 307 | DEFINE_EVENT(block_bio, block_bio_queue, |
308 | 308 | ||
309 | TP_PROTO(struct request_queue *q, struct bio *bio), | 309 | TP_PROTO(struct request_queue *q, struct bio *bio), |
310 | 310 | ||
311 | TP_ARGS(q, bio) | 311 | TP_ARGS(q, bio) |
312 | ); | 312 | ); |
313 | 313 | ||
314 | DECLARE_EVENT_CLASS(block_get_rq, | 314 | DECLARE_EVENT_CLASS(block_get_rq, |
315 | 315 | ||
316 | TP_PROTO(struct request_queue *q, struct bio *bio, int rw), | 316 | TP_PROTO(struct request_queue *q, struct bio *bio, int rw), |
317 | 317 | ||
318 | TP_ARGS(q, bio, rw), | 318 | TP_ARGS(q, bio, rw), |
319 | 319 | ||
320 | TP_STRUCT__entry( | 320 | TP_STRUCT__entry( |
321 | __field( dev_t, dev ) | 321 | __field( dev_t, dev ) |
322 | __field( sector_t, sector ) | 322 | __field( sector_t, sector ) |
323 | __field( unsigned int, nr_sector ) | 323 | __field( unsigned int, nr_sector ) |
324 | __array( char, rwbs, 6 ) | 324 | __array( char, rwbs, 6 ) |
325 | __array( char, comm, TASK_COMM_LEN ) | 325 | __array( char, comm, TASK_COMM_LEN ) |
326 | ), | 326 | ), |
327 | 327 | ||
328 | TP_fast_assign( | 328 | TP_fast_assign( |
329 | __entry->dev = bio ? bio->bi_bdev->bd_dev : 0; | 329 | __entry->dev = bio ? bio->bi_bdev->bd_dev : 0; |
330 | __entry->sector = bio ? bio->bi_sector : 0; | 330 | __entry->sector = bio ? bio->bi_sector : 0; |
331 | __entry->nr_sector = bio ? bio->bi_size >> 9 : 0; | 331 | __entry->nr_sector = bio ? bio->bi_size >> 9 : 0; |
332 | blk_fill_rwbs(__entry->rwbs, | 332 | blk_fill_rwbs(__entry->rwbs, |
333 | bio ? bio->bi_rw : 0, __entry->nr_sector); | 333 | bio ? bio->bi_rw : 0, __entry->nr_sector); |
334 | memcpy(__entry->comm, current->comm, TASK_COMM_LEN); | 334 | memcpy(__entry->comm, current->comm, TASK_COMM_LEN); |
335 | ), | 335 | ), |
336 | 336 | ||
337 | TP_printk("%d,%d %s %llu + %u [%s]", | 337 | TP_printk("%d,%d %s %llu + %u [%s]", |
338 | MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, | 338 | MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, |
339 | (unsigned long long)__entry->sector, | 339 | (unsigned long long)__entry->sector, |
340 | __entry->nr_sector, __entry->comm) | 340 | __entry->nr_sector, __entry->comm) |
341 | ); | 341 | ); |
342 | 342 | ||
343 | /** | 343 | /** |
344 | * block_getrq - get a free request entry in queue for block IO operations | 344 | * block_getrq - get a free request entry in queue for block IO operations |
345 | * @q: queue for operations | 345 | * @q: queue for operations |
346 | * @bio: pending block IO operation | 346 | * @bio: pending block IO operation |
347 | * @rw: low bit indicates a read (%0) or a write (%1) | 347 | * @rw: low bit indicates a read (%0) or a write (%1) |
348 | * | 348 | * |
349 | * A request struct for queue @q has been allocated to handle the | 349 | * A request struct for queue @q has been allocated to handle the |
350 | * block IO operation @bio. | 350 | * block IO operation @bio. |
351 | */ | 351 | */ |
352 | DEFINE_EVENT(block_get_rq, block_getrq, | 352 | DEFINE_EVENT(block_get_rq, block_getrq, |
353 | 353 | ||
354 | TP_PROTO(struct request_queue *q, struct bio *bio, int rw), | 354 | TP_PROTO(struct request_queue *q, struct bio *bio, int rw), |
355 | 355 | ||
356 | TP_ARGS(q, bio, rw) | 356 | TP_ARGS(q, bio, rw) |
357 | ); | 357 | ); |
358 | 358 | ||
359 | /** | 359 | /** |
360 | * block_sleeprq - waiting to get a free request entry in queue for block IO operation | 360 | * block_sleeprq - waiting to get a free request entry in queue for block IO operation |
361 | * @q: queue for operation | 361 | * @q: queue for operation |
362 | * @bio: pending block IO operation | 362 | * @bio: pending block IO operation |
363 | * @rw: low bit indicates a read (%0) or a write (%1) | 363 | * @rw: low bit indicates a read (%0) or a write (%1) |
364 | * | 364 | * |
365 | * In the case where a request struct cannot be provided for queue @q | 365 | * In the case where a request struct cannot be provided for queue @q |
366 | * the process needs to wait for an request struct to become | 366 | * the process needs to wait for an request struct to become |
367 | * available. This tracepoint event is generated each time the | 367 | * available. This tracepoint event is generated each time the |
368 | * process goes to sleep waiting for request struct become available. | 368 | * process goes to sleep waiting for request struct become available. |
369 | */ | 369 | */ |
370 | DEFINE_EVENT(block_get_rq, block_sleeprq, | 370 | DEFINE_EVENT(block_get_rq, block_sleeprq, |
371 | 371 | ||
372 | TP_PROTO(struct request_queue *q, struct bio *bio, int rw), | 372 | TP_PROTO(struct request_queue *q, struct bio *bio, int rw), |
373 | 373 | ||
374 | TP_ARGS(q, bio, rw) | 374 | TP_ARGS(q, bio, rw) |
375 | ); | 375 | ); |
376 | 376 | ||
377 | /** | 377 | /** |
378 | * block_plug - keep operations requests in request queue | 378 | * block_plug - keep operations requests in request queue |
379 | * @q: request queue to plug | 379 | * @q: request queue to plug |
380 | * | 380 | * |
381 | * Plug the request queue @q. Do not allow block operation requests | 381 | * Plug the request queue @q. Do not allow block operation requests |
382 | * to be sent to the device driver. Instead, accumulate requests in | 382 | * to be sent to the device driver. Instead, accumulate requests in |
383 | * the queue to improve throughput performance of the block device. | 383 | * the queue to improve throughput performance of the block device. |
384 | */ | 384 | */ |
385 | TRACE_EVENT(block_plug, | 385 | TRACE_EVENT(block_plug, |
386 | 386 | ||
387 | TP_PROTO(struct request_queue *q), | 387 | TP_PROTO(struct request_queue *q), |
388 | 388 | ||
389 | TP_ARGS(q), | 389 | TP_ARGS(q), |
390 | 390 | ||
391 | TP_STRUCT__entry( | 391 | TP_STRUCT__entry( |
392 | __array( char, comm, TASK_COMM_LEN ) | 392 | __array( char, comm, TASK_COMM_LEN ) |
393 | ), | 393 | ), |
394 | 394 | ||
395 | TP_fast_assign( | 395 | TP_fast_assign( |
396 | memcpy(__entry->comm, current->comm, TASK_COMM_LEN); | 396 | memcpy(__entry->comm, current->comm, TASK_COMM_LEN); |
397 | ), | 397 | ), |
398 | 398 | ||
399 | TP_printk("[%s]", __entry->comm) | 399 | TP_printk("[%s]", __entry->comm) |
400 | ); | 400 | ); |
401 | 401 | ||
402 | DECLARE_EVENT_CLASS(block_unplug, | 402 | DECLARE_EVENT_CLASS(block_unplug, |
403 | 403 | ||
404 | TP_PROTO(struct request_queue *q, unsigned int depth), | 404 | TP_PROTO(struct request_queue *q, unsigned int depth, bool explicit), |
405 | 405 | ||
406 | TP_ARGS(q, depth), | 406 | TP_ARGS(q, depth, explicit), |
407 | 407 | ||
408 | TP_STRUCT__entry( | 408 | TP_STRUCT__entry( |
409 | __field( int, nr_rq ) | 409 | __field( int, nr_rq ) |
410 | __array( char, comm, TASK_COMM_LEN ) | 410 | __array( char, comm, TASK_COMM_LEN ) |
411 | ), | 411 | ), |
412 | 412 | ||
413 | TP_fast_assign( | 413 | TP_fast_assign( |
414 | __entry->nr_rq = depth; | 414 | __entry->nr_rq = depth; |
415 | memcpy(__entry->comm, current->comm, TASK_COMM_LEN); | 415 | memcpy(__entry->comm, current->comm, TASK_COMM_LEN); |
416 | ), | 416 | ), |
417 | 417 | ||
418 | TP_printk("[%s] %d", __entry->comm, __entry->nr_rq) | 418 | TP_printk("[%s] %d", __entry->comm, __entry->nr_rq) |
419 | ); | 419 | ); |
420 | 420 | ||
421 | /** | 421 | /** |
422 | * block_unplug_io - release of operations requests in request queue | 422 | * block_unplug - release of operations requests in request queue |
423 | * @q: request queue to unplug | 423 | * @q: request queue to unplug |
424 | * @depth: number of requests just added to the queue | 424 | * @depth: number of requests just added to the queue |
425 | * @explicit: whether this was an explicit unplug, or one from schedule() | ||
425 | * | 426 | * |
426 | * Unplug request queue @q because device driver is scheduled to work | 427 | * Unplug request queue @q because device driver is scheduled to work |
427 | * on elements in the request queue. | 428 | * on elements in the request queue. |
428 | */ | 429 | */ |
429 | DEFINE_EVENT(block_unplug, block_unplug_io, | 430 | DEFINE_EVENT(block_unplug, block_unplug, |
430 | 431 | ||
431 | TP_PROTO(struct request_queue *q, unsigned int depth), | 432 | TP_PROTO(struct request_queue *q, unsigned int depth, bool explicit), |
432 | 433 | ||
433 | TP_ARGS(q, depth) | 434 | TP_ARGS(q, depth, explicit) |
434 | ); | 435 | ); |
435 | 436 | ||
436 | /** | 437 | /** |
437 | * block_split - split a single bio struct into two bio structs | 438 | * block_split - split a single bio struct into two bio structs |
438 | * @q: queue containing the bio | 439 | * @q: queue containing the bio |
439 | * @bio: block operation being split | 440 | * @bio: block operation being split |
440 | * @new_sector: The starting sector for the new bio | 441 | * @new_sector: The starting sector for the new bio |
441 | * | 442 | * |
442 | * The bio request @bio in request queue @q needs to be split into two | 443 | * The bio request @bio in request queue @q needs to be split into two |
443 | * bio requests. The newly created @bio request starts at | 444 | * bio requests. The newly created @bio request starts at |
444 | * @new_sector. This split may be required due to hardware limitation | 445 | * @new_sector. This split may be required due to hardware limitation |
445 | * such as operation crossing device boundaries in a RAID system. | 446 | * such as operation crossing device boundaries in a RAID system. |
446 | */ | 447 | */ |
447 | TRACE_EVENT(block_split, | 448 | TRACE_EVENT(block_split, |
448 | 449 | ||
449 | TP_PROTO(struct request_queue *q, struct bio *bio, | 450 | TP_PROTO(struct request_queue *q, struct bio *bio, |
450 | unsigned int new_sector), | 451 | unsigned int new_sector), |
451 | 452 | ||
452 | TP_ARGS(q, bio, new_sector), | 453 | TP_ARGS(q, bio, new_sector), |
453 | 454 | ||
454 | TP_STRUCT__entry( | 455 | TP_STRUCT__entry( |
455 | __field( dev_t, dev ) | 456 | __field( dev_t, dev ) |
456 | __field( sector_t, sector ) | 457 | __field( sector_t, sector ) |
457 | __field( sector_t, new_sector ) | 458 | __field( sector_t, new_sector ) |
458 | __array( char, rwbs, 6 ) | 459 | __array( char, rwbs, 6 ) |
459 | __array( char, comm, TASK_COMM_LEN ) | 460 | __array( char, comm, TASK_COMM_LEN ) |
460 | ), | 461 | ), |
461 | 462 | ||
462 | TP_fast_assign( | 463 | TP_fast_assign( |
463 | __entry->dev = bio->bi_bdev->bd_dev; | 464 | __entry->dev = bio->bi_bdev->bd_dev; |
464 | __entry->sector = bio->bi_sector; | 465 | __entry->sector = bio->bi_sector; |
465 | __entry->new_sector = new_sector; | 466 | __entry->new_sector = new_sector; |
466 | blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); | 467 | blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); |
467 | memcpy(__entry->comm, current->comm, TASK_COMM_LEN); | 468 | memcpy(__entry->comm, current->comm, TASK_COMM_LEN); |
468 | ), | 469 | ), |
469 | 470 | ||
470 | TP_printk("%d,%d %s %llu / %llu [%s]", | 471 | TP_printk("%d,%d %s %llu / %llu [%s]", |
471 | MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, | 472 | MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, |
472 | (unsigned long long)__entry->sector, | 473 | (unsigned long long)__entry->sector, |
473 | (unsigned long long)__entry->new_sector, | 474 | (unsigned long long)__entry->new_sector, |
474 | __entry->comm) | 475 | __entry->comm) |
475 | ); | 476 | ); |
476 | 477 | ||
477 | /** | 478 | /** |
478 | * block_bio_remap - map request for a logical device to the raw device | 479 | * block_bio_remap - map request for a logical device to the raw device |
479 | * @q: queue holding the operation | 480 | * @q: queue holding the operation |
480 | * @bio: revised operation | 481 | * @bio: revised operation |
481 | * @dev: device for the operation | 482 | * @dev: device for the operation |
482 | * @from: original sector for the operation | 483 | * @from: original sector for the operation |
483 | * | 484 | * |
484 | * An operation for a logical device has been mapped to the | 485 | * An operation for a logical device has been mapped to the |
485 | * raw block device. | 486 | * raw block device. |
486 | */ | 487 | */ |
487 | TRACE_EVENT(block_bio_remap, | 488 | TRACE_EVENT(block_bio_remap, |
488 | 489 | ||
489 | TP_PROTO(struct request_queue *q, struct bio *bio, dev_t dev, | 490 | TP_PROTO(struct request_queue *q, struct bio *bio, dev_t dev, |
490 | sector_t from), | 491 | sector_t from), |
491 | 492 | ||
492 | TP_ARGS(q, bio, dev, from), | 493 | TP_ARGS(q, bio, dev, from), |
493 | 494 | ||
494 | TP_STRUCT__entry( | 495 | TP_STRUCT__entry( |
495 | __field( dev_t, dev ) | 496 | __field( dev_t, dev ) |
496 | __field( sector_t, sector ) | 497 | __field( sector_t, sector ) |
497 | __field( unsigned int, nr_sector ) | 498 | __field( unsigned int, nr_sector ) |
498 | __field( dev_t, old_dev ) | 499 | __field( dev_t, old_dev ) |
499 | __field( sector_t, old_sector ) | 500 | __field( sector_t, old_sector ) |
500 | __array( char, rwbs, 6 ) | 501 | __array( char, rwbs, 6 ) |
501 | ), | 502 | ), |
502 | 503 | ||
503 | TP_fast_assign( | 504 | TP_fast_assign( |
504 | __entry->dev = bio->bi_bdev->bd_dev; | 505 | __entry->dev = bio->bi_bdev->bd_dev; |
505 | __entry->sector = bio->bi_sector; | 506 | __entry->sector = bio->bi_sector; |
506 | __entry->nr_sector = bio->bi_size >> 9; | 507 | __entry->nr_sector = bio->bi_size >> 9; |
507 | __entry->old_dev = dev; | 508 | __entry->old_dev = dev; |
508 | __entry->old_sector = from; | 509 | __entry->old_sector = from; |
509 | blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); | 510 | blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); |
510 | ), | 511 | ), |
511 | 512 | ||
512 | TP_printk("%d,%d %s %llu + %u <- (%d,%d) %llu", | 513 | TP_printk("%d,%d %s %llu + %u <- (%d,%d) %llu", |
513 | MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, | 514 | MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, |
514 | (unsigned long long)__entry->sector, | 515 | (unsigned long long)__entry->sector, |
515 | __entry->nr_sector, | 516 | __entry->nr_sector, |
516 | MAJOR(__entry->old_dev), MINOR(__entry->old_dev), | 517 | MAJOR(__entry->old_dev), MINOR(__entry->old_dev), |
517 | (unsigned long long)__entry->old_sector) | 518 | (unsigned long long)__entry->old_sector) |
518 | ); | 519 | ); |
519 | 520 | ||
520 | /** | 521 | /** |
521 | * block_rq_remap - map request for a block operation request | 522 | * block_rq_remap - map request for a block operation request |
522 | * @q: queue holding the operation | 523 | * @q: queue holding the operation |
523 | * @rq: block IO operation request | 524 | * @rq: block IO operation request |
524 | * @dev: device for the operation | 525 | * @dev: device for the operation |
525 | * @from: original sector for the operation | 526 | * @from: original sector for the operation |
526 | * | 527 | * |
527 | * The block operation request @rq in @q has been remapped. The block | 528 | * The block operation request @rq in @q has been remapped. The block |
528 | * operation request @rq holds the current information and @from hold | 529 | * operation request @rq holds the current information and @from hold |
529 | * the original sector. | 530 | * the original sector. |
530 | */ | 531 | */ |
531 | TRACE_EVENT(block_rq_remap, | 532 | TRACE_EVENT(block_rq_remap, |
532 | 533 | ||
533 | TP_PROTO(struct request_queue *q, struct request *rq, dev_t dev, | 534 | TP_PROTO(struct request_queue *q, struct request *rq, dev_t dev, |
534 | sector_t from), | 535 | sector_t from), |
535 | 536 | ||
536 | TP_ARGS(q, rq, dev, from), | 537 | TP_ARGS(q, rq, dev, from), |
537 | 538 | ||
538 | TP_STRUCT__entry( | 539 | TP_STRUCT__entry( |
539 | __field( dev_t, dev ) | 540 | __field( dev_t, dev ) |
540 | __field( sector_t, sector ) | 541 | __field( sector_t, sector ) |
541 | __field( unsigned int, nr_sector ) | 542 | __field( unsigned int, nr_sector ) |
542 | __field( dev_t, old_dev ) | 543 | __field( dev_t, old_dev ) |
543 | __field( sector_t, old_sector ) | 544 | __field( sector_t, old_sector ) |
544 | __array( char, rwbs, 6 ) | 545 | __array( char, rwbs, 6 ) |
545 | ), | 546 | ), |
546 | 547 | ||
547 | TP_fast_assign( | 548 | TP_fast_assign( |
548 | __entry->dev = disk_devt(rq->rq_disk); | 549 | __entry->dev = disk_devt(rq->rq_disk); |
549 | __entry->sector = blk_rq_pos(rq); | 550 | __entry->sector = blk_rq_pos(rq); |
550 | __entry->nr_sector = blk_rq_sectors(rq); | 551 | __entry->nr_sector = blk_rq_sectors(rq); |
551 | __entry->old_dev = dev; | 552 | __entry->old_dev = dev; |
552 | __entry->old_sector = from; | 553 | __entry->old_sector = from; |
553 | blk_fill_rwbs(__entry->rwbs, rq->cmd_flags, blk_rq_bytes(rq)); | 554 | blk_fill_rwbs(__entry->rwbs, rq->cmd_flags, blk_rq_bytes(rq)); |
554 | ), | 555 | ), |
555 | 556 | ||
556 | TP_printk("%d,%d %s %llu + %u <- (%d,%d) %llu", | 557 | TP_printk("%d,%d %s %llu + %u <- (%d,%d) %llu", |
557 | MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, | 558 | MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, |
558 | (unsigned long long)__entry->sector, | 559 | (unsigned long long)__entry->sector, |
559 | __entry->nr_sector, | 560 | __entry->nr_sector, |
560 | MAJOR(__entry->old_dev), MINOR(__entry->old_dev), | 561 | MAJOR(__entry->old_dev), MINOR(__entry->old_dev), |
561 | (unsigned long long)__entry->old_sector) | 562 | (unsigned long long)__entry->old_sector) |
562 | ); | 563 | ); |
563 | 564 | ||
564 | #endif /* _TRACE_BLOCK_H */ | 565 | #endif /* _TRACE_BLOCK_H */ |
565 | 566 | ||
566 | /* This part must be outside protection */ | 567 | /* This part must be outside protection */ |
567 | #include <trace/define_trace.h> | 568 | #include <trace/define_trace.h> |
568 | 569 | ||
569 | 570 |
kernel/trace/blktrace.c
1 | /* | 1 | /* |
2 | * Copyright (C) 2006 Jens Axboe <axboe@kernel.dk> | 2 | * Copyright (C) 2006 Jens Axboe <axboe@kernel.dk> |
3 | * | 3 | * |
4 | * This program is free software; you can redistribute it and/or modify | 4 | * This program is free software; you can redistribute it and/or modify |
5 | * it under the terms of the GNU General Public License version 2 as | 5 | * it under the terms of the GNU General Public License version 2 as |
6 | * published by the Free Software Foundation. | 6 | * published by the Free Software Foundation. |
7 | * | 7 | * |
8 | * This program is distributed in the hope that it will be useful, | 8 | * This program is distributed in the hope that it will be useful, |
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
11 | * GNU General Public License for more details. | 11 | * GNU General Public License for more details. |
12 | * | 12 | * |
13 | * You should have received a copy of the GNU General Public License | 13 | * You should have received a copy of the GNU General Public License |
14 | * along with this program; if not, write to the Free Software | 14 | * along with this program; if not, write to the Free Software |
15 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | 15 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA |
16 | * | 16 | * |
17 | */ | 17 | */ |
18 | #include <linux/kernel.h> | 18 | #include <linux/kernel.h> |
19 | #include <linux/blkdev.h> | 19 | #include <linux/blkdev.h> |
20 | #include <linux/blktrace_api.h> | 20 | #include <linux/blktrace_api.h> |
21 | #include <linux/percpu.h> | 21 | #include <linux/percpu.h> |
22 | #include <linux/init.h> | 22 | #include <linux/init.h> |
23 | #include <linux/mutex.h> | 23 | #include <linux/mutex.h> |
24 | #include <linux/slab.h> | 24 | #include <linux/slab.h> |
25 | #include <linux/debugfs.h> | 25 | #include <linux/debugfs.h> |
26 | #include <linux/time.h> | 26 | #include <linux/time.h> |
27 | #include <linux/uaccess.h> | 27 | #include <linux/uaccess.h> |
28 | 28 | ||
29 | #include <trace/events/block.h> | 29 | #include <trace/events/block.h> |
30 | 30 | ||
31 | #include "trace_output.h" | 31 | #include "trace_output.h" |
32 | 32 | ||
33 | #ifdef CONFIG_BLK_DEV_IO_TRACE | 33 | #ifdef CONFIG_BLK_DEV_IO_TRACE |
34 | 34 | ||
35 | static unsigned int blktrace_seq __read_mostly = 1; | 35 | static unsigned int blktrace_seq __read_mostly = 1; |
36 | 36 | ||
37 | static struct trace_array *blk_tr; | 37 | static struct trace_array *blk_tr; |
38 | static bool blk_tracer_enabled __read_mostly; | 38 | static bool blk_tracer_enabled __read_mostly; |
39 | 39 | ||
40 | /* Select an alternative, minimalistic output than the original one */ | 40 | /* Select an alternative, minimalistic output than the original one */ |
41 | #define TRACE_BLK_OPT_CLASSIC 0x1 | 41 | #define TRACE_BLK_OPT_CLASSIC 0x1 |
42 | 42 | ||
43 | static struct tracer_opt blk_tracer_opts[] = { | 43 | static struct tracer_opt blk_tracer_opts[] = { |
44 | /* Default disable the minimalistic output */ | 44 | /* Default disable the minimalistic output */ |
45 | { TRACER_OPT(blk_classic, TRACE_BLK_OPT_CLASSIC) }, | 45 | { TRACER_OPT(blk_classic, TRACE_BLK_OPT_CLASSIC) }, |
46 | { } | 46 | { } |
47 | }; | 47 | }; |
48 | 48 | ||
49 | static struct tracer_flags blk_tracer_flags = { | 49 | static struct tracer_flags blk_tracer_flags = { |
50 | .val = 0, | 50 | .val = 0, |
51 | .opts = blk_tracer_opts, | 51 | .opts = blk_tracer_opts, |
52 | }; | 52 | }; |
53 | 53 | ||
54 | /* Global reference count of probes */ | 54 | /* Global reference count of probes */ |
55 | static atomic_t blk_probes_ref = ATOMIC_INIT(0); | 55 | static atomic_t blk_probes_ref = ATOMIC_INIT(0); |
56 | 56 | ||
57 | static void blk_register_tracepoints(void); | 57 | static void blk_register_tracepoints(void); |
58 | static void blk_unregister_tracepoints(void); | 58 | static void blk_unregister_tracepoints(void); |
59 | 59 | ||
60 | /* | 60 | /* |
61 | * Send out a notify message. | 61 | * Send out a notify message. |
62 | */ | 62 | */ |
63 | static void trace_note(struct blk_trace *bt, pid_t pid, int action, | 63 | static void trace_note(struct blk_trace *bt, pid_t pid, int action, |
64 | const void *data, size_t len) | 64 | const void *data, size_t len) |
65 | { | 65 | { |
66 | struct blk_io_trace *t; | 66 | struct blk_io_trace *t; |
67 | struct ring_buffer_event *event = NULL; | 67 | struct ring_buffer_event *event = NULL; |
68 | struct ring_buffer *buffer = NULL; | 68 | struct ring_buffer *buffer = NULL; |
69 | int pc = 0; | 69 | int pc = 0; |
70 | int cpu = smp_processor_id(); | 70 | int cpu = smp_processor_id(); |
71 | bool blk_tracer = blk_tracer_enabled; | 71 | bool blk_tracer = blk_tracer_enabled; |
72 | 72 | ||
73 | if (blk_tracer) { | 73 | if (blk_tracer) { |
74 | buffer = blk_tr->buffer; | 74 | buffer = blk_tr->buffer; |
75 | pc = preempt_count(); | 75 | pc = preempt_count(); |
76 | event = trace_buffer_lock_reserve(buffer, TRACE_BLK, | 76 | event = trace_buffer_lock_reserve(buffer, TRACE_BLK, |
77 | sizeof(*t) + len, | 77 | sizeof(*t) + len, |
78 | 0, pc); | 78 | 0, pc); |
79 | if (!event) | 79 | if (!event) |
80 | return; | 80 | return; |
81 | t = ring_buffer_event_data(event); | 81 | t = ring_buffer_event_data(event); |
82 | goto record_it; | 82 | goto record_it; |
83 | } | 83 | } |
84 | 84 | ||
85 | if (!bt->rchan) | 85 | if (!bt->rchan) |
86 | return; | 86 | return; |
87 | 87 | ||
88 | t = relay_reserve(bt->rchan, sizeof(*t) + len); | 88 | t = relay_reserve(bt->rchan, sizeof(*t) + len); |
89 | if (t) { | 89 | if (t) { |
90 | t->magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION; | 90 | t->magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION; |
91 | t->time = ktime_to_ns(ktime_get()); | 91 | t->time = ktime_to_ns(ktime_get()); |
92 | record_it: | 92 | record_it: |
93 | t->device = bt->dev; | 93 | t->device = bt->dev; |
94 | t->action = action; | 94 | t->action = action; |
95 | t->pid = pid; | 95 | t->pid = pid; |
96 | t->cpu = cpu; | 96 | t->cpu = cpu; |
97 | t->pdu_len = len; | 97 | t->pdu_len = len; |
98 | memcpy((void *) t + sizeof(*t), data, len); | 98 | memcpy((void *) t + sizeof(*t), data, len); |
99 | 99 | ||
100 | if (blk_tracer) | 100 | if (blk_tracer) |
101 | trace_buffer_unlock_commit(buffer, event, 0, pc); | 101 | trace_buffer_unlock_commit(buffer, event, 0, pc); |
102 | } | 102 | } |
103 | } | 103 | } |
104 | 104 | ||
105 | /* | 105 | /* |
106 | * Send out a notify for this process, if we haven't done so since a trace | 106 | * Send out a notify for this process, if we haven't done so since a trace |
107 | * started | 107 | * started |
108 | */ | 108 | */ |
109 | static void trace_note_tsk(struct blk_trace *bt, struct task_struct *tsk) | 109 | static void trace_note_tsk(struct blk_trace *bt, struct task_struct *tsk) |
110 | { | 110 | { |
111 | tsk->btrace_seq = blktrace_seq; | 111 | tsk->btrace_seq = blktrace_seq; |
112 | trace_note(bt, tsk->pid, BLK_TN_PROCESS, tsk->comm, sizeof(tsk->comm)); | 112 | trace_note(bt, tsk->pid, BLK_TN_PROCESS, tsk->comm, sizeof(tsk->comm)); |
113 | } | 113 | } |
114 | 114 | ||
115 | static void trace_note_time(struct blk_trace *bt) | 115 | static void trace_note_time(struct blk_trace *bt) |
116 | { | 116 | { |
117 | struct timespec now; | 117 | struct timespec now; |
118 | unsigned long flags; | 118 | unsigned long flags; |
119 | u32 words[2]; | 119 | u32 words[2]; |
120 | 120 | ||
121 | getnstimeofday(&now); | 121 | getnstimeofday(&now); |
122 | words[0] = now.tv_sec; | 122 | words[0] = now.tv_sec; |
123 | words[1] = now.tv_nsec; | 123 | words[1] = now.tv_nsec; |
124 | 124 | ||
125 | local_irq_save(flags); | 125 | local_irq_save(flags); |
126 | trace_note(bt, 0, BLK_TN_TIMESTAMP, words, sizeof(words)); | 126 | trace_note(bt, 0, BLK_TN_TIMESTAMP, words, sizeof(words)); |
127 | local_irq_restore(flags); | 127 | local_irq_restore(flags); |
128 | } | 128 | } |
129 | 129 | ||
130 | void __trace_note_message(struct blk_trace *bt, const char *fmt, ...) | 130 | void __trace_note_message(struct blk_trace *bt, const char *fmt, ...) |
131 | { | 131 | { |
132 | int n; | 132 | int n; |
133 | va_list args; | 133 | va_list args; |
134 | unsigned long flags; | 134 | unsigned long flags; |
135 | char *buf; | 135 | char *buf; |
136 | 136 | ||
137 | if (unlikely(bt->trace_state != Blktrace_running && | 137 | if (unlikely(bt->trace_state != Blktrace_running && |
138 | !blk_tracer_enabled)) | 138 | !blk_tracer_enabled)) |
139 | return; | 139 | return; |
140 | 140 | ||
141 | /* | 141 | /* |
142 | * If the BLK_TC_NOTIFY action mask isn't set, don't send any note | 142 | * If the BLK_TC_NOTIFY action mask isn't set, don't send any note |
143 | * message to the trace. | 143 | * message to the trace. |
144 | */ | 144 | */ |
145 | if (!(bt->act_mask & BLK_TC_NOTIFY)) | 145 | if (!(bt->act_mask & BLK_TC_NOTIFY)) |
146 | return; | 146 | return; |
147 | 147 | ||
148 | local_irq_save(flags); | 148 | local_irq_save(flags); |
149 | buf = per_cpu_ptr(bt->msg_data, smp_processor_id()); | 149 | buf = per_cpu_ptr(bt->msg_data, smp_processor_id()); |
150 | va_start(args, fmt); | 150 | va_start(args, fmt); |
151 | n = vscnprintf(buf, BLK_TN_MAX_MSG, fmt, args); | 151 | n = vscnprintf(buf, BLK_TN_MAX_MSG, fmt, args); |
152 | va_end(args); | 152 | va_end(args); |
153 | 153 | ||
154 | trace_note(bt, 0, BLK_TN_MESSAGE, buf, n); | 154 | trace_note(bt, 0, BLK_TN_MESSAGE, buf, n); |
155 | local_irq_restore(flags); | 155 | local_irq_restore(flags); |
156 | } | 156 | } |
157 | EXPORT_SYMBOL_GPL(__trace_note_message); | 157 | EXPORT_SYMBOL_GPL(__trace_note_message); |
158 | 158 | ||
159 | static int act_log_check(struct blk_trace *bt, u32 what, sector_t sector, | 159 | static int act_log_check(struct blk_trace *bt, u32 what, sector_t sector, |
160 | pid_t pid) | 160 | pid_t pid) |
161 | { | 161 | { |
162 | if (((bt->act_mask << BLK_TC_SHIFT) & what) == 0) | 162 | if (((bt->act_mask << BLK_TC_SHIFT) & what) == 0) |
163 | return 1; | 163 | return 1; |
164 | if (sector && (sector < bt->start_lba || sector > bt->end_lba)) | 164 | if (sector && (sector < bt->start_lba || sector > bt->end_lba)) |
165 | return 1; | 165 | return 1; |
166 | if (bt->pid && pid != bt->pid) | 166 | if (bt->pid && pid != bt->pid) |
167 | return 1; | 167 | return 1; |
168 | 168 | ||
169 | return 0; | 169 | return 0; |
170 | } | 170 | } |
171 | 171 | ||
172 | /* | 172 | /* |
173 | * Data direction bit lookup | 173 | * Data direction bit lookup |
174 | */ | 174 | */ |
175 | static const u32 ddir_act[2] = { BLK_TC_ACT(BLK_TC_READ), | 175 | static const u32 ddir_act[2] = { BLK_TC_ACT(BLK_TC_READ), |
176 | BLK_TC_ACT(BLK_TC_WRITE) }; | 176 | BLK_TC_ACT(BLK_TC_WRITE) }; |
177 | 177 | ||
178 | #define BLK_TC_RAHEAD BLK_TC_AHEAD | 178 | #define BLK_TC_RAHEAD BLK_TC_AHEAD |
179 | 179 | ||
180 | /* The ilog2() calls fall out because they're constant */ | 180 | /* The ilog2() calls fall out because they're constant */ |
181 | #define MASK_TC_BIT(rw, __name) ((rw & REQ_ ## __name) << \ | 181 | #define MASK_TC_BIT(rw, __name) ((rw & REQ_ ## __name) << \ |
182 | (ilog2(BLK_TC_ ## __name) + BLK_TC_SHIFT - __REQ_ ## __name)) | 182 | (ilog2(BLK_TC_ ## __name) + BLK_TC_SHIFT - __REQ_ ## __name)) |
183 | 183 | ||
184 | /* | 184 | /* |
185 | * The worker for the various blk_add_trace*() types. Fills out a | 185 | * The worker for the various blk_add_trace*() types. Fills out a |
186 | * blk_io_trace structure and places it in a per-cpu subbuffer. | 186 | * blk_io_trace structure and places it in a per-cpu subbuffer. |
187 | */ | 187 | */ |
188 | static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, | 188 | static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, |
189 | int rw, u32 what, int error, int pdu_len, void *pdu_data) | 189 | int rw, u32 what, int error, int pdu_len, void *pdu_data) |
190 | { | 190 | { |
191 | struct task_struct *tsk = current; | 191 | struct task_struct *tsk = current; |
192 | struct ring_buffer_event *event = NULL; | 192 | struct ring_buffer_event *event = NULL; |
193 | struct ring_buffer *buffer = NULL; | 193 | struct ring_buffer *buffer = NULL; |
194 | struct blk_io_trace *t; | 194 | struct blk_io_trace *t; |
195 | unsigned long flags = 0; | 195 | unsigned long flags = 0; |
196 | unsigned long *sequence; | 196 | unsigned long *sequence; |
197 | pid_t pid; | 197 | pid_t pid; |
198 | int cpu, pc = 0; | 198 | int cpu, pc = 0; |
199 | bool blk_tracer = blk_tracer_enabled; | 199 | bool blk_tracer = blk_tracer_enabled; |
200 | 200 | ||
201 | if (unlikely(bt->trace_state != Blktrace_running && !blk_tracer)) | 201 | if (unlikely(bt->trace_state != Blktrace_running && !blk_tracer)) |
202 | return; | 202 | return; |
203 | 203 | ||
204 | what |= ddir_act[rw & WRITE]; | 204 | what |= ddir_act[rw & WRITE]; |
205 | what |= MASK_TC_BIT(rw, SYNC); | 205 | what |= MASK_TC_BIT(rw, SYNC); |
206 | what |= MASK_TC_BIT(rw, RAHEAD); | 206 | what |= MASK_TC_BIT(rw, RAHEAD); |
207 | what |= MASK_TC_BIT(rw, META); | 207 | what |= MASK_TC_BIT(rw, META); |
208 | what |= MASK_TC_BIT(rw, DISCARD); | 208 | what |= MASK_TC_BIT(rw, DISCARD); |
209 | 209 | ||
210 | pid = tsk->pid; | 210 | pid = tsk->pid; |
211 | if (act_log_check(bt, what, sector, pid)) | 211 | if (act_log_check(bt, what, sector, pid)) |
212 | return; | 212 | return; |
213 | cpu = raw_smp_processor_id(); | 213 | cpu = raw_smp_processor_id(); |
214 | 214 | ||
215 | if (blk_tracer) { | 215 | if (blk_tracer) { |
216 | tracing_record_cmdline(current); | 216 | tracing_record_cmdline(current); |
217 | 217 | ||
218 | buffer = blk_tr->buffer; | 218 | buffer = blk_tr->buffer; |
219 | pc = preempt_count(); | 219 | pc = preempt_count(); |
220 | event = trace_buffer_lock_reserve(buffer, TRACE_BLK, | 220 | event = trace_buffer_lock_reserve(buffer, TRACE_BLK, |
221 | sizeof(*t) + pdu_len, | 221 | sizeof(*t) + pdu_len, |
222 | 0, pc); | 222 | 0, pc); |
223 | if (!event) | 223 | if (!event) |
224 | return; | 224 | return; |
225 | t = ring_buffer_event_data(event); | 225 | t = ring_buffer_event_data(event); |
226 | goto record_it; | 226 | goto record_it; |
227 | } | 227 | } |
228 | 228 | ||
229 | /* | 229 | /* |
230 | * A word about the locking here - we disable interrupts to reserve | 230 | * A word about the locking here - we disable interrupts to reserve |
231 | * some space in the relay per-cpu buffer, to prevent an irq | 231 | * some space in the relay per-cpu buffer, to prevent an irq |
232 | * from coming in and stepping on our toes. | 232 | * from coming in and stepping on our toes. |
233 | */ | 233 | */ |
234 | local_irq_save(flags); | 234 | local_irq_save(flags); |
235 | 235 | ||
236 | if (unlikely(tsk->btrace_seq != blktrace_seq)) | 236 | if (unlikely(tsk->btrace_seq != blktrace_seq)) |
237 | trace_note_tsk(bt, tsk); | 237 | trace_note_tsk(bt, tsk); |
238 | 238 | ||
239 | t = relay_reserve(bt->rchan, sizeof(*t) + pdu_len); | 239 | t = relay_reserve(bt->rchan, sizeof(*t) + pdu_len); |
240 | if (t) { | 240 | if (t) { |
241 | sequence = per_cpu_ptr(bt->sequence, cpu); | 241 | sequence = per_cpu_ptr(bt->sequence, cpu); |
242 | 242 | ||
243 | t->magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION; | 243 | t->magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION; |
244 | t->sequence = ++(*sequence); | 244 | t->sequence = ++(*sequence); |
245 | t->time = ktime_to_ns(ktime_get()); | 245 | t->time = ktime_to_ns(ktime_get()); |
246 | record_it: | 246 | record_it: |
247 | /* | 247 | /* |
248 | * These two are not needed in ftrace as they are in the | 248 | * These two are not needed in ftrace as they are in the |
249 | * generic trace_entry, filled by tracing_generic_entry_update, | 249 | * generic trace_entry, filled by tracing_generic_entry_update, |
250 | * but for the trace_event->bin() synthesizer benefit we do it | 250 | * but for the trace_event->bin() synthesizer benefit we do it |
251 | * here too. | 251 | * here too. |
252 | */ | 252 | */ |
253 | t->cpu = cpu; | 253 | t->cpu = cpu; |
254 | t->pid = pid; | 254 | t->pid = pid; |
255 | 255 | ||
256 | t->sector = sector; | 256 | t->sector = sector; |
257 | t->bytes = bytes; | 257 | t->bytes = bytes; |
258 | t->action = what; | 258 | t->action = what; |
259 | t->device = bt->dev; | 259 | t->device = bt->dev; |
260 | t->error = error; | 260 | t->error = error; |
261 | t->pdu_len = pdu_len; | 261 | t->pdu_len = pdu_len; |
262 | 262 | ||
263 | if (pdu_len) | 263 | if (pdu_len) |
264 | memcpy((void *) t + sizeof(*t), pdu_data, pdu_len); | 264 | memcpy((void *) t + sizeof(*t), pdu_data, pdu_len); |
265 | 265 | ||
266 | if (blk_tracer) { | 266 | if (blk_tracer) { |
267 | trace_buffer_unlock_commit(buffer, event, 0, pc); | 267 | trace_buffer_unlock_commit(buffer, event, 0, pc); |
268 | return; | 268 | return; |
269 | } | 269 | } |
270 | } | 270 | } |
271 | 271 | ||
272 | local_irq_restore(flags); | 272 | local_irq_restore(flags); |
273 | } | 273 | } |
274 | 274 | ||
275 | static struct dentry *blk_tree_root; | 275 | static struct dentry *blk_tree_root; |
276 | static DEFINE_MUTEX(blk_tree_mutex); | 276 | static DEFINE_MUTEX(blk_tree_mutex); |
277 | 277 | ||
278 | static void blk_trace_free(struct blk_trace *bt) | 278 | static void blk_trace_free(struct blk_trace *bt) |
279 | { | 279 | { |
280 | debugfs_remove(bt->msg_file); | 280 | debugfs_remove(bt->msg_file); |
281 | debugfs_remove(bt->dropped_file); | 281 | debugfs_remove(bt->dropped_file); |
282 | relay_close(bt->rchan); | 282 | relay_close(bt->rchan); |
283 | debugfs_remove(bt->dir); | 283 | debugfs_remove(bt->dir); |
284 | free_percpu(bt->sequence); | 284 | free_percpu(bt->sequence); |
285 | free_percpu(bt->msg_data); | 285 | free_percpu(bt->msg_data); |
286 | kfree(bt); | 286 | kfree(bt); |
287 | } | 287 | } |
288 | 288 | ||
289 | static void blk_trace_cleanup(struct blk_trace *bt) | 289 | static void blk_trace_cleanup(struct blk_trace *bt) |
290 | { | 290 | { |
291 | blk_trace_free(bt); | 291 | blk_trace_free(bt); |
292 | if (atomic_dec_and_test(&blk_probes_ref)) | 292 | if (atomic_dec_and_test(&blk_probes_ref)) |
293 | blk_unregister_tracepoints(); | 293 | blk_unregister_tracepoints(); |
294 | } | 294 | } |
295 | 295 | ||
296 | int blk_trace_remove(struct request_queue *q) | 296 | int blk_trace_remove(struct request_queue *q) |
297 | { | 297 | { |
298 | struct blk_trace *bt; | 298 | struct blk_trace *bt; |
299 | 299 | ||
300 | bt = xchg(&q->blk_trace, NULL); | 300 | bt = xchg(&q->blk_trace, NULL); |
301 | if (!bt) | 301 | if (!bt) |
302 | return -EINVAL; | 302 | return -EINVAL; |
303 | 303 | ||
304 | if (bt->trace_state != Blktrace_running) | 304 | if (bt->trace_state != Blktrace_running) |
305 | blk_trace_cleanup(bt); | 305 | blk_trace_cleanup(bt); |
306 | 306 | ||
307 | return 0; | 307 | return 0; |
308 | } | 308 | } |
309 | EXPORT_SYMBOL_GPL(blk_trace_remove); | 309 | EXPORT_SYMBOL_GPL(blk_trace_remove); |
310 | 310 | ||
311 | static int blk_dropped_open(struct inode *inode, struct file *filp) | 311 | static int blk_dropped_open(struct inode *inode, struct file *filp) |
312 | { | 312 | { |
313 | filp->private_data = inode->i_private; | 313 | filp->private_data = inode->i_private; |
314 | 314 | ||
315 | return 0; | 315 | return 0; |
316 | } | 316 | } |
317 | 317 | ||
318 | static ssize_t blk_dropped_read(struct file *filp, char __user *buffer, | 318 | static ssize_t blk_dropped_read(struct file *filp, char __user *buffer, |
319 | size_t count, loff_t *ppos) | 319 | size_t count, loff_t *ppos) |
320 | { | 320 | { |
321 | struct blk_trace *bt = filp->private_data; | 321 | struct blk_trace *bt = filp->private_data; |
322 | char buf[16]; | 322 | char buf[16]; |
323 | 323 | ||
324 | snprintf(buf, sizeof(buf), "%u\n", atomic_read(&bt->dropped)); | 324 | snprintf(buf, sizeof(buf), "%u\n", atomic_read(&bt->dropped)); |
325 | 325 | ||
326 | return simple_read_from_buffer(buffer, count, ppos, buf, strlen(buf)); | 326 | return simple_read_from_buffer(buffer, count, ppos, buf, strlen(buf)); |
327 | } | 327 | } |
328 | 328 | ||
329 | static const struct file_operations blk_dropped_fops = { | 329 | static const struct file_operations blk_dropped_fops = { |
330 | .owner = THIS_MODULE, | 330 | .owner = THIS_MODULE, |
331 | .open = blk_dropped_open, | 331 | .open = blk_dropped_open, |
332 | .read = blk_dropped_read, | 332 | .read = blk_dropped_read, |
333 | .llseek = default_llseek, | 333 | .llseek = default_llseek, |
334 | }; | 334 | }; |
335 | 335 | ||
336 | static int blk_msg_open(struct inode *inode, struct file *filp) | 336 | static int blk_msg_open(struct inode *inode, struct file *filp) |
337 | { | 337 | { |
338 | filp->private_data = inode->i_private; | 338 | filp->private_data = inode->i_private; |
339 | 339 | ||
340 | return 0; | 340 | return 0; |
341 | } | 341 | } |
342 | 342 | ||
343 | static ssize_t blk_msg_write(struct file *filp, const char __user *buffer, | 343 | static ssize_t blk_msg_write(struct file *filp, const char __user *buffer, |
344 | size_t count, loff_t *ppos) | 344 | size_t count, loff_t *ppos) |
345 | { | 345 | { |
346 | char *msg; | 346 | char *msg; |
347 | struct blk_trace *bt; | 347 | struct blk_trace *bt; |
348 | 348 | ||
349 | if (count >= BLK_TN_MAX_MSG) | 349 | if (count >= BLK_TN_MAX_MSG) |
350 | return -EINVAL; | 350 | return -EINVAL; |
351 | 351 | ||
352 | msg = kmalloc(count + 1, GFP_KERNEL); | 352 | msg = kmalloc(count + 1, GFP_KERNEL); |
353 | if (msg == NULL) | 353 | if (msg == NULL) |
354 | return -ENOMEM; | 354 | return -ENOMEM; |
355 | 355 | ||
356 | if (copy_from_user(msg, buffer, count)) { | 356 | if (copy_from_user(msg, buffer, count)) { |
357 | kfree(msg); | 357 | kfree(msg); |
358 | return -EFAULT; | 358 | return -EFAULT; |
359 | } | 359 | } |
360 | 360 | ||
361 | msg[count] = '\0'; | 361 | msg[count] = '\0'; |
362 | bt = filp->private_data; | 362 | bt = filp->private_data; |
363 | __trace_note_message(bt, "%s", msg); | 363 | __trace_note_message(bt, "%s", msg); |
364 | kfree(msg); | 364 | kfree(msg); |
365 | 365 | ||
366 | return count; | 366 | return count; |
367 | } | 367 | } |
368 | 368 | ||
369 | static const struct file_operations blk_msg_fops = { | 369 | static const struct file_operations blk_msg_fops = { |
370 | .owner = THIS_MODULE, | 370 | .owner = THIS_MODULE, |
371 | .open = blk_msg_open, | 371 | .open = blk_msg_open, |
372 | .write = blk_msg_write, | 372 | .write = blk_msg_write, |
373 | .llseek = noop_llseek, | 373 | .llseek = noop_llseek, |
374 | }; | 374 | }; |
375 | 375 | ||
376 | /* | 376 | /* |
377 | * Keep track of how many times we encountered a full subbuffer, to aid | 377 | * Keep track of how many times we encountered a full subbuffer, to aid |
378 | * the user space app in telling how many lost events there were. | 378 | * the user space app in telling how many lost events there were. |
379 | */ | 379 | */ |
380 | static int blk_subbuf_start_callback(struct rchan_buf *buf, void *subbuf, | 380 | static int blk_subbuf_start_callback(struct rchan_buf *buf, void *subbuf, |
381 | void *prev_subbuf, size_t prev_padding) | 381 | void *prev_subbuf, size_t prev_padding) |
382 | { | 382 | { |
383 | struct blk_trace *bt; | 383 | struct blk_trace *bt; |
384 | 384 | ||
385 | if (!relay_buf_full(buf)) | 385 | if (!relay_buf_full(buf)) |
386 | return 1; | 386 | return 1; |
387 | 387 | ||
388 | bt = buf->chan->private_data; | 388 | bt = buf->chan->private_data; |
389 | atomic_inc(&bt->dropped); | 389 | atomic_inc(&bt->dropped); |
390 | return 0; | 390 | return 0; |
391 | } | 391 | } |
392 | 392 | ||
393 | static int blk_remove_buf_file_callback(struct dentry *dentry) | 393 | static int blk_remove_buf_file_callback(struct dentry *dentry) |
394 | { | 394 | { |
395 | debugfs_remove(dentry); | 395 | debugfs_remove(dentry); |
396 | 396 | ||
397 | return 0; | 397 | return 0; |
398 | } | 398 | } |
399 | 399 | ||
400 | static struct dentry *blk_create_buf_file_callback(const char *filename, | 400 | static struct dentry *blk_create_buf_file_callback(const char *filename, |
401 | struct dentry *parent, | 401 | struct dentry *parent, |
402 | int mode, | 402 | int mode, |
403 | struct rchan_buf *buf, | 403 | struct rchan_buf *buf, |
404 | int *is_global) | 404 | int *is_global) |
405 | { | 405 | { |
406 | return debugfs_create_file(filename, mode, parent, buf, | 406 | return debugfs_create_file(filename, mode, parent, buf, |
407 | &relay_file_operations); | 407 | &relay_file_operations); |
408 | } | 408 | } |
409 | 409 | ||
410 | static struct rchan_callbacks blk_relay_callbacks = { | 410 | static struct rchan_callbacks blk_relay_callbacks = { |
411 | .subbuf_start = blk_subbuf_start_callback, | 411 | .subbuf_start = blk_subbuf_start_callback, |
412 | .create_buf_file = blk_create_buf_file_callback, | 412 | .create_buf_file = blk_create_buf_file_callback, |
413 | .remove_buf_file = blk_remove_buf_file_callback, | 413 | .remove_buf_file = blk_remove_buf_file_callback, |
414 | }; | 414 | }; |
415 | 415 | ||
416 | static void blk_trace_setup_lba(struct blk_trace *bt, | 416 | static void blk_trace_setup_lba(struct blk_trace *bt, |
417 | struct block_device *bdev) | 417 | struct block_device *bdev) |
418 | { | 418 | { |
419 | struct hd_struct *part = NULL; | 419 | struct hd_struct *part = NULL; |
420 | 420 | ||
421 | if (bdev) | 421 | if (bdev) |
422 | part = bdev->bd_part; | 422 | part = bdev->bd_part; |
423 | 423 | ||
424 | if (part) { | 424 | if (part) { |
425 | bt->start_lba = part->start_sect; | 425 | bt->start_lba = part->start_sect; |
426 | bt->end_lba = part->start_sect + part->nr_sects; | 426 | bt->end_lba = part->start_sect + part->nr_sects; |
427 | } else { | 427 | } else { |
428 | bt->start_lba = 0; | 428 | bt->start_lba = 0; |
429 | bt->end_lba = -1ULL; | 429 | bt->end_lba = -1ULL; |
430 | } | 430 | } |
431 | } | 431 | } |
432 | 432 | ||
433 | /* | 433 | /* |
434 | * Setup everything required to start tracing | 434 | * Setup everything required to start tracing |
435 | */ | 435 | */ |
436 | int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, | 436 | int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, |
437 | struct block_device *bdev, | 437 | struct block_device *bdev, |
438 | struct blk_user_trace_setup *buts) | 438 | struct blk_user_trace_setup *buts) |
439 | { | 439 | { |
440 | struct blk_trace *old_bt, *bt = NULL; | 440 | struct blk_trace *old_bt, *bt = NULL; |
441 | struct dentry *dir = NULL; | 441 | struct dentry *dir = NULL; |
442 | int ret, i; | 442 | int ret, i; |
443 | 443 | ||
444 | if (!buts->buf_size || !buts->buf_nr) | 444 | if (!buts->buf_size || !buts->buf_nr) |
445 | return -EINVAL; | 445 | return -EINVAL; |
446 | 446 | ||
447 | strncpy(buts->name, name, BLKTRACE_BDEV_SIZE); | 447 | strncpy(buts->name, name, BLKTRACE_BDEV_SIZE); |
448 | buts->name[BLKTRACE_BDEV_SIZE - 1] = '\0'; | 448 | buts->name[BLKTRACE_BDEV_SIZE - 1] = '\0'; |
449 | 449 | ||
450 | /* | 450 | /* |
451 | * some device names have larger paths - convert the slashes | 451 | * some device names have larger paths - convert the slashes |
452 | * to underscores for this to work as expected | 452 | * to underscores for this to work as expected |
453 | */ | 453 | */ |
454 | for (i = 0; i < strlen(buts->name); i++) | 454 | for (i = 0; i < strlen(buts->name); i++) |
455 | if (buts->name[i] == '/') | 455 | if (buts->name[i] == '/') |
456 | buts->name[i] = '_'; | 456 | buts->name[i] = '_'; |
457 | 457 | ||
458 | bt = kzalloc(sizeof(*bt), GFP_KERNEL); | 458 | bt = kzalloc(sizeof(*bt), GFP_KERNEL); |
459 | if (!bt) | 459 | if (!bt) |
460 | return -ENOMEM; | 460 | return -ENOMEM; |
461 | 461 | ||
462 | ret = -ENOMEM; | 462 | ret = -ENOMEM; |
463 | bt->sequence = alloc_percpu(unsigned long); | 463 | bt->sequence = alloc_percpu(unsigned long); |
464 | if (!bt->sequence) | 464 | if (!bt->sequence) |
465 | goto err; | 465 | goto err; |
466 | 466 | ||
467 | bt->msg_data = __alloc_percpu(BLK_TN_MAX_MSG, __alignof__(char)); | 467 | bt->msg_data = __alloc_percpu(BLK_TN_MAX_MSG, __alignof__(char)); |
468 | if (!bt->msg_data) | 468 | if (!bt->msg_data) |
469 | goto err; | 469 | goto err; |
470 | 470 | ||
471 | ret = -ENOENT; | 471 | ret = -ENOENT; |
472 | 472 | ||
473 | mutex_lock(&blk_tree_mutex); | 473 | mutex_lock(&blk_tree_mutex); |
474 | if (!blk_tree_root) { | 474 | if (!blk_tree_root) { |
475 | blk_tree_root = debugfs_create_dir("block", NULL); | 475 | blk_tree_root = debugfs_create_dir("block", NULL); |
476 | if (!blk_tree_root) { | 476 | if (!blk_tree_root) { |
477 | mutex_unlock(&blk_tree_mutex); | 477 | mutex_unlock(&blk_tree_mutex); |
478 | goto err; | 478 | goto err; |
479 | } | 479 | } |
480 | } | 480 | } |
481 | mutex_unlock(&blk_tree_mutex); | 481 | mutex_unlock(&blk_tree_mutex); |
482 | 482 | ||
483 | dir = debugfs_create_dir(buts->name, blk_tree_root); | 483 | dir = debugfs_create_dir(buts->name, blk_tree_root); |
484 | 484 | ||
485 | if (!dir) | 485 | if (!dir) |
486 | goto err; | 486 | goto err; |
487 | 487 | ||
488 | bt->dir = dir; | 488 | bt->dir = dir; |
489 | bt->dev = dev; | 489 | bt->dev = dev; |
490 | atomic_set(&bt->dropped, 0); | 490 | atomic_set(&bt->dropped, 0); |
491 | 491 | ||
492 | ret = -EIO; | 492 | ret = -EIO; |
493 | bt->dropped_file = debugfs_create_file("dropped", 0444, dir, bt, | 493 | bt->dropped_file = debugfs_create_file("dropped", 0444, dir, bt, |
494 | &blk_dropped_fops); | 494 | &blk_dropped_fops); |
495 | if (!bt->dropped_file) | 495 | if (!bt->dropped_file) |
496 | goto err; | 496 | goto err; |
497 | 497 | ||
498 | bt->msg_file = debugfs_create_file("msg", 0222, dir, bt, &blk_msg_fops); | 498 | bt->msg_file = debugfs_create_file("msg", 0222, dir, bt, &blk_msg_fops); |
499 | if (!bt->msg_file) | 499 | if (!bt->msg_file) |
500 | goto err; | 500 | goto err; |
501 | 501 | ||
502 | bt->rchan = relay_open("trace", dir, buts->buf_size, | 502 | bt->rchan = relay_open("trace", dir, buts->buf_size, |
503 | buts->buf_nr, &blk_relay_callbacks, bt); | 503 | buts->buf_nr, &blk_relay_callbacks, bt); |
504 | if (!bt->rchan) | 504 | if (!bt->rchan) |
505 | goto err; | 505 | goto err; |
506 | 506 | ||
507 | bt->act_mask = buts->act_mask; | 507 | bt->act_mask = buts->act_mask; |
508 | if (!bt->act_mask) | 508 | if (!bt->act_mask) |
509 | bt->act_mask = (u16) -1; | 509 | bt->act_mask = (u16) -1; |
510 | 510 | ||
511 | blk_trace_setup_lba(bt, bdev); | 511 | blk_trace_setup_lba(bt, bdev); |
512 | 512 | ||
513 | /* overwrite with user settings */ | 513 | /* overwrite with user settings */ |
514 | if (buts->start_lba) | 514 | if (buts->start_lba) |
515 | bt->start_lba = buts->start_lba; | 515 | bt->start_lba = buts->start_lba; |
516 | if (buts->end_lba) | 516 | if (buts->end_lba) |
517 | bt->end_lba = buts->end_lba; | 517 | bt->end_lba = buts->end_lba; |
518 | 518 | ||
519 | bt->pid = buts->pid; | 519 | bt->pid = buts->pid; |
520 | bt->trace_state = Blktrace_setup; | 520 | bt->trace_state = Blktrace_setup; |
521 | 521 | ||
522 | ret = -EBUSY; | 522 | ret = -EBUSY; |
523 | old_bt = xchg(&q->blk_trace, bt); | 523 | old_bt = xchg(&q->blk_trace, bt); |
524 | if (old_bt) { | 524 | if (old_bt) { |
525 | (void) xchg(&q->blk_trace, old_bt); | 525 | (void) xchg(&q->blk_trace, old_bt); |
526 | goto err; | 526 | goto err; |
527 | } | 527 | } |
528 | 528 | ||
529 | if (atomic_inc_return(&blk_probes_ref) == 1) | 529 | if (atomic_inc_return(&blk_probes_ref) == 1) |
530 | blk_register_tracepoints(); | 530 | blk_register_tracepoints(); |
531 | 531 | ||
532 | return 0; | 532 | return 0; |
533 | err: | 533 | err: |
534 | blk_trace_free(bt); | 534 | blk_trace_free(bt); |
535 | return ret; | 535 | return ret; |
536 | } | 536 | } |
537 | 537 | ||
538 | int blk_trace_setup(struct request_queue *q, char *name, dev_t dev, | 538 | int blk_trace_setup(struct request_queue *q, char *name, dev_t dev, |
539 | struct block_device *bdev, | 539 | struct block_device *bdev, |
540 | char __user *arg) | 540 | char __user *arg) |
541 | { | 541 | { |
542 | struct blk_user_trace_setup buts; | 542 | struct blk_user_trace_setup buts; |
543 | int ret; | 543 | int ret; |
544 | 544 | ||
545 | ret = copy_from_user(&buts, arg, sizeof(buts)); | 545 | ret = copy_from_user(&buts, arg, sizeof(buts)); |
546 | if (ret) | 546 | if (ret) |
547 | return -EFAULT; | 547 | return -EFAULT; |
548 | 548 | ||
549 | ret = do_blk_trace_setup(q, name, dev, bdev, &buts); | 549 | ret = do_blk_trace_setup(q, name, dev, bdev, &buts); |
550 | if (ret) | 550 | if (ret) |
551 | return ret; | 551 | return ret; |
552 | 552 | ||
553 | if (copy_to_user(arg, &buts, sizeof(buts))) { | 553 | if (copy_to_user(arg, &buts, sizeof(buts))) { |
554 | blk_trace_remove(q); | 554 | blk_trace_remove(q); |
555 | return -EFAULT; | 555 | return -EFAULT; |
556 | } | 556 | } |
557 | return 0; | 557 | return 0; |
558 | } | 558 | } |
559 | EXPORT_SYMBOL_GPL(blk_trace_setup); | 559 | EXPORT_SYMBOL_GPL(blk_trace_setup); |
560 | 560 | ||
561 | #if defined(CONFIG_COMPAT) && defined(CONFIG_X86_64) | 561 | #if defined(CONFIG_COMPAT) && defined(CONFIG_X86_64) |
562 | static int compat_blk_trace_setup(struct request_queue *q, char *name, | 562 | static int compat_blk_trace_setup(struct request_queue *q, char *name, |
563 | dev_t dev, struct block_device *bdev, | 563 | dev_t dev, struct block_device *bdev, |
564 | char __user *arg) | 564 | char __user *arg) |
565 | { | 565 | { |
566 | struct blk_user_trace_setup buts; | 566 | struct blk_user_trace_setup buts; |
567 | struct compat_blk_user_trace_setup cbuts; | 567 | struct compat_blk_user_trace_setup cbuts; |
568 | int ret; | 568 | int ret; |
569 | 569 | ||
570 | if (copy_from_user(&cbuts, arg, sizeof(cbuts))) | 570 | if (copy_from_user(&cbuts, arg, sizeof(cbuts))) |
571 | return -EFAULT; | 571 | return -EFAULT; |
572 | 572 | ||
573 | buts = (struct blk_user_trace_setup) { | 573 | buts = (struct blk_user_trace_setup) { |
574 | .act_mask = cbuts.act_mask, | 574 | .act_mask = cbuts.act_mask, |
575 | .buf_size = cbuts.buf_size, | 575 | .buf_size = cbuts.buf_size, |
576 | .buf_nr = cbuts.buf_nr, | 576 | .buf_nr = cbuts.buf_nr, |
577 | .start_lba = cbuts.start_lba, | 577 | .start_lba = cbuts.start_lba, |
578 | .end_lba = cbuts.end_lba, | 578 | .end_lba = cbuts.end_lba, |
579 | .pid = cbuts.pid, | 579 | .pid = cbuts.pid, |
580 | }; | 580 | }; |
581 | memcpy(&buts.name, &cbuts.name, 32); | 581 | memcpy(&buts.name, &cbuts.name, 32); |
582 | 582 | ||
583 | ret = do_blk_trace_setup(q, name, dev, bdev, &buts); | 583 | ret = do_blk_trace_setup(q, name, dev, bdev, &buts); |
584 | if (ret) | 584 | if (ret) |
585 | return ret; | 585 | return ret; |
586 | 586 | ||
587 | if (copy_to_user(arg, &buts.name, 32)) { | 587 | if (copy_to_user(arg, &buts.name, 32)) { |
588 | blk_trace_remove(q); | 588 | blk_trace_remove(q); |
589 | return -EFAULT; | 589 | return -EFAULT; |
590 | } | 590 | } |
591 | 591 | ||
592 | return 0; | 592 | return 0; |
593 | } | 593 | } |
594 | #endif | 594 | #endif |
595 | 595 | ||
596 | int blk_trace_startstop(struct request_queue *q, int start) | 596 | int blk_trace_startstop(struct request_queue *q, int start) |
597 | { | 597 | { |
598 | int ret; | 598 | int ret; |
599 | struct blk_trace *bt = q->blk_trace; | 599 | struct blk_trace *bt = q->blk_trace; |
600 | 600 | ||
601 | if (bt == NULL) | 601 | if (bt == NULL) |
602 | return -EINVAL; | 602 | return -EINVAL; |
603 | 603 | ||
604 | /* | 604 | /* |
605 | * For starting a trace, we can transition from a setup or stopped | 605 | * For starting a trace, we can transition from a setup or stopped |
606 | * trace. For stopping a trace, the state must be running | 606 | * trace. For stopping a trace, the state must be running |
607 | */ | 607 | */ |
608 | ret = -EINVAL; | 608 | ret = -EINVAL; |
609 | if (start) { | 609 | if (start) { |
610 | if (bt->trace_state == Blktrace_setup || | 610 | if (bt->trace_state == Blktrace_setup || |
611 | bt->trace_state == Blktrace_stopped) { | 611 | bt->trace_state == Blktrace_stopped) { |
612 | blktrace_seq++; | 612 | blktrace_seq++; |
613 | smp_mb(); | 613 | smp_mb(); |
614 | bt->trace_state = Blktrace_running; | 614 | bt->trace_state = Blktrace_running; |
615 | 615 | ||
616 | trace_note_time(bt); | 616 | trace_note_time(bt); |
617 | ret = 0; | 617 | ret = 0; |
618 | } | 618 | } |
619 | } else { | 619 | } else { |
620 | if (bt->trace_state == Blktrace_running) { | 620 | if (bt->trace_state == Blktrace_running) { |
621 | bt->trace_state = Blktrace_stopped; | 621 | bt->trace_state = Blktrace_stopped; |
622 | relay_flush(bt->rchan); | 622 | relay_flush(bt->rchan); |
623 | ret = 0; | 623 | ret = 0; |
624 | } | 624 | } |
625 | } | 625 | } |
626 | 626 | ||
627 | return ret; | 627 | return ret; |
628 | } | 628 | } |
629 | EXPORT_SYMBOL_GPL(blk_trace_startstop); | 629 | EXPORT_SYMBOL_GPL(blk_trace_startstop); |
630 | 630 | ||
631 | /** | 631 | /** |
632 | * blk_trace_ioctl: - handle the ioctls associated with tracing | 632 | * blk_trace_ioctl: - handle the ioctls associated with tracing |
633 | * @bdev: the block device | 633 | * @bdev: the block device |
634 | * @cmd: the ioctl cmd | 634 | * @cmd: the ioctl cmd |
635 | * @arg: the argument data, if any | 635 | * @arg: the argument data, if any |
636 | * | 636 | * |
637 | **/ | 637 | **/ |
638 | int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg) | 638 | int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg) |
639 | { | 639 | { |
640 | struct request_queue *q; | 640 | struct request_queue *q; |
641 | int ret, start = 0; | 641 | int ret, start = 0; |
642 | char b[BDEVNAME_SIZE]; | 642 | char b[BDEVNAME_SIZE]; |
643 | 643 | ||
644 | q = bdev_get_queue(bdev); | 644 | q = bdev_get_queue(bdev); |
645 | if (!q) | 645 | if (!q) |
646 | return -ENXIO; | 646 | return -ENXIO; |
647 | 647 | ||
648 | mutex_lock(&bdev->bd_mutex); | 648 | mutex_lock(&bdev->bd_mutex); |
649 | 649 | ||
650 | switch (cmd) { | 650 | switch (cmd) { |
651 | case BLKTRACESETUP: | 651 | case BLKTRACESETUP: |
652 | bdevname(bdev, b); | 652 | bdevname(bdev, b); |
653 | ret = blk_trace_setup(q, b, bdev->bd_dev, bdev, arg); | 653 | ret = blk_trace_setup(q, b, bdev->bd_dev, bdev, arg); |
654 | break; | 654 | break; |
655 | #if defined(CONFIG_COMPAT) && defined(CONFIG_X86_64) | 655 | #if defined(CONFIG_COMPAT) && defined(CONFIG_X86_64) |
656 | case BLKTRACESETUP32: | 656 | case BLKTRACESETUP32: |
657 | bdevname(bdev, b); | 657 | bdevname(bdev, b); |
658 | ret = compat_blk_trace_setup(q, b, bdev->bd_dev, bdev, arg); | 658 | ret = compat_blk_trace_setup(q, b, bdev->bd_dev, bdev, arg); |
659 | break; | 659 | break; |
660 | #endif | 660 | #endif |
661 | case BLKTRACESTART: | 661 | case BLKTRACESTART: |
662 | start = 1; | 662 | start = 1; |
663 | case BLKTRACESTOP: | 663 | case BLKTRACESTOP: |
664 | ret = blk_trace_startstop(q, start); | 664 | ret = blk_trace_startstop(q, start); |
665 | break; | 665 | break; |
666 | case BLKTRACETEARDOWN: | 666 | case BLKTRACETEARDOWN: |
667 | ret = blk_trace_remove(q); | 667 | ret = blk_trace_remove(q); |
668 | break; | 668 | break; |
669 | default: | 669 | default: |
670 | ret = -ENOTTY; | 670 | ret = -ENOTTY; |
671 | break; | 671 | break; |
672 | } | 672 | } |
673 | 673 | ||
674 | mutex_unlock(&bdev->bd_mutex); | 674 | mutex_unlock(&bdev->bd_mutex); |
675 | return ret; | 675 | return ret; |
676 | } | 676 | } |
677 | 677 | ||
678 | /** | 678 | /** |
679 | * blk_trace_shutdown: - stop and cleanup trace structures | 679 | * blk_trace_shutdown: - stop and cleanup trace structures |
680 | * @q: the request queue associated with the device | 680 | * @q: the request queue associated with the device |
681 | * | 681 | * |
682 | **/ | 682 | **/ |
683 | void blk_trace_shutdown(struct request_queue *q) | 683 | void blk_trace_shutdown(struct request_queue *q) |
684 | { | 684 | { |
685 | if (q->blk_trace) { | 685 | if (q->blk_trace) { |
686 | blk_trace_startstop(q, 0); | 686 | blk_trace_startstop(q, 0); |
687 | blk_trace_remove(q); | 687 | blk_trace_remove(q); |
688 | } | 688 | } |
689 | } | 689 | } |
690 | 690 | ||
691 | /* | 691 | /* |
692 | * blktrace probes | 692 | * blktrace probes |
693 | */ | 693 | */ |
694 | 694 | ||
695 | /** | 695 | /** |
696 | * blk_add_trace_rq - Add a trace for a request oriented action | 696 | * blk_add_trace_rq - Add a trace for a request oriented action |
697 | * @q: queue the io is for | 697 | * @q: queue the io is for |
698 | * @rq: the source request | 698 | * @rq: the source request |
699 | * @what: the action | 699 | * @what: the action |
700 | * | 700 | * |
701 | * Description: | 701 | * Description: |
702 | * Records an action against a request. Will log the bio offset + size. | 702 | * Records an action against a request. Will log the bio offset + size. |
703 | * | 703 | * |
704 | **/ | 704 | **/ |
705 | static void blk_add_trace_rq(struct request_queue *q, struct request *rq, | 705 | static void blk_add_trace_rq(struct request_queue *q, struct request *rq, |
706 | u32 what) | 706 | u32 what) |
707 | { | 707 | { |
708 | struct blk_trace *bt = q->blk_trace; | 708 | struct blk_trace *bt = q->blk_trace; |
709 | 709 | ||
710 | if (likely(!bt)) | 710 | if (likely(!bt)) |
711 | return; | 711 | return; |
712 | 712 | ||
713 | if (rq->cmd_type == REQ_TYPE_BLOCK_PC) { | 713 | if (rq->cmd_type == REQ_TYPE_BLOCK_PC) { |
714 | what |= BLK_TC_ACT(BLK_TC_PC); | 714 | what |= BLK_TC_ACT(BLK_TC_PC); |
715 | __blk_add_trace(bt, 0, blk_rq_bytes(rq), rq->cmd_flags, | 715 | __blk_add_trace(bt, 0, blk_rq_bytes(rq), rq->cmd_flags, |
716 | what, rq->errors, rq->cmd_len, rq->cmd); | 716 | what, rq->errors, rq->cmd_len, rq->cmd); |
717 | } else { | 717 | } else { |
718 | what |= BLK_TC_ACT(BLK_TC_FS); | 718 | what |= BLK_TC_ACT(BLK_TC_FS); |
719 | __blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq), | 719 | __blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq), |
720 | rq->cmd_flags, what, rq->errors, 0, NULL); | 720 | rq->cmd_flags, what, rq->errors, 0, NULL); |
721 | } | 721 | } |
722 | } | 722 | } |
723 | 723 | ||
724 | static void blk_add_trace_rq_abort(void *ignore, | 724 | static void blk_add_trace_rq_abort(void *ignore, |
725 | struct request_queue *q, struct request *rq) | 725 | struct request_queue *q, struct request *rq) |
726 | { | 726 | { |
727 | blk_add_trace_rq(q, rq, BLK_TA_ABORT); | 727 | blk_add_trace_rq(q, rq, BLK_TA_ABORT); |
728 | } | 728 | } |
729 | 729 | ||
730 | static void blk_add_trace_rq_insert(void *ignore, | 730 | static void blk_add_trace_rq_insert(void *ignore, |
731 | struct request_queue *q, struct request *rq) | 731 | struct request_queue *q, struct request *rq) |
732 | { | 732 | { |
733 | blk_add_trace_rq(q, rq, BLK_TA_INSERT); | 733 | blk_add_trace_rq(q, rq, BLK_TA_INSERT); |
734 | } | 734 | } |
735 | 735 | ||
736 | static void blk_add_trace_rq_issue(void *ignore, | 736 | static void blk_add_trace_rq_issue(void *ignore, |
737 | struct request_queue *q, struct request *rq) | 737 | struct request_queue *q, struct request *rq) |
738 | { | 738 | { |
739 | blk_add_trace_rq(q, rq, BLK_TA_ISSUE); | 739 | blk_add_trace_rq(q, rq, BLK_TA_ISSUE); |
740 | } | 740 | } |
741 | 741 | ||
742 | static void blk_add_trace_rq_requeue(void *ignore, | 742 | static void blk_add_trace_rq_requeue(void *ignore, |
743 | struct request_queue *q, | 743 | struct request_queue *q, |
744 | struct request *rq) | 744 | struct request *rq) |
745 | { | 745 | { |
746 | blk_add_trace_rq(q, rq, BLK_TA_REQUEUE); | 746 | blk_add_trace_rq(q, rq, BLK_TA_REQUEUE); |
747 | } | 747 | } |
748 | 748 | ||
749 | static void blk_add_trace_rq_complete(void *ignore, | 749 | static void blk_add_trace_rq_complete(void *ignore, |
750 | struct request_queue *q, | 750 | struct request_queue *q, |
751 | struct request *rq) | 751 | struct request *rq) |
752 | { | 752 | { |
753 | blk_add_trace_rq(q, rq, BLK_TA_COMPLETE); | 753 | blk_add_trace_rq(q, rq, BLK_TA_COMPLETE); |
754 | } | 754 | } |
755 | 755 | ||
756 | /** | 756 | /** |
757 | * blk_add_trace_bio - Add a trace for a bio oriented action | 757 | * blk_add_trace_bio - Add a trace for a bio oriented action |
758 | * @q: queue the io is for | 758 | * @q: queue the io is for |
759 | * @bio: the source bio | 759 | * @bio: the source bio |
760 | * @what: the action | 760 | * @what: the action |
761 | * @error: error, if any | 761 | * @error: error, if any |
762 | * | 762 | * |
763 | * Description: | 763 | * Description: |
764 | * Records an action against a bio. Will log the bio offset + size. | 764 | * Records an action against a bio. Will log the bio offset + size. |
765 | * | 765 | * |
766 | **/ | 766 | **/ |
767 | static void blk_add_trace_bio(struct request_queue *q, struct bio *bio, | 767 | static void blk_add_trace_bio(struct request_queue *q, struct bio *bio, |
768 | u32 what, int error) | 768 | u32 what, int error) |
769 | { | 769 | { |
770 | struct blk_trace *bt = q->blk_trace; | 770 | struct blk_trace *bt = q->blk_trace; |
771 | 771 | ||
772 | if (likely(!bt)) | 772 | if (likely(!bt)) |
773 | return; | 773 | return; |
774 | 774 | ||
775 | if (!error && !bio_flagged(bio, BIO_UPTODATE)) | 775 | if (!error && !bio_flagged(bio, BIO_UPTODATE)) |
776 | error = EIO; | 776 | error = EIO; |
777 | 777 | ||
778 | __blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw, what, | 778 | __blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw, what, |
779 | error, 0, NULL); | 779 | error, 0, NULL); |
780 | } | 780 | } |
781 | 781 | ||
782 | static void blk_add_trace_bio_bounce(void *ignore, | 782 | static void blk_add_trace_bio_bounce(void *ignore, |
783 | struct request_queue *q, struct bio *bio) | 783 | struct request_queue *q, struct bio *bio) |
784 | { | 784 | { |
785 | blk_add_trace_bio(q, bio, BLK_TA_BOUNCE, 0); | 785 | blk_add_trace_bio(q, bio, BLK_TA_BOUNCE, 0); |
786 | } | 786 | } |
787 | 787 | ||
788 | static void blk_add_trace_bio_complete(void *ignore, | 788 | static void blk_add_trace_bio_complete(void *ignore, |
789 | struct request_queue *q, struct bio *bio, | 789 | struct request_queue *q, struct bio *bio, |
790 | int error) | 790 | int error) |
791 | { | 791 | { |
792 | blk_add_trace_bio(q, bio, BLK_TA_COMPLETE, error); | 792 | blk_add_trace_bio(q, bio, BLK_TA_COMPLETE, error); |
793 | } | 793 | } |
794 | 794 | ||
795 | static void blk_add_trace_bio_backmerge(void *ignore, | 795 | static void blk_add_trace_bio_backmerge(void *ignore, |
796 | struct request_queue *q, | 796 | struct request_queue *q, |
797 | struct bio *bio) | 797 | struct bio *bio) |
798 | { | 798 | { |
799 | blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE, 0); | 799 | blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE, 0); |
800 | } | 800 | } |
801 | 801 | ||
802 | static void blk_add_trace_bio_frontmerge(void *ignore, | 802 | static void blk_add_trace_bio_frontmerge(void *ignore, |
803 | struct request_queue *q, | 803 | struct request_queue *q, |
804 | struct bio *bio) | 804 | struct bio *bio) |
805 | { | 805 | { |
806 | blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE, 0); | 806 | blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE, 0); |
807 | } | 807 | } |
808 | 808 | ||
809 | static void blk_add_trace_bio_queue(void *ignore, | 809 | static void blk_add_trace_bio_queue(void *ignore, |
810 | struct request_queue *q, struct bio *bio) | 810 | struct request_queue *q, struct bio *bio) |
811 | { | 811 | { |
812 | blk_add_trace_bio(q, bio, BLK_TA_QUEUE, 0); | 812 | blk_add_trace_bio(q, bio, BLK_TA_QUEUE, 0); |
813 | } | 813 | } |
814 | 814 | ||
815 | static void blk_add_trace_getrq(void *ignore, | 815 | static void blk_add_trace_getrq(void *ignore, |
816 | struct request_queue *q, | 816 | struct request_queue *q, |
817 | struct bio *bio, int rw) | 817 | struct bio *bio, int rw) |
818 | { | 818 | { |
819 | if (bio) | 819 | if (bio) |
820 | blk_add_trace_bio(q, bio, BLK_TA_GETRQ, 0); | 820 | blk_add_trace_bio(q, bio, BLK_TA_GETRQ, 0); |
821 | else { | 821 | else { |
822 | struct blk_trace *bt = q->blk_trace; | 822 | struct blk_trace *bt = q->blk_trace; |
823 | 823 | ||
824 | if (bt) | 824 | if (bt) |
825 | __blk_add_trace(bt, 0, 0, rw, BLK_TA_GETRQ, 0, 0, NULL); | 825 | __blk_add_trace(bt, 0, 0, rw, BLK_TA_GETRQ, 0, 0, NULL); |
826 | } | 826 | } |
827 | } | 827 | } |
828 | 828 | ||
829 | 829 | ||
830 | static void blk_add_trace_sleeprq(void *ignore, | 830 | static void blk_add_trace_sleeprq(void *ignore, |
831 | struct request_queue *q, | 831 | struct request_queue *q, |
832 | struct bio *bio, int rw) | 832 | struct bio *bio, int rw) |
833 | { | 833 | { |
834 | if (bio) | 834 | if (bio) |
835 | blk_add_trace_bio(q, bio, BLK_TA_SLEEPRQ, 0); | 835 | blk_add_trace_bio(q, bio, BLK_TA_SLEEPRQ, 0); |
836 | else { | 836 | else { |
837 | struct blk_trace *bt = q->blk_trace; | 837 | struct blk_trace *bt = q->blk_trace; |
838 | 838 | ||
839 | if (bt) | 839 | if (bt) |
840 | __blk_add_trace(bt, 0, 0, rw, BLK_TA_SLEEPRQ, | 840 | __blk_add_trace(bt, 0, 0, rw, BLK_TA_SLEEPRQ, |
841 | 0, 0, NULL); | 841 | 0, 0, NULL); |
842 | } | 842 | } |
843 | } | 843 | } |
844 | 844 | ||
845 | static void blk_add_trace_plug(void *ignore, struct request_queue *q) | 845 | static void blk_add_trace_plug(void *ignore, struct request_queue *q) |
846 | { | 846 | { |
847 | struct blk_trace *bt = q->blk_trace; | 847 | struct blk_trace *bt = q->blk_trace; |
848 | 848 | ||
849 | if (bt) | 849 | if (bt) |
850 | __blk_add_trace(bt, 0, 0, 0, BLK_TA_PLUG, 0, 0, NULL); | 850 | __blk_add_trace(bt, 0, 0, 0, BLK_TA_PLUG, 0, 0, NULL); |
851 | } | 851 | } |
852 | 852 | ||
853 | static void blk_add_trace_unplug_io(void *ignore, struct request_queue *q, | 853 | static void blk_add_trace_unplug(void *ignore, struct request_queue *q, |
854 | unsigned int depth) | 854 | unsigned int depth, bool explicit) |
855 | { | 855 | { |
856 | struct blk_trace *bt = q->blk_trace; | 856 | struct blk_trace *bt = q->blk_trace; |
857 | 857 | ||
858 | if (bt) { | 858 | if (bt) { |
859 | __be64 rpdu = cpu_to_be64(depth); | 859 | __be64 rpdu = cpu_to_be64(depth); |
860 | u32 what; | ||
860 | 861 | ||
861 | __blk_add_trace(bt, 0, 0, 0, BLK_TA_UNPLUG_IO, 0, | 862 | if (explicit) |
862 | sizeof(rpdu), &rpdu); | 863 | what = BLK_TA_UNPLUG_IO; |
864 | else | ||
865 | what = BLK_TA_UNPLUG_TIMER; | ||
866 | |||
867 | __blk_add_trace(bt, 0, 0, 0, what, 0, sizeof(rpdu), &rpdu); | ||
863 | } | 868 | } |
864 | } | 869 | } |
865 | 870 | ||
866 | static void blk_add_trace_split(void *ignore, | 871 | static void blk_add_trace_split(void *ignore, |
867 | struct request_queue *q, struct bio *bio, | 872 | struct request_queue *q, struct bio *bio, |
868 | unsigned int pdu) | 873 | unsigned int pdu) |
869 | { | 874 | { |
870 | struct blk_trace *bt = q->blk_trace; | 875 | struct blk_trace *bt = q->blk_trace; |
871 | 876 | ||
872 | if (bt) { | 877 | if (bt) { |
873 | __be64 rpdu = cpu_to_be64(pdu); | 878 | __be64 rpdu = cpu_to_be64(pdu); |
874 | 879 | ||
875 | __blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw, | 880 | __blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw, |
876 | BLK_TA_SPLIT, !bio_flagged(bio, BIO_UPTODATE), | 881 | BLK_TA_SPLIT, !bio_flagged(bio, BIO_UPTODATE), |
877 | sizeof(rpdu), &rpdu); | 882 | sizeof(rpdu), &rpdu); |
878 | } | 883 | } |
879 | } | 884 | } |
880 | 885 | ||
881 | /** | 886 | /** |
882 | * blk_add_trace_bio_remap - Add a trace for a bio-remap operation | 887 | * blk_add_trace_bio_remap - Add a trace for a bio-remap operation |
883 | * @ignore: trace callback data parameter (not used) | 888 | * @ignore: trace callback data parameter (not used) |
884 | * @q: queue the io is for | 889 | * @q: queue the io is for |
885 | * @bio: the source bio | 890 | * @bio: the source bio |
886 | * @dev: target device | 891 | * @dev: target device |
887 | * @from: source sector | 892 | * @from: source sector |
888 | * | 893 | * |
889 | * Description: | 894 | * Description: |
890 | * Device mapper or raid target sometimes need to split a bio because | 895 | * Device mapper or raid target sometimes need to split a bio because |
891 | * it spans a stripe (or similar). Add a trace for that action. | 896 | * it spans a stripe (or similar). Add a trace for that action. |
892 | * | 897 | * |
893 | **/ | 898 | **/ |
894 | static void blk_add_trace_bio_remap(void *ignore, | 899 | static void blk_add_trace_bio_remap(void *ignore, |
895 | struct request_queue *q, struct bio *bio, | 900 | struct request_queue *q, struct bio *bio, |
896 | dev_t dev, sector_t from) | 901 | dev_t dev, sector_t from) |
897 | { | 902 | { |
898 | struct blk_trace *bt = q->blk_trace; | 903 | struct blk_trace *bt = q->blk_trace; |
899 | struct blk_io_trace_remap r; | 904 | struct blk_io_trace_remap r; |
900 | 905 | ||
901 | if (likely(!bt)) | 906 | if (likely(!bt)) |
902 | return; | 907 | return; |
903 | 908 | ||
904 | r.device_from = cpu_to_be32(dev); | 909 | r.device_from = cpu_to_be32(dev); |
905 | r.device_to = cpu_to_be32(bio->bi_bdev->bd_dev); | 910 | r.device_to = cpu_to_be32(bio->bi_bdev->bd_dev); |
906 | r.sector_from = cpu_to_be64(from); | 911 | r.sector_from = cpu_to_be64(from); |
907 | 912 | ||
908 | __blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw, | 913 | __blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw, |
909 | BLK_TA_REMAP, !bio_flagged(bio, BIO_UPTODATE), | 914 | BLK_TA_REMAP, !bio_flagged(bio, BIO_UPTODATE), |
910 | sizeof(r), &r); | 915 | sizeof(r), &r); |
911 | } | 916 | } |
912 | 917 | ||
913 | /** | 918 | /** |
914 | * blk_add_trace_rq_remap - Add a trace for a request-remap operation | 919 | * blk_add_trace_rq_remap - Add a trace for a request-remap operation |
915 | * @ignore: trace callback data parameter (not used) | 920 | * @ignore: trace callback data parameter (not used) |
916 | * @q: queue the io is for | 921 | * @q: queue the io is for |
917 | * @rq: the source request | 922 | * @rq: the source request |
918 | * @dev: target device | 923 | * @dev: target device |
919 | * @from: source sector | 924 | * @from: source sector |
920 | * | 925 | * |
921 | * Description: | 926 | * Description: |
922 | * Device mapper remaps request to other devices. | 927 | * Device mapper remaps request to other devices. |
923 | * Add a trace for that action. | 928 | * Add a trace for that action. |
924 | * | 929 | * |
925 | **/ | 930 | **/ |
926 | static void blk_add_trace_rq_remap(void *ignore, | 931 | static void blk_add_trace_rq_remap(void *ignore, |
927 | struct request_queue *q, | 932 | struct request_queue *q, |
928 | struct request *rq, dev_t dev, | 933 | struct request *rq, dev_t dev, |
929 | sector_t from) | 934 | sector_t from) |
930 | { | 935 | { |
931 | struct blk_trace *bt = q->blk_trace; | 936 | struct blk_trace *bt = q->blk_trace; |
932 | struct blk_io_trace_remap r; | 937 | struct blk_io_trace_remap r; |
933 | 938 | ||
934 | if (likely(!bt)) | 939 | if (likely(!bt)) |
935 | return; | 940 | return; |
936 | 941 | ||
937 | r.device_from = cpu_to_be32(dev); | 942 | r.device_from = cpu_to_be32(dev); |
938 | r.device_to = cpu_to_be32(disk_devt(rq->rq_disk)); | 943 | r.device_to = cpu_to_be32(disk_devt(rq->rq_disk)); |
939 | r.sector_from = cpu_to_be64(from); | 944 | r.sector_from = cpu_to_be64(from); |
940 | 945 | ||
941 | __blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq), | 946 | __blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq), |
942 | rq_data_dir(rq), BLK_TA_REMAP, !!rq->errors, | 947 | rq_data_dir(rq), BLK_TA_REMAP, !!rq->errors, |
943 | sizeof(r), &r); | 948 | sizeof(r), &r); |
944 | } | 949 | } |
945 | 950 | ||
946 | /** | 951 | /** |
947 | * blk_add_driver_data - Add binary message with driver-specific data | 952 | * blk_add_driver_data - Add binary message with driver-specific data |
948 | * @q: queue the io is for | 953 | * @q: queue the io is for |
949 | * @rq: io request | 954 | * @rq: io request |
950 | * @data: driver-specific data | 955 | * @data: driver-specific data |
951 | * @len: length of driver-specific data | 956 | * @len: length of driver-specific data |
952 | * | 957 | * |
953 | * Description: | 958 | * Description: |
954 | * Some drivers might want to write driver-specific data per request. | 959 | * Some drivers might want to write driver-specific data per request. |
955 | * | 960 | * |
956 | **/ | 961 | **/ |
957 | void blk_add_driver_data(struct request_queue *q, | 962 | void blk_add_driver_data(struct request_queue *q, |
958 | struct request *rq, | 963 | struct request *rq, |
959 | void *data, size_t len) | 964 | void *data, size_t len) |
960 | { | 965 | { |
961 | struct blk_trace *bt = q->blk_trace; | 966 | struct blk_trace *bt = q->blk_trace; |
962 | 967 | ||
963 | if (likely(!bt)) | 968 | if (likely(!bt)) |
964 | return; | 969 | return; |
965 | 970 | ||
966 | if (rq->cmd_type == REQ_TYPE_BLOCK_PC) | 971 | if (rq->cmd_type == REQ_TYPE_BLOCK_PC) |
967 | __blk_add_trace(bt, 0, blk_rq_bytes(rq), 0, | 972 | __blk_add_trace(bt, 0, blk_rq_bytes(rq), 0, |
968 | BLK_TA_DRV_DATA, rq->errors, len, data); | 973 | BLK_TA_DRV_DATA, rq->errors, len, data); |
969 | else | 974 | else |
970 | __blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq), 0, | 975 | __blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq), 0, |
971 | BLK_TA_DRV_DATA, rq->errors, len, data); | 976 | BLK_TA_DRV_DATA, rq->errors, len, data); |
972 | } | 977 | } |
973 | EXPORT_SYMBOL_GPL(blk_add_driver_data); | 978 | EXPORT_SYMBOL_GPL(blk_add_driver_data); |
974 | 979 | ||
975 | static void blk_register_tracepoints(void) | 980 | static void blk_register_tracepoints(void) |
976 | { | 981 | { |
977 | int ret; | 982 | int ret; |
978 | 983 | ||
979 | ret = register_trace_block_rq_abort(blk_add_trace_rq_abort, NULL); | 984 | ret = register_trace_block_rq_abort(blk_add_trace_rq_abort, NULL); |
980 | WARN_ON(ret); | 985 | WARN_ON(ret); |
981 | ret = register_trace_block_rq_insert(blk_add_trace_rq_insert, NULL); | 986 | ret = register_trace_block_rq_insert(blk_add_trace_rq_insert, NULL); |
982 | WARN_ON(ret); | 987 | WARN_ON(ret); |
983 | ret = register_trace_block_rq_issue(blk_add_trace_rq_issue, NULL); | 988 | ret = register_trace_block_rq_issue(blk_add_trace_rq_issue, NULL); |
984 | WARN_ON(ret); | 989 | WARN_ON(ret); |
985 | ret = register_trace_block_rq_requeue(blk_add_trace_rq_requeue, NULL); | 990 | ret = register_trace_block_rq_requeue(blk_add_trace_rq_requeue, NULL); |
986 | WARN_ON(ret); | 991 | WARN_ON(ret); |
987 | ret = register_trace_block_rq_complete(blk_add_trace_rq_complete, NULL); | 992 | ret = register_trace_block_rq_complete(blk_add_trace_rq_complete, NULL); |
988 | WARN_ON(ret); | 993 | WARN_ON(ret); |
989 | ret = register_trace_block_bio_bounce(blk_add_trace_bio_bounce, NULL); | 994 | ret = register_trace_block_bio_bounce(blk_add_trace_bio_bounce, NULL); |
990 | WARN_ON(ret); | 995 | WARN_ON(ret); |
991 | ret = register_trace_block_bio_complete(blk_add_trace_bio_complete, NULL); | 996 | ret = register_trace_block_bio_complete(blk_add_trace_bio_complete, NULL); |
992 | WARN_ON(ret); | 997 | WARN_ON(ret); |
993 | ret = register_trace_block_bio_backmerge(blk_add_trace_bio_backmerge, NULL); | 998 | ret = register_trace_block_bio_backmerge(blk_add_trace_bio_backmerge, NULL); |
994 | WARN_ON(ret); | 999 | WARN_ON(ret); |
995 | ret = register_trace_block_bio_frontmerge(blk_add_trace_bio_frontmerge, NULL); | 1000 | ret = register_trace_block_bio_frontmerge(blk_add_trace_bio_frontmerge, NULL); |
996 | WARN_ON(ret); | 1001 | WARN_ON(ret); |
997 | ret = register_trace_block_bio_queue(blk_add_trace_bio_queue, NULL); | 1002 | ret = register_trace_block_bio_queue(blk_add_trace_bio_queue, NULL); |
998 | WARN_ON(ret); | 1003 | WARN_ON(ret); |
999 | ret = register_trace_block_getrq(blk_add_trace_getrq, NULL); | 1004 | ret = register_trace_block_getrq(blk_add_trace_getrq, NULL); |
1000 | WARN_ON(ret); | 1005 | WARN_ON(ret); |
1001 | ret = register_trace_block_sleeprq(blk_add_trace_sleeprq, NULL); | 1006 | ret = register_trace_block_sleeprq(blk_add_trace_sleeprq, NULL); |
1002 | WARN_ON(ret); | 1007 | WARN_ON(ret); |
1003 | ret = register_trace_block_plug(blk_add_trace_plug, NULL); | 1008 | ret = register_trace_block_plug(blk_add_trace_plug, NULL); |
1004 | WARN_ON(ret); | 1009 | WARN_ON(ret); |
1005 | ret = register_trace_block_unplug_io(blk_add_trace_unplug_io, NULL); | 1010 | ret = register_trace_block_unplug(blk_add_trace_unplug, NULL); |
1006 | WARN_ON(ret); | 1011 | WARN_ON(ret); |
1007 | ret = register_trace_block_split(blk_add_trace_split, NULL); | 1012 | ret = register_trace_block_split(blk_add_trace_split, NULL); |
1008 | WARN_ON(ret); | 1013 | WARN_ON(ret); |
1009 | ret = register_trace_block_bio_remap(blk_add_trace_bio_remap, NULL); | 1014 | ret = register_trace_block_bio_remap(blk_add_trace_bio_remap, NULL); |
1010 | WARN_ON(ret); | 1015 | WARN_ON(ret); |
1011 | ret = register_trace_block_rq_remap(blk_add_trace_rq_remap, NULL); | 1016 | ret = register_trace_block_rq_remap(blk_add_trace_rq_remap, NULL); |
1012 | WARN_ON(ret); | 1017 | WARN_ON(ret); |
1013 | } | 1018 | } |
1014 | 1019 | ||
1015 | static void blk_unregister_tracepoints(void) | 1020 | static void blk_unregister_tracepoints(void) |
1016 | { | 1021 | { |
1017 | unregister_trace_block_rq_remap(blk_add_trace_rq_remap, NULL); | 1022 | unregister_trace_block_rq_remap(blk_add_trace_rq_remap, NULL); |
1018 | unregister_trace_block_bio_remap(blk_add_trace_bio_remap, NULL); | 1023 | unregister_trace_block_bio_remap(blk_add_trace_bio_remap, NULL); |
1019 | unregister_trace_block_split(blk_add_trace_split, NULL); | 1024 | unregister_trace_block_split(blk_add_trace_split, NULL); |
1020 | unregister_trace_block_unplug_io(blk_add_trace_unplug_io, NULL); | 1025 | unregister_trace_block_unplug(blk_add_trace_unplug, NULL); |
1021 | unregister_trace_block_plug(blk_add_trace_plug, NULL); | 1026 | unregister_trace_block_plug(blk_add_trace_plug, NULL); |
1022 | unregister_trace_block_sleeprq(blk_add_trace_sleeprq, NULL); | 1027 | unregister_trace_block_sleeprq(blk_add_trace_sleeprq, NULL); |
1023 | unregister_trace_block_getrq(blk_add_trace_getrq, NULL); | 1028 | unregister_trace_block_getrq(blk_add_trace_getrq, NULL); |
1024 | unregister_trace_block_bio_queue(blk_add_trace_bio_queue, NULL); | 1029 | unregister_trace_block_bio_queue(blk_add_trace_bio_queue, NULL); |
1025 | unregister_trace_block_bio_frontmerge(blk_add_trace_bio_frontmerge, NULL); | 1030 | unregister_trace_block_bio_frontmerge(blk_add_trace_bio_frontmerge, NULL); |
1026 | unregister_trace_block_bio_backmerge(blk_add_trace_bio_backmerge, NULL); | 1031 | unregister_trace_block_bio_backmerge(blk_add_trace_bio_backmerge, NULL); |
1027 | unregister_trace_block_bio_complete(blk_add_trace_bio_complete, NULL); | 1032 | unregister_trace_block_bio_complete(blk_add_trace_bio_complete, NULL); |
1028 | unregister_trace_block_bio_bounce(blk_add_trace_bio_bounce, NULL); | 1033 | unregister_trace_block_bio_bounce(blk_add_trace_bio_bounce, NULL); |
1029 | unregister_trace_block_rq_complete(blk_add_trace_rq_complete, NULL); | 1034 | unregister_trace_block_rq_complete(blk_add_trace_rq_complete, NULL); |
1030 | unregister_trace_block_rq_requeue(blk_add_trace_rq_requeue, NULL); | 1035 | unregister_trace_block_rq_requeue(blk_add_trace_rq_requeue, NULL); |
1031 | unregister_trace_block_rq_issue(blk_add_trace_rq_issue, NULL); | 1036 | unregister_trace_block_rq_issue(blk_add_trace_rq_issue, NULL); |
1032 | unregister_trace_block_rq_insert(blk_add_trace_rq_insert, NULL); | 1037 | unregister_trace_block_rq_insert(blk_add_trace_rq_insert, NULL); |
1033 | unregister_trace_block_rq_abort(blk_add_trace_rq_abort, NULL); | 1038 | unregister_trace_block_rq_abort(blk_add_trace_rq_abort, NULL); |
1034 | 1039 | ||
1035 | tracepoint_synchronize_unregister(); | 1040 | tracepoint_synchronize_unregister(); |
1036 | } | 1041 | } |
1037 | 1042 | ||
1038 | /* | 1043 | /* |
1039 | * struct blk_io_tracer formatting routines | 1044 | * struct blk_io_tracer formatting routines |
1040 | */ | 1045 | */ |
1041 | 1046 | ||
1042 | static void fill_rwbs(char *rwbs, const struct blk_io_trace *t) | 1047 | static void fill_rwbs(char *rwbs, const struct blk_io_trace *t) |
1043 | { | 1048 | { |
1044 | int i = 0; | 1049 | int i = 0; |
1045 | int tc = t->action >> BLK_TC_SHIFT; | 1050 | int tc = t->action >> BLK_TC_SHIFT; |
1046 | 1051 | ||
1047 | if (t->action == BLK_TN_MESSAGE) { | 1052 | if (t->action == BLK_TN_MESSAGE) { |
1048 | rwbs[i++] = 'N'; | 1053 | rwbs[i++] = 'N'; |
1049 | goto out; | 1054 | goto out; |
1050 | } | 1055 | } |
1051 | 1056 | ||
1052 | if (tc & BLK_TC_DISCARD) | 1057 | if (tc & BLK_TC_DISCARD) |
1053 | rwbs[i++] = 'D'; | 1058 | rwbs[i++] = 'D'; |
1054 | else if (tc & BLK_TC_WRITE) | 1059 | else if (tc & BLK_TC_WRITE) |
1055 | rwbs[i++] = 'W'; | 1060 | rwbs[i++] = 'W'; |
1056 | else if (t->bytes) | 1061 | else if (t->bytes) |
1057 | rwbs[i++] = 'R'; | 1062 | rwbs[i++] = 'R'; |
1058 | else | 1063 | else |
1059 | rwbs[i++] = 'N'; | 1064 | rwbs[i++] = 'N'; |
1060 | 1065 | ||
1061 | if (tc & BLK_TC_AHEAD) | 1066 | if (tc & BLK_TC_AHEAD) |
1062 | rwbs[i++] = 'A'; | 1067 | rwbs[i++] = 'A'; |
1063 | if (tc & BLK_TC_BARRIER) | 1068 | if (tc & BLK_TC_BARRIER) |
1064 | rwbs[i++] = 'B'; | 1069 | rwbs[i++] = 'B'; |
1065 | if (tc & BLK_TC_SYNC) | 1070 | if (tc & BLK_TC_SYNC) |
1066 | rwbs[i++] = 'S'; | 1071 | rwbs[i++] = 'S'; |
1067 | if (tc & BLK_TC_META) | 1072 | if (tc & BLK_TC_META) |
1068 | rwbs[i++] = 'M'; | 1073 | rwbs[i++] = 'M'; |
1069 | out: | 1074 | out: |
1070 | rwbs[i] = '\0'; | 1075 | rwbs[i] = '\0'; |
1071 | } | 1076 | } |
1072 | 1077 | ||
1073 | static inline | 1078 | static inline |
1074 | const struct blk_io_trace *te_blk_io_trace(const struct trace_entry *ent) | 1079 | const struct blk_io_trace *te_blk_io_trace(const struct trace_entry *ent) |
1075 | { | 1080 | { |
1076 | return (const struct blk_io_trace *)ent; | 1081 | return (const struct blk_io_trace *)ent; |
1077 | } | 1082 | } |
1078 | 1083 | ||
1079 | static inline const void *pdu_start(const struct trace_entry *ent) | 1084 | static inline const void *pdu_start(const struct trace_entry *ent) |
1080 | { | 1085 | { |
1081 | return te_blk_io_trace(ent) + 1; | 1086 | return te_blk_io_trace(ent) + 1; |
1082 | } | 1087 | } |
1083 | 1088 | ||
1084 | static inline u32 t_action(const struct trace_entry *ent) | 1089 | static inline u32 t_action(const struct trace_entry *ent) |
1085 | { | 1090 | { |
1086 | return te_blk_io_trace(ent)->action; | 1091 | return te_blk_io_trace(ent)->action; |
1087 | } | 1092 | } |
1088 | 1093 | ||
1089 | static inline u32 t_bytes(const struct trace_entry *ent) | 1094 | static inline u32 t_bytes(const struct trace_entry *ent) |
1090 | { | 1095 | { |
1091 | return te_blk_io_trace(ent)->bytes; | 1096 | return te_blk_io_trace(ent)->bytes; |
1092 | } | 1097 | } |
1093 | 1098 | ||
1094 | static inline u32 t_sec(const struct trace_entry *ent) | 1099 | static inline u32 t_sec(const struct trace_entry *ent) |
1095 | { | 1100 | { |
1096 | return te_blk_io_trace(ent)->bytes >> 9; | 1101 | return te_blk_io_trace(ent)->bytes >> 9; |
1097 | } | 1102 | } |
1098 | 1103 | ||
1099 | static inline unsigned long long t_sector(const struct trace_entry *ent) | 1104 | static inline unsigned long long t_sector(const struct trace_entry *ent) |
1100 | { | 1105 | { |
1101 | return te_blk_io_trace(ent)->sector; | 1106 | return te_blk_io_trace(ent)->sector; |
1102 | } | 1107 | } |
1103 | 1108 | ||
1104 | static inline __u16 t_error(const struct trace_entry *ent) | 1109 | static inline __u16 t_error(const struct trace_entry *ent) |
1105 | { | 1110 | { |
1106 | return te_blk_io_trace(ent)->error; | 1111 | return te_blk_io_trace(ent)->error; |
1107 | } | 1112 | } |
1108 | 1113 | ||
1109 | static __u64 get_pdu_int(const struct trace_entry *ent) | 1114 | static __u64 get_pdu_int(const struct trace_entry *ent) |
1110 | { | 1115 | { |
1111 | const __u64 *val = pdu_start(ent); | 1116 | const __u64 *val = pdu_start(ent); |
1112 | return be64_to_cpu(*val); | 1117 | return be64_to_cpu(*val); |
1113 | } | 1118 | } |
1114 | 1119 | ||
1115 | static void get_pdu_remap(const struct trace_entry *ent, | 1120 | static void get_pdu_remap(const struct trace_entry *ent, |
1116 | struct blk_io_trace_remap *r) | 1121 | struct blk_io_trace_remap *r) |
1117 | { | 1122 | { |
1118 | const struct blk_io_trace_remap *__r = pdu_start(ent); | 1123 | const struct blk_io_trace_remap *__r = pdu_start(ent); |
1119 | __u64 sector_from = __r->sector_from; | 1124 | __u64 sector_from = __r->sector_from; |
1120 | 1125 | ||
1121 | r->device_from = be32_to_cpu(__r->device_from); | 1126 | r->device_from = be32_to_cpu(__r->device_from); |
1122 | r->device_to = be32_to_cpu(__r->device_to); | 1127 | r->device_to = be32_to_cpu(__r->device_to); |
1123 | r->sector_from = be64_to_cpu(sector_from); | 1128 | r->sector_from = be64_to_cpu(sector_from); |
1124 | } | 1129 | } |
1125 | 1130 | ||
1126 | typedef int (blk_log_action_t) (struct trace_iterator *iter, const char *act); | 1131 | typedef int (blk_log_action_t) (struct trace_iterator *iter, const char *act); |
1127 | 1132 | ||
1128 | static int blk_log_action_classic(struct trace_iterator *iter, const char *act) | 1133 | static int blk_log_action_classic(struct trace_iterator *iter, const char *act) |
1129 | { | 1134 | { |
1130 | char rwbs[6]; | 1135 | char rwbs[6]; |
1131 | unsigned long long ts = iter->ts; | 1136 | unsigned long long ts = iter->ts; |
1132 | unsigned long nsec_rem = do_div(ts, NSEC_PER_SEC); | 1137 | unsigned long nsec_rem = do_div(ts, NSEC_PER_SEC); |
1133 | unsigned secs = (unsigned long)ts; | 1138 | unsigned secs = (unsigned long)ts; |
1134 | const struct blk_io_trace *t = te_blk_io_trace(iter->ent); | 1139 | const struct blk_io_trace *t = te_blk_io_trace(iter->ent); |
1135 | 1140 | ||
1136 | fill_rwbs(rwbs, t); | 1141 | fill_rwbs(rwbs, t); |
1137 | 1142 | ||
1138 | return trace_seq_printf(&iter->seq, | 1143 | return trace_seq_printf(&iter->seq, |
1139 | "%3d,%-3d %2d %5d.%09lu %5u %2s %3s ", | 1144 | "%3d,%-3d %2d %5d.%09lu %5u %2s %3s ", |
1140 | MAJOR(t->device), MINOR(t->device), iter->cpu, | 1145 | MAJOR(t->device), MINOR(t->device), iter->cpu, |
1141 | secs, nsec_rem, iter->ent->pid, act, rwbs); | 1146 | secs, nsec_rem, iter->ent->pid, act, rwbs); |
1142 | } | 1147 | } |
1143 | 1148 | ||
1144 | static int blk_log_action(struct trace_iterator *iter, const char *act) | 1149 | static int blk_log_action(struct trace_iterator *iter, const char *act) |
1145 | { | 1150 | { |
1146 | char rwbs[6]; | 1151 | char rwbs[6]; |
1147 | const struct blk_io_trace *t = te_blk_io_trace(iter->ent); | 1152 | const struct blk_io_trace *t = te_blk_io_trace(iter->ent); |
1148 | 1153 | ||
1149 | fill_rwbs(rwbs, t); | 1154 | fill_rwbs(rwbs, t); |
1150 | return trace_seq_printf(&iter->seq, "%3d,%-3d %2s %3s ", | 1155 | return trace_seq_printf(&iter->seq, "%3d,%-3d %2s %3s ", |
1151 | MAJOR(t->device), MINOR(t->device), act, rwbs); | 1156 | MAJOR(t->device), MINOR(t->device), act, rwbs); |
1152 | } | 1157 | } |
1153 | 1158 | ||
1154 | static int blk_log_dump_pdu(struct trace_seq *s, const struct trace_entry *ent) | 1159 | static int blk_log_dump_pdu(struct trace_seq *s, const struct trace_entry *ent) |
1155 | { | 1160 | { |
1156 | const unsigned char *pdu_buf; | 1161 | const unsigned char *pdu_buf; |
1157 | int pdu_len; | 1162 | int pdu_len; |
1158 | int i, end, ret; | 1163 | int i, end, ret; |
1159 | 1164 | ||
1160 | pdu_buf = pdu_start(ent); | 1165 | pdu_buf = pdu_start(ent); |
1161 | pdu_len = te_blk_io_trace(ent)->pdu_len; | 1166 | pdu_len = te_blk_io_trace(ent)->pdu_len; |
1162 | 1167 | ||
1163 | if (!pdu_len) | 1168 | if (!pdu_len) |
1164 | return 1; | 1169 | return 1; |
1165 | 1170 | ||
1166 | /* find the last zero that needs to be printed */ | 1171 | /* find the last zero that needs to be printed */ |
1167 | for (end = pdu_len - 1; end >= 0; end--) | 1172 | for (end = pdu_len - 1; end >= 0; end--) |
1168 | if (pdu_buf[end]) | 1173 | if (pdu_buf[end]) |
1169 | break; | 1174 | break; |
1170 | end++; | 1175 | end++; |
1171 | 1176 | ||
1172 | if (!trace_seq_putc(s, '(')) | 1177 | if (!trace_seq_putc(s, '(')) |
1173 | return 0; | 1178 | return 0; |
1174 | 1179 | ||
1175 | for (i = 0; i < pdu_len; i++) { | 1180 | for (i = 0; i < pdu_len; i++) { |
1176 | 1181 | ||
1177 | ret = trace_seq_printf(s, "%s%02x", | 1182 | ret = trace_seq_printf(s, "%s%02x", |
1178 | i == 0 ? "" : " ", pdu_buf[i]); | 1183 | i == 0 ? "" : " ", pdu_buf[i]); |
1179 | if (!ret) | 1184 | if (!ret) |
1180 | return ret; | 1185 | return ret; |
1181 | 1186 | ||
1182 | /* | 1187 | /* |
1183 | * stop when the rest is just zeroes and indicate so | 1188 | * stop when the rest is just zeroes and indicate so |
1184 | * with a ".." appended | 1189 | * with a ".." appended |
1185 | */ | 1190 | */ |
1186 | if (i == end && end != pdu_len - 1) | 1191 | if (i == end && end != pdu_len - 1) |
1187 | return trace_seq_puts(s, " ..) "); | 1192 | return trace_seq_puts(s, " ..) "); |
1188 | } | 1193 | } |
1189 | 1194 | ||
1190 | return trace_seq_puts(s, ") "); | 1195 | return trace_seq_puts(s, ") "); |
1191 | } | 1196 | } |
1192 | 1197 | ||
1193 | static int blk_log_generic(struct trace_seq *s, const struct trace_entry *ent) | 1198 | static int blk_log_generic(struct trace_seq *s, const struct trace_entry *ent) |
1194 | { | 1199 | { |
1195 | char cmd[TASK_COMM_LEN]; | 1200 | char cmd[TASK_COMM_LEN]; |
1196 | 1201 | ||
1197 | trace_find_cmdline(ent->pid, cmd); | 1202 | trace_find_cmdline(ent->pid, cmd); |
1198 | 1203 | ||
1199 | if (t_action(ent) & BLK_TC_ACT(BLK_TC_PC)) { | 1204 | if (t_action(ent) & BLK_TC_ACT(BLK_TC_PC)) { |
1200 | int ret; | 1205 | int ret; |
1201 | 1206 | ||
1202 | ret = trace_seq_printf(s, "%u ", t_bytes(ent)); | 1207 | ret = trace_seq_printf(s, "%u ", t_bytes(ent)); |
1203 | if (!ret) | 1208 | if (!ret) |
1204 | return 0; | 1209 | return 0; |
1205 | ret = blk_log_dump_pdu(s, ent); | 1210 | ret = blk_log_dump_pdu(s, ent); |
1206 | if (!ret) | 1211 | if (!ret) |
1207 | return 0; | 1212 | return 0; |
1208 | return trace_seq_printf(s, "[%s]\n", cmd); | 1213 | return trace_seq_printf(s, "[%s]\n", cmd); |
1209 | } else { | 1214 | } else { |
1210 | if (t_sec(ent)) | 1215 | if (t_sec(ent)) |
1211 | return trace_seq_printf(s, "%llu + %u [%s]\n", | 1216 | return trace_seq_printf(s, "%llu + %u [%s]\n", |
1212 | t_sector(ent), t_sec(ent), cmd); | 1217 | t_sector(ent), t_sec(ent), cmd); |
1213 | return trace_seq_printf(s, "[%s]\n", cmd); | 1218 | return trace_seq_printf(s, "[%s]\n", cmd); |
1214 | } | 1219 | } |
1215 | } | 1220 | } |
1216 | 1221 | ||
1217 | static int blk_log_with_error(struct trace_seq *s, | 1222 | static int blk_log_with_error(struct trace_seq *s, |
1218 | const struct trace_entry *ent) | 1223 | const struct trace_entry *ent) |
1219 | { | 1224 | { |
1220 | if (t_action(ent) & BLK_TC_ACT(BLK_TC_PC)) { | 1225 | if (t_action(ent) & BLK_TC_ACT(BLK_TC_PC)) { |
1221 | int ret; | 1226 | int ret; |
1222 | 1227 | ||
1223 | ret = blk_log_dump_pdu(s, ent); | 1228 | ret = blk_log_dump_pdu(s, ent); |
1224 | if (ret) | 1229 | if (ret) |
1225 | return trace_seq_printf(s, "[%d]\n", t_error(ent)); | 1230 | return trace_seq_printf(s, "[%d]\n", t_error(ent)); |
1226 | return 0; | 1231 | return 0; |
1227 | } else { | 1232 | } else { |
1228 | if (t_sec(ent)) | 1233 | if (t_sec(ent)) |
1229 | return trace_seq_printf(s, "%llu + %u [%d]\n", | 1234 | return trace_seq_printf(s, "%llu + %u [%d]\n", |
1230 | t_sector(ent), | 1235 | t_sector(ent), |
1231 | t_sec(ent), t_error(ent)); | 1236 | t_sec(ent), t_error(ent)); |
1232 | return trace_seq_printf(s, "%llu [%d]\n", | 1237 | return trace_seq_printf(s, "%llu [%d]\n", |
1233 | t_sector(ent), t_error(ent)); | 1238 | t_sector(ent), t_error(ent)); |
1234 | } | 1239 | } |
1235 | } | 1240 | } |
1236 | 1241 | ||
1237 | static int blk_log_remap(struct trace_seq *s, const struct trace_entry *ent) | 1242 | static int blk_log_remap(struct trace_seq *s, const struct trace_entry *ent) |
1238 | { | 1243 | { |
1239 | struct blk_io_trace_remap r = { .device_from = 0, }; | 1244 | struct blk_io_trace_remap r = { .device_from = 0, }; |
1240 | 1245 | ||
1241 | get_pdu_remap(ent, &r); | 1246 | get_pdu_remap(ent, &r); |
1242 | return trace_seq_printf(s, "%llu + %u <- (%d,%d) %llu\n", | 1247 | return trace_seq_printf(s, "%llu + %u <- (%d,%d) %llu\n", |
1243 | t_sector(ent), t_sec(ent), | 1248 | t_sector(ent), t_sec(ent), |
1244 | MAJOR(r.device_from), MINOR(r.device_from), | 1249 | MAJOR(r.device_from), MINOR(r.device_from), |
1245 | (unsigned long long)r.sector_from); | 1250 | (unsigned long long)r.sector_from); |
1246 | } | 1251 | } |
1247 | 1252 | ||
1248 | static int blk_log_plug(struct trace_seq *s, const struct trace_entry *ent) | 1253 | static int blk_log_plug(struct trace_seq *s, const struct trace_entry *ent) |
1249 | { | 1254 | { |
1250 | char cmd[TASK_COMM_LEN]; | 1255 | char cmd[TASK_COMM_LEN]; |
1251 | 1256 | ||
1252 | trace_find_cmdline(ent->pid, cmd); | 1257 | trace_find_cmdline(ent->pid, cmd); |
1253 | 1258 | ||
1254 | return trace_seq_printf(s, "[%s]\n", cmd); | 1259 | return trace_seq_printf(s, "[%s]\n", cmd); |
1255 | } | 1260 | } |
1256 | 1261 | ||
1257 | static int blk_log_unplug(struct trace_seq *s, const struct trace_entry *ent) | 1262 | static int blk_log_unplug(struct trace_seq *s, const struct trace_entry *ent) |
1258 | { | 1263 | { |
1259 | char cmd[TASK_COMM_LEN]; | 1264 | char cmd[TASK_COMM_LEN]; |
1260 | 1265 | ||
1261 | trace_find_cmdline(ent->pid, cmd); | 1266 | trace_find_cmdline(ent->pid, cmd); |
1262 | 1267 | ||
1263 | return trace_seq_printf(s, "[%s] %llu\n", cmd, get_pdu_int(ent)); | 1268 | return trace_seq_printf(s, "[%s] %llu\n", cmd, get_pdu_int(ent)); |
1264 | } | 1269 | } |
1265 | 1270 | ||
1266 | static int blk_log_split(struct trace_seq *s, const struct trace_entry *ent) | 1271 | static int blk_log_split(struct trace_seq *s, const struct trace_entry *ent) |
1267 | { | 1272 | { |
1268 | char cmd[TASK_COMM_LEN]; | 1273 | char cmd[TASK_COMM_LEN]; |
1269 | 1274 | ||
1270 | trace_find_cmdline(ent->pid, cmd); | 1275 | trace_find_cmdline(ent->pid, cmd); |
1271 | 1276 | ||
1272 | return trace_seq_printf(s, "%llu / %llu [%s]\n", t_sector(ent), | 1277 | return trace_seq_printf(s, "%llu / %llu [%s]\n", t_sector(ent), |
1273 | get_pdu_int(ent), cmd); | 1278 | get_pdu_int(ent), cmd); |
1274 | } | 1279 | } |
1275 | 1280 | ||
1276 | static int blk_log_msg(struct trace_seq *s, const struct trace_entry *ent) | 1281 | static int blk_log_msg(struct trace_seq *s, const struct trace_entry *ent) |
1277 | { | 1282 | { |
1278 | int ret; | 1283 | int ret; |
1279 | const struct blk_io_trace *t = te_blk_io_trace(ent); | 1284 | const struct blk_io_trace *t = te_blk_io_trace(ent); |
1280 | 1285 | ||
1281 | ret = trace_seq_putmem(s, t + 1, t->pdu_len); | 1286 | ret = trace_seq_putmem(s, t + 1, t->pdu_len); |
1282 | if (ret) | 1287 | if (ret) |
1283 | return trace_seq_putc(s, '\n'); | 1288 | return trace_seq_putc(s, '\n'); |
1284 | return ret; | 1289 | return ret; |
1285 | } | 1290 | } |
1286 | 1291 | ||
1287 | /* | 1292 | /* |
1288 | * struct tracer operations | 1293 | * struct tracer operations |
1289 | */ | 1294 | */ |
1290 | 1295 | ||
1291 | static void blk_tracer_print_header(struct seq_file *m) | 1296 | static void blk_tracer_print_header(struct seq_file *m) |
1292 | { | 1297 | { |
1293 | if (!(blk_tracer_flags.val & TRACE_BLK_OPT_CLASSIC)) | 1298 | if (!(blk_tracer_flags.val & TRACE_BLK_OPT_CLASSIC)) |
1294 | return; | 1299 | return; |
1295 | seq_puts(m, "# DEV CPU TIMESTAMP PID ACT FLG\n" | 1300 | seq_puts(m, "# DEV CPU TIMESTAMP PID ACT FLG\n" |
1296 | "# | | | | | |\n"); | 1301 | "# | | | | | |\n"); |
1297 | } | 1302 | } |
1298 | 1303 | ||
1299 | static void blk_tracer_start(struct trace_array *tr) | 1304 | static void blk_tracer_start(struct trace_array *tr) |
1300 | { | 1305 | { |
1301 | blk_tracer_enabled = true; | 1306 | blk_tracer_enabled = true; |
1302 | } | 1307 | } |
1303 | 1308 | ||
1304 | static int blk_tracer_init(struct trace_array *tr) | 1309 | static int blk_tracer_init(struct trace_array *tr) |
1305 | { | 1310 | { |
1306 | blk_tr = tr; | 1311 | blk_tr = tr; |
1307 | blk_tracer_start(tr); | 1312 | blk_tracer_start(tr); |
1308 | return 0; | 1313 | return 0; |
1309 | } | 1314 | } |
1310 | 1315 | ||
1311 | static void blk_tracer_stop(struct trace_array *tr) | 1316 | static void blk_tracer_stop(struct trace_array *tr) |
1312 | { | 1317 | { |
1313 | blk_tracer_enabled = false; | 1318 | blk_tracer_enabled = false; |
1314 | } | 1319 | } |
1315 | 1320 | ||
1316 | static void blk_tracer_reset(struct trace_array *tr) | 1321 | static void blk_tracer_reset(struct trace_array *tr) |
1317 | { | 1322 | { |
1318 | blk_tracer_stop(tr); | 1323 | blk_tracer_stop(tr); |
1319 | } | 1324 | } |
1320 | 1325 | ||
1321 | static const struct { | 1326 | static const struct { |
1322 | const char *act[2]; | 1327 | const char *act[2]; |
1323 | int (*print)(struct trace_seq *s, const struct trace_entry *ent); | 1328 | int (*print)(struct trace_seq *s, const struct trace_entry *ent); |
1324 | } what2act[] = { | 1329 | } what2act[] = { |
1325 | [__BLK_TA_QUEUE] = {{ "Q", "queue" }, blk_log_generic }, | 1330 | [__BLK_TA_QUEUE] = {{ "Q", "queue" }, blk_log_generic }, |
1326 | [__BLK_TA_BACKMERGE] = {{ "M", "backmerge" }, blk_log_generic }, | 1331 | [__BLK_TA_BACKMERGE] = {{ "M", "backmerge" }, blk_log_generic }, |
1327 | [__BLK_TA_FRONTMERGE] = {{ "F", "frontmerge" }, blk_log_generic }, | 1332 | [__BLK_TA_FRONTMERGE] = {{ "F", "frontmerge" }, blk_log_generic }, |
1328 | [__BLK_TA_GETRQ] = {{ "G", "getrq" }, blk_log_generic }, | 1333 | [__BLK_TA_GETRQ] = {{ "G", "getrq" }, blk_log_generic }, |
1329 | [__BLK_TA_SLEEPRQ] = {{ "S", "sleeprq" }, blk_log_generic }, | 1334 | [__BLK_TA_SLEEPRQ] = {{ "S", "sleeprq" }, blk_log_generic }, |
1330 | [__BLK_TA_REQUEUE] = {{ "R", "requeue" }, blk_log_with_error }, | 1335 | [__BLK_TA_REQUEUE] = {{ "R", "requeue" }, blk_log_with_error }, |
1331 | [__BLK_TA_ISSUE] = {{ "D", "issue" }, blk_log_generic }, | 1336 | [__BLK_TA_ISSUE] = {{ "D", "issue" }, blk_log_generic }, |
1332 | [__BLK_TA_COMPLETE] = {{ "C", "complete" }, blk_log_with_error }, | 1337 | [__BLK_TA_COMPLETE] = {{ "C", "complete" }, blk_log_with_error }, |
1333 | [__BLK_TA_PLUG] = {{ "P", "plug" }, blk_log_plug }, | 1338 | [__BLK_TA_PLUG] = {{ "P", "plug" }, blk_log_plug }, |
1334 | [__BLK_TA_UNPLUG_IO] = {{ "U", "unplug_io" }, blk_log_unplug }, | 1339 | [__BLK_TA_UNPLUG_IO] = {{ "U", "unplug_io" }, blk_log_unplug }, |
1340 | [__BLK_TA_UNPLUG_TIMER] = {{ "UT", "unplug_timer" }, blk_log_unplug }, | ||
1335 | [__BLK_TA_INSERT] = {{ "I", "insert" }, blk_log_generic }, | 1341 | [__BLK_TA_INSERT] = {{ "I", "insert" }, blk_log_generic }, |
1336 | [__BLK_TA_SPLIT] = {{ "X", "split" }, blk_log_split }, | 1342 | [__BLK_TA_SPLIT] = {{ "X", "split" }, blk_log_split }, |
1337 | [__BLK_TA_BOUNCE] = {{ "B", "bounce" }, blk_log_generic }, | 1343 | [__BLK_TA_BOUNCE] = {{ "B", "bounce" }, blk_log_generic }, |
1338 | [__BLK_TA_REMAP] = {{ "A", "remap" }, blk_log_remap }, | 1344 | [__BLK_TA_REMAP] = {{ "A", "remap" }, blk_log_remap }, |
1339 | }; | 1345 | }; |
1340 | 1346 | ||
1341 | static enum print_line_t print_one_line(struct trace_iterator *iter, | 1347 | static enum print_line_t print_one_line(struct trace_iterator *iter, |
1342 | bool classic) | 1348 | bool classic) |
1343 | { | 1349 | { |
1344 | struct trace_seq *s = &iter->seq; | 1350 | struct trace_seq *s = &iter->seq; |
1345 | const struct blk_io_trace *t; | 1351 | const struct blk_io_trace *t; |
1346 | u16 what; | 1352 | u16 what; |
1347 | int ret; | 1353 | int ret; |
1348 | bool long_act; | 1354 | bool long_act; |
1349 | blk_log_action_t *log_action; | 1355 | blk_log_action_t *log_action; |
1350 | 1356 | ||
1351 | t = te_blk_io_trace(iter->ent); | 1357 | t = te_blk_io_trace(iter->ent); |
1352 | what = t->action & ((1 << BLK_TC_SHIFT) - 1); | 1358 | what = t->action & ((1 << BLK_TC_SHIFT) - 1); |
1353 | long_act = !!(trace_flags & TRACE_ITER_VERBOSE); | 1359 | long_act = !!(trace_flags & TRACE_ITER_VERBOSE); |
1354 | log_action = classic ? &blk_log_action_classic : &blk_log_action; | 1360 | log_action = classic ? &blk_log_action_classic : &blk_log_action; |
1355 | 1361 | ||
1356 | if (t->action == BLK_TN_MESSAGE) { | 1362 | if (t->action == BLK_TN_MESSAGE) { |
1357 | ret = log_action(iter, long_act ? "message" : "m"); | 1363 | ret = log_action(iter, long_act ? "message" : "m"); |
1358 | if (ret) | 1364 | if (ret) |
1359 | ret = blk_log_msg(s, iter->ent); | 1365 | ret = blk_log_msg(s, iter->ent); |
1360 | goto out; | 1366 | goto out; |
1361 | } | 1367 | } |
1362 | 1368 | ||
1363 | if (unlikely(what == 0 || what >= ARRAY_SIZE(what2act))) | 1369 | if (unlikely(what == 0 || what >= ARRAY_SIZE(what2act))) |
1364 | ret = trace_seq_printf(s, "Unknown action %x\n", what); | 1370 | ret = trace_seq_printf(s, "Unknown action %x\n", what); |
1365 | else { | 1371 | else { |
1366 | ret = log_action(iter, what2act[what].act[long_act]); | 1372 | ret = log_action(iter, what2act[what].act[long_act]); |
1367 | if (ret) | 1373 | if (ret) |
1368 | ret = what2act[what].print(s, iter->ent); | 1374 | ret = what2act[what].print(s, iter->ent); |
1369 | } | 1375 | } |
1370 | out: | 1376 | out: |
1371 | return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE; | 1377 | return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE; |
1372 | } | 1378 | } |
1373 | 1379 | ||
1374 | static enum print_line_t blk_trace_event_print(struct trace_iterator *iter, | 1380 | static enum print_line_t blk_trace_event_print(struct trace_iterator *iter, |
1375 | int flags, struct trace_event *event) | 1381 | int flags, struct trace_event *event) |
1376 | { | 1382 | { |
1377 | return print_one_line(iter, false); | 1383 | return print_one_line(iter, false); |
1378 | } | 1384 | } |
1379 | 1385 | ||
1380 | static int blk_trace_synthesize_old_trace(struct trace_iterator *iter) | 1386 | static int blk_trace_synthesize_old_trace(struct trace_iterator *iter) |
1381 | { | 1387 | { |
1382 | struct trace_seq *s = &iter->seq; | 1388 | struct trace_seq *s = &iter->seq; |
1383 | struct blk_io_trace *t = (struct blk_io_trace *)iter->ent; | 1389 | struct blk_io_trace *t = (struct blk_io_trace *)iter->ent; |
1384 | const int offset = offsetof(struct blk_io_trace, sector); | 1390 | const int offset = offsetof(struct blk_io_trace, sector); |
1385 | struct blk_io_trace old = { | 1391 | struct blk_io_trace old = { |
1386 | .magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION, | 1392 | .magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION, |
1387 | .time = iter->ts, | 1393 | .time = iter->ts, |
1388 | }; | 1394 | }; |
1389 | 1395 | ||
1390 | if (!trace_seq_putmem(s, &old, offset)) | 1396 | if (!trace_seq_putmem(s, &old, offset)) |
1391 | return 0; | 1397 | return 0; |
1392 | return trace_seq_putmem(s, &t->sector, | 1398 | return trace_seq_putmem(s, &t->sector, |
1393 | sizeof(old) - offset + t->pdu_len); | 1399 | sizeof(old) - offset + t->pdu_len); |
1394 | } | 1400 | } |
1395 | 1401 | ||
1396 | static enum print_line_t | 1402 | static enum print_line_t |
1397 | blk_trace_event_print_binary(struct trace_iterator *iter, int flags, | 1403 | blk_trace_event_print_binary(struct trace_iterator *iter, int flags, |
1398 | struct trace_event *event) | 1404 | struct trace_event *event) |
1399 | { | 1405 | { |
1400 | return blk_trace_synthesize_old_trace(iter) ? | 1406 | return blk_trace_synthesize_old_trace(iter) ? |
1401 | TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE; | 1407 | TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE; |
1402 | } | 1408 | } |
1403 | 1409 | ||
1404 | static enum print_line_t blk_tracer_print_line(struct trace_iterator *iter) | 1410 | static enum print_line_t blk_tracer_print_line(struct trace_iterator *iter) |
1405 | { | 1411 | { |
1406 | if (!(blk_tracer_flags.val & TRACE_BLK_OPT_CLASSIC)) | 1412 | if (!(blk_tracer_flags.val & TRACE_BLK_OPT_CLASSIC)) |
1407 | return TRACE_TYPE_UNHANDLED; | 1413 | return TRACE_TYPE_UNHANDLED; |
1408 | 1414 | ||
1409 | return print_one_line(iter, true); | 1415 | return print_one_line(iter, true); |
1410 | } | 1416 | } |
1411 | 1417 | ||
1412 | static int blk_tracer_set_flag(u32 old_flags, u32 bit, int set) | 1418 | static int blk_tracer_set_flag(u32 old_flags, u32 bit, int set) |
1413 | { | 1419 | { |
1414 | /* don't output context-info for blk_classic output */ | 1420 | /* don't output context-info for blk_classic output */ |
1415 | if (bit == TRACE_BLK_OPT_CLASSIC) { | 1421 | if (bit == TRACE_BLK_OPT_CLASSIC) { |
1416 | if (set) | 1422 | if (set) |
1417 | trace_flags &= ~TRACE_ITER_CONTEXT_INFO; | 1423 | trace_flags &= ~TRACE_ITER_CONTEXT_INFO; |
1418 | else | 1424 | else |
1419 | trace_flags |= TRACE_ITER_CONTEXT_INFO; | 1425 | trace_flags |= TRACE_ITER_CONTEXT_INFO; |
1420 | } | 1426 | } |
1421 | return 0; | 1427 | return 0; |
1422 | } | 1428 | } |
1423 | 1429 | ||
1424 | static struct tracer blk_tracer __read_mostly = { | 1430 | static struct tracer blk_tracer __read_mostly = { |
1425 | .name = "blk", | 1431 | .name = "blk", |
1426 | .init = blk_tracer_init, | 1432 | .init = blk_tracer_init, |
1427 | .reset = blk_tracer_reset, | 1433 | .reset = blk_tracer_reset, |
1428 | .start = blk_tracer_start, | 1434 | .start = blk_tracer_start, |
1429 | .stop = blk_tracer_stop, | 1435 | .stop = blk_tracer_stop, |
1430 | .print_header = blk_tracer_print_header, | 1436 | .print_header = blk_tracer_print_header, |
1431 | .print_line = blk_tracer_print_line, | 1437 | .print_line = blk_tracer_print_line, |
1432 | .flags = &blk_tracer_flags, | 1438 | .flags = &blk_tracer_flags, |
1433 | .set_flag = blk_tracer_set_flag, | 1439 | .set_flag = blk_tracer_set_flag, |
1434 | }; | 1440 | }; |
1435 | 1441 | ||
1436 | static struct trace_event_functions trace_blk_event_funcs = { | 1442 | static struct trace_event_functions trace_blk_event_funcs = { |
1437 | .trace = blk_trace_event_print, | 1443 | .trace = blk_trace_event_print, |
1438 | .binary = blk_trace_event_print_binary, | 1444 | .binary = blk_trace_event_print_binary, |
1439 | }; | 1445 | }; |
1440 | 1446 | ||
1441 | static struct trace_event trace_blk_event = { | 1447 | static struct trace_event trace_blk_event = { |
1442 | .type = TRACE_BLK, | 1448 | .type = TRACE_BLK, |
1443 | .funcs = &trace_blk_event_funcs, | 1449 | .funcs = &trace_blk_event_funcs, |
1444 | }; | 1450 | }; |
1445 | 1451 | ||
1446 | static int __init init_blk_tracer(void) | 1452 | static int __init init_blk_tracer(void) |
1447 | { | 1453 | { |
1448 | if (!register_ftrace_event(&trace_blk_event)) { | 1454 | if (!register_ftrace_event(&trace_blk_event)) { |
1449 | pr_warning("Warning: could not register block events\n"); | 1455 | pr_warning("Warning: could not register block events\n"); |
1450 | return 1; | 1456 | return 1; |
1451 | } | 1457 | } |
1452 | 1458 | ||
1453 | if (register_tracer(&blk_tracer) != 0) { | 1459 | if (register_tracer(&blk_tracer) != 0) { |
1454 | pr_warning("Warning: could not register the block tracer\n"); | 1460 | pr_warning("Warning: could not register the block tracer\n"); |
1455 | unregister_ftrace_event(&trace_blk_event); | 1461 | unregister_ftrace_event(&trace_blk_event); |
1456 | return 1; | 1462 | return 1; |
1457 | } | 1463 | } |
1458 | 1464 | ||
1459 | return 0; | 1465 | return 0; |
1460 | } | 1466 | } |
1461 | 1467 | ||
1462 | device_initcall(init_blk_tracer); | 1468 | device_initcall(init_blk_tracer); |
1463 | 1469 | ||
1464 | static int blk_trace_remove_queue(struct request_queue *q) | 1470 | static int blk_trace_remove_queue(struct request_queue *q) |
1465 | { | 1471 | { |
1466 | struct blk_trace *bt; | 1472 | struct blk_trace *bt; |
1467 | 1473 | ||
1468 | bt = xchg(&q->blk_trace, NULL); | 1474 | bt = xchg(&q->blk_trace, NULL); |
1469 | if (bt == NULL) | 1475 | if (bt == NULL) |
1470 | return -EINVAL; | 1476 | return -EINVAL; |
1471 | 1477 | ||
1472 | if (atomic_dec_and_test(&blk_probes_ref)) | 1478 | if (atomic_dec_and_test(&blk_probes_ref)) |
1473 | blk_unregister_tracepoints(); | 1479 | blk_unregister_tracepoints(); |
1474 | 1480 | ||
1475 | blk_trace_free(bt); | 1481 | blk_trace_free(bt); |
1476 | return 0; | 1482 | return 0; |
1477 | } | 1483 | } |
1478 | 1484 | ||
1479 | /* | 1485 | /* |
1480 | * Setup everything required to start tracing | 1486 | * Setup everything required to start tracing |
1481 | */ | 1487 | */ |
1482 | static int blk_trace_setup_queue(struct request_queue *q, | 1488 | static int blk_trace_setup_queue(struct request_queue *q, |
1483 | struct block_device *bdev) | 1489 | struct block_device *bdev) |
1484 | { | 1490 | { |
1485 | struct blk_trace *old_bt, *bt = NULL; | 1491 | struct blk_trace *old_bt, *bt = NULL; |
1486 | int ret = -ENOMEM; | 1492 | int ret = -ENOMEM; |
1487 | 1493 | ||
1488 | bt = kzalloc(sizeof(*bt), GFP_KERNEL); | 1494 | bt = kzalloc(sizeof(*bt), GFP_KERNEL); |
1489 | if (!bt) | 1495 | if (!bt) |
1490 | return -ENOMEM; | 1496 | return -ENOMEM; |
1491 | 1497 | ||
1492 | bt->msg_data = __alloc_percpu(BLK_TN_MAX_MSG, __alignof__(char)); | 1498 | bt->msg_data = __alloc_percpu(BLK_TN_MAX_MSG, __alignof__(char)); |
1493 | if (!bt->msg_data) | 1499 | if (!bt->msg_data) |
1494 | goto free_bt; | 1500 | goto free_bt; |
1495 | 1501 | ||
1496 | bt->dev = bdev->bd_dev; | 1502 | bt->dev = bdev->bd_dev; |
1497 | bt->act_mask = (u16)-1; | 1503 | bt->act_mask = (u16)-1; |
1498 | 1504 | ||
1499 | blk_trace_setup_lba(bt, bdev); | 1505 | blk_trace_setup_lba(bt, bdev); |
1500 | 1506 | ||
1501 | old_bt = xchg(&q->blk_trace, bt); | 1507 | old_bt = xchg(&q->blk_trace, bt); |
1502 | if (old_bt != NULL) { | 1508 | if (old_bt != NULL) { |
1503 | (void)xchg(&q->blk_trace, old_bt); | 1509 | (void)xchg(&q->blk_trace, old_bt); |
1504 | ret = -EBUSY; | 1510 | ret = -EBUSY; |
1505 | goto free_bt; | 1511 | goto free_bt; |
1506 | } | 1512 | } |
1507 | 1513 | ||
1508 | if (atomic_inc_return(&blk_probes_ref) == 1) | 1514 | if (atomic_inc_return(&blk_probes_ref) == 1) |
1509 | blk_register_tracepoints(); | 1515 | blk_register_tracepoints(); |
1510 | return 0; | 1516 | return 0; |
1511 | 1517 | ||
1512 | free_bt: | 1518 | free_bt: |
1513 | blk_trace_free(bt); | 1519 | blk_trace_free(bt); |
1514 | return ret; | 1520 | return ret; |
1515 | } | 1521 | } |
1516 | 1522 | ||
1517 | /* | 1523 | /* |
1518 | * sysfs interface to enable and configure tracing | 1524 | * sysfs interface to enable and configure tracing |
1519 | */ | 1525 | */ |
1520 | 1526 | ||
1521 | static ssize_t sysfs_blk_trace_attr_show(struct device *dev, | 1527 | static ssize_t sysfs_blk_trace_attr_show(struct device *dev, |
1522 | struct device_attribute *attr, | 1528 | struct device_attribute *attr, |
1523 | char *buf); | 1529 | char *buf); |
1524 | static ssize_t sysfs_blk_trace_attr_store(struct device *dev, | 1530 | static ssize_t sysfs_blk_trace_attr_store(struct device *dev, |
1525 | struct device_attribute *attr, | 1531 | struct device_attribute *attr, |
1526 | const char *buf, size_t count); | 1532 | const char *buf, size_t count); |
1527 | #define BLK_TRACE_DEVICE_ATTR(_name) \ | 1533 | #define BLK_TRACE_DEVICE_ATTR(_name) \ |
1528 | DEVICE_ATTR(_name, S_IRUGO | S_IWUSR, \ | 1534 | DEVICE_ATTR(_name, S_IRUGO | S_IWUSR, \ |
1529 | sysfs_blk_trace_attr_show, \ | 1535 | sysfs_blk_trace_attr_show, \ |
1530 | sysfs_blk_trace_attr_store) | 1536 | sysfs_blk_trace_attr_store) |
1531 | 1537 | ||
1532 | static BLK_TRACE_DEVICE_ATTR(enable); | 1538 | static BLK_TRACE_DEVICE_ATTR(enable); |
1533 | static BLK_TRACE_DEVICE_ATTR(act_mask); | 1539 | static BLK_TRACE_DEVICE_ATTR(act_mask); |
1534 | static BLK_TRACE_DEVICE_ATTR(pid); | 1540 | static BLK_TRACE_DEVICE_ATTR(pid); |
1535 | static BLK_TRACE_DEVICE_ATTR(start_lba); | 1541 | static BLK_TRACE_DEVICE_ATTR(start_lba); |
1536 | static BLK_TRACE_DEVICE_ATTR(end_lba); | 1542 | static BLK_TRACE_DEVICE_ATTR(end_lba); |
1537 | 1543 | ||
1538 | static struct attribute *blk_trace_attrs[] = { | 1544 | static struct attribute *blk_trace_attrs[] = { |
1539 | &dev_attr_enable.attr, | 1545 | &dev_attr_enable.attr, |
1540 | &dev_attr_act_mask.attr, | 1546 | &dev_attr_act_mask.attr, |
1541 | &dev_attr_pid.attr, | 1547 | &dev_attr_pid.attr, |
1542 | &dev_attr_start_lba.attr, | 1548 | &dev_attr_start_lba.attr, |
1543 | &dev_attr_end_lba.attr, | 1549 | &dev_attr_end_lba.attr, |
1544 | NULL | 1550 | NULL |
1545 | }; | 1551 | }; |
1546 | 1552 | ||
1547 | struct attribute_group blk_trace_attr_group = { | 1553 | struct attribute_group blk_trace_attr_group = { |
1548 | .name = "trace", | 1554 | .name = "trace", |
1549 | .attrs = blk_trace_attrs, | 1555 | .attrs = blk_trace_attrs, |
1550 | }; | 1556 | }; |
1551 | 1557 | ||
1552 | static const struct { | 1558 | static const struct { |
1553 | int mask; | 1559 | int mask; |
1554 | const char *str; | 1560 | const char *str; |
1555 | } mask_maps[] = { | 1561 | } mask_maps[] = { |
1556 | { BLK_TC_READ, "read" }, | 1562 | { BLK_TC_READ, "read" }, |
1557 | { BLK_TC_WRITE, "write" }, | 1563 | { BLK_TC_WRITE, "write" }, |
1558 | { BLK_TC_BARRIER, "barrier" }, | 1564 | { BLK_TC_BARRIER, "barrier" }, |
1559 | { BLK_TC_SYNC, "sync" }, | 1565 | { BLK_TC_SYNC, "sync" }, |
1560 | { BLK_TC_QUEUE, "queue" }, | 1566 | { BLK_TC_QUEUE, "queue" }, |
1561 | { BLK_TC_REQUEUE, "requeue" }, | 1567 | { BLK_TC_REQUEUE, "requeue" }, |
1562 | { BLK_TC_ISSUE, "issue" }, | 1568 | { BLK_TC_ISSUE, "issue" }, |
1563 | { BLK_TC_COMPLETE, "complete" }, | 1569 | { BLK_TC_COMPLETE, "complete" }, |
1564 | { BLK_TC_FS, "fs" }, | 1570 | { BLK_TC_FS, "fs" }, |
1565 | { BLK_TC_PC, "pc" }, | 1571 | { BLK_TC_PC, "pc" }, |
1566 | { BLK_TC_AHEAD, "ahead" }, | 1572 | { BLK_TC_AHEAD, "ahead" }, |
1567 | { BLK_TC_META, "meta" }, | 1573 | { BLK_TC_META, "meta" }, |
1568 | { BLK_TC_DISCARD, "discard" }, | 1574 | { BLK_TC_DISCARD, "discard" }, |
1569 | { BLK_TC_DRV_DATA, "drv_data" }, | 1575 | { BLK_TC_DRV_DATA, "drv_data" }, |
1570 | }; | 1576 | }; |
1571 | 1577 | ||
1572 | static int blk_trace_str2mask(const char *str) | 1578 | static int blk_trace_str2mask(const char *str) |
1573 | { | 1579 | { |
1574 | int i; | 1580 | int i; |
1575 | int mask = 0; | 1581 | int mask = 0; |
1576 | char *buf, *s, *token; | 1582 | char *buf, *s, *token; |
1577 | 1583 | ||
1578 | buf = kstrdup(str, GFP_KERNEL); | 1584 | buf = kstrdup(str, GFP_KERNEL); |
1579 | if (buf == NULL) | 1585 | if (buf == NULL) |
1580 | return -ENOMEM; | 1586 | return -ENOMEM; |
1581 | s = strstrip(buf); | 1587 | s = strstrip(buf); |
1582 | 1588 | ||
1583 | while (1) { | 1589 | while (1) { |
1584 | token = strsep(&s, ","); | 1590 | token = strsep(&s, ","); |
1585 | if (token == NULL) | 1591 | if (token == NULL) |
1586 | break; | 1592 | break; |
1587 | 1593 | ||
1588 | if (*token == '\0') | 1594 | if (*token == '\0') |
1589 | continue; | 1595 | continue; |
1590 | 1596 | ||
1591 | for (i = 0; i < ARRAY_SIZE(mask_maps); i++) { | 1597 | for (i = 0; i < ARRAY_SIZE(mask_maps); i++) { |
1592 | if (strcasecmp(token, mask_maps[i].str) == 0) { | 1598 | if (strcasecmp(token, mask_maps[i].str) == 0) { |
1593 | mask |= mask_maps[i].mask; | 1599 | mask |= mask_maps[i].mask; |
1594 | break; | 1600 | break; |
1595 | } | 1601 | } |
1596 | } | 1602 | } |
1597 | if (i == ARRAY_SIZE(mask_maps)) { | 1603 | if (i == ARRAY_SIZE(mask_maps)) { |
1598 | mask = -EINVAL; | 1604 | mask = -EINVAL; |
1599 | break; | 1605 | break; |
1600 | } | 1606 | } |
1601 | } | 1607 | } |
1602 | kfree(buf); | 1608 | kfree(buf); |
1603 | 1609 | ||
1604 | return mask; | 1610 | return mask; |
1605 | } | 1611 | } |
1606 | 1612 | ||
1607 | static ssize_t blk_trace_mask2str(char *buf, int mask) | 1613 | static ssize_t blk_trace_mask2str(char *buf, int mask) |
1608 | { | 1614 | { |
1609 | int i; | 1615 | int i; |
1610 | char *p = buf; | 1616 | char *p = buf; |
1611 | 1617 | ||
1612 | for (i = 0; i < ARRAY_SIZE(mask_maps); i++) { | 1618 | for (i = 0; i < ARRAY_SIZE(mask_maps); i++) { |
1613 | if (mask & mask_maps[i].mask) { | 1619 | if (mask & mask_maps[i].mask) { |
1614 | p += sprintf(p, "%s%s", | 1620 | p += sprintf(p, "%s%s", |
1615 | (p == buf) ? "" : ",", mask_maps[i].str); | 1621 | (p == buf) ? "" : ",", mask_maps[i].str); |
1616 | } | 1622 | } |
1617 | } | 1623 | } |
1618 | *p++ = '\n'; | 1624 | *p++ = '\n'; |
1619 | 1625 | ||
1620 | return p - buf; | 1626 | return p - buf; |
1621 | } | 1627 | } |
1622 | 1628 | ||
1623 | static struct request_queue *blk_trace_get_queue(struct block_device *bdev) | 1629 | static struct request_queue *blk_trace_get_queue(struct block_device *bdev) |
1624 | { | 1630 | { |
1625 | if (bdev->bd_disk == NULL) | 1631 | if (bdev->bd_disk == NULL) |
1626 | return NULL; | 1632 | return NULL; |
1627 | 1633 | ||
1628 | return bdev_get_queue(bdev); | 1634 | return bdev_get_queue(bdev); |
1629 | } | 1635 | } |
1630 | 1636 | ||
1631 | static ssize_t sysfs_blk_trace_attr_show(struct device *dev, | 1637 | static ssize_t sysfs_blk_trace_attr_show(struct device *dev, |
1632 | struct device_attribute *attr, | 1638 | struct device_attribute *attr, |
1633 | char *buf) | 1639 | char *buf) |
1634 | { | 1640 | { |
1635 | struct hd_struct *p = dev_to_part(dev); | 1641 | struct hd_struct *p = dev_to_part(dev); |
1636 | struct request_queue *q; | 1642 | struct request_queue *q; |
1637 | struct block_device *bdev; | 1643 | struct block_device *bdev; |
1638 | ssize_t ret = -ENXIO; | 1644 | ssize_t ret = -ENXIO; |
1639 | 1645 | ||
1640 | bdev = bdget(part_devt(p)); | 1646 | bdev = bdget(part_devt(p)); |
1641 | if (bdev == NULL) | 1647 | if (bdev == NULL) |
1642 | goto out; | 1648 | goto out; |
1643 | 1649 | ||
1644 | q = blk_trace_get_queue(bdev); | 1650 | q = blk_trace_get_queue(bdev); |
1645 | if (q == NULL) | 1651 | if (q == NULL) |
1646 | goto out_bdput; | 1652 | goto out_bdput; |
1647 | 1653 | ||
1648 | mutex_lock(&bdev->bd_mutex); | 1654 | mutex_lock(&bdev->bd_mutex); |
1649 | 1655 | ||
1650 | if (attr == &dev_attr_enable) { | 1656 | if (attr == &dev_attr_enable) { |
1651 | ret = sprintf(buf, "%u\n", !!q->blk_trace); | 1657 | ret = sprintf(buf, "%u\n", !!q->blk_trace); |
1652 | goto out_unlock_bdev; | 1658 | goto out_unlock_bdev; |
1653 | } | 1659 | } |
1654 | 1660 | ||
1655 | if (q->blk_trace == NULL) | 1661 | if (q->blk_trace == NULL) |
1656 | ret = sprintf(buf, "disabled\n"); | 1662 | ret = sprintf(buf, "disabled\n"); |
1657 | else if (attr == &dev_attr_act_mask) | 1663 | else if (attr == &dev_attr_act_mask) |
1658 | ret = blk_trace_mask2str(buf, q->blk_trace->act_mask); | 1664 | ret = blk_trace_mask2str(buf, q->blk_trace->act_mask); |
1659 | else if (attr == &dev_attr_pid) | 1665 | else if (attr == &dev_attr_pid) |
1660 | ret = sprintf(buf, "%u\n", q->blk_trace->pid); | 1666 | ret = sprintf(buf, "%u\n", q->blk_trace->pid); |
1661 | else if (attr == &dev_attr_start_lba) | 1667 | else if (attr == &dev_attr_start_lba) |
1662 | ret = sprintf(buf, "%llu\n", q->blk_trace->start_lba); | 1668 | ret = sprintf(buf, "%llu\n", q->blk_trace->start_lba); |
1663 | else if (attr == &dev_attr_end_lba) | 1669 | else if (attr == &dev_attr_end_lba) |
1664 | ret = sprintf(buf, "%llu\n", q->blk_trace->end_lba); | 1670 | ret = sprintf(buf, "%llu\n", q->blk_trace->end_lba); |
1665 | 1671 | ||
1666 | out_unlock_bdev: | 1672 | out_unlock_bdev: |
1667 | mutex_unlock(&bdev->bd_mutex); | 1673 | mutex_unlock(&bdev->bd_mutex); |
1668 | out_bdput: | 1674 | out_bdput: |
1669 | bdput(bdev); | 1675 | bdput(bdev); |
1670 | out: | 1676 | out: |
1671 | return ret; | 1677 | return ret; |
1672 | } | 1678 | } |
1673 | 1679 | ||
1674 | static ssize_t sysfs_blk_trace_attr_store(struct device *dev, | 1680 | static ssize_t sysfs_blk_trace_attr_store(struct device *dev, |
1675 | struct device_attribute *attr, | 1681 | struct device_attribute *attr, |
1676 | const char *buf, size_t count) | 1682 | const char *buf, size_t count) |
1677 | { | 1683 | { |
1678 | struct block_device *bdev; | 1684 | struct block_device *bdev; |
1679 | struct request_queue *q; | 1685 | struct request_queue *q; |
1680 | struct hd_struct *p; | 1686 | struct hd_struct *p; |
1681 | u64 value; | 1687 | u64 value; |
1682 | ssize_t ret = -EINVAL; | 1688 | ssize_t ret = -EINVAL; |
1683 | 1689 | ||
1684 | if (count == 0) | 1690 | if (count == 0) |
1685 | goto out; | 1691 | goto out; |
1686 | 1692 | ||
1687 | if (attr == &dev_attr_act_mask) { | 1693 | if (attr == &dev_attr_act_mask) { |
1688 | if (sscanf(buf, "%llx", &value) != 1) { | 1694 | if (sscanf(buf, "%llx", &value) != 1) { |
1689 | /* Assume it is a list of trace category names */ | 1695 | /* Assume it is a list of trace category names */ |
1690 | ret = blk_trace_str2mask(buf); | 1696 | ret = blk_trace_str2mask(buf); |
1691 | if (ret < 0) | 1697 | if (ret < 0) |
1692 | goto out; | 1698 | goto out; |
1693 | value = ret; | 1699 | value = ret; |
1694 | } | 1700 | } |
1695 | } else if (sscanf(buf, "%llu", &value) != 1) | 1701 | } else if (sscanf(buf, "%llu", &value) != 1) |
1696 | goto out; | 1702 | goto out; |
1697 | 1703 | ||
1698 | ret = -ENXIO; | 1704 | ret = -ENXIO; |
1699 | 1705 | ||
1700 | p = dev_to_part(dev); | 1706 | p = dev_to_part(dev); |
1701 | bdev = bdget(part_devt(p)); | 1707 | bdev = bdget(part_devt(p)); |
1702 | if (bdev == NULL) | 1708 | if (bdev == NULL) |
1703 | goto out; | 1709 | goto out; |
1704 | 1710 | ||
1705 | q = blk_trace_get_queue(bdev); | 1711 | q = blk_trace_get_queue(bdev); |
1706 | if (q == NULL) | 1712 | if (q == NULL) |
1707 | goto out_bdput; | 1713 | goto out_bdput; |
1708 | 1714 | ||
1709 | mutex_lock(&bdev->bd_mutex); | 1715 | mutex_lock(&bdev->bd_mutex); |
1710 | 1716 | ||
1711 | if (attr == &dev_attr_enable) { | 1717 | if (attr == &dev_attr_enable) { |
1712 | if (value) | 1718 | if (value) |
1713 | ret = blk_trace_setup_queue(q, bdev); | 1719 | ret = blk_trace_setup_queue(q, bdev); |
1714 | else | 1720 | else |
1715 | ret = blk_trace_remove_queue(q); | 1721 | ret = blk_trace_remove_queue(q); |
1716 | goto out_unlock_bdev; | 1722 | goto out_unlock_bdev; |
1717 | } | 1723 | } |
1718 | 1724 | ||
1719 | ret = 0; | 1725 | ret = 0; |
1720 | if (q->blk_trace == NULL) | 1726 | if (q->blk_trace == NULL) |
1721 | ret = blk_trace_setup_queue(q, bdev); | 1727 | ret = blk_trace_setup_queue(q, bdev); |
1722 | 1728 | ||
1723 | if (ret == 0) { | 1729 | if (ret == 0) { |
1724 | if (attr == &dev_attr_act_mask) | 1730 | if (attr == &dev_attr_act_mask) |
1725 | q->blk_trace->act_mask = value; | 1731 | q->blk_trace->act_mask = value; |
1726 | else if (attr == &dev_attr_pid) | 1732 | else if (attr == &dev_attr_pid) |
1727 | q->blk_trace->pid = value; | 1733 | q->blk_trace->pid = value; |
1728 | else if (attr == &dev_attr_start_lba) | 1734 | else if (attr == &dev_attr_start_lba) |
1729 | q->blk_trace->start_lba = value; | 1735 | q->blk_trace->start_lba = value; |
1730 | else if (attr == &dev_attr_end_lba) | 1736 | else if (attr == &dev_attr_end_lba) |
1731 | q->blk_trace->end_lba = value; | 1737 | q->blk_trace->end_lba = value; |
1732 | } | 1738 | } |
1733 | 1739 | ||
1734 | out_unlock_bdev: | 1740 | out_unlock_bdev: |
1735 | mutex_unlock(&bdev->bd_mutex); | 1741 | mutex_unlock(&bdev->bd_mutex); |
1736 | out_bdput: | 1742 | out_bdput: |
1737 | bdput(bdev); | 1743 | bdput(bdev); |
1738 | out: | 1744 | out: |
1739 | return ret ? ret : count; | 1745 | return ret ? ret : count; |
1740 | } | 1746 | } |
1741 | 1747 | ||
1742 | int blk_trace_init_sysfs(struct device *dev) | 1748 | int blk_trace_init_sysfs(struct device *dev) |
1743 | { | 1749 | { |
1744 | return sysfs_create_group(&dev->kobj, &blk_trace_attr_group); | 1750 | return sysfs_create_group(&dev->kobj, &blk_trace_attr_group); |
1745 | } | 1751 | } |
1746 | 1752 | ||
1747 | void blk_trace_remove_sysfs(struct device *dev) | 1753 | void blk_trace_remove_sysfs(struct device *dev) |
1748 | { | 1754 | { |
1749 | sysfs_remove_group(&dev->kobj, &blk_trace_attr_group); | 1755 | sysfs_remove_group(&dev->kobj, &blk_trace_attr_group); |
1750 | } | 1756 | } |
1751 | 1757 | ||
1752 | #endif /* CONFIG_BLK_DEV_IO_TRACE */ | 1758 | #endif /* CONFIG_BLK_DEV_IO_TRACE */ |
1753 | 1759 | ||
1754 | #ifdef CONFIG_EVENT_TRACING | 1760 | #ifdef CONFIG_EVENT_TRACING |
1755 | 1761 | ||
1756 | void blk_dump_cmd(char *buf, struct request *rq) | 1762 | void blk_dump_cmd(char *buf, struct request *rq) |
1757 | { | 1763 | { |
1758 | int i, end; | 1764 | int i, end; |
1759 | int len = rq->cmd_len; | 1765 | int len = rq->cmd_len; |
1760 | unsigned char *cmd = rq->cmd; | 1766 | unsigned char *cmd = rq->cmd; |
1761 | 1767 | ||
1762 | if (rq->cmd_type != REQ_TYPE_BLOCK_PC) { | 1768 | if (rq->cmd_type != REQ_TYPE_BLOCK_PC) { |
1763 | buf[0] = '\0'; | 1769 | buf[0] = '\0'; |
1764 | return; | 1770 | return; |
1765 | } | 1771 | } |
1766 | 1772 | ||
1767 | for (end = len - 1; end >= 0; end--) | 1773 | for (end = len - 1; end >= 0; end--) |
1768 | if (cmd[end]) | 1774 | if (cmd[end]) |
1769 | break; | 1775 | break; |
1770 | end++; | 1776 | end++; |
1771 | 1777 | ||
1772 | for (i = 0; i < len; i++) { | 1778 | for (i = 0; i < len; i++) { |
1773 | buf += sprintf(buf, "%s%02x", i == 0 ? "" : " ", cmd[i]); | 1779 | buf += sprintf(buf, "%s%02x", i == 0 ? "" : " ", cmd[i]); |
1774 | if (i == end && end != len - 1) { | 1780 | if (i == end && end != len - 1) { |
1775 | sprintf(buf, " .."); | 1781 | sprintf(buf, " .."); |
1776 | break; | 1782 | break; |
1777 | } | 1783 | } |
1778 | } | 1784 | } |
1779 | } | 1785 | } |
1780 | 1786 | ||
1781 | void blk_fill_rwbs(char *rwbs, u32 rw, int bytes) | 1787 | void blk_fill_rwbs(char *rwbs, u32 rw, int bytes) |
1782 | { | 1788 | { |
1783 | int i = 0; | 1789 | int i = 0; |
1784 | 1790 | ||
1785 | if (rw & WRITE) | 1791 | if (rw & WRITE) |
1786 | rwbs[i++] = 'W'; | 1792 | rwbs[i++] = 'W'; |
1787 | else if (rw & REQ_DISCARD) | 1793 | else if (rw & REQ_DISCARD) |
1788 | rwbs[i++] = 'D'; | 1794 | rwbs[i++] = 'D'; |
1789 | else if (bytes) | 1795 | else if (bytes) |
1790 | rwbs[i++] = 'R'; | 1796 | rwbs[i++] = 'R'; |
1791 | else | 1797 | else |
1792 | rwbs[i++] = 'N'; | 1798 | rwbs[i++] = 'N'; |
1793 | 1799 | ||
1794 | if (rw & REQ_RAHEAD) | 1800 | if (rw & REQ_RAHEAD) |
1795 | rwbs[i++] = 'A'; | 1801 | rwbs[i++] = 'A'; |
1796 | if (rw & REQ_SYNC) | 1802 | if (rw & REQ_SYNC) |
1797 | rwbs[i++] = 'S'; | 1803 | rwbs[i++] = 'S'; |
1798 | if (rw & REQ_META) | 1804 | if (rw & REQ_META) |
1799 | rwbs[i++] = 'M'; | 1805 | rwbs[i++] = 'M'; |
1800 | if (rw & REQ_SECURE) | 1806 | if (rw & REQ_SECURE) |
1801 | rwbs[i++] = 'E'; | 1807 | rwbs[i++] = 'E'; |
1802 | 1808 | ||
1803 | rwbs[i] = '\0'; | 1809 | rwbs[i] = '\0'; |
1804 | } | 1810 | } |
1805 | 1811 | ||
1806 | #endif /* CONFIG_EVENT_TRACING */ | 1812 | #endif /* CONFIG_EVENT_TRACING */ |
1807 | 1813 | ||
1808 | 1814 |