Commit 49cac01e1fa74174d72adb0e872504a7fefd7c01

Authored by Jens Axboe
1 parent a237c1c5bc

block: make unplug timer trace event correspond to the schedule() unplug

It's a pretty close match to what we had before - the timer triggering
would mean that nobody unplugged the plug in due time, in the new
scheme this matches very closely what the schedule() unplug now is.
It's essentially the difference between an explicit unplug (IO unplug)
or an implicit unplug (timer unplug, we scheduled with pending IO
queued).

Signed-off-by: Jens Axboe <jaxboe@fusionio.com>

Showing 3 changed files with 31 additions and 18 deletions Inline Diff

1 /* 1 /*
2 * Copyright (C) 1991, 1992 Linus Torvalds 2 * Copyright (C) 1991, 1992 Linus Torvalds
3 * Copyright (C) 1994, Karl Keyte: Added support for disk statistics 3 * Copyright (C) 1994, Karl Keyte: Added support for disk statistics
4 * Elevator latency, (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE 4 * Elevator latency, (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
5 * Queue request tables / lock, selectable elevator, Jens Axboe <axboe@suse.de> 5 * Queue request tables / lock, selectable elevator, Jens Axboe <axboe@suse.de>
6 * kernel-doc documentation started by NeilBrown <neilb@cse.unsw.edu.au> 6 * kernel-doc documentation started by NeilBrown <neilb@cse.unsw.edu.au>
7 * - July2000 7 * - July2000
8 * bio rewrite, highmem i/o, etc, Jens Axboe <axboe@suse.de> - may 2001 8 * bio rewrite, highmem i/o, etc, Jens Axboe <axboe@suse.de> - may 2001
9 */ 9 */
10 10
11 /* 11 /*
12 * This handles all read/write requests to block devices 12 * This handles all read/write requests to block devices
13 */ 13 */
14 #include <linux/kernel.h> 14 #include <linux/kernel.h>
15 #include <linux/module.h> 15 #include <linux/module.h>
16 #include <linux/backing-dev.h> 16 #include <linux/backing-dev.h>
17 #include <linux/bio.h> 17 #include <linux/bio.h>
18 #include <linux/blkdev.h> 18 #include <linux/blkdev.h>
19 #include <linux/highmem.h> 19 #include <linux/highmem.h>
20 #include <linux/mm.h> 20 #include <linux/mm.h>
21 #include <linux/kernel_stat.h> 21 #include <linux/kernel_stat.h>
22 #include <linux/string.h> 22 #include <linux/string.h>
23 #include <linux/init.h> 23 #include <linux/init.h>
24 #include <linux/completion.h> 24 #include <linux/completion.h>
25 #include <linux/slab.h> 25 #include <linux/slab.h>
26 #include <linux/swap.h> 26 #include <linux/swap.h>
27 #include <linux/writeback.h> 27 #include <linux/writeback.h>
28 #include <linux/task_io_accounting_ops.h> 28 #include <linux/task_io_accounting_ops.h>
29 #include <linux/fault-inject.h> 29 #include <linux/fault-inject.h>
30 #include <linux/list_sort.h> 30 #include <linux/list_sort.h>
31 31
32 #define CREATE_TRACE_POINTS 32 #define CREATE_TRACE_POINTS
33 #include <trace/events/block.h> 33 #include <trace/events/block.h>
34 34
35 #include "blk.h" 35 #include "blk.h"
36 36
37 EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap); 37 EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap);
38 EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap); 38 EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap);
39 EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete); 39 EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete);
40 40
41 static int __make_request(struct request_queue *q, struct bio *bio); 41 static int __make_request(struct request_queue *q, struct bio *bio);
42 42
43 /* 43 /*
44 * For the allocated request tables 44 * For the allocated request tables
45 */ 45 */
46 static struct kmem_cache *request_cachep; 46 static struct kmem_cache *request_cachep;
47 47
48 /* 48 /*
49 * For queue allocation 49 * For queue allocation
50 */ 50 */
51 struct kmem_cache *blk_requestq_cachep; 51 struct kmem_cache *blk_requestq_cachep;
52 52
53 /* 53 /*
54 * Controlling structure to kblockd 54 * Controlling structure to kblockd
55 */ 55 */
56 static struct workqueue_struct *kblockd_workqueue; 56 static struct workqueue_struct *kblockd_workqueue;
57 57
58 static void drive_stat_acct(struct request *rq, int new_io) 58 static void drive_stat_acct(struct request *rq, int new_io)
59 { 59 {
60 struct hd_struct *part; 60 struct hd_struct *part;
61 int rw = rq_data_dir(rq); 61 int rw = rq_data_dir(rq);
62 int cpu; 62 int cpu;
63 63
64 if (!blk_do_io_stat(rq)) 64 if (!blk_do_io_stat(rq))
65 return; 65 return;
66 66
67 cpu = part_stat_lock(); 67 cpu = part_stat_lock();
68 68
69 if (!new_io) { 69 if (!new_io) {
70 part = rq->part; 70 part = rq->part;
71 part_stat_inc(cpu, part, merges[rw]); 71 part_stat_inc(cpu, part, merges[rw]);
72 } else { 72 } else {
73 part = disk_map_sector_rcu(rq->rq_disk, blk_rq_pos(rq)); 73 part = disk_map_sector_rcu(rq->rq_disk, blk_rq_pos(rq));
74 if (!hd_struct_try_get(part)) { 74 if (!hd_struct_try_get(part)) {
75 /* 75 /*
76 * The partition is already being removed, 76 * The partition is already being removed,
77 * the request will be accounted on the disk only 77 * the request will be accounted on the disk only
78 * 78 *
79 * We take a reference on disk->part0 although that 79 * We take a reference on disk->part0 although that
80 * partition will never be deleted, so we can treat 80 * partition will never be deleted, so we can treat
81 * it as any other partition. 81 * it as any other partition.
82 */ 82 */
83 part = &rq->rq_disk->part0; 83 part = &rq->rq_disk->part0;
84 hd_struct_get(part); 84 hd_struct_get(part);
85 } 85 }
86 part_round_stats(cpu, part); 86 part_round_stats(cpu, part);
87 part_inc_in_flight(part, rw); 87 part_inc_in_flight(part, rw);
88 rq->part = part; 88 rq->part = part;
89 } 89 }
90 90
91 part_stat_unlock(); 91 part_stat_unlock();
92 } 92 }
93 93
94 void blk_queue_congestion_threshold(struct request_queue *q) 94 void blk_queue_congestion_threshold(struct request_queue *q)
95 { 95 {
96 int nr; 96 int nr;
97 97
98 nr = q->nr_requests - (q->nr_requests / 8) + 1; 98 nr = q->nr_requests - (q->nr_requests / 8) + 1;
99 if (nr > q->nr_requests) 99 if (nr > q->nr_requests)
100 nr = q->nr_requests; 100 nr = q->nr_requests;
101 q->nr_congestion_on = nr; 101 q->nr_congestion_on = nr;
102 102
103 nr = q->nr_requests - (q->nr_requests / 8) - (q->nr_requests / 16) - 1; 103 nr = q->nr_requests - (q->nr_requests / 8) - (q->nr_requests / 16) - 1;
104 if (nr < 1) 104 if (nr < 1)
105 nr = 1; 105 nr = 1;
106 q->nr_congestion_off = nr; 106 q->nr_congestion_off = nr;
107 } 107 }
108 108
109 /** 109 /**
110 * blk_get_backing_dev_info - get the address of a queue's backing_dev_info 110 * blk_get_backing_dev_info - get the address of a queue's backing_dev_info
111 * @bdev: device 111 * @bdev: device
112 * 112 *
113 * Locates the passed device's request queue and returns the address of its 113 * Locates the passed device's request queue and returns the address of its
114 * backing_dev_info 114 * backing_dev_info
115 * 115 *
116 * Will return NULL if the request queue cannot be located. 116 * Will return NULL if the request queue cannot be located.
117 */ 117 */
118 struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev) 118 struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev)
119 { 119 {
120 struct backing_dev_info *ret = NULL; 120 struct backing_dev_info *ret = NULL;
121 struct request_queue *q = bdev_get_queue(bdev); 121 struct request_queue *q = bdev_get_queue(bdev);
122 122
123 if (q) 123 if (q)
124 ret = &q->backing_dev_info; 124 ret = &q->backing_dev_info;
125 return ret; 125 return ret;
126 } 126 }
127 EXPORT_SYMBOL(blk_get_backing_dev_info); 127 EXPORT_SYMBOL(blk_get_backing_dev_info);
128 128
129 void blk_rq_init(struct request_queue *q, struct request *rq) 129 void blk_rq_init(struct request_queue *q, struct request *rq)
130 { 130 {
131 memset(rq, 0, sizeof(*rq)); 131 memset(rq, 0, sizeof(*rq));
132 132
133 INIT_LIST_HEAD(&rq->queuelist); 133 INIT_LIST_HEAD(&rq->queuelist);
134 INIT_LIST_HEAD(&rq->timeout_list); 134 INIT_LIST_HEAD(&rq->timeout_list);
135 rq->cpu = -1; 135 rq->cpu = -1;
136 rq->q = q; 136 rq->q = q;
137 rq->__sector = (sector_t) -1; 137 rq->__sector = (sector_t) -1;
138 INIT_HLIST_NODE(&rq->hash); 138 INIT_HLIST_NODE(&rq->hash);
139 RB_CLEAR_NODE(&rq->rb_node); 139 RB_CLEAR_NODE(&rq->rb_node);
140 rq->cmd = rq->__cmd; 140 rq->cmd = rq->__cmd;
141 rq->cmd_len = BLK_MAX_CDB; 141 rq->cmd_len = BLK_MAX_CDB;
142 rq->tag = -1; 142 rq->tag = -1;
143 rq->ref_count = 1; 143 rq->ref_count = 1;
144 rq->start_time = jiffies; 144 rq->start_time = jiffies;
145 set_start_time_ns(rq); 145 set_start_time_ns(rq);
146 rq->part = NULL; 146 rq->part = NULL;
147 } 147 }
148 EXPORT_SYMBOL(blk_rq_init); 148 EXPORT_SYMBOL(blk_rq_init);
149 149
150 static void req_bio_endio(struct request *rq, struct bio *bio, 150 static void req_bio_endio(struct request *rq, struct bio *bio,
151 unsigned int nbytes, int error) 151 unsigned int nbytes, int error)
152 { 152 {
153 if (error) 153 if (error)
154 clear_bit(BIO_UPTODATE, &bio->bi_flags); 154 clear_bit(BIO_UPTODATE, &bio->bi_flags);
155 else if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) 155 else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
156 error = -EIO; 156 error = -EIO;
157 157
158 if (unlikely(nbytes > bio->bi_size)) { 158 if (unlikely(nbytes > bio->bi_size)) {
159 printk(KERN_ERR "%s: want %u bytes done, %u left\n", 159 printk(KERN_ERR "%s: want %u bytes done, %u left\n",
160 __func__, nbytes, bio->bi_size); 160 __func__, nbytes, bio->bi_size);
161 nbytes = bio->bi_size; 161 nbytes = bio->bi_size;
162 } 162 }
163 163
164 if (unlikely(rq->cmd_flags & REQ_QUIET)) 164 if (unlikely(rq->cmd_flags & REQ_QUIET))
165 set_bit(BIO_QUIET, &bio->bi_flags); 165 set_bit(BIO_QUIET, &bio->bi_flags);
166 166
167 bio->bi_size -= nbytes; 167 bio->bi_size -= nbytes;
168 bio->bi_sector += (nbytes >> 9); 168 bio->bi_sector += (nbytes >> 9);
169 169
170 if (bio_integrity(bio)) 170 if (bio_integrity(bio))
171 bio_integrity_advance(bio, nbytes); 171 bio_integrity_advance(bio, nbytes);
172 172
173 /* don't actually finish bio if it's part of flush sequence */ 173 /* don't actually finish bio if it's part of flush sequence */
174 if (bio->bi_size == 0 && !(rq->cmd_flags & REQ_FLUSH_SEQ)) 174 if (bio->bi_size == 0 && !(rq->cmd_flags & REQ_FLUSH_SEQ))
175 bio_endio(bio, error); 175 bio_endio(bio, error);
176 } 176 }
177 177
178 void blk_dump_rq_flags(struct request *rq, char *msg) 178 void blk_dump_rq_flags(struct request *rq, char *msg)
179 { 179 {
180 int bit; 180 int bit;
181 181
182 printk(KERN_INFO "%s: dev %s: type=%x, flags=%x\n", msg, 182 printk(KERN_INFO "%s: dev %s: type=%x, flags=%x\n", msg,
183 rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->cmd_type, 183 rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->cmd_type,
184 rq->cmd_flags); 184 rq->cmd_flags);
185 185
186 printk(KERN_INFO " sector %llu, nr/cnr %u/%u\n", 186 printk(KERN_INFO " sector %llu, nr/cnr %u/%u\n",
187 (unsigned long long)blk_rq_pos(rq), 187 (unsigned long long)blk_rq_pos(rq),
188 blk_rq_sectors(rq), blk_rq_cur_sectors(rq)); 188 blk_rq_sectors(rq), blk_rq_cur_sectors(rq));
189 printk(KERN_INFO " bio %p, biotail %p, buffer %p, len %u\n", 189 printk(KERN_INFO " bio %p, biotail %p, buffer %p, len %u\n",
190 rq->bio, rq->biotail, rq->buffer, blk_rq_bytes(rq)); 190 rq->bio, rq->biotail, rq->buffer, blk_rq_bytes(rq));
191 191
192 if (rq->cmd_type == REQ_TYPE_BLOCK_PC) { 192 if (rq->cmd_type == REQ_TYPE_BLOCK_PC) {
193 printk(KERN_INFO " cdb: "); 193 printk(KERN_INFO " cdb: ");
194 for (bit = 0; bit < BLK_MAX_CDB; bit++) 194 for (bit = 0; bit < BLK_MAX_CDB; bit++)
195 printk("%02x ", rq->cmd[bit]); 195 printk("%02x ", rq->cmd[bit]);
196 printk("\n"); 196 printk("\n");
197 } 197 }
198 } 198 }
199 EXPORT_SYMBOL(blk_dump_rq_flags); 199 EXPORT_SYMBOL(blk_dump_rq_flags);
200 200
201 static void blk_delay_work(struct work_struct *work) 201 static void blk_delay_work(struct work_struct *work)
202 { 202 {
203 struct request_queue *q; 203 struct request_queue *q;
204 204
205 q = container_of(work, struct request_queue, delay_work.work); 205 q = container_of(work, struct request_queue, delay_work.work);
206 spin_lock_irq(q->queue_lock); 206 spin_lock_irq(q->queue_lock);
207 __blk_run_queue(q, false); 207 __blk_run_queue(q, false);
208 spin_unlock_irq(q->queue_lock); 208 spin_unlock_irq(q->queue_lock);
209 } 209 }
210 210
211 /** 211 /**
212 * blk_delay_queue - restart queueing after defined interval 212 * blk_delay_queue - restart queueing after defined interval
213 * @q: The &struct request_queue in question 213 * @q: The &struct request_queue in question
214 * @msecs: Delay in msecs 214 * @msecs: Delay in msecs
215 * 215 *
216 * Description: 216 * Description:
217 * Sometimes queueing needs to be postponed for a little while, to allow 217 * Sometimes queueing needs to be postponed for a little while, to allow
218 * resources to come back. This function will make sure that queueing is 218 * resources to come back. This function will make sure that queueing is
219 * restarted around the specified time. 219 * restarted around the specified time.
220 */ 220 */
221 void blk_delay_queue(struct request_queue *q, unsigned long msecs) 221 void blk_delay_queue(struct request_queue *q, unsigned long msecs)
222 { 222 {
223 schedule_delayed_work(&q->delay_work, msecs_to_jiffies(msecs)); 223 schedule_delayed_work(&q->delay_work, msecs_to_jiffies(msecs));
224 } 224 }
225 EXPORT_SYMBOL(blk_delay_queue); 225 EXPORT_SYMBOL(blk_delay_queue);
226 226
227 /** 227 /**
228 * blk_start_queue - restart a previously stopped queue 228 * blk_start_queue - restart a previously stopped queue
229 * @q: The &struct request_queue in question 229 * @q: The &struct request_queue in question
230 * 230 *
231 * Description: 231 * Description:
232 * blk_start_queue() will clear the stop flag on the queue, and call 232 * blk_start_queue() will clear the stop flag on the queue, and call
233 * the request_fn for the queue if it was in a stopped state when 233 * the request_fn for the queue if it was in a stopped state when
234 * entered. Also see blk_stop_queue(). Queue lock must be held. 234 * entered. Also see blk_stop_queue(). Queue lock must be held.
235 **/ 235 **/
236 void blk_start_queue(struct request_queue *q) 236 void blk_start_queue(struct request_queue *q)
237 { 237 {
238 WARN_ON(!irqs_disabled()); 238 WARN_ON(!irqs_disabled());
239 239
240 queue_flag_clear(QUEUE_FLAG_STOPPED, q); 240 queue_flag_clear(QUEUE_FLAG_STOPPED, q);
241 __blk_run_queue(q, false); 241 __blk_run_queue(q, false);
242 } 242 }
243 EXPORT_SYMBOL(blk_start_queue); 243 EXPORT_SYMBOL(blk_start_queue);
244 244
245 /** 245 /**
246 * blk_stop_queue - stop a queue 246 * blk_stop_queue - stop a queue
247 * @q: The &struct request_queue in question 247 * @q: The &struct request_queue in question
248 * 248 *
249 * Description: 249 * Description:
250 * The Linux block layer assumes that a block driver will consume all 250 * The Linux block layer assumes that a block driver will consume all
251 * entries on the request queue when the request_fn strategy is called. 251 * entries on the request queue when the request_fn strategy is called.
252 * Often this will not happen, because of hardware limitations (queue 252 * Often this will not happen, because of hardware limitations (queue
253 * depth settings). If a device driver gets a 'queue full' response, 253 * depth settings). If a device driver gets a 'queue full' response,
254 * or if it simply chooses not to queue more I/O at one point, it can 254 * or if it simply chooses not to queue more I/O at one point, it can
255 * call this function to prevent the request_fn from being called until 255 * call this function to prevent the request_fn from being called until
256 * the driver has signalled it's ready to go again. This happens by calling 256 * the driver has signalled it's ready to go again. This happens by calling
257 * blk_start_queue() to restart queue operations. Queue lock must be held. 257 * blk_start_queue() to restart queue operations. Queue lock must be held.
258 **/ 258 **/
259 void blk_stop_queue(struct request_queue *q) 259 void blk_stop_queue(struct request_queue *q)
260 { 260 {
261 __cancel_delayed_work(&q->delay_work); 261 __cancel_delayed_work(&q->delay_work);
262 queue_flag_set(QUEUE_FLAG_STOPPED, q); 262 queue_flag_set(QUEUE_FLAG_STOPPED, q);
263 } 263 }
264 EXPORT_SYMBOL(blk_stop_queue); 264 EXPORT_SYMBOL(blk_stop_queue);
265 265
266 /** 266 /**
267 * blk_sync_queue - cancel any pending callbacks on a queue 267 * blk_sync_queue - cancel any pending callbacks on a queue
268 * @q: the queue 268 * @q: the queue
269 * 269 *
270 * Description: 270 * Description:
271 * The block layer may perform asynchronous callback activity 271 * The block layer may perform asynchronous callback activity
272 * on a queue, such as calling the unplug function after a timeout. 272 * on a queue, such as calling the unplug function after a timeout.
273 * A block device may call blk_sync_queue to ensure that any 273 * A block device may call blk_sync_queue to ensure that any
274 * such activity is cancelled, thus allowing it to release resources 274 * such activity is cancelled, thus allowing it to release resources
275 * that the callbacks might use. The caller must already have made sure 275 * that the callbacks might use. The caller must already have made sure
276 * that its ->make_request_fn will not re-add plugging prior to calling 276 * that its ->make_request_fn will not re-add plugging prior to calling
277 * this function. 277 * this function.
278 * 278 *
279 * This function does not cancel any asynchronous activity arising 279 * This function does not cancel any asynchronous activity arising
280 * out of elevator or throttling code. That would require elevaotor_exit() 280 * out of elevator or throttling code. That would require elevaotor_exit()
281 * and blk_throtl_exit() to be called with queue lock initialized. 281 * and blk_throtl_exit() to be called with queue lock initialized.
282 * 282 *
283 */ 283 */
284 void blk_sync_queue(struct request_queue *q) 284 void blk_sync_queue(struct request_queue *q)
285 { 285 {
286 del_timer_sync(&q->timeout); 286 del_timer_sync(&q->timeout);
287 cancel_delayed_work_sync(&q->delay_work); 287 cancel_delayed_work_sync(&q->delay_work);
288 } 288 }
289 EXPORT_SYMBOL(blk_sync_queue); 289 EXPORT_SYMBOL(blk_sync_queue);
290 290
291 /** 291 /**
292 * __blk_run_queue - run a single device queue 292 * __blk_run_queue - run a single device queue
293 * @q: The queue to run 293 * @q: The queue to run
294 * @force_kblockd: Don't run @q->request_fn directly. Use kblockd. 294 * @force_kblockd: Don't run @q->request_fn directly. Use kblockd.
295 * 295 *
296 * Description: 296 * Description:
297 * See @blk_run_queue. This variant must be called with the queue lock 297 * See @blk_run_queue. This variant must be called with the queue lock
298 * held and interrupts disabled. 298 * held and interrupts disabled.
299 * 299 *
300 */ 300 */
301 void __blk_run_queue(struct request_queue *q, bool force_kblockd) 301 void __blk_run_queue(struct request_queue *q, bool force_kblockd)
302 { 302 {
303 if (unlikely(blk_queue_stopped(q))) 303 if (unlikely(blk_queue_stopped(q)))
304 return; 304 return;
305 305
306 /* 306 /*
307 * Only recurse once to avoid overrunning the stack, let the unplug 307 * Only recurse once to avoid overrunning the stack, let the unplug
308 * handling reinvoke the handler shortly if we already got there. 308 * handling reinvoke the handler shortly if we already got there.
309 */ 309 */
310 if (!force_kblockd && !queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) { 310 if (!force_kblockd && !queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) {
311 q->request_fn(q); 311 q->request_fn(q);
312 queue_flag_clear(QUEUE_FLAG_REENTER, q); 312 queue_flag_clear(QUEUE_FLAG_REENTER, q);
313 } else 313 } else
314 queue_delayed_work(kblockd_workqueue, &q->delay_work, 0); 314 queue_delayed_work(kblockd_workqueue, &q->delay_work, 0);
315 } 315 }
316 EXPORT_SYMBOL(__blk_run_queue); 316 EXPORT_SYMBOL(__blk_run_queue);
317 317
318 /** 318 /**
319 * blk_run_queue - run a single device queue 319 * blk_run_queue - run a single device queue
320 * @q: The queue to run 320 * @q: The queue to run
321 * 321 *
322 * Description: 322 * Description:
323 * Invoke request handling on this queue, if it has pending work to do. 323 * Invoke request handling on this queue, if it has pending work to do.
324 * May be used to restart queueing when a request has completed. 324 * May be used to restart queueing when a request has completed.
325 */ 325 */
326 void blk_run_queue(struct request_queue *q) 326 void blk_run_queue(struct request_queue *q)
327 { 327 {
328 unsigned long flags; 328 unsigned long flags;
329 329
330 spin_lock_irqsave(q->queue_lock, flags); 330 spin_lock_irqsave(q->queue_lock, flags);
331 __blk_run_queue(q, false); 331 __blk_run_queue(q, false);
332 spin_unlock_irqrestore(q->queue_lock, flags); 332 spin_unlock_irqrestore(q->queue_lock, flags);
333 } 333 }
334 EXPORT_SYMBOL(blk_run_queue); 334 EXPORT_SYMBOL(blk_run_queue);
335 335
336 void blk_put_queue(struct request_queue *q) 336 void blk_put_queue(struct request_queue *q)
337 { 337 {
338 kobject_put(&q->kobj); 338 kobject_put(&q->kobj);
339 } 339 }
340 340
341 /* 341 /*
342 * Note: If a driver supplied the queue lock, it should not zap that lock 342 * Note: If a driver supplied the queue lock, it should not zap that lock
343 * unexpectedly as some queue cleanup components like elevator_exit() and 343 * unexpectedly as some queue cleanup components like elevator_exit() and
344 * blk_throtl_exit() need queue lock. 344 * blk_throtl_exit() need queue lock.
345 */ 345 */
346 void blk_cleanup_queue(struct request_queue *q) 346 void blk_cleanup_queue(struct request_queue *q)
347 { 347 {
348 /* 348 /*
349 * We know we have process context here, so we can be a little 349 * We know we have process context here, so we can be a little
350 * cautious and ensure that pending block actions on this device 350 * cautious and ensure that pending block actions on this device
351 * are done before moving on. Going into this function, we should 351 * are done before moving on. Going into this function, we should
352 * not have processes doing IO to this device. 352 * not have processes doing IO to this device.
353 */ 353 */
354 blk_sync_queue(q); 354 blk_sync_queue(q);
355 355
356 del_timer_sync(&q->backing_dev_info.laptop_mode_wb_timer); 356 del_timer_sync(&q->backing_dev_info.laptop_mode_wb_timer);
357 mutex_lock(&q->sysfs_lock); 357 mutex_lock(&q->sysfs_lock);
358 queue_flag_set_unlocked(QUEUE_FLAG_DEAD, q); 358 queue_flag_set_unlocked(QUEUE_FLAG_DEAD, q);
359 mutex_unlock(&q->sysfs_lock); 359 mutex_unlock(&q->sysfs_lock);
360 360
361 if (q->elevator) 361 if (q->elevator)
362 elevator_exit(q->elevator); 362 elevator_exit(q->elevator);
363 363
364 blk_throtl_exit(q); 364 blk_throtl_exit(q);
365 365
366 blk_put_queue(q); 366 blk_put_queue(q);
367 } 367 }
368 EXPORT_SYMBOL(blk_cleanup_queue); 368 EXPORT_SYMBOL(blk_cleanup_queue);
369 369
370 static int blk_init_free_list(struct request_queue *q) 370 static int blk_init_free_list(struct request_queue *q)
371 { 371 {
372 struct request_list *rl = &q->rq; 372 struct request_list *rl = &q->rq;
373 373
374 if (unlikely(rl->rq_pool)) 374 if (unlikely(rl->rq_pool))
375 return 0; 375 return 0;
376 376
377 rl->count[BLK_RW_SYNC] = rl->count[BLK_RW_ASYNC] = 0; 377 rl->count[BLK_RW_SYNC] = rl->count[BLK_RW_ASYNC] = 0;
378 rl->starved[BLK_RW_SYNC] = rl->starved[BLK_RW_ASYNC] = 0; 378 rl->starved[BLK_RW_SYNC] = rl->starved[BLK_RW_ASYNC] = 0;
379 rl->elvpriv = 0; 379 rl->elvpriv = 0;
380 init_waitqueue_head(&rl->wait[BLK_RW_SYNC]); 380 init_waitqueue_head(&rl->wait[BLK_RW_SYNC]);
381 init_waitqueue_head(&rl->wait[BLK_RW_ASYNC]); 381 init_waitqueue_head(&rl->wait[BLK_RW_ASYNC]);
382 382
383 rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab, 383 rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab,
384 mempool_free_slab, request_cachep, q->node); 384 mempool_free_slab, request_cachep, q->node);
385 385
386 if (!rl->rq_pool) 386 if (!rl->rq_pool)
387 return -ENOMEM; 387 return -ENOMEM;
388 388
389 return 0; 389 return 0;
390 } 390 }
391 391
392 struct request_queue *blk_alloc_queue(gfp_t gfp_mask) 392 struct request_queue *blk_alloc_queue(gfp_t gfp_mask)
393 { 393 {
394 return blk_alloc_queue_node(gfp_mask, -1); 394 return blk_alloc_queue_node(gfp_mask, -1);
395 } 395 }
396 EXPORT_SYMBOL(blk_alloc_queue); 396 EXPORT_SYMBOL(blk_alloc_queue);
397 397
398 struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) 398 struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
399 { 399 {
400 struct request_queue *q; 400 struct request_queue *q;
401 int err; 401 int err;
402 402
403 q = kmem_cache_alloc_node(blk_requestq_cachep, 403 q = kmem_cache_alloc_node(blk_requestq_cachep,
404 gfp_mask | __GFP_ZERO, node_id); 404 gfp_mask | __GFP_ZERO, node_id);
405 if (!q) 405 if (!q)
406 return NULL; 406 return NULL;
407 407
408 q->backing_dev_info.ra_pages = 408 q->backing_dev_info.ra_pages =
409 (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; 409 (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
410 q->backing_dev_info.state = 0; 410 q->backing_dev_info.state = 0;
411 q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY; 411 q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY;
412 q->backing_dev_info.name = "block"; 412 q->backing_dev_info.name = "block";
413 413
414 err = bdi_init(&q->backing_dev_info); 414 err = bdi_init(&q->backing_dev_info);
415 if (err) { 415 if (err) {
416 kmem_cache_free(blk_requestq_cachep, q); 416 kmem_cache_free(blk_requestq_cachep, q);
417 return NULL; 417 return NULL;
418 } 418 }
419 419
420 if (blk_throtl_init(q)) { 420 if (blk_throtl_init(q)) {
421 kmem_cache_free(blk_requestq_cachep, q); 421 kmem_cache_free(blk_requestq_cachep, q);
422 return NULL; 422 return NULL;
423 } 423 }
424 424
425 setup_timer(&q->backing_dev_info.laptop_mode_wb_timer, 425 setup_timer(&q->backing_dev_info.laptop_mode_wb_timer,
426 laptop_mode_timer_fn, (unsigned long) q); 426 laptop_mode_timer_fn, (unsigned long) q);
427 setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q); 427 setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q);
428 INIT_LIST_HEAD(&q->timeout_list); 428 INIT_LIST_HEAD(&q->timeout_list);
429 INIT_LIST_HEAD(&q->flush_queue[0]); 429 INIT_LIST_HEAD(&q->flush_queue[0]);
430 INIT_LIST_HEAD(&q->flush_queue[1]); 430 INIT_LIST_HEAD(&q->flush_queue[1]);
431 INIT_LIST_HEAD(&q->flush_data_in_flight); 431 INIT_LIST_HEAD(&q->flush_data_in_flight);
432 INIT_DELAYED_WORK(&q->delay_work, blk_delay_work); 432 INIT_DELAYED_WORK(&q->delay_work, blk_delay_work);
433 433
434 kobject_init(&q->kobj, &blk_queue_ktype); 434 kobject_init(&q->kobj, &blk_queue_ktype);
435 435
436 mutex_init(&q->sysfs_lock); 436 mutex_init(&q->sysfs_lock);
437 spin_lock_init(&q->__queue_lock); 437 spin_lock_init(&q->__queue_lock);
438 438
439 /* 439 /*
440 * By default initialize queue_lock to internal lock and driver can 440 * By default initialize queue_lock to internal lock and driver can
441 * override it later if need be. 441 * override it later if need be.
442 */ 442 */
443 q->queue_lock = &q->__queue_lock; 443 q->queue_lock = &q->__queue_lock;
444 444
445 return q; 445 return q;
446 } 446 }
447 EXPORT_SYMBOL(blk_alloc_queue_node); 447 EXPORT_SYMBOL(blk_alloc_queue_node);
448 448
449 /** 449 /**
450 * blk_init_queue - prepare a request queue for use with a block device 450 * blk_init_queue - prepare a request queue for use with a block device
451 * @rfn: The function to be called to process requests that have been 451 * @rfn: The function to be called to process requests that have been
452 * placed on the queue. 452 * placed on the queue.
453 * @lock: Request queue spin lock 453 * @lock: Request queue spin lock
454 * 454 *
455 * Description: 455 * Description:
456 * If a block device wishes to use the standard request handling procedures, 456 * If a block device wishes to use the standard request handling procedures,
457 * which sorts requests and coalesces adjacent requests, then it must 457 * which sorts requests and coalesces adjacent requests, then it must
458 * call blk_init_queue(). The function @rfn will be called when there 458 * call blk_init_queue(). The function @rfn will be called when there
459 * are requests on the queue that need to be processed. If the device 459 * are requests on the queue that need to be processed. If the device
460 * supports plugging, then @rfn may not be called immediately when requests 460 * supports plugging, then @rfn may not be called immediately when requests
461 * are available on the queue, but may be called at some time later instead. 461 * are available on the queue, but may be called at some time later instead.
462 * Plugged queues are generally unplugged when a buffer belonging to one 462 * Plugged queues are generally unplugged when a buffer belonging to one
463 * of the requests on the queue is needed, or due to memory pressure. 463 * of the requests on the queue is needed, or due to memory pressure.
464 * 464 *
465 * @rfn is not required, or even expected, to remove all requests off the 465 * @rfn is not required, or even expected, to remove all requests off the
466 * queue, but only as many as it can handle at a time. If it does leave 466 * queue, but only as many as it can handle at a time. If it does leave
467 * requests on the queue, it is responsible for arranging that the requests 467 * requests on the queue, it is responsible for arranging that the requests
468 * get dealt with eventually. 468 * get dealt with eventually.
469 * 469 *
470 * The queue spin lock must be held while manipulating the requests on the 470 * The queue spin lock must be held while manipulating the requests on the
471 * request queue; this lock will be taken also from interrupt context, so irq 471 * request queue; this lock will be taken also from interrupt context, so irq
472 * disabling is needed for it. 472 * disabling is needed for it.
473 * 473 *
474 * Function returns a pointer to the initialized request queue, or %NULL if 474 * Function returns a pointer to the initialized request queue, or %NULL if
475 * it didn't succeed. 475 * it didn't succeed.
476 * 476 *
477 * Note: 477 * Note:
478 * blk_init_queue() must be paired with a blk_cleanup_queue() call 478 * blk_init_queue() must be paired with a blk_cleanup_queue() call
479 * when the block device is deactivated (such as at module unload). 479 * when the block device is deactivated (such as at module unload).
480 **/ 480 **/
481 481
482 struct request_queue *blk_init_queue(request_fn_proc *rfn, spinlock_t *lock) 482 struct request_queue *blk_init_queue(request_fn_proc *rfn, spinlock_t *lock)
483 { 483 {
484 return blk_init_queue_node(rfn, lock, -1); 484 return blk_init_queue_node(rfn, lock, -1);
485 } 485 }
486 EXPORT_SYMBOL(blk_init_queue); 486 EXPORT_SYMBOL(blk_init_queue);
487 487
488 struct request_queue * 488 struct request_queue *
489 blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id) 489 blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)
490 { 490 {
491 struct request_queue *uninit_q, *q; 491 struct request_queue *uninit_q, *q;
492 492
493 uninit_q = blk_alloc_queue_node(GFP_KERNEL, node_id); 493 uninit_q = blk_alloc_queue_node(GFP_KERNEL, node_id);
494 if (!uninit_q) 494 if (!uninit_q)
495 return NULL; 495 return NULL;
496 496
497 q = blk_init_allocated_queue_node(uninit_q, rfn, lock, node_id); 497 q = blk_init_allocated_queue_node(uninit_q, rfn, lock, node_id);
498 if (!q) 498 if (!q)
499 blk_cleanup_queue(uninit_q); 499 blk_cleanup_queue(uninit_q);
500 500
501 return q; 501 return q;
502 } 502 }
503 EXPORT_SYMBOL(blk_init_queue_node); 503 EXPORT_SYMBOL(blk_init_queue_node);
504 504
505 struct request_queue * 505 struct request_queue *
506 blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn, 506 blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn,
507 spinlock_t *lock) 507 spinlock_t *lock)
508 { 508 {
509 return blk_init_allocated_queue_node(q, rfn, lock, -1); 509 return blk_init_allocated_queue_node(q, rfn, lock, -1);
510 } 510 }
511 EXPORT_SYMBOL(blk_init_allocated_queue); 511 EXPORT_SYMBOL(blk_init_allocated_queue);
512 512
513 struct request_queue * 513 struct request_queue *
514 blk_init_allocated_queue_node(struct request_queue *q, request_fn_proc *rfn, 514 blk_init_allocated_queue_node(struct request_queue *q, request_fn_proc *rfn,
515 spinlock_t *lock, int node_id) 515 spinlock_t *lock, int node_id)
516 { 516 {
517 if (!q) 517 if (!q)
518 return NULL; 518 return NULL;
519 519
520 q->node = node_id; 520 q->node = node_id;
521 if (blk_init_free_list(q)) 521 if (blk_init_free_list(q))
522 return NULL; 522 return NULL;
523 523
524 q->request_fn = rfn; 524 q->request_fn = rfn;
525 q->prep_rq_fn = NULL; 525 q->prep_rq_fn = NULL;
526 q->unprep_rq_fn = NULL; 526 q->unprep_rq_fn = NULL;
527 q->queue_flags = QUEUE_FLAG_DEFAULT; 527 q->queue_flags = QUEUE_FLAG_DEFAULT;
528 528
529 /* Override internal queue lock with supplied lock pointer */ 529 /* Override internal queue lock with supplied lock pointer */
530 if (lock) 530 if (lock)
531 q->queue_lock = lock; 531 q->queue_lock = lock;
532 532
533 /* 533 /*
534 * This also sets hw/phys segments, boundary and size 534 * This also sets hw/phys segments, boundary and size
535 */ 535 */
536 blk_queue_make_request(q, __make_request); 536 blk_queue_make_request(q, __make_request);
537 537
538 q->sg_reserved_size = INT_MAX; 538 q->sg_reserved_size = INT_MAX;
539 539
540 /* 540 /*
541 * all done 541 * all done
542 */ 542 */
543 if (!elevator_init(q, NULL)) { 543 if (!elevator_init(q, NULL)) {
544 blk_queue_congestion_threshold(q); 544 blk_queue_congestion_threshold(q);
545 return q; 545 return q;
546 } 546 }
547 547
548 return NULL; 548 return NULL;
549 } 549 }
550 EXPORT_SYMBOL(blk_init_allocated_queue_node); 550 EXPORT_SYMBOL(blk_init_allocated_queue_node);
551 551
552 int blk_get_queue(struct request_queue *q) 552 int blk_get_queue(struct request_queue *q)
553 { 553 {
554 if (likely(!test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) { 554 if (likely(!test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) {
555 kobject_get(&q->kobj); 555 kobject_get(&q->kobj);
556 return 0; 556 return 0;
557 } 557 }
558 558
559 return 1; 559 return 1;
560 } 560 }
561 561
562 static inline void blk_free_request(struct request_queue *q, struct request *rq) 562 static inline void blk_free_request(struct request_queue *q, struct request *rq)
563 { 563 {
564 BUG_ON(rq->cmd_flags & REQ_ON_PLUG); 564 BUG_ON(rq->cmd_flags & REQ_ON_PLUG);
565 565
566 if (rq->cmd_flags & REQ_ELVPRIV) 566 if (rq->cmd_flags & REQ_ELVPRIV)
567 elv_put_request(q, rq); 567 elv_put_request(q, rq);
568 mempool_free(rq, q->rq.rq_pool); 568 mempool_free(rq, q->rq.rq_pool);
569 } 569 }
570 570
571 static struct request * 571 static struct request *
572 blk_alloc_request(struct request_queue *q, int flags, int priv, gfp_t gfp_mask) 572 blk_alloc_request(struct request_queue *q, int flags, int priv, gfp_t gfp_mask)
573 { 573 {
574 struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask); 574 struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask);
575 575
576 if (!rq) 576 if (!rq)
577 return NULL; 577 return NULL;
578 578
579 blk_rq_init(q, rq); 579 blk_rq_init(q, rq);
580 580
581 rq->cmd_flags = flags | REQ_ALLOCED; 581 rq->cmd_flags = flags | REQ_ALLOCED;
582 582
583 if (priv) { 583 if (priv) {
584 if (unlikely(elv_set_request(q, rq, gfp_mask))) { 584 if (unlikely(elv_set_request(q, rq, gfp_mask))) {
585 mempool_free(rq, q->rq.rq_pool); 585 mempool_free(rq, q->rq.rq_pool);
586 return NULL; 586 return NULL;
587 } 587 }
588 rq->cmd_flags |= REQ_ELVPRIV; 588 rq->cmd_flags |= REQ_ELVPRIV;
589 } 589 }
590 590
591 return rq; 591 return rq;
592 } 592 }
593 593
594 /* 594 /*
595 * ioc_batching returns true if the ioc is a valid batching request and 595 * ioc_batching returns true if the ioc is a valid batching request and
596 * should be given priority access to a request. 596 * should be given priority access to a request.
597 */ 597 */
598 static inline int ioc_batching(struct request_queue *q, struct io_context *ioc) 598 static inline int ioc_batching(struct request_queue *q, struct io_context *ioc)
599 { 599 {
600 if (!ioc) 600 if (!ioc)
601 return 0; 601 return 0;
602 602
603 /* 603 /*
604 * Make sure the process is able to allocate at least 1 request 604 * Make sure the process is able to allocate at least 1 request
605 * even if the batch times out, otherwise we could theoretically 605 * even if the batch times out, otherwise we could theoretically
606 * lose wakeups. 606 * lose wakeups.
607 */ 607 */
608 return ioc->nr_batch_requests == q->nr_batching || 608 return ioc->nr_batch_requests == q->nr_batching ||
609 (ioc->nr_batch_requests > 0 609 (ioc->nr_batch_requests > 0
610 && time_before(jiffies, ioc->last_waited + BLK_BATCH_TIME)); 610 && time_before(jiffies, ioc->last_waited + BLK_BATCH_TIME));
611 } 611 }
612 612
613 /* 613 /*
614 * ioc_set_batching sets ioc to be a new "batcher" if it is not one. This 614 * ioc_set_batching sets ioc to be a new "batcher" if it is not one. This
615 * will cause the process to be a "batcher" on all queues in the system. This 615 * will cause the process to be a "batcher" on all queues in the system. This
616 * is the behaviour we want though - once it gets a wakeup it should be given 616 * is the behaviour we want though - once it gets a wakeup it should be given
617 * a nice run. 617 * a nice run.
618 */ 618 */
619 static void ioc_set_batching(struct request_queue *q, struct io_context *ioc) 619 static void ioc_set_batching(struct request_queue *q, struct io_context *ioc)
620 { 620 {
621 if (!ioc || ioc_batching(q, ioc)) 621 if (!ioc || ioc_batching(q, ioc))
622 return; 622 return;
623 623
624 ioc->nr_batch_requests = q->nr_batching; 624 ioc->nr_batch_requests = q->nr_batching;
625 ioc->last_waited = jiffies; 625 ioc->last_waited = jiffies;
626 } 626 }
627 627
628 static void __freed_request(struct request_queue *q, int sync) 628 static void __freed_request(struct request_queue *q, int sync)
629 { 629 {
630 struct request_list *rl = &q->rq; 630 struct request_list *rl = &q->rq;
631 631
632 if (rl->count[sync] < queue_congestion_off_threshold(q)) 632 if (rl->count[sync] < queue_congestion_off_threshold(q))
633 blk_clear_queue_congested(q, sync); 633 blk_clear_queue_congested(q, sync);
634 634
635 if (rl->count[sync] + 1 <= q->nr_requests) { 635 if (rl->count[sync] + 1 <= q->nr_requests) {
636 if (waitqueue_active(&rl->wait[sync])) 636 if (waitqueue_active(&rl->wait[sync]))
637 wake_up(&rl->wait[sync]); 637 wake_up(&rl->wait[sync]);
638 638
639 blk_clear_queue_full(q, sync); 639 blk_clear_queue_full(q, sync);
640 } 640 }
641 } 641 }
642 642
643 /* 643 /*
644 * A request has just been released. Account for it, update the full and 644 * A request has just been released. Account for it, update the full and
645 * congestion status, wake up any waiters. Called under q->queue_lock. 645 * congestion status, wake up any waiters. Called under q->queue_lock.
646 */ 646 */
647 static void freed_request(struct request_queue *q, int sync, int priv) 647 static void freed_request(struct request_queue *q, int sync, int priv)
648 { 648 {
649 struct request_list *rl = &q->rq; 649 struct request_list *rl = &q->rq;
650 650
651 rl->count[sync]--; 651 rl->count[sync]--;
652 if (priv) 652 if (priv)
653 rl->elvpriv--; 653 rl->elvpriv--;
654 654
655 __freed_request(q, sync); 655 __freed_request(q, sync);
656 656
657 if (unlikely(rl->starved[sync ^ 1])) 657 if (unlikely(rl->starved[sync ^ 1]))
658 __freed_request(q, sync ^ 1); 658 __freed_request(q, sync ^ 1);
659 } 659 }
660 660
661 /* 661 /*
662 * Determine if elevator data should be initialized when allocating the 662 * Determine if elevator data should be initialized when allocating the
663 * request associated with @bio. 663 * request associated with @bio.
664 */ 664 */
665 static bool blk_rq_should_init_elevator(struct bio *bio) 665 static bool blk_rq_should_init_elevator(struct bio *bio)
666 { 666 {
667 if (!bio) 667 if (!bio)
668 return true; 668 return true;
669 669
670 /* 670 /*
671 * Flush requests do not use the elevator so skip initialization. 671 * Flush requests do not use the elevator so skip initialization.
672 * This allows a request to share the flush and elevator data. 672 * This allows a request to share the flush and elevator data.
673 */ 673 */
674 if (bio->bi_rw & (REQ_FLUSH | REQ_FUA)) 674 if (bio->bi_rw & (REQ_FLUSH | REQ_FUA))
675 return false; 675 return false;
676 676
677 return true; 677 return true;
678 } 678 }
679 679
680 /* 680 /*
681 * Get a free request, queue_lock must be held. 681 * Get a free request, queue_lock must be held.
682 * Returns NULL on failure, with queue_lock held. 682 * Returns NULL on failure, with queue_lock held.
683 * Returns !NULL on success, with queue_lock *not held*. 683 * Returns !NULL on success, with queue_lock *not held*.
684 */ 684 */
685 static struct request *get_request(struct request_queue *q, int rw_flags, 685 static struct request *get_request(struct request_queue *q, int rw_flags,
686 struct bio *bio, gfp_t gfp_mask) 686 struct bio *bio, gfp_t gfp_mask)
687 { 687 {
688 struct request *rq = NULL; 688 struct request *rq = NULL;
689 struct request_list *rl = &q->rq; 689 struct request_list *rl = &q->rq;
690 struct io_context *ioc = NULL; 690 struct io_context *ioc = NULL;
691 const bool is_sync = rw_is_sync(rw_flags) != 0; 691 const bool is_sync = rw_is_sync(rw_flags) != 0;
692 int may_queue, priv = 0; 692 int may_queue, priv = 0;
693 693
694 may_queue = elv_may_queue(q, rw_flags); 694 may_queue = elv_may_queue(q, rw_flags);
695 if (may_queue == ELV_MQUEUE_NO) 695 if (may_queue == ELV_MQUEUE_NO)
696 goto rq_starved; 696 goto rq_starved;
697 697
698 if (rl->count[is_sync]+1 >= queue_congestion_on_threshold(q)) { 698 if (rl->count[is_sync]+1 >= queue_congestion_on_threshold(q)) {
699 if (rl->count[is_sync]+1 >= q->nr_requests) { 699 if (rl->count[is_sync]+1 >= q->nr_requests) {
700 ioc = current_io_context(GFP_ATOMIC, q->node); 700 ioc = current_io_context(GFP_ATOMIC, q->node);
701 /* 701 /*
702 * The queue will fill after this allocation, so set 702 * The queue will fill after this allocation, so set
703 * it as full, and mark this process as "batching". 703 * it as full, and mark this process as "batching".
704 * This process will be allowed to complete a batch of 704 * This process will be allowed to complete a batch of
705 * requests, others will be blocked. 705 * requests, others will be blocked.
706 */ 706 */
707 if (!blk_queue_full(q, is_sync)) { 707 if (!blk_queue_full(q, is_sync)) {
708 ioc_set_batching(q, ioc); 708 ioc_set_batching(q, ioc);
709 blk_set_queue_full(q, is_sync); 709 blk_set_queue_full(q, is_sync);
710 } else { 710 } else {
711 if (may_queue != ELV_MQUEUE_MUST 711 if (may_queue != ELV_MQUEUE_MUST
712 && !ioc_batching(q, ioc)) { 712 && !ioc_batching(q, ioc)) {
713 /* 713 /*
714 * The queue is full and the allocating 714 * The queue is full and the allocating
715 * process is not a "batcher", and not 715 * process is not a "batcher", and not
716 * exempted by the IO scheduler 716 * exempted by the IO scheduler
717 */ 717 */
718 goto out; 718 goto out;
719 } 719 }
720 } 720 }
721 } 721 }
722 blk_set_queue_congested(q, is_sync); 722 blk_set_queue_congested(q, is_sync);
723 } 723 }
724 724
725 /* 725 /*
726 * Only allow batching queuers to allocate up to 50% over the defined 726 * Only allow batching queuers to allocate up to 50% over the defined
727 * limit of requests, otherwise we could have thousands of requests 727 * limit of requests, otherwise we could have thousands of requests
728 * allocated with any setting of ->nr_requests 728 * allocated with any setting of ->nr_requests
729 */ 729 */
730 if (rl->count[is_sync] >= (3 * q->nr_requests / 2)) 730 if (rl->count[is_sync] >= (3 * q->nr_requests / 2))
731 goto out; 731 goto out;
732 732
733 rl->count[is_sync]++; 733 rl->count[is_sync]++;
734 rl->starved[is_sync] = 0; 734 rl->starved[is_sync] = 0;
735 735
736 if (blk_rq_should_init_elevator(bio)) { 736 if (blk_rq_should_init_elevator(bio)) {
737 priv = !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); 737 priv = !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
738 if (priv) 738 if (priv)
739 rl->elvpriv++; 739 rl->elvpriv++;
740 } 740 }
741 741
742 if (blk_queue_io_stat(q)) 742 if (blk_queue_io_stat(q))
743 rw_flags |= REQ_IO_STAT; 743 rw_flags |= REQ_IO_STAT;
744 spin_unlock_irq(q->queue_lock); 744 spin_unlock_irq(q->queue_lock);
745 745
746 rq = blk_alloc_request(q, rw_flags, priv, gfp_mask); 746 rq = blk_alloc_request(q, rw_flags, priv, gfp_mask);
747 if (unlikely(!rq)) { 747 if (unlikely(!rq)) {
748 /* 748 /*
749 * Allocation failed presumably due to memory. Undo anything 749 * Allocation failed presumably due to memory. Undo anything
750 * we might have messed up. 750 * we might have messed up.
751 * 751 *
752 * Allocating task should really be put onto the front of the 752 * Allocating task should really be put onto the front of the
753 * wait queue, but this is pretty rare. 753 * wait queue, but this is pretty rare.
754 */ 754 */
755 spin_lock_irq(q->queue_lock); 755 spin_lock_irq(q->queue_lock);
756 freed_request(q, is_sync, priv); 756 freed_request(q, is_sync, priv);
757 757
758 /* 758 /*
759 * in the very unlikely event that allocation failed and no 759 * in the very unlikely event that allocation failed and no
760 * requests for this direction was pending, mark us starved 760 * requests for this direction was pending, mark us starved
761 * so that freeing of a request in the other direction will 761 * so that freeing of a request in the other direction will
762 * notice us. another possible fix would be to split the 762 * notice us. another possible fix would be to split the
763 * rq mempool into READ and WRITE 763 * rq mempool into READ and WRITE
764 */ 764 */
765 rq_starved: 765 rq_starved:
766 if (unlikely(rl->count[is_sync] == 0)) 766 if (unlikely(rl->count[is_sync] == 0))
767 rl->starved[is_sync] = 1; 767 rl->starved[is_sync] = 1;
768 768
769 goto out; 769 goto out;
770 } 770 }
771 771
772 /* 772 /*
773 * ioc may be NULL here, and ioc_batching will be false. That's 773 * ioc may be NULL here, and ioc_batching will be false. That's
774 * OK, if the queue is under the request limit then requests need 774 * OK, if the queue is under the request limit then requests need
775 * not count toward the nr_batch_requests limit. There will always 775 * not count toward the nr_batch_requests limit. There will always
776 * be some limit enforced by BLK_BATCH_TIME. 776 * be some limit enforced by BLK_BATCH_TIME.
777 */ 777 */
778 if (ioc_batching(q, ioc)) 778 if (ioc_batching(q, ioc))
779 ioc->nr_batch_requests--; 779 ioc->nr_batch_requests--;
780 780
781 trace_block_getrq(q, bio, rw_flags & 1); 781 trace_block_getrq(q, bio, rw_flags & 1);
782 out: 782 out:
783 return rq; 783 return rq;
784 } 784 }
785 785
786 /* 786 /*
787 * No available requests for this queue, wait for some requests to become 787 * No available requests for this queue, wait for some requests to become
788 * available. 788 * available.
789 * 789 *
790 * Called with q->queue_lock held, and returns with it unlocked. 790 * Called with q->queue_lock held, and returns with it unlocked.
791 */ 791 */
792 static struct request *get_request_wait(struct request_queue *q, int rw_flags, 792 static struct request *get_request_wait(struct request_queue *q, int rw_flags,
793 struct bio *bio) 793 struct bio *bio)
794 { 794 {
795 const bool is_sync = rw_is_sync(rw_flags) != 0; 795 const bool is_sync = rw_is_sync(rw_flags) != 0;
796 struct request *rq; 796 struct request *rq;
797 797
798 rq = get_request(q, rw_flags, bio, GFP_NOIO); 798 rq = get_request(q, rw_flags, bio, GFP_NOIO);
799 while (!rq) { 799 while (!rq) {
800 DEFINE_WAIT(wait); 800 DEFINE_WAIT(wait);
801 struct io_context *ioc; 801 struct io_context *ioc;
802 struct request_list *rl = &q->rq; 802 struct request_list *rl = &q->rq;
803 803
804 prepare_to_wait_exclusive(&rl->wait[is_sync], &wait, 804 prepare_to_wait_exclusive(&rl->wait[is_sync], &wait,
805 TASK_UNINTERRUPTIBLE); 805 TASK_UNINTERRUPTIBLE);
806 806
807 trace_block_sleeprq(q, bio, rw_flags & 1); 807 trace_block_sleeprq(q, bio, rw_flags & 1);
808 808
809 spin_unlock_irq(q->queue_lock); 809 spin_unlock_irq(q->queue_lock);
810 io_schedule(); 810 io_schedule();
811 811
812 /* 812 /*
813 * After sleeping, we become a "batching" process and 813 * After sleeping, we become a "batching" process and
814 * will be able to allocate at least one request, and 814 * will be able to allocate at least one request, and
815 * up to a big batch of them for a small period time. 815 * up to a big batch of them for a small period time.
816 * See ioc_batching, ioc_set_batching 816 * See ioc_batching, ioc_set_batching
817 */ 817 */
818 ioc = current_io_context(GFP_NOIO, q->node); 818 ioc = current_io_context(GFP_NOIO, q->node);
819 ioc_set_batching(q, ioc); 819 ioc_set_batching(q, ioc);
820 820
821 spin_lock_irq(q->queue_lock); 821 spin_lock_irq(q->queue_lock);
822 finish_wait(&rl->wait[is_sync], &wait); 822 finish_wait(&rl->wait[is_sync], &wait);
823 823
824 rq = get_request(q, rw_flags, bio, GFP_NOIO); 824 rq = get_request(q, rw_flags, bio, GFP_NOIO);
825 }; 825 };
826 826
827 return rq; 827 return rq;
828 } 828 }
829 829
830 struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask) 830 struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask)
831 { 831 {
832 struct request *rq; 832 struct request *rq;
833 833
834 BUG_ON(rw != READ && rw != WRITE); 834 BUG_ON(rw != READ && rw != WRITE);
835 835
836 spin_lock_irq(q->queue_lock); 836 spin_lock_irq(q->queue_lock);
837 if (gfp_mask & __GFP_WAIT) { 837 if (gfp_mask & __GFP_WAIT) {
838 rq = get_request_wait(q, rw, NULL); 838 rq = get_request_wait(q, rw, NULL);
839 } else { 839 } else {
840 rq = get_request(q, rw, NULL, gfp_mask); 840 rq = get_request(q, rw, NULL, gfp_mask);
841 if (!rq) 841 if (!rq)
842 spin_unlock_irq(q->queue_lock); 842 spin_unlock_irq(q->queue_lock);
843 } 843 }
844 /* q->queue_lock is unlocked at this point */ 844 /* q->queue_lock is unlocked at this point */
845 845
846 return rq; 846 return rq;
847 } 847 }
848 EXPORT_SYMBOL(blk_get_request); 848 EXPORT_SYMBOL(blk_get_request);
849 849
850 /** 850 /**
851 * blk_make_request - given a bio, allocate a corresponding struct request. 851 * blk_make_request - given a bio, allocate a corresponding struct request.
852 * @q: target request queue 852 * @q: target request queue
853 * @bio: The bio describing the memory mappings that will be submitted for IO. 853 * @bio: The bio describing the memory mappings that will be submitted for IO.
854 * It may be a chained-bio properly constructed by block/bio layer. 854 * It may be a chained-bio properly constructed by block/bio layer.
855 * @gfp_mask: gfp flags to be used for memory allocation 855 * @gfp_mask: gfp flags to be used for memory allocation
856 * 856 *
857 * blk_make_request is the parallel of generic_make_request for BLOCK_PC 857 * blk_make_request is the parallel of generic_make_request for BLOCK_PC
858 * type commands. Where the struct request needs to be farther initialized by 858 * type commands. Where the struct request needs to be farther initialized by
859 * the caller. It is passed a &struct bio, which describes the memory info of 859 * the caller. It is passed a &struct bio, which describes the memory info of
860 * the I/O transfer. 860 * the I/O transfer.
861 * 861 *
862 * The caller of blk_make_request must make sure that bi_io_vec 862 * The caller of blk_make_request must make sure that bi_io_vec
863 * are set to describe the memory buffers. That bio_data_dir() will return 863 * are set to describe the memory buffers. That bio_data_dir() will return
864 * the needed direction of the request. (And all bio's in the passed bio-chain 864 * the needed direction of the request. (And all bio's in the passed bio-chain
865 * are properly set accordingly) 865 * are properly set accordingly)
866 * 866 *
867 * If called under none-sleepable conditions, mapped bio buffers must not 867 * If called under none-sleepable conditions, mapped bio buffers must not
868 * need bouncing, by calling the appropriate masked or flagged allocator, 868 * need bouncing, by calling the appropriate masked or flagged allocator,
869 * suitable for the target device. Otherwise the call to blk_queue_bounce will 869 * suitable for the target device. Otherwise the call to blk_queue_bounce will
870 * BUG. 870 * BUG.
871 * 871 *
872 * WARNING: When allocating/cloning a bio-chain, careful consideration should be 872 * WARNING: When allocating/cloning a bio-chain, careful consideration should be
873 * given to how you allocate bios. In particular, you cannot use __GFP_WAIT for 873 * given to how you allocate bios. In particular, you cannot use __GFP_WAIT for
874 * anything but the first bio in the chain. Otherwise you risk waiting for IO 874 * anything but the first bio in the chain. Otherwise you risk waiting for IO
875 * completion of a bio that hasn't been submitted yet, thus resulting in a 875 * completion of a bio that hasn't been submitted yet, thus resulting in a
876 * deadlock. Alternatively bios should be allocated using bio_kmalloc() instead 876 * deadlock. Alternatively bios should be allocated using bio_kmalloc() instead
877 * of bio_alloc(), as that avoids the mempool deadlock. 877 * of bio_alloc(), as that avoids the mempool deadlock.
878 * If possible a big IO should be split into smaller parts when allocation 878 * If possible a big IO should be split into smaller parts when allocation
879 * fails. Partial allocation should not be an error, or you risk a live-lock. 879 * fails. Partial allocation should not be an error, or you risk a live-lock.
880 */ 880 */
881 struct request *blk_make_request(struct request_queue *q, struct bio *bio, 881 struct request *blk_make_request(struct request_queue *q, struct bio *bio,
882 gfp_t gfp_mask) 882 gfp_t gfp_mask)
883 { 883 {
884 struct request *rq = blk_get_request(q, bio_data_dir(bio), gfp_mask); 884 struct request *rq = blk_get_request(q, bio_data_dir(bio), gfp_mask);
885 885
886 if (unlikely(!rq)) 886 if (unlikely(!rq))
887 return ERR_PTR(-ENOMEM); 887 return ERR_PTR(-ENOMEM);
888 888
889 for_each_bio(bio) { 889 for_each_bio(bio) {
890 struct bio *bounce_bio = bio; 890 struct bio *bounce_bio = bio;
891 int ret; 891 int ret;
892 892
893 blk_queue_bounce(q, &bounce_bio); 893 blk_queue_bounce(q, &bounce_bio);
894 ret = blk_rq_append_bio(q, rq, bounce_bio); 894 ret = blk_rq_append_bio(q, rq, bounce_bio);
895 if (unlikely(ret)) { 895 if (unlikely(ret)) {
896 blk_put_request(rq); 896 blk_put_request(rq);
897 return ERR_PTR(ret); 897 return ERR_PTR(ret);
898 } 898 }
899 } 899 }
900 900
901 return rq; 901 return rq;
902 } 902 }
903 EXPORT_SYMBOL(blk_make_request); 903 EXPORT_SYMBOL(blk_make_request);
904 904
905 /** 905 /**
906 * blk_requeue_request - put a request back on queue 906 * blk_requeue_request - put a request back on queue
907 * @q: request queue where request should be inserted 907 * @q: request queue where request should be inserted
908 * @rq: request to be inserted 908 * @rq: request to be inserted
909 * 909 *
910 * Description: 910 * Description:
911 * Drivers often keep queueing requests until the hardware cannot accept 911 * Drivers often keep queueing requests until the hardware cannot accept
912 * more, when that condition happens we need to put the request back 912 * more, when that condition happens we need to put the request back
913 * on the queue. Must be called with queue lock held. 913 * on the queue. Must be called with queue lock held.
914 */ 914 */
915 void blk_requeue_request(struct request_queue *q, struct request *rq) 915 void blk_requeue_request(struct request_queue *q, struct request *rq)
916 { 916 {
917 blk_delete_timer(rq); 917 blk_delete_timer(rq);
918 blk_clear_rq_complete(rq); 918 blk_clear_rq_complete(rq);
919 trace_block_rq_requeue(q, rq); 919 trace_block_rq_requeue(q, rq);
920 920
921 if (blk_rq_tagged(rq)) 921 if (blk_rq_tagged(rq))
922 blk_queue_end_tag(q, rq); 922 blk_queue_end_tag(q, rq);
923 923
924 BUG_ON(blk_queued_rq(rq)); 924 BUG_ON(blk_queued_rq(rq));
925 925
926 elv_requeue_request(q, rq); 926 elv_requeue_request(q, rq);
927 } 927 }
928 EXPORT_SYMBOL(blk_requeue_request); 928 EXPORT_SYMBOL(blk_requeue_request);
929 929
930 static void add_acct_request(struct request_queue *q, struct request *rq, 930 static void add_acct_request(struct request_queue *q, struct request *rq,
931 int where) 931 int where)
932 { 932 {
933 drive_stat_acct(rq, 1); 933 drive_stat_acct(rq, 1);
934 __elv_add_request(q, rq, where); 934 __elv_add_request(q, rq, where);
935 } 935 }
936 936
937 /** 937 /**
938 * blk_insert_request - insert a special request into a request queue 938 * blk_insert_request - insert a special request into a request queue
939 * @q: request queue where request should be inserted 939 * @q: request queue where request should be inserted
940 * @rq: request to be inserted 940 * @rq: request to be inserted
941 * @at_head: insert request at head or tail of queue 941 * @at_head: insert request at head or tail of queue
942 * @data: private data 942 * @data: private data
943 * 943 *
944 * Description: 944 * Description:
945 * Many block devices need to execute commands asynchronously, so they don't 945 * Many block devices need to execute commands asynchronously, so they don't
946 * block the whole kernel from preemption during request execution. This is 946 * block the whole kernel from preemption during request execution. This is
947 * accomplished normally by inserting aritficial requests tagged as 947 * accomplished normally by inserting aritficial requests tagged as
948 * REQ_TYPE_SPECIAL in to the corresponding request queue, and letting them 948 * REQ_TYPE_SPECIAL in to the corresponding request queue, and letting them
949 * be scheduled for actual execution by the request queue. 949 * be scheduled for actual execution by the request queue.
950 * 950 *
951 * We have the option of inserting the head or the tail of the queue. 951 * We have the option of inserting the head or the tail of the queue.
952 * Typically we use the tail for new ioctls and so forth. We use the head 952 * Typically we use the tail for new ioctls and so forth. We use the head
953 * of the queue for things like a QUEUE_FULL message from a device, or a 953 * of the queue for things like a QUEUE_FULL message from a device, or a
954 * host that is unable to accept a particular command. 954 * host that is unable to accept a particular command.
955 */ 955 */
956 void blk_insert_request(struct request_queue *q, struct request *rq, 956 void blk_insert_request(struct request_queue *q, struct request *rq,
957 int at_head, void *data) 957 int at_head, void *data)
958 { 958 {
959 int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK; 959 int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK;
960 unsigned long flags; 960 unsigned long flags;
961 961
962 /* 962 /*
963 * tell I/O scheduler that this isn't a regular read/write (ie it 963 * tell I/O scheduler that this isn't a regular read/write (ie it
964 * must not attempt merges on this) and that it acts as a soft 964 * must not attempt merges on this) and that it acts as a soft
965 * barrier 965 * barrier
966 */ 966 */
967 rq->cmd_type = REQ_TYPE_SPECIAL; 967 rq->cmd_type = REQ_TYPE_SPECIAL;
968 968
969 rq->special = data; 969 rq->special = data;
970 970
971 spin_lock_irqsave(q->queue_lock, flags); 971 spin_lock_irqsave(q->queue_lock, flags);
972 972
973 /* 973 /*
974 * If command is tagged, release the tag 974 * If command is tagged, release the tag
975 */ 975 */
976 if (blk_rq_tagged(rq)) 976 if (blk_rq_tagged(rq))
977 blk_queue_end_tag(q, rq); 977 blk_queue_end_tag(q, rq);
978 978
979 add_acct_request(q, rq, where); 979 add_acct_request(q, rq, where);
980 __blk_run_queue(q, false); 980 __blk_run_queue(q, false);
981 spin_unlock_irqrestore(q->queue_lock, flags); 981 spin_unlock_irqrestore(q->queue_lock, flags);
982 } 982 }
983 EXPORT_SYMBOL(blk_insert_request); 983 EXPORT_SYMBOL(blk_insert_request);
984 984
985 static void part_round_stats_single(int cpu, struct hd_struct *part, 985 static void part_round_stats_single(int cpu, struct hd_struct *part,
986 unsigned long now) 986 unsigned long now)
987 { 987 {
988 if (now == part->stamp) 988 if (now == part->stamp)
989 return; 989 return;
990 990
991 if (part_in_flight(part)) { 991 if (part_in_flight(part)) {
992 __part_stat_add(cpu, part, time_in_queue, 992 __part_stat_add(cpu, part, time_in_queue,
993 part_in_flight(part) * (now - part->stamp)); 993 part_in_flight(part) * (now - part->stamp));
994 __part_stat_add(cpu, part, io_ticks, (now - part->stamp)); 994 __part_stat_add(cpu, part, io_ticks, (now - part->stamp));
995 } 995 }
996 part->stamp = now; 996 part->stamp = now;
997 } 997 }
998 998
999 /** 999 /**
1000 * part_round_stats() - Round off the performance stats on a struct disk_stats. 1000 * part_round_stats() - Round off the performance stats on a struct disk_stats.
1001 * @cpu: cpu number for stats access 1001 * @cpu: cpu number for stats access
1002 * @part: target partition 1002 * @part: target partition
1003 * 1003 *
1004 * The average IO queue length and utilisation statistics are maintained 1004 * The average IO queue length and utilisation statistics are maintained
1005 * by observing the current state of the queue length and the amount of 1005 * by observing the current state of the queue length and the amount of
1006 * time it has been in this state for. 1006 * time it has been in this state for.
1007 * 1007 *
1008 * Normally, that accounting is done on IO completion, but that can result 1008 * Normally, that accounting is done on IO completion, but that can result
1009 * in more than a second's worth of IO being accounted for within any one 1009 * in more than a second's worth of IO being accounted for within any one
1010 * second, leading to >100% utilisation. To deal with that, we call this 1010 * second, leading to >100% utilisation. To deal with that, we call this
1011 * function to do a round-off before returning the results when reading 1011 * function to do a round-off before returning the results when reading
1012 * /proc/diskstats. This accounts immediately for all queue usage up to 1012 * /proc/diskstats. This accounts immediately for all queue usage up to
1013 * the current jiffies and restarts the counters again. 1013 * the current jiffies and restarts the counters again.
1014 */ 1014 */
1015 void part_round_stats(int cpu, struct hd_struct *part) 1015 void part_round_stats(int cpu, struct hd_struct *part)
1016 { 1016 {
1017 unsigned long now = jiffies; 1017 unsigned long now = jiffies;
1018 1018
1019 if (part->partno) 1019 if (part->partno)
1020 part_round_stats_single(cpu, &part_to_disk(part)->part0, now); 1020 part_round_stats_single(cpu, &part_to_disk(part)->part0, now);
1021 part_round_stats_single(cpu, part, now); 1021 part_round_stats_single(cpu, part, now);
1022 } 1022 }
1023 EXPORT_SYMBOL_GPL(part_round_stats); 1023 EXPORT_SYMBOL_GPL(part_round_stats);
1024 1024
1025 /* 1025 /*
1026 * queue lock must be held 1026 * queue lock must be held
1027 */ 1027 */
1028 void __blk_put_request(struct request_queue *q, struct request *req) 1028 void __blk_put_request(struct request_queue *q, struct request *req)
1029 { 1029 {
1030 if (unlikely(!q)) 1030 if (unlikely(!q))
1031 return; 1031 return;
1032 if (unlikely(--req->ref_count)) 1032 if (unlikely(--req->ref_count))
1033 return; 1033 return;
1034 1034
1035 elv_completed_request(q, req); 1035 elv_completed_request(q, req);
1036 1036
1037 /* this is a bio leak */ 1037 /* this is a bio leak */
1038 WARN_ON(req->bio != NULL); 1038 WARN_ON(req->bio != NULL);
1039 1039
1040 /* 1040 /*
1041 * Request may not have originated from ll_rw_blk. if not, 1041 * Request may not have originated from ll_rw_blk. if not,
1042 * it didn't come out of our reserved rq pools 1042 * it didn't come out of our reserved rq pools
1043 */ 1043 */
1044 if (req->cmd_flags & REQ_ALLOCED) { 1044 if (req->cmd_flags & REQ_ALLOCED) {
1045 int is_sync = rq_is_sync(req) != 0; 1045 int is_sync = rq_is_sync(req) != 0;
1046 int priv = req->cmd_flags & REQ_ELVPRIV; 1046 int priv = req->cmd_flags & REQ_ELVPRIV;
1047 1047
1048 BUG_ON(!list_empty(&req->queuelist)); 1048 BUG_ON(!list_empty(&req->queuelist));
1049 BUG_ON(!hlist_unhashed(&req->hash)); 1049 BUG_ON(!hlist_unhashed(&req->hash));
1050 1050
1051 blk_free_request(q, req); 1051 blk_free_request(q, req);
1052 freed_request(q, is_sync, priv); 1052 freed_request(q, is_sync, priv);
1053 } 1053 }
1054 } 1054 }
1055 EXPORT_SYMBOL_GPL(__blk_put_request); 1055 EXPORT_SYMBOL_GPL(__blk_put_request);
1056 1056
1057 void blk_put_request(struct request *req) 1057 void blk_put_request(struct request *req)
1058 { 1058 {
1059 unsigned long flags; 1059 unsigned long flags;
1060 struct request_queue *q = req->q; 1060 struct request_queue *q = req->q;
1061 1061
1062 spin_lock_irqsave(q->queue_lock, flags); 1062 spin_lock_irqsave(q->queue_lock, flags);
1063 __blk_put_request(q, req); 1063 __blk_put_request(q, req);
1064 spin_unlock_irqrestore(q->queue_lock, flags); 1064 spin_unlock_irqrestore(q->queue_lock, flags);
1065 } 1065 }
1066 EXPORT_SYMBOL(blk_put_request); 1066 EXPORT_SYMBOL(blk_put_request);
1067 1067
1068 /** 1068 /**
1069 * blk_add_request_payload - add a payload to a request 1069 * blk_add_request_payload - add a payload to a request
1070 * @rq: request to update 1070 * @rq: request to update
1071 * @page: page backing the payload 1071 * @page: page backing the payload
1072 * @len: length of the payload. 1072 * @len: length of the payload.
1073 * 1073 *
1074 * This allows to later add a payload to an already submitted request by 1074 * This allows to later add a payload to an already submitted request by
1075 * a block driver. The driver needs to take care of freeing the payload 1075 * a block driver. The driver needs to take care of freeing the payload
1076 * itself. 1076 * itself.
1077 * 1077 *
1078 * Note that this is a quite horrible hack and nothing but handling of 1078 * Note that this is a quite horrible hack and nothing but handling of
1079 * discard requests should ever use it. 1079 * discard requests should ever use it.
1080 */ 1080 */
1081 void blk_add_request_payload(struct request *rq, struct page *page, 1081 void blk_add_request_payload(struct request *rq, struct page *page,
1082 unsigned int len) 1082 unsigned int len)
1083 { 1083 {
1084 struct bio *bio = rq->bio; 1084 struct bio *bio = rq->bio;
1085 1085
1086 bio->bi_io_vec->bv_page = page; 1086 bio->bi_io_vec->bv_page = page;
1087 bio->bi_io_vec->bv_offset = 0; 1087 bio->bi_io_vec->bv_offset = 0;
1088 bio->bi_io_vec->bv_len = len; 1088 bio->bi_io_vec->bv_len = len;
1089 1089
1090 bio->bi_size = len; 1090 bio->bi_size = len;
1091 bio->bi_vcnt = 1; 1091 bio->bi_vcnt = 1;
1092 bio->bi_phys_segments = 1; 1092 bio->bi_phys_segments = 1;
1093 1093
1094 rq->__data_len = rq->resid_len = len; 1094 rq->__data_len = rq->resid_len = len;
1095 rq->nr_phys_segments = 1; 1095 rq->nr_phys_segments = 1;
1096 rq->buffer = bio_data(bio); 1096 rq->buffer = bio_data(bio);
1097 } 1097 }
1098 EXPORT_SYMBOL_GPL(blk_add_request_payload); 1098 EXPORT_SYMBOL_GPL(blk_add_request_payload);
1099 1099
1100 static bool bio_attempt_back_merge(struct request_queue *q, struct request *req, 1100 static bool bio_attempt_back_merge(struct request_queue *q, struct request *req,
1101 struct bio *bio) 1101 struct bio *bio)
1102 { 1102 {
1103 const int ff = bio->bi_rw & REQ_FAILFAST_MASK; 1103 const int ff = bio->bi_rw & REQ_FAILFAST_MASK;
1104 1104
1105 /* 1105 /*
1106 * Debug stuff, kill later 1106 * Debug stuff, kill later
1107 */ 1107 */
1108 if (!rq_mergeable(req)) { 1108 if (!rq_mergeable(req)) {
1109 blk_dump_rq_flags(req, "back"); 1109 blk_dump_rq_flags(req, "back");
1110 return false; 1110 return false;
1111 } 1111 }
1112 1112
1113 if (!ll_back_merge_fn(q, req, bio)) 1113 if (!ll_back_merge_fn(q, req, bio))
1114 return false; 1114 return false;
1115 1115
1116 trace_block_bio_backmerge(q, bio); 1116 trace_block_bio_backmerge(q, bio);
1117 1117
1118 if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff) 1118 if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
1119 blk_rq_set_mixed_merge(req); 1119 blk_rq_set_mixed_merge(req);
1120 1120
1121 req->biotail->bi_next = bio; 1121 req->biotail->bi_next = bio;
1122 req->biotail = bio; 1122 req->biotail = bio;
1123 req->__data_len += bio->bi_size; 1123 req->__data_len += bio->bi_size;
1124 req->ioprio = ioprio_best(req->ioprio, bio_prio(bio)); 1124 req->ioprio = ioprio_best(req->ioprio, bio_prio(bio));
1125 1125
1126 drive_stat_acct(req, 0); 1126 drive_stat_acct(req, 0);
1127 return true; 1127 return true;
1128 } 1128 }
1129 1129
1130 static bool bio_attempt_front_merge(struct request_queue *q, 1130 static bool bio_attempt_front_merge(struct request_queue *q,
1131 struct request *req, struct bio *bio) 1131 struct request *req, struct bio *bio)
1132 { 1132 {
1133 const int ff = bio->bi_rw & REQ_FAILFAST_MASK; 1133 const int ff = bio->bi_rw & REQ_FAILFAST_MASK;
1134 sector_t sector; 1134 sector_t sector;
1135 1135
1136 /* 1136 /*
1137 * Debug stuff, kill later 1137 * Debug stuff, kill later
1138 */ 1138 */
1139 if (!rq_mergeable(req)) { 1139 if (!rq_mergeable(req)) {
1140 blk_dump_rq_flags(req, "front"); 1140 blk_dump_rq_flags(req, "front");
1141 return false; 1141 return false;
1142 } 1142 }
1143 1143
1144 if (!ll_front_merge_fn(q, req, bio)) 1144 if (!ll_front_merge_fn(q, req, bio))
1145 return false; 1145 return false;
1146 1146
1147 trace_block_bio_frontmerge(q, bio); 1147 trace_block_bio_frontmerge(q, bio);
1148 1148
1149 if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff) 1149 if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
1150 blk_rq_set_mixed_merge(req); 1150 blk_rq_set_mixed_merge(req);
1151 1151
1152 sector = bio->bi_sector; 1152 sector = bio->bi_sector;
1153 1153
1154 bio->bi_next = req->bio; 1154 bio->bi_next = req->bio;
1155 req->bio = bio; 1155 req->bio = bio;
1156 1156
1157 /* 1157 /*
1158 * may not be valid. if the low level driver said 1158 * may not be valid. if the low level driver said
1159 * it didn't need a bounce buffer then it better 1159 * it didn't need a bounce buffer then it better
1160 * not touch req->buffer either... 1160 * not touch req->buffer either...
1161 */ 1161 */
1162 req->buffer = bio_data(bio); 1162 req->buffer = bio_data(bio);
1163 req->__sector = bio->bi_sector; 1163 req->__sector = bio->bi_sector;
1164 req->__data_len += bio->bi_size; 1164 req->__data_len += bio->bi_size;
1165 req->ioprio = ioprio_best(req->ioprio, bio_prio(bio)); 1165 req->ioprio = ioprio_best(req->ioprio, bio_prio(bio));
1166 1166
1167 drive_stat_acct(req, 0); 1167 drive_stat_acct(req, 0);
1168 return true; 1168 return true;
1169 } 1169 }
1170 1170
1171 /* 1171 /*
1172 * Attempts to merge with the plugged list in the current process. Returns 1172 * Attempts to merge with the plugged list in the current process. Returns
1173 * true if merge was successful, otherwise false. 1173 * true if merge was successful, otherwise false.
1174 */ 1174 */
1175 static bool attempt_plug_merge(struct task_struct *tsk, struct request_queue *q, 1175 static bool attempt_plug_merge(struct task_struct *tsk, struct request_queue *q,
1176 struct bio *bio) 1176 struct bio *bio)
1177 { 1177 {
1178 struct blk_plug *plug; 1178 struct blk_plug *plug;
1179 struct request *rq; 1179 struct request *rq;
1180 bool ret = false; 1180 bool ret = false;
1181 1181
1182 plug = tsk->plug; 1182 plug = tsk->plug;
1183 if (!plug) 1183 if (!plug)
1184 goto out; 1184 goto out;
1185 1185
1186 list_for_each_entry_reverse(rq, &plug->list, queuelist) { 1186 list_for_each_entry_reverse(rq, &plug->list, queuelist) {
1187 int el_ret; 1187 int el_ret;
1188 1188
1189 if (rq->q != q) 1189 if (rq->q != q)
1190 continue; 1190 continue;
1191 1191
1192 el_ret = elv_try_merge(rq, bio); 1192 el_ret = elv_try_merge(rq, bio);
1193 if (el_ret == ELEVATOR_BACK_MERGE) { 1193 if (el_ret == ELEVATOR_BACK_MERGE) {
1194 ret = bio_attempt_back_merge(q, rq, bio); 1194 ret = bio_attempt_back_merge(q, rq, bio);
1195 if (ret) 1195 if (ret)
1196 break; 1196 break;
1197 } else if (el_ret == ELEVATOR_FRONT_MERGE) { 1197 } else if (el_ret == ELEVATOR_FRONT_MERGE) {
1198 ret = bio_attempt_front_merge(q, rq, bio); 1198 ret = bio_attempt_front_merge(q, rq, bio);
1199 if (ret) 1199 if (ret)
1200 break; 1200 break;
1201 } 1201 }
1202 } 1202 }
1203 out: 1203 out:
1204 return ret; 1204 return ret;
1205 } 1205 }
1206 1206
1207 void init_request_from_bio(struct request *req, struct bio *bio) 1207 void init_request_from_bio(struct request *req, struct bio *bio)
1208 { 1208 {
1209 req->cpu = bio->bi_comp_cpu; 1209 req->cpu = bio->bi_comp_cpu;
1210 req->cmd_type = REQ_TYPE_FS; 1210 req->cmd_type = REQ_TYPE_FS;
1211 1211
1212 req->cmd_flags |= bio->bi_rw & REQ_COMMON_MASK; 1212 req->cmd_flags |= bio->bi_rw & REQ_COMMON_MASK;
1213 if (bio->bi_rw & REQ_RAHEAD) 1213 if (bio->bi_rw & REQ_RAHEAD)
1214 req->cmd_flags |= REQ_FAILFAST_MASK; 1214 req->cmd_flags |= REQ_FAILFAST_MASK;
1215 1215
1216 req->errors = 0; 1216 req->errors = 0;
1217 req->__sector = bio->bi_sector; 1217 req->__sector = bio->bi_sector;
1218 req->ioprio = bio_prio(bio); 1218 req->ioprio = bio_prio(bio);
1219 blk_rq_bio_prep(req->q, req, bio); 1219 blk_rq_bio_prep(req->q, req, bio);
1220 } 1220 }
1221 1221
1222 static int __make_request(struct request_queue *q, struct bio *bio) 1222 static int __make_request(struct request_queue *q, struct bio *bio)
1223 { 1223 {
1224 const bool sync = !!(bio->bi_rw & REQ_SYNC); 1224 const bool sync = !!(bio->bi_rw & REQ_SYNC);
1225 struct blk_plug *plug; 1225 struct blk_plug *plug;
1226 int el_ret, rw_flags, where = ELEVATOR_INSERT_SORT; 1226 int el_ret, rw_flags, where = ELEVATOR_INSERT_SORT;
1227 struct request *req; 1227 struct request *req;
1228 1228
1229 /* 1229 /*
1230 * low level driver can indicate that it wants pages above a 1230 * low level driver can indicate that it wants pages above a
1231 * certain limit bounced to low memory (ie for highmem, or even 1231 * certain limit bounced to low memory (ie for highmem, or even
1232 * ISA dma in theory) 1232 * ISA dma in theory)
1233 */ 1233 */
1234 blk_queue_bounce(q, &bio); 1234 blk_queue_bounce(q, &bio);
1235 1235
1236 if (bio->bi_rw & (REQ_FLUSH | REQ_FUA)) { 1236 if (bio->bi_rw & (REQ_FLUSH | REQ_FUA)) {
1237 spin_lock_irq(q->queue_lock); 1237 spin_lock_irq(q->queue_lock);
1238 where = ELEVATOR_INSERT_FLUSH; 1238 where = ELEVATOR_INSERT_FLUSH;
1239 goto get_rq; 1239 goto get_rq;
1240 } 1240 }
1241 1241
1242 /* 1242 /*
1243 * Check if we can merge with the plugged list before grabbing 1243 * Check if we can merge with the plugged list before grabbing
1244 * any locks. 1244 * any locks.
1245 */ 1245 */
1246 if (attempt_plug_merge(current, q, bio)) 1246 if (attempt_plug_merge(current, q, bio))
1247 goto out; 1247 goto out;
1248 1248
1249 spin_lock_irq(q->queue_lock); 1249 spin_lock_irq(q->queue_lock);
1250 1250
1251 el_ret = elv_merge(q, &req, bio); 1251 el_ret = elv_merge(q, &req, bio);
1252 if (el_ret == ELEVATOR_BACK_MERGE) { 1252 if (el_ret == ELEVATOR_BACK_MERGE) {
1253 BUG_ON(req->cmd_flags & REQ_ON_PLUG); 1253 BUG_ON(req->cmd_flags & REQ_ON_PLUG);
1254 if (bio_attempt_back_merge(q, req, bio)) { 1254 if (bio_attempt_back_merge(q, req, bio)) {
1255 if (!attempt_back_merge(q, req)) 1255 if (!attempt_back_merge(q, req))
1256 elv_merged_request(q, req, el_ret); 1256 elv_merged_request(q, req, el_ret);
1257 goto out_unlock; 1257 goto out_unlock;
1258 } 1258 }
1259 } else if (el_ret == ELEVATOR_FRONT_MERGE) { 1259 } else if (el_ret == ELEVATOR_FRONT_MERGE) {
1260 BUG_ON(req->cmd_flags & REQ_ON_PLUG); 1260 BUG_ON(req->cmd_flags & REQ_ON_PLUG);
1261 if (bio_attempt_front_merge(q, req, bio)) { 1261 if (bio_attempt_front_merge(q, req, bio)) {
1262 if (!attempt_front_merge(q, req)) 1262 if (!attempt_front_merge(q, req))
1263 elv_merged_request(q, req, el_ret); 1263 elv_merged_request(q, req, el_ret);
1264 goto out_unlock; 1264 goto out_unlock;
1265 } 1265 }
1266 } 1266 }
1267 1267
1268 get_rq: 1268 get_rq:
1269 /* 1269 /*
1270 * This sync check and mask will be re-done in init_request_from_bio(), 1270 * This sync check and mask will be re-done in init_request_from_bio(),
1271 * but we need to set it earlier to expose the sync flag to the 1271 * but we need to set it earlier to expose the sync flag to the
1272 * rq allocator and io schedulers. 1272 * rq allocator and io schedulers.
1273 */ 1273 */
1274 rw_flags = bio_data_dir(bio); 1274 rw_flags = bio_data_dir(bio);
1275 if (sync) 1275 if (sync)
1276 rw_flags |= REQ_SYNC; 1276 rw_flags |= REQ_SYNC;
1277 1277
1278 /* 1278 /*
1279 * Grab a free request. This is might sleep but can not fail. 1279 * Grab a free request. This is might sleep but can not fail.
1280 * Returns with the queue unlocked. 1280 * Returns with the queue unlocked.
1281 */ 1281 */
1282 req = get_request_wait(q, rw_flags, bio); 1282 req = get_request_wait(q, rw_flags, bio);
1283 1283
1284 /* 1284 /*
1285 * After dropping the lock and possibly sleeping here, our request 1285 * After dropping the lock and possibly sleeping here, our request
1286 * may now be mergeable after it had proven unmergeable (above). 1286 * may now be mergeable after it had proven unmergeable (above).
1287 * We don't worry about that case for efficiency. It won't happen 1287 * We don't worry about that case for efficiency. It won't happen
1288 * often, and the elevators are able to handle it. 1288 * often, and the elevators are able to handle it.
1289 */ 1289 */
1290 init_request_from_bio(req, bio); 1290 init_request_from_bio(req, bio);
1291 1291
1292 if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) || 1292 if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) ||
1293 bio_flagged(bio, BIO_CPU_AFFINE)) { 1293 bio_flagged(bio, BIO_CPU_AFFINE)) {
1294 req->cpu = blk_cpu_to_group(get_cpu()); 1294 req->cpu = blk_cpu_to_group(get_cpu());
1295 put_cpu(); 1295 put_cpu();
1296 } 1296 }
1297 1297
1298 plug = current->plug; 1298 plug = current->plug;
1299 if (plug) { 1299 if (plug) {
1300 /* 1300 /*
1301 * If this is the first request added after a plug, fire 1301 * If this is the first request added after a plug, fire
1302 * of a plug trace. If others have been added before, check 1302 * of a plug trace. If others have been added before, check
1303 * if we have multiple devices in this plug. If so, make a 1303 * if we have multiple devices in this plug. If so, make a
1304 * note to sort the list before dispatch. 1304 * note to sort the list before dispatch.
1305 */ 1305 */
1306 if (list_empty(&plug->list)) 1306 if (list_empty(&plug->list))
1307 trace_block_plug(q); 1307 trace_block_plug(q);
1308 else if (!plug->should_sort) { 1308 else if (!plug->should_sort) {
1309 struct request *__rq; 1309 struct request *__rq;
1310 1310
1311 __rq = list_entry_rq(plug->list.prev); 1311 __rq = list_entry_rq(plug->list.prev);
1312 if (__rq->q != q) 1312 if (__rq->q != q)
1313 plug->should_sort = 1; 1313 plug->should_sort = 1;
1314 } 1314 }
1315 /* 1315 /*
1316 * Debug flag, kill later 1316 * Debug flag, kill later
1317 */ 1317 */
1318 req->cmd_flags |= REQ_ON_PLUG; 1318 req->cmd_flags |= REQ_ON_PLUG;
1319 list_add_tail(&req->queuelist, &plug->list); 1319 list_add_tail(&req->queuelist, &plug->list);
1320 drive_stat_acct(req, 1); 1320 drive_stat_acct(req, 1);
1321 } else { 1321 } else {
1322 spin_lock_irq(q->queue_lock); 1322 spin_lock_irq(q->queue_lock);
1323 add_acct_request(q, req, where); 1323 add_acct_request(q, req, where);
1324 __blk_run_queue(q, false); 1324 __blk_run_queue(q, false);
1325 out_unlock: 1325 out_unlock:
1326 spin_unlock_irq(q->queue_lock); 1326 spin_unlock_irq(q->queue_lock);
1327 } 1327 }
1328 out: 1328 out:
1329 return 0; 1329 return 0;
1330 } 1330 }
1331 1331
1332 /* 1332 /*
1333 * If bio->bi_dev is a partition, remap the location 1333 * If bio->bi_dev is a partition, remap the location
1334 */ 1334 */
1335 static inline void blk_partition_remap(struct bio *bio) 1335 static inline void blk_partition_remap(struct bio *bio)
1336 { 1336 {
1337 struct block_device *bdev = bio->bi_bdev; 1337 struct block_device *bdev = bio->bi_bdev;
1338 1338
1339 if (bio_sectors(bio) && bdev != bdev->bd_contains) { 1339 if (bio_sectors(bio) && bdev != bdev->bd_contains) {
1340 struct hd_struct *p = bdev->bd_part; 1340 struct hd_struct *p = bdev->bd_part;
1341 1341
1342 bio->bi_sector += p->start_sect; 1342 bio->bi_sector += p->start_sect;
1343 bio->bi_bdev = bdev->bd_contains; 1343 bio->bi_bdev = bdev->bd_contains;
1344 1344
1345 trace_block_bio_remap(bdev_get_queue(bio->bi_bdev), bio, 1345 trace_block_bio_remap(bdev_get_queue(bio->bi_bdev), bio,
1346 bdev->bd_dev, 1346 bdev->bd_dev,
1347 bio->bi_sector - p->start_sect); 1347 bio->bi_sector - p->start_sect);
1348 } 1348 }
1349 } 1349 }
1350 1350
1351 static void handle_bad_sector(struct bio *bio) 1351 static void handle_bad_sector(struct bio *bio)
1352 { 1352 {
1353 char b[BDEVNAME_SIZE]; 1353 char b[BDEVNAME_SIZE];
1354 1354
1355 printk(KERN_INFO "attempt to access beyond end of device\n"); 1355 printk(KERN_INFO "attempt to access beyond end of device\n");
1356 printk(KERN_INFO "%s: rw=%ld, want=%Lu, limit=%Lu\n", 1356 printk(KERN_INFO "%s: rw=%ld, want=%Lu, limit=%Lu\n",
1357 bdevname(bio->bi_bdev, b), 1357 bdevname(bio->bi_bdev, b),
1358 bio->bi_rw, 1358 bio->bi_rw,
1359 (unsigned long long)bio->bi_sector + bio_sectors(bio), 1359 (unsigned long long)bio->bi_sector + bio_sectors(bio),
1360 (long long)(i_size_read(bio->bi_bdev->bd_inode) >> 9)); 1360 (long long)(i_size_read(bio->bi_bdev->bd_inode) >> 9));
1361 1361
1362 set_bit(BIO_EOF, &bio->bi_flags); 1362 set_bit(BIO_EOF, &bio->bi_flags);
1363 } 1363 }
1364 1364
1365 #ifdef CONFIG_FAIL_MAKE_REQUEST 1365 #ifdef CONFIG_FAIL_MAKE_REQUEST
1366 1366
1367 static DECLARE_FAULT_ATTR(fail_make_request); 1367 static DECLARE_FAULT_ATTR(fail_make_request);
1368 1368
1369 static int __init setup_fail_make_request(char *str) 1369 static int __init setup_fail_make_request(char *str)
1370 { 1370 {
1371 return setup_fault_attr(&fail_make_request, str); 1371 return setup_fault_attr(&fail_make_request, str);
1372 } 1372 }
1373 __setup("fail_make_request=", setup_fail_make_request); 1373 __setup("fail_make_request=", setup_fail_make_request);
1374 1374
1375 static int should_fail_request(struct bio *bio) 1375 static int should_fail_request(struct bio *bio)
1376 { 1376 {
1377 struct hd_struct *part = bio->bi_bdev->bd_part; 1377 struct hd_struct *part = bio->bi_bdev->bd_part;
1378 1378
1379 if (part_to_disk(part)->part0.make_it_fail || part->make_it_fail) 1379 if (part_to_disk(part)->part0.make_it_fail || part->make_it_fail)
1380 return should_fail(&fail_make_request, bio->bi_size); 1380 return should_fail(&fail_make_request, bio->bi_size);
1381 1381
1382 return 0; 1382 return 0;
1383 } 1383 }
1384 1384
1385 static int __init fail_make_request_debugfs(void) 1385 static int __init fail_make_request_debugfs(void)
1386 { 1386 {
1387 return init_fault_attr_dentries(&fail_make_request, 1387 return init_fault_attr_dentries(&fail_make_request,
1388 "fail_make_request"); 1388 "fail_make_request");
1389 } 1389 }
1390 1390
1391 late_initcall(fail_make_request_debugfs); 1391 late_initcall(fail_make_request_debugfs);
1392 1392
1393 #else /* CONFIG_FAIL_MAKE_REQUEST */ 1393 #else /* CONFIG_FAIL_MAKE_REQUEST */
1394 1394
1395 static inline int should_fail_request(struct bio *bio) 1395 static inline int should_fail_request(struct bio *bio)
1396 { 1396 {
1397 return 0; 1397 return 0;
1398 } 1398 }
1399 1399
1400 #endif /* CONFIG_FAIL_MAKE_REQUEST */ 1400 #endif /* CONFIG_FAIL_MAKE_REQUEST */
1401 1401
1402 /* 1402 /*
1403 * Check whether this bio extends beyond the end of the device. 1403 * Check whether this bio extends beyond the end of the device.
1404 */ 1404 */
1405 static inline int bio_check_eod(struct bio *bio, unsigned int nr_sectors) 1405 static inline int bio_check_eod(struct bio *bio, unsigned int nr_sectors)
1406 { 1406 {
1407 sector_t maxsector; 1407 sector_t maxsector;
1408 1408
1409 if (!nr_sectors) 1409 if (!nr_sectors)
1410 return 0; 1410 return 0;
1411 1411
1412 /* Test device or partition size, when known. */ 1412 /* Test device or partition size, when known. */
1413 maxsector = i_size_read(bio->bi_bdev->bd_inode) >> 9; 1413 maxsector = i_size_read(bio->bi_bdev->bd_inode) >> 9;
1414 if (maxsector) { 1414 if (maxsector) {
1415 sector_t sector = bio->bi_sector; 1415 sector_t sector = bio->bi_sector;
1416 1416
1417 if (maxsector < nr_sectors || maxsector - nr_sectors < sector) { 1417 if (maxsector < nr_sectors || maxsector - nr_sectors < sector) {
1418 /* 1418 /*
1419 * This may well happen - the kernel calls bread() 1419 * This may well happen - the kernel calls bread()
1420 * without checking the size of the device, e.g., when 1420 * without checking the size of the device, e.g., when
1421 * mounting a device. 1421 * mounting a device.
1422 */ 1422 */
1423 handle_bad_sector(bio); 1423 handle_bad_sector(bio);
1424 return 1; 1424 return 1;
1425 } 1425 }
1426 } 1426 }
1427 1427
1428 return 0; 1428 return 0;
1429 } 1429 }
1430 1430
1431 /** 1431 /**
1432 * generic_make_request - hand a buffer to its device driver for I/O 1432 * generic_make_request - hand a buffer to its device driver for I/O
1433 * @bio: The bio describing the location in memory and on the device. 1433 * @bio: The bio describing the location in memory and on the device.
1434 * 1434 *
1435 * generic_make_request() is used to make I/O requests of block 1435 * generic_make_request() is used to make I/O requests of block
1436 * devices. It is passed a &struct bio, which describes the I/O that needs 1436 * devices. It is passed a &struct bio, which describes the I/O that needs
1437 * to be done. 1437 * to be done.
1438 * 1438 *
1439 * generic_make_request() does not return any status. The 1439 * generic_make_request() does not return any status. The
1440 * success/failure status of the request, along with notification of 1440 * success/failure status of the request, along with notification of
1441 * completion, is delivered asynchronously through the bio->bi_end_io 1441 * completion, is delivered asynchronously through the bio->bi_end_io
1442 * function described (one day) else where. 1442 * function described (one day) else where.
1443 * 1443 *
1444 * The caller of generic_make_request must make sure that bi_io_vec 1444 * The caller of generic_make_request must make sure that bi_io_vec
1445 * are set to describe the memory buffer, and that bi_dev and bi_sector are 1445 * are set to describe the memory buffer, and that bi_dev and bi_sector are
1446 * set to describe the device address, and the 1446 * set to describe the device address, and the
1447 * bi_end_io and optionally bi_private are set to describe how 1447 * bi_end_io and optionally bi_private are set to describe how
1448 * completion notification should be signaled. 1448 * completion notification should be signaled.
1449 * 1449 *
1450 * generic_make_request and the drivers it calls may use bi_next if this 1450 * generic_make_request and the drivers it calls may use bi_next if this
1451 * bio happens to be merged with someone else, and may change bi_dev and 1451 * bio happens to be merged with someone else, and may change bi_dev and
1452 * bi_sector for remaps as it sees fit. So the values of these fields 1452 * bi_sector for remaps as it sees fit. So the values of these fields
1453 * should NOT be depended on after the call to generic_make_request. 1453 * should NOT be depended on after the call to generic_make_request.
1454 */ 1454 */
1455 static inline void __generic_make_request(struct bio *bio) 1455 static inline void __generic_make_request(struct bio *bio)
1456 { 1456 {
1457 struct request_queue *q; 1457 struct request_queue *q;
1458 sector_t old_sector; 1458 sector_t old_sector;
1459 int ret, nr_sectors = bio_sectors(bio); 1459 int ret, nr_sectors = bio_sectors(bio);
1460 dev_t old_dev; 1460 dev_t old_dev;
1461 int err = -EIO; 1461 int err = -EIO;
1462 1462
1463 might_sleep(); 1463 might_sleep();
1464 1464
1465 if (bio_check_eod(bio, nr_sectors)) 1465 if (bio_check_eod(bio, nr_sectors))
1466 goto end_io; 1466 goto end_io;
1467 1467
1468 /* 1468 /*
1469 * Resolve the mapping until finished. (drivers are 1469 * Resolve the mapping until finished. (drivers are
1470 * still free to implement/resolve their own stacking 1470 * still free to implement/resolve their own stacking
1471 * by explicitly returning 0) 1471 * by explicitly returning 0)
1472 * 1472 *
1473 * NOTE: we don't repeat the blk_size check for each new device. 1473 * NOTE: we don't repeat the blk_size check for each new device.
1474 * Stacking drivers are expected to know what they are doing. 1474 * Stacking drivers are expected to know what they are doing.
1475 */ 1475 */
1476 old_sector = -1; 1476 old_sector = -1;
1477 old_dev = 0; 1477 old_dev = 0;
1478 do { 1478 do {
1479 char b[BDEVNAME_SIZE]; 1479 char b[BDEVNAME_SIZE];
1480 1480
1481 q = bdev_get_queue(bio->bi_bdev); 1481 q = bdev_get_queue(bio->bi_bdev);
1482 if (unlikely(!q)) { 1482 if (unlikely(!q)) {
1483 printk(KERN_ERR 1483 printk(KERN_ERR
1484 "generic_make_request: Trying to access " 1484 "generic_make_request: Trying to access "
1485 "nonexistent block-device %s (%Lu)\n", 1485 "nonexistent block-device %s (%Lu)\n",
1486 bdevname(bio->bi_bdev, b), 1486 bdevname(bio->bi_bdev, b),
1487 (long long) bio->bi_sector); 1487 (long long) bio->bi_sector);
1488 goto end_io; 1488 goto end_io;
1489 } 1489 }
1490 1490
1491 if (unlikely(!(bio->bi_rw & REQ_DISCARD) && 1491 if (unlikely(!(bio->bi_rw & REQ_DISCARD) &&
1492 nr_sectors > queue_max_hw_sectors(q))) { 1492 nr_sectors > queue_max_hw_sectors(q))) {
1493 printk(KERN_ERR "bio too big device %s (%u > %u)\n", 1493 printk(KERN_ERR "bio too big device %s (%u > %u)\n",
1494 bdevname(bio->bi_bdev, b), 1494 bdevname(bio->bi_bdev, b),
1495 bio_sectors(bio), 1495 bio_sectors(bio),
1496 queue_max_hw_sectors(q)); 1496 queue_max_hw_sectors(q));
1497 goto end_io; 1497 goto end_io;
1498 } 1498 }
1499 1499
1500 if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) 1500 if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)))
1501 goto end_io; 1501 goto end_io;
1502 1502
1503 if (should_fail_request(bio)) 1503 if (should_fail_request(bio))
1504 goto end_io; 1504 goto end_io;
1505 1505
1506 /* 1506 /*
1507 * If this device has partitions, remap block n 1507 * If this device has partitions, remap block n
1508 * of partition p to block n+start(p) of the disk. 1508 * of partition p to block n+start(p) of the disk.
1509 */ 1509 */
1510 blk_partition_remap(bio); 1510 blk_partition_remap(bio);
1511 1511
1512 if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) 1512 if (bio_integrity_enabled(bio) && bio_integrity_prep(bio))
1513 goto end_io; 1513 goto end_io;
1514 1514
1515 if (old_sector != -1) 1515 if (old_sector != -1)
1516 trace_block_bio_remap(q, bio, old_dev, old_sector); 1516 trace_block_bio_remap(q, bio, old_dev, old_sector);
1517 1517
1518 old_sector = bio->bi_sector; 1518 old_sector = bio->bi_sector;
1519 old_dev = bio->bi_bdev->bd_dev; 1519 old_dev = bio->bi_bdev->bd_dev;
1520 1520
1521 if (bio_check_eod(bio, nr_sectors)) 1521 if (bio_check_eod(bio, nr_sectors))
1522 goto end_io; 1522 goto end_io;
1523 1523
1524 /* 1524 /*
1525 * Filter flush bio's early so that make_request based 1525 * Filter flush bio's early so that make_request based
1526 * drivers without flush support don't have to worry 1526 * drivers without flush support don't have to worry
1527 * about them. 1527 * about them.
1528 */ 1528 */
1529 if ((bio->bi_rw & (REQ_FLUSH | REQ_FUA)) && !q->flush_flags) { 1529 if ((bio->bi_rw & (REQ_FLUSH | REQ_FUA)) && !q->flush_flags) {
1530 bio->bi_rw &= ~(REQ_FLUSH | REQ_FUA); 1530 bio->bi_rw &= ~(REQ_FLUSH | REQ_FUA);
1531 if (!nr_sectors) { 1531 if (!nr_sectors) {
1532 err = 0; 1532 err = 0;
1533 goto end_io; 1533 goto end_io;
1534 } 1534 }
1535 } 1535 }
1536 1536
1537 if ((bio->bi_rw & REQ_DISCARD) && 1537 if ((bio->bi_rw & REQ_DISCARD) &&
1538 (!blk_queue_discard(q) || 1538 (!blk_queue_discard(q) ||
1539 ((bio->bi_rw & REQ_SECURE) && 1539 ((bio->bi_rw & REQ_SECURE) &&
1540 !blk_queue_secdiscard(q)))) { 1540 !blk_queue_secdiscard(q)))) {
1541 err = -EOPNOTSUPP; 1541 err = -EOPNOTSUPP;
1542 goto end_io; 1542 goto end_io;
1543 } 1543 }
1544 1544
1545 blk_throtl_bio(q, &bio); 1545 blk_throtl_bio(q, &bio);
1546 1546
1547 /* 1547 /*
1548 * If bio = NULL, bio has been throttled and will be submitted 1548 * If bio = NULL, bio has been throttled and will be submitted
1549 * later. 1549 * later.
1550 */ 1550 */
1551 if (!bio) 1551 if (!bio)
1552 break; 1552 break;
1553 1553
1554 trace_block_bio_queue(q, bio); 1554 trace_block_bio_queue(q, bio);
1555 1555
1556 ret = q->make_request_fn(q, bio); 1556 ret = q->make_request_fn(q, bio);
1557 } while (ret); 1557 } while (ret);
1558 1558
1559 return; 1559 return;
1560 1560
1561 end_io: 1561 end_io:
1562 bio_endio(bio, err); 1562 bio_endio(bio, err);
1563 } 1563 }
1564 1564
1565 /* 1565 /*
1566 * We only want one ->make_request_fn to be active at a time, 1566 * We only want one ->make_request_fn to be active at a time,
1567 * else stack usage with stacked devices could be a problem. 1567 * else stack usage with stacked devices could be a problem.
1568 * So use current->bio_list to keep a list of requests 1568 * So use current->bio_list to keep a list of requests
1569 * submited by a make_request_fn function. 1569 * submited by a make_request_fn function.
1570 * current->bio_list is also used as a flag to say if 1570 * current->bio_list is also used as a flag to say if
1571 * generic_make_request is currently active in this task or not. 1571 * generic_make_request is currently active in this task or not.
1572 * If it is NULL, then no make_request is active. If it is non-NULL, 1572 * If it is NULL, then no make_request is active. If it is non-NULL,
1573 * then a make_request is active, and new requests should be added 1573 * then a make_request is active, and new requests should be added
1574 * at the tail 1574 * at the tail
1575 */ 1575 */
1576 void generic_make_request(struct bio *bio) 1576 void generic_make_request(struct bio *bio)
1577 { 1577 {
1578 struct bio_list bio_list_on_stack; 1578 struct bio_list bio_list_on_stack;
1579 1579
1580 if (current->bio_list) { 1580 if (current->bio_list) {
1581 /* make_request is active */ 1581 /* make_request is active */
1582 bio_list_add(current->bio_list, bio); 1582 bio_list_add(current->bio_list, bio);
1583 return; 1583 return;
1584 } 1584 }
1585 /* following loop may be a bit non-obvious, and so deserves some 1585 /* following loop may be a bit non-obvious, and so deserves some
1586 * explanation. 1586 * explanation.
1587 * Before entering the loop, bio->bi_next is NULL (as all callers 1587 * Before entering the loop, bio->bi_next is NULL (as all callers
1588 * ensure that) so we have a list with a single bio. 1588 * ensure that) so we have a list with a single bio.
1589 * We pretend that we have just taken it off a longer list, so 1589 * We pretend that we have just taken it off a longer list, so
1590 * we assign bio_list to a pointer to the bio_list_on_stack, 1590 * we assign bio_list to a pointer to the bio_list_on_stack,
1591 * thus initialising the bio_list of new bios to be 1591 * thus initialising the bio_list of new bios to be
1592 * added. __generic_make_request may indeed add some more bios 1592 * added. __generic_make_request may indeed add some more bios
1593 * through a recursive call to generic_make_request. If it 1593 * through a recursive call to generic_make_request. If it
1594 * did, we find a non-NULL value in bio_list and re-enter the loop 1594 * did, we find a non-NULL value in bio_list and re-enter the loop
1595 * from the top. In this case we really did just take the bio 1595 * from the top. In this case we really did just take the bio
1596 * of the top of the list (no pretending) and so remove it from 1596 * of the top of the list (no pretending) and so remove it from
1597 * bio_list, and call into __generic_make_request again. 1597 * bio_list, and call into __generic_make_request again.
1598 * 1598 *
1599 * The loop was structured like this to make only one call to 1599 * The loop was structured like this to make only one call to
1600 * __generic_make_request (which is important as it is large and 1600 * __generic_make_request (which is important as it is large and
1601 * inlined) and to keep the structure simple. 1601 * inlined) and to keep the structure simple.
1602 */ 1602 */
1603 BUG_ON(bio->bi_next); 1603 BUG_ON(bio->bi_next);
1604 bio_list_init(&bio_list_on_stack); 1604 bio_list_init(&bio_list_on_stack);
1605 current->bio_list = &bio_list_on_stack; 1605 current->bio_list = &bio_list_on_stack;
1606 do { 1606 do {
1607 __generic_make_request(bio); 1607 __generic_make_request(bio);
1608 bio = bio_list_pop(current->bio_list); 1608 bio = bio_list_pop(current->bio_list);
1609 } while (bio); 1609 } while (bio);
1610 current->bio_list = NULL; /* deactivate */ 1610 current->bio_list = NULL; /* deactivate */
1611 } 1611 }
1612 EXPORT_SYMBOL(generic_make_request); 1612 EXPORT_SYMBOL(generic_make_request);
1613 1613
1614 /** 1614 /**
1615 * submit_bio - submit a bio to the block device layer for I/O 1615 * submit_bio - submit a bio to the block device layer for I/O
1616 * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead) 1616 * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead)
1617 * @bio: The &struct bio which describes the I/O 1617 * @bio: The &struct bio which describes the I/O
1618 * 1618 *
1619 * submit_bio() is very similar in purpose to generic_make_request(), and 1619 * submit_bio() is very similar in purpose to generic_make_request(), and
1620 * uses that function to do most of the work. Both are fairly rough 1620 * uses that function to do most of the work. Both are fairly rough
1621 * interfaces; @bio must be presetup and ready for I/O. 1621 * interfaces; @bio must be presetup and ready for I/O.
1622 * 1622 *
1623 */ 1623 */
1624 void submit_bio(int rw, struct bio *bio) 1624 void submit_bio(int rw, struct bio *bio)
1625 { 1625 {
1626 int count = bio_sectors(bio); 1626 int count = bio_sectors(bio);
1627 1627
1628 bio->bi_rw |= rw; 1628 bio->bi_rw |= rw;
1629 1629
1630 /* 1630 /*
1631 * If it's a regular read/write or a barrier with data attached, 1631 * If it's a regular read/write or a barrier with data attached,
1632 * go through the normal accounting stuff before submission. 1632 * go through the normal accounting stuff before submission.
1633 */ 1633 */
1634 if (bio_has_data(bio) && !(rw & REQ_DISCARD)) { 1634 if (bio_has_data(bio) && !(rw & REQ_DISCARD)) {
1635 if (rw & WRITE) { 1635 if (rw & WRITE) {
1636 count_vm_events(PGPGOUT, count); 1636 count_vm_events(PGPGOUT, count);
1637 } else { 1637 } else {
1638 task_io_account_read(bio->bi_size); 1638 task_io_account_read(bio->bi_size);
1639 count_vm_events(PGPGIN, count); 1639 count_vm_events(PGPGIN, count);
1640 } 1640 }
1641 1641
1642 if (unlikely(block_dump)) { 1642 if (unlikely(block_dump)) {
1643 char b[BDEVNAME_SIZE]; 1643 char b[BDEVNAME_SIZE];
1644 printk(KERN_DEBUG "%s(%d): %s block %Lu on %s (%u sectors)\n", 1644 printk(KERN_DEBUG "%s(%d): %s block %Lu on %s (%u sectors)\n",
1645 current->comm, task_pid_nr(current), 1645 current->comm, task_pid_nr(current),
1646 (rw & WRITE) ? "WRITE" : "READ", 1646 (rw & WRITE) ? "WRITE" : "READ",
1647 (unsigned long long)bio->bi_sector, 1647 (unsigned long long)bio->bi_sector,
1648 bdevname(bio->bi_bdev, b), 1648 bdevname(bio->bi_bdev, b),
1649 count); 1649 count);
1650 } 1650 }
1651 } 1651 }
1652 1652
1653 generic_make_request(bio); 1653 generic_make_request(bio);
1654 } 1654 }
1655 EXPORT_SYMBOL(submit_bio); 1655 EXPORT_SYMBOL(submit_bio);
1656 1656
1657 /** 1657 /**
1658 * blk_rq_check_limits - Helper function to check a request for the queue limit 1658 * blk_rq_check_limits - Helper function to check a request for the queue limit
1659 * @q: the queue 1659 * @q: the queue
1660 * @rq: the request being checked 1660 * @rq: the request being checked
1661 * 1661 *
1662 * Description: 1662 * Description:
1663 * @rq may have been made based on weaker limitations of upper-level queues 1663 * @rq may have been made based on weaker limitations of upper-level queues
1664 * in request stacking drivers, and it may violate the limitation of @q. 1664 * in request stacking drivers, and it may violate the limitation of @q.
1665 * Since the block layer and the underlying device driver trust @rq 1665 * Since the block layer and the underlying device driver trust @rq
1666 * after it is inserted to @q, it should be checked against @q before 1666 * after it is inserted to @q, it should be checked against @q before
1667 * the insertion using this generic function. 1667 * the insertion using this generic function.
1668 * 1668 *
1669 * This function should also be useful for request stacking drivers 1669 * This function should also be useful for request stacking drivers
1670 * in some cases below, so export this function. 1670 * in some cases below, so export this function.
1671 * Request stacking drivers like request-based dm may change the queue 1671 * Request stacking drivers like request-based dm may change the queue
1672 * limits while requests are in the queue (e.g. dm's table swapping). 1672 * limits while requests are in the queue (e.g. dm's table swapping).
1673 * Such request stacking drivers should check those requests agaist 1673 * Such request stacking drivers should check those requests agaist
1674 * the new queue limits again when they dispatch those requests, 1674 * the new queue limits again when they dispatch those requests,
1675 * although such checkings are also done against the old queue limits 1675 * although such checkings are also done against the old queue limits
1676 * when submitting requests. 1676 * when submitting requests.
1677 */ 1677 */
1678 int blk_rq_check_limits(struct request_queue *q, struct request *rq) 1678 int blk_rq_check_limits(struct request_queue *q, struct request *rq)
1679 { 1679 {
1680 if (rq->cmd_flags & REQ_DISCARD) 1680 if (rq->cmd_flags & REQ_DISCARD)
1681 return 0; 1681 return 0;
1682 1682
1683 if (blk_rq_sectors(rq) > queue_max_sectors(q) || 1683 if (blk_rq_sectors(rq) > queue_max_sectors(q) ||
1684 blk_rq_bytes(rq) > queue_max_hw_sectors(q) << 9) { 1684 blk_rq_bytes(rq) > queue_max_hw_sectors(q) << 9) {
1685 printk(KERN_ERR "%s: over max size limit.\n", __func__); 1685 printk(KERN_ERR "%s: over max size limit.\n", __func__);
1686 return -EIO; 1686 return -EIO;
1687 } 1687 }
1688 1688
1689 /* 1689 /*
1690 * queue's settings related to segment counting like q->bounce_pfn 1690 * queue's settings related to segment counting like q->bounce_pfn
1691 * may differ from that of other stacking queues. 1691 * may differ from that of other stacking queues.
1692 * Recalculate it to check the request correctly on this queue's 1692 * Recalculate it to check the request correctly on this queue's
1693 * limitation. 1693 * limitation.
1694 */ 1694 */
1695 blk_recalc_rq_segments(rq); 1695 blk_recalc_rq_segments(rq);
1696 if (rq->nr_phys_segments > queue_max_segments(q)) { 1696 if (rq->nr_phys_segments > queue_max_segments(q)) {
1697 printk(KERN_ERR "%s: over max segments limit.\n", __func__); 1697 printk(KERN_ERR "%s: over max segments limit.\n", __func__);
1698 return -EIO; 1698 return -EIO;
1699 } 1699 }
1700 1700
1701 return 0; 1701 return 0;
1702 } 1702 }
1703 EXPORT_SYMBOL_GPL(blk_rq_check_limits); 1703 EXPORT_SYMBOL_GPL(blk_rq_check_limits);
1704 1704
1705 /** 1705 /**
1706 * blk_insert_cloned_request - Helper for stacking drivers to submit a request 1706 * blk_insert_cloned_request - Helper for stacking drivers to submit a request
1707 * @q: the queue to submit the request 1707 * @q: the queue to submit the request
1708 * @rq: the request being queued 1708 * @rq: the request being queued
1709 */ 1709 */
1710 int blk_insert_cloned_request(struct request_queue *q, struct request *rq) 1710 int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
1711 { 1711 {
1712 unsigned long flags; 1712 unsigned long flags;
1713 1713
1714 if (blk_rq_check_limits(q, rq)) 1714 if (blk_rq_check_limits(q, rq))
1715 return -EIO; 1715 return -EIO;
1716 1716
1717 #ifdef CONFIG_FAIL_MAKE_REQUEST 1717 #ifdef CONFIG_FAIL_MAKE_REQUEST
1718 if (rq->rq_disk && rq->rq_disk->part0.make_it_fail && 1718 if (rq->rq_disk && rq->rq_disk->part0.make_it_fail &&
1719 should_fail(&fail_make_request, blk_rq_bytes(rq))) 1719 should_fail(&fail_make_request, blk_rq_bytes(rq)))
1720 return -EIO; 1720 return -EIO;
1721 #endif 1721 #endif
1722 1722
1723 spin_lock_irqsave(q->queue_lock, flags); 1723 spin_lock_irqsave(q->queue_lock, flags);
1724 1724
1725 /* 1725 /*
1726 * Submitting request must be dequeued before calling this function 1726 * Submitting request must be dequeued before calling this function
1727 * because it will be linked to another request_queue 1727 * because it will be linked to another request_queue
1728 */ 1728 */
1729 BUG_ON(blk_queued_rq(rq)); 1729 BUG_ON(blk_queued_rq(rq));
1730 1730
1731 add_acct_request(q, rq, ELEVATOR_INSERT_BACK); 1731 add_acct_request(q, rq, ELEVATOR_INSERT_BACK);
1732 spin_unlock_irqrestore(q->queue_lock, flags); 1732 spin_unlock_irqrestore(q->queue_lock, flags);
1733 1733
1734 return 0; 1734 return 0;
1735 } 1735 }
1736 EXPORT_SYMBOL_GPL(blk_insert_cloned_request); 1736 EXPORT_SYMBOL_GPL(blk_insert_cloned_request);
1737 1737
1738 /** 1738 /**
1739 * blk_rq_err_bytes - determine number of bytes till the next failure boundary 1739 * blk_rq_err_bytes - determine number of bytes till the next failure boundary
1740 * @rq: request to examine 1740 * @rq: request to examine
1741 * 1741 *
1742 * Description: 1742 * Description:
1743 * A request could be merge of IOs which require different failure 1743 * A request could be merge of IOs which require different failure
1744 * handling. This function determines the number of bytes which 1744 * handling. This function determines the number of bytes which
1745 * can be failed from the beginning of the request without 1745 * can be failed from the beginning of the request without
1746 * crossing into area which need to be retried further. 1746 * crossing into area which need to be retried further.
1747 * 1747 *
1748 * Return: 1748 * Return:
1749 * The number of bytes to fail. 1749 * The number of bytes to fail.
1750 * 1750 *
1751 * Context: 1751 * Context:
1752 * queue_lock must be held. 1752 * queue_lock must be held.
1753 */ 1753 */
1754 unsigned int blk_rq_err_bytes(const struct request *rq) 1754 unsigned int blk_rq_err_bytes(const struct request *rq)
1755 { 1755 {
1756 unsigned int ff = rq->cmd_flags & REQ_FAILFAST_MASK; 1756 unsigned int ff = rq->cmd_flags & REQ_FAILFAST_MASK;
1757 unsigned int bytes = 0; 1757 unsigned int bytes = 0;
1758 struct bio *bio; 1758 struct bio *bio;
1759 1759
1760 if (!(rq->cmd_flags & REQ_MIXED_MERGE)) 1760 if (!(rq->cmd_flags & REQ_MIXED_MERGE))
1761 return blk_rq_bytes(rq); 1761 return blk_rq_bytes(rq);
1762 1762
1763 /* 1763 /*
1764 * Currently the only 'mixing' which can happen is between 1764 * Currently the only 'mixing' which can happen is between
1765 * different fastfail types. We can safely fail portions 1765 * different fastfail types. We can safely fail portions
1766 * which have all the failfast bits that the first one has - 1766 * which have all the failfast bits that the first one has -
1767 * the ones which are at least as eager to fail as the first 1767 * the ones which are at least as eager to fail as the first
1768 * one. 1768 * one.
1769 */ 1769 */
1770 for (bio = rq->bio; bio; bio = bio->bi_next) { 1770 for (bio = rq->bio; bio; bio = bio->bi_next) {
1771 if ((bio->bi_rw & ff) != ff) 1771 if ((bio->bi_rw & ff) != ff)
1772 break; 1772 break;
1773 bytes += bio->bi_size; 1773 bytes += bio->bi_size;
1774 } 1774 }
1775 1775
1776 /* this could lead to infinite loop */ 1776 /* this could lead to infinite loop */
1777 BUG_ON(blk_rq_bytes(rq) && !bytes); 1777 BUG_ON(blk_rq_bytes(rq) && !bytes);
1778 return bytes; 1778 return bytes;
1779 } 1779 }
1780 EXPORT_SYMBOL_GPL(blk_rq_err_bytes); 1780 EXPORT_SYMBOL_GPL(blk_rq_err_bytes);
1781 1781
1782 static void blk_account_io_completion(struct request *req, unsigned int bytes) 1782 static void blk_account_io_completion(struct request *req, unsigned int bytes)
1783 { 1783 {
1784 if (blk_do_io_stat(req)) { 1784 if (blk_do_io_stat(req)) {
1785 const int rw = rq_data_dir(req); 1785 const int rw = rq_data_dir(req);
1786 struct hd_struct *part; 1786 struct hd_struct *part;
1787 int cpu; 1787 int cpu;
1788 1788
1789 cpu = part_stat_lock(); 1789 cpu = part_stat_lock();
1790 part = req->part; 1790 part = req->part;
1791 part_stat_add(cpu, part, sectors[rw], bytes >> 9); 1791 part_stat_add(cpu, part, sectors[rw], bytes >> 9);
1792 part_stat_unlock(); 1792 part_stat_unlock();
1793 } 1793 }
1794 } 1794 }
1795 1795
1796 static void blk_account_io_done(struct request *req) 1796 static void blk_account_io_done(struct request *req)
1797 { 1797 {
1798 /* 1798 /*
1799 * Account IO completion. flush_rq isn't accounted as a 1799 * Account IO completion. flush_rq isn't accounted as a
1800 * normal IO on queueing nor completion. Accounting the 1800 * normal IO on queueing nor completion. Accounting the
1801 * containing request is enough. 1801 * containing request is enough.
1802 */ 1802 */
1803 if (blk_do_io_stat(req) && !(req->cmd_flags & REQ_FLUSH_SEQ)) { 1803 if (blk_do_io_stat(req) && !(req->cmd_flags & REQ_FLUSH_SEQ)) {
1804 unsigned long duration = jiffies - req->start_time; 1804 unsigned long duration = jiffies - req->start_time;
1805 const int rw = rq_data_dir(req); 1805 const int rw = rq_data_dir(req);
1806 struct hd_struct *part; 1806 struct hd_struct *part;
1807 int cpu; 1807 int cpu;
1808 1808
1809 cpu = part_stat_lock(); 1809 cpu = part_stat_lock();
1810 part = req->part; 1810 part = req->part;
1811 1811
1812 part_stat_inc(cpu, part, ios[rw]); 1812 part_stat_inc(cpu, part, ios[rw]);
1813 part_stat_add(cpu, part, ticks[rw], duration); 1813 part_stat_add(cpu, part, ticks[rw], duration);
1814 part_round_stats(cpu, part); 1814 part_round_stats(cpu, part);
1815 part_dec_in_flight(part, rw); 1815 part_dec_in_flight(part, rw);
1816 1816
1817 hd_struct_put(part); 1817 hd_struct_put(part);
1818 part_stat_unlock(); 1818 part_stat_unlock();
1819 } 1819 }
1820 } 1820 }
1821 1821
1822 /** 1822 /**
1823 * blk_peek_request - peek at the top of a request queue 1823 * blk_peek_request - peek at the top of a request queue
1824 * @q: request queue to peek at 1824 * @q: request queue to peek at
1825 * 1825 *
1826 * Description: 1826 * Description:
1827 * Return the request at the top of @q. The returned request 1827 * Return the request at the top of @q. The returned request
1828 * should be started using blk_start_request() before LLD starts 1828 * should be started using blk_start_request() before LLD starts
1829 * processing it. 1829 * processing it.
1830 * 1830 *
1831 * Return: 1831 * Return:
1832 * Pointer to the request at the top of @q if available. Null 1832 * Pointer to the request at the top of @q if available. Null
1833 * otherwise. 1833 * otherwise.
1834 * 1834 *
1835 * Context: 1835 * Context:
1836 * queue_lock must be held. 1836 * queue_lock must be held.
1837 */ 1837 */
1838 struct request *blk_peek_request(struct request_queue *q) 1838 struct request *blk_peek_request(struct request_queue *q)
1839 { 1839 {
1840 struct request *rq; 1840 struct request *rq;
1841 int ret; 1841 int ret;
1842 1842
1843 while ((rq = __elv_next_request(q)) != NULL) { 1843 while ((rq = __elv_next_request(q)) != NULL) {
1844 if (!(rq->cmd_flags & REQ_STARTED)) { 1844 if (!(rq->cmd_flags & REQ_STARTED)) {
1845 /* 1845 /*
1846 * This is the first time the device driver 1846 * This is the first time the device driver
1847 * sees this request (possibly after 1847 * sees this request (possibly after
1848 * requeueing). Notify IO scheduler. 1848 * requeueing). Notify IO scheduler.
1849 */ 1849 */
1850 if (rq->cmd_flags & REQ_SORTED) 1850 if (rq->cmd_flags & REQ_SORTED)
1851 elv_activate_rq(q, rq); 1851 elv_activate_rq(q, rq);
1852 1852
1853 /* 1853 /*
1854 * just mark as started even if we don't start 1854 * just mark as started even if we don't start
1855 * it, a request that has been delayed should 1855 * it, a request that has been delayed should
1856 * not be passed by new incoming requests 1856 * not be passed by new incoming requests
1857 */ 1857 */
1858 rq->cmd_flags |= REQ_STARTED; 1858 rq->cmd_flags |= REQ_STARTED;
1859 trace_block_rq_issue(q, rq); 1859 trace_block_rq_issue(q, rq);
1860 } 1860 }
1861 1861
1862 if (!q->boundary_rq || q->boundary_rq == rq) { 1862 if (!q->boundary_rq || q->boundary_rq == rq) {
1863 q->end_sector = rq_end_sector(rq); 1863 q->end_sector = rq_end_sector(rq);
1864 q->boundary_rq = NULL; 1864 q->boundary_rq = NULL;
1865 } 1865 }
1866 1866
1867 if (rq->cmd_flags & REQ_DONTPREP) 1867 if (rq->cmd_flags & REQ_DONTPREP)
1868 break; 1868 break;
1869 1869
1870 if (q->dma_drain_size && blk_rq_bytes(rq)) { 1870 if (q->dma_drain_size && blk_rq_bytes(rq)) {
1871 /* 1871 /*
1872 * make sure space for the drain appears we 1872 * make sure space for the drain appears we
1873 * know we can do this because max_hw_segments 1873 * know we can do this because max_hw_segments
1874 * has been adjusted to be one fewer than the 1874 * has been adjusted to be one fewer than the
1875 * device can handle 1875 * device can handle
1876 */ 1876 */
1877 rq->nr_phys_segments++; 1877 rq->nr_phys_segments++;
1878 } 1878 }
1879 1879
1880 if (!q->prep_rq_fn) 1880 if (!q->prep_rq_fn)
1881 break; 1881 break;
1882 1882
1883 ret = q->prep_rq_fn(q, rq); 1883 ret = q->prep_rq_fn(q, rq);
1884 if (ret == BLKPREP_OK) { 1884 if (ret == BLKPREP_OK) {
1885 break; 1885 break;
1886 } else if (ret == BLKPREP_DEFER) { 1886 } else if (ret == BLKPREP_DEFER) {
1887 /* 1887 /*
1888 * the request may have been (partially) prepped. 1888 * the request may have been (partially) prepped.
1889 * we need to keep this request in the front to 1889 * we need to keep this request in the front to
1890 * avoid resource deadlock. REQ_STARTED will 1890 * avoid resource deadlock. REQ_STARTED will
1891 * prevent other fs requests from passing this one. 1891 * prevent other fs requests from passing this one.
1892 */ 1892 */
1893 if (q->dma_drain_size && blk_rq_bytes(rq) && 1893 if (q->dma_drain_size && blk_rq_bytes(rq) &&
1894 !(rq->cmd_flags & REQ_DONTPREP)) { 1894 !(rq->cmd_flags & REQ_DONTPREP)) {
1895 /* 1895 /*
1896 * remove the space for the drain we added 1896 * remove the space for the drain we added
1897 * so that we don't add it again 1897 * so that we don't add it again
1898 */ 1898 */
1899 --rq->nr_phys_segments; 1899 --rq->nr_phys_segments;
1900 } 1900 }
1901 1901
1902 rq = NULL; 1902 rq = NULL;
1903 break; 1903 break;
1904 } else if (ret == BLKPREP_KILL) { 1904 } else if (ret == BLKPREP_KILL) {
1905 rq->cmd_flags |= REQ_QUIET; 1905 rq->cmd_flags |= REQ_QUIET;
1906 /* 1906 /*
1907 * Mark this request as started so we don't trigger 1907 * Mark this request as started so we don't trigger
1908 * any debug logic in the end I/O path. 1908 * any debug logic in the end I/O path.
1909 */ 1909 */
1910 blk_start_request(rq); 1910 blk_start_request(rq);
1911 __blk_end_request_all(rq, -EIO); 1911 __blk_end_request_all(rq, -EIO);
1912 } else { 1912 } else {
1913 printk(KERN_ERR "%s: bad return=%d\n", __func__, ret); 1913 printk(KERN_ERR "%s: bad return=%d\n", __func__, ret);
1914 break; 1914 break;
1915 } 1915 }
1916 } 1916 }
1917 1917
1918 return rq; 1918 return rq;
1919 } 1919 }
1920 EXPORT_SYMBOL(blk_peek_request); 1920 EXPORT_SYMBOL(blk_peek_request);
1921 1921
1922 void blk_dequeue_request(struct request *rq) 1922 void blk_dequeue_request(struct request *rq)
1923 { 1923 {
1924 struct request_queue *q = rq->q; 1924 struct request_queue *q = rq->q;
1925 1925
1926 BUG_ON(list_empty(&rq->queuelist)); 1926 BUG_ON(list_empty(&rq->queuelist));
1927 BUG_ON(ELV_ON_HASH(rq)); 1927 BUG_ON(ELV_ON_HASH(rq));
1928 1928
1929 list_del_init(&rq->queuelist); 1929 list_del_init(&rq->queuelist);
1930 1930
1931 /* 1931 /*
1932 * the time frame between a request being removed from the lists 1932 * the time frame between a request being removed from the lists
1933 * and to it is freed is accounted as io that is in progress at 1933 * and to it is freed is accounted as io that is in progress at
1934 * the driver side. 1934 * the driver side.
1935 */ 1935 */
1936 if (blk_account_rq(rq)) { 1936 if (blk_account_rq(rq)) {
1937 q->in_flight[rq_is_sync(rq)]++; 1937 q->in_flight[rq_is_sync(rq)]++;
1938 set_io_start_time_ns(rq); 1938 set_io_start_time_ns(rq);
1939 } 1939 }
1940 } 1940 }
1941 1941
1942 /** 1942 /**
1943 * blk_start_request - start request processing on the driver 1943 * blk_start_request - start request processing on the driver
1944 * @req: request to dequeue 1944 * @req: request to dequeue
1945 * 1945 *
1946 * Description: 1946 * Description:
1947 * Dequeue @req and start timeout timer on it. This hands off the 1947 * Dequeue @req and start timeout timer on it. This hands off the
1948 * request to the driver. 1948 * request to the driver.
1949 * 1949 *
1950 * Block internal functions which don't want to start timer should 1950 * Block internal functions which don't want to start timer should
1951 * call blk_dequeue_request(). 1951 * call blk_dequeue_request().
1952 * 1952 *
1953 * Context: 1953 * Context:
1954 * queue_lock must be held. 1954 * queue_lock must be held.
1955 */ 1955 */
1956 void blk_start_request(struct request *req) 1956 void blk_start_request(struct request *req)
1957 { 1957 {
1958 blk_dequeue_request(req); 1958 blk_dequeue_request(req);
1959 1959
1960 /* 1960 /*
1961 * We are now handing the request to the hardware, initialize 1961 * We are now handing the request to the hardware, initialize
1962 * resid_len to full count and add the timeout handler. 1962 * resid_len to full count and add the timeout handler.
1963 */ 1963 */
1964 req->resid_len = blk_rq_bytes(req); 1964 req->resid_len = blk_rq_bytes(req);
1965 if (unlikely(blk_bidi_rq(req))) 1965 if (unlikely(blk_bidi_rq(req)))
1966 req->next_rq->resid_len = blk_rq_bytes(req->next_rq); 1966 req->next_rq->resid_len = blk_rq_bytes(req->next_rq);
1967 1967
1968 blk_add_timer(req); 1968 blk_add_timer(req);
1969 } 1969 }
1970 EXPORT_SYMBOL(blk_start_request); 1970 EXPORT_SYMBOL(blk_start_request);
1971 1971
1972 /** 1972 /**
1973 * blk_fetch_request - fetch a request from a request queue 1973 * blk_fetch_request - fetch a request from a request queue
1974 * @q: request queue to fetch a request from 1974 * @q: request queue to fetch a request from
1975 * 1975 *
1976 * Description: 1976 * Description:
1977 * Return the request at the top of @q. The request is started on 1977 * Return the request at the top of @q. The request is started on
1978 * return and LLD can start processing it immediately. 1978 * return and LLD can start processing it immediately.
1979 * 1979 *
1980 * Return: 1980 * Return:
1981 * Pointer to the request at the top of @q if available. Null 1981 * Pointer to the request at the top of @q if available. Null
1982 * otherwise. 1982 * otherwise.
1983 * 1983 *
1984 * Context: 1984 * Context:
1985 * queue_lock must be held. 1985 * queue_lock must be held.
1986 */ 1986 */
1987 struct request *blk_fetch_request(struct request_queue *q) 1987 struct request *blk_fetch_request(struct request_queue *q)
1988 { 1988 {
1989 struct request *rq; 1989 struct request *rq;
1990 1990
1991 rq = blk_peek_request(q); 1991 rq = blk_peek_request(q);
1992 if (rq) 1992 if (rq)
1993 blk_start_request(rq); 1993 blk_start_request(rq);
1994 return rq; 1994 return rq;
1995 } 1995 }
1996 EXPORT_SYMBOL(blk_fetch_request); 1996 EXPORT_SYMBOL(blk_fetch_request);
1997 1997
1998 /** 1998 /**
1999 * blk_update_request - Special helper function for request stacking drivers 1999 * blk_update_request - Special helper function for request stacking drivers
2000 * @req: the request being processed 2000 * @req: the request being processed
2001 * @error: %0 for success, < %0 for error 2001 * @error: %0 for success, < %0 for error
2002 * @nr_bytes: number of bytes to complete @req 2002 * @nr_bytes: number of bytes to complete @req
2003 * 2003 *
2004 * Description: 2004 * Description:
2005 * Ends I/O on a number of bytes attached to @req, but doesn't complete 2005 * Ends I/O on a number of bytes attached to @req, but doesn't complete
2006 * the request structure even if @req doesn't have leftover. 2006 * the request structure even if @req doesn't have leftover.
2007 * If @req has leftover, sets it up for the next range of segments. 2007 * If @req has leftover, sets it up for the next range of segments.
2008 * 2008 *
2009 * This special helper function is only for request stacking drivers 2009 * This special helper function is only for request stacking drivers
2010 * (e.g. request-based dm) so that they can handle partial completion. 2010 * (e.g. request-based dm) so that they can handle partial completion.
2011 * Actual device drivers should use blk_end_request instead. 2011 * Actual device drivers should use blk_end_request instead.
2012 * 2012 *
2013 * Passing the result of blk_rq_bytes() as @nr_bytes guarantees 2013 * Passing the result of blk_rq_bytes() as @nr_bytes guarantees
2014 * %false return from this function. 2014 * %false return from this function.
2015 * 2015 *
2016 * Return: 2016 * Return:
2017 * %false - this request doesn't have any more data 2017 * %false - this request doesn't have any more data
2018 * %true - this request has more data 2018 * %true - this request has more data
2019 **/ 2019 **/
2020 bool blk_update_request(struct request *req, int error, unsigned int nr_bytes) 2020 bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
2021 { 2021 {
2022 int total_bytes, bio_nbytes, next_idx = 0; 2022 int total_bytes, bio_nbytes, next_idx = 0;
2023 struct bio *bio; 2023 struct bio *bio;
2024 2024
2025 if (!req->bio) 2025 if (!req->bio)
2026 return false; 2026 return false;
2027 2027
2028 trace_block_rq_complete(req->q, req); 2028 trace_block_rq_complete(req->q, req);
2029 2029
2030 /* 2030 /*
2031 * For fs requests, rq is just carrier of independent bio's 2031 * For fs requests, rq is just carrier of independent bio's
2032 * and each partial completion should be handled separately. 2032 * and each partial completion should be handled separately.
2033 * Reset per-request error on each partial completion. 2033 * Reset per-request error on each partial completion.
2034 * 2034 *
2035 * TODO: tj: This is too subtle. It would be better to let 2035 * TODO: tj: This is too subtle. It would be better to let
2036 * low level drivers do what they see fit. 2036 * low level drivers do what they see fit.
2037 */ 2037 */
2038 if (req->cmd_type == REQ_TYPE_FS) 2038 if (req->cmd_type == REQ_TYPE_FS)
2039 req->errors = 0; 2039 req->errors = 0;
2040 2040
2041 if (error && req->cmd_type == REQ_TYPE_FS && 2041 if (error && req->cmd_type == REQ_TYPE_FS &&
2042 !(req->cmd_flags & REQ_QUIET)) { 2042 !(req->cmd_flags & REQ_QUIET)) {
2043 char *error_type; 2043 char *error_type;
2044 2044
2045 switch (error) { 2045 switch (error) {
2046 case -ENOLINK: 2046 case -ENOLINK:
2047 error_type = "recoverable transport"; 2047 error_type = "recoverable transport";
2048 break; 2048 break;
2049 case -EREMOTEIO: 2049 case -EREMOTEIO:
2050 error_type = "critical target"; 2050 error_type = "critical target";
2051 break; 2051 break;
2052 case -EBADE: 2052 case -EBADE:
2053 error_type = "critical nexus"; 2053 error_type = "critical nexus";
2054 break; 2054 break;
2055 case -EIO: 2055 case -EIO:
2056 default: 2056 default:
2057 error_type = "I/O"; 2057 error_type = "I/O";
2058 break; 2058 break;
2059 } 2059 }
2060 printk(KERN_ERR "end_request: %s error, dev %s, sector %llu\n", 2060 printk(KERN_ERR "end_request: %s error, dev %s, sector %llu\n",
2061 error_type, req->rq_disk ? req->rq_disk->disk_name : "?", 2061 error_type, req->rq_disk ? req->rq_disk->disk_name : "?",
2062 (unsigned long long)blk_rq_pos(req)); 2062 (unsigned long long)blk_rq_pos(req));
2063 } 2063 }
2064 2064
2065 blk_account_io_completion(req, nr_bytes); 2065 blk_account_io_completion(req, nr_bytes);
2066 2066
2067 total_bytes = bio_nbytes = 0; 2067 total_bytes = bio_nbytes = 0;
2068 while ((bio = req->bio) != NULL) { 2068 while ((bio = req->bio) != NULL) {
2069 int nbytes; 2069 int nbytes;
2070 2070
2071 if (nr_bytes >= bio->bi_size) { 2071 if (nr_bytes >= bio->bi_size) {
2072 req->bio = bio->bi_next; 2072 req->bio = bio->bi_next;
2073 nbytes = bio->bi_size; 2073 nbytes = bio->bi_size;
2074 req_bio_endio(req, bio, nbytes, error); 2074 req_bio_endio(req, bio, nbytes, error);
2075 next_idx = 0; 2075 next_idx = 0;
2076 bio_nbytes = 0; 2076 bio_nbytes = 0;
2077 } else { 2077 } else {
2078 int idx = bio->bi_idx + next_idx; 2078 int idx = bio->bi_idx + next_idx;
2079 2079
2080 if (unlikely(idx >= bio->bi_vcnt)) { 2080 if (unlikely(idx >= bio->bi_vcnt)) {
2081 blk_dump_rq_flags(req, "__end_that"); 2081 blk_dump_rq_flags(req, "__end_that");
2082 printk(KERN_ERR "%s: bio idx %d >= vcnt %d\n", 2082 printk(KERN_ERR "%s: bio idx %d >= vcnt %d\n",
2083 __func__, idx, bio->bi_vcnt); 2083 __func__, idx, bio->bi_vcnt);
2084 break; 2084 break;
2085 } 2085 }
2086 2086
2087 nbytes = bio_iovec_idx(bio, idx)->bv_len; 2087 nbytes = bio_iovec_idx(bio, idx)->bv_len;
2088 BIO_BUG_ON(nbytes > bio->bi_size); 2088 BIO_BUG_ON(nbytes > bio->bi_size);
2089 2089
2090 /* 2090 /*
2091 * not a complete bvec done 2091 * not a complete bvec done
2092 */ 2092 */
2093 if (unlikely(nbytes > nr_bytes)) { 2093 if (unlikely(nbytes > nr_bytes)) {
2094 bio_nbytes += nr_bytes; 2094 bio_nbytes += nr_bytes;
2095 total_bytes += nr_bytes; 2095 total_bytes += nr_bytes;
2096 break; 2096 break;
2097 } 2097 }
2098 2098
2099 /* 2099 /*
2100 * advance to the next vector 2100 * advance to the next vector
2101 */ 2101 */
2102 next_idx++; 2102 next_idx++;
2103 bio_nbytes += nbytes; 2103 bio_nbytes += nbytes;
2104 } 2104 }
2105 2105
2106 total_bytes += nbytes; 2106 total_bytes += nbytes;
2107 nr_bytes -= nbytes; 2107 nr_bytes -= nbytes;
2108 2108
2109 bio = req->bio; 2109 bio = req->bio;
2110 if (bio) { 2110 if (bio) {
2111 /* 2111 /*
2112 * end more in this run, or just return 'not-done' 2112 * end more in this run, or just return 'not-done'
2113 */ 2113 */
2114 if (unlikely(nr_bytes <= 0)) 2114 if (unlikely(nr_bytes <= 0))
2115 break; 2115 break;
2116 } 2116 }
2117 } 2117 }
2118 2118
2119 /* 2119 /*
2120 * completely done 2120 * completely done
2121 */ 2121 */
2122 if (!req->bio) { 2122 if (!req->bio) {
2123 /* 2123 /*
2124 * Reset counters so that the request stacking driver 2124 * Reset counters so that the request stacking driver
2125 * can find how many bytes remain in the request 2125 * can find how many bytes remain in the request
2126 * later. 2126 * later.
2127 */ 2127 */
2128 req->__data_len = 0; 2128 req->__data_len = 0;
2129 return false; 2129 return false;
2130 } 2130 }
2131 2131
2132 /* 2132 /*
2133 * if the request wasn't completed, update state 2133 * if the request wasn't completed, update state
2134 */ 2134 */
2135 if (bio_nbytes) { 2135 if (bio_nbytes) {
2136 req_bio_endio(req, bio, bio_nbytes, error); 2136 req_bio_endio(req, bio, bio_nbytes, error);
2137 bio->bi_idx += next_idx; 2137 bio->bi_idx += next_idx;
2138 bio_iovec(bio)->bv_offset += nr_bytes; 2138 bio_iovec(bio)->bv_offset += nr_bytes;
2139 bio_iovec(bio)->bv_len -= nr_bytes; 2139 bio_iovec(bio)->bv_len -= nr_bytes;
2140 } 2140 }
2141 2141
2142 req->__data_len -= total_bytes; 2142 req->__data_len -= total_bytes;
2143 req->buffer = bio_data(req->bio); 2143 req->buffer = bio_data(req->bio);
2144 2144
2145 /* update sector only for requests with clear definition of sector */ 2145 /* update sector only for requests with clear definition of sector */
2146 if (req->cmd_type == REQ_TYPE_FS || (req->cmd_flags & REQ_DISCARD)) 2146 if (req->cmd_type == REQ_TYPE_FS || (req->cmd_flags & REQ_DISCARD))
2147 req->__sector += total_bytes >> 9; 2147 req->__sector += total_bytes >> 9;
2148 2148
2149 /* mixed attributes always follow the first bio */ 2149 /* mixed attributes always follow the first bio */
2150 if (req->cmd_flags & REQ_MIXED_MERGE) { 2150 if (req->cmd_flags & REQ_MIXED_MERGE) {
2151 req->cmd_flags &= ~REQ_FAILFAST_MASK; 2151 req->cmd_flags &= ~REQ_FAILFAST_MASK;
2152 req->cmd_flags |= req->bio->bi_rw & REQ_FAILFAST_MASK; 2152 req->cmd_flags |= req->bio->bi_rw & REQ_FAILFAST_MASK;
2153 } 2153 }
2154 2154
2155 /* 2155 /*
2156 * If total number of sectors is less than the first segment 2156 * If total number of sectors is less than the first segment
2157 * size, something has gone terribly wrong. 2157 * size, something has gone terribly wrong.
2158 */ 2158 */
2159 if (blk_rq_bytes(req) < blk_rq_cur_bytes(req)) { 2159 if (blk_rq_bytes(req) < blk_rq_cur_bytes(req)) {
2160 blk_dump_rq_flags(req, "request botched"); 2160 blk_dump_rq_flags(req, "request botched");
2161 req->__data_len = blk_rq_cur_bytes(req); 2161 req->__data_len = blk_rq_cur_bytes(req);
2162 } 2162 }
2163 2163
2164 /* recalculate the number of segments */ 2164 /* recalculate the number of segments */
2165 blk_recalc_rq_segments(req); 2165 blk_recalc_rq_segments(req);
2166 2166
2167 return true; 2167 return true;
2168 } 2168 }
2169 EXPORT_SYMBOL_GPL(blk_update_request); 2169 EXPORT_SYMBOL_GPL(blk_update_request);
2170 2170
2171 static bool blk_update_bidi_request(struct request *rq, int error, 2171 static bool blk_update_bidi_request(struct request *rq, int error,
2172 unsigned int nr_bytes, 2172 unsigned int nr_bytes,
2173 unsigned int bidi_bytes) 2173 unsigned int bidi_bytes)
2174 { 2174 {
2175 if (blk_update_request(rq, error, nr_bytes)) 2175 if (blk_update_request(rq, error, nr_bytes))
2176 return true; 2176 return true;
2177 2177
2178 /* Bidi request must be completed as a whole */ 2178 /* Bidi request must be completed as a whole */
2179 if (unlikely(blk_bidi_rq(rq)) && 2179 if (unlikely(blk_bidi_rq(rq)) &&
2180 blk_update_request(rq->next_rq, error, bidi_bytes)) 2180 blk_update_request(rq->next_rq, error, bidi_bytes))
2181 return true; 2181 return true;
2182 2182
2183 if (blk_queue_add_random(rq->q)) 2183 if (blk_queue_add_random(rq->q))
2184 add_disk_randomness(rq->rq_disk); 2184 add_disk_randomness(rq->rq_disk);
2185 2185
2186 return false; 2186 return false;
2187 } 2187 }
2188 2188
2189 /** 2189 /**
2190 * blk_unprep_request - unprepare a request 2190 * blk_unprep_request - unprepare a request
2191 * @req: the request 2191 * @req: the request
2192 * 2192 *
2193 * This function makes a request ready for complete resubmission (or 2193 * This function makes a request ready for complete resubmission (or
2194 * completion). It happens only after all error handling is complete, 2194 * completion). It happens only after all error handling is complete,
2195 * so represents the appropriate moment to deallocate any resources 2195 * so represents the appropriate moment to deallocate any resources
2196 * that were allocated to the request in the prep_rq_fn. The queue 2196 * that were allocated to the request in the prep_rq_fn. The queue
2197 * lock is held when calling this. 2197 * lock is held when calling this.
2198 */ 2198 */
2199 void blk_unprep_request(struct request *req) 2199 void blk_unprep_request(struct request *req)
2200 { 2200 {
2201 struct request_queue *q = req->q; 2201 struct request_queue *q = req->q;
2202 2202
2203 req->cmd_flags &= ~REQ_DONTPREP; 2203 req->cmd_flags &= ~REQ_DONTPREP;
2204 if (q->unprep_rq_fn) 2204 if (q->unprep_rq_fn)
2205 q->unprep_rq_fn(q, req); 2205 q->unprep_rq_fn(q, req);
2206 } 2206 }
2207 EXPORT_SYMBOL_GPL(blk_unprep_request); 2207 EXPORT_SYMBOL_GPL(blk_unprep_request);
2208 2208
2209 /* 2209 /*
2210 * queue lock must be held 2210 * queue lock must be held
2211 */ 2211 */
2212 static void blk_finish_request(struct request *req, int error) 2212 static void blk_finish_request(struct request *req, int error)
2213 { 2213 {
2214 if (blk_rq_tagged(req)) 2214 if (blk_rq_tagged(req))
2215 blk_queue_end_tag(req->q, req); 2215 blk_queue_end_tag(req->q, req);
2216 2216
2217 BUG_ON(blk_queued_rq(req)); 2217 BUG_ON(blk_queued_rq(req));
2218 2218
2219 if (unlikely(laptop_mode) && req->cmd_type == REQ_TYPE_FS) 2219 if (unlikely(laptop_mode) && req->cmd_type == REQ_TYPE_FS)
2220 laptop_io_completion(&req->q->backing_dev_info); 2220 laptop_io_completion(&req->q->backing_dev_info);
2221 2221
2222 blk_delete_timer(req); 2222 blk_delete_timer(req);
2223 2223
2224 if (req->cmd_flags & REQ_DONTPREP) 2224 if (req->cmd_flags & REQ_DONTPREP)
2225 blk_unprep_request(req); 2225 blk_unprep_request(req);
2226 2226
2227 2227
2228 blk_account_io_done(req); 2228 blk_account_io_done(req);
2229 2229
2230 if (req->end_io) 2230 if (req->end_io)
2231 req->end_io(req, error); 2231 req->end_io(req, error);
2232 else { 2232 else {
2233 if (blk_bidi_rq(req)) 2233 if (blk_bidi_rq(req))
2234 __blk_put_request(req->next_rq->q, req->next_rq); 2234 __blk_put_request(req->next_rq->q, req->next_rq);
2235 2235
2236 __blk_put_request(req->q, req); 2236 __blk_put_request(req->q, req);
2237 } 2237 }
2238 } 2238 }
2239 2239
2240 /** 2240 /**
2241 * blk_end_bidi_request - Complete a bidi request 2241 * blk_end_bidi_request - Complete a bidi request
2242 * @rq: the request to complete 2242 * @rq: the request to complete
2243 * @error: %0 for success, < %0 for error 2243 * @error: %0 for success, < %0 for error
2244 * @nr_bytes: number of bytes to complete @rq 2244 * @nr_bytes: number of bytes to complete @rq
2245 * @bidi_bytes: number of bytes to complete @rq->next_rq 2245 * @bidi_bytes: number of bytes to complete @rq->next_rq
2246 * 2246 *
2247 * Description: 2247 * Description:
2248 * Ends I/O on a number of bytes attached to @rq and @rq->next_rq. 2248 * Ends I/O on a number of bytes attached to @rq and @rq->next_rq.
2249 * Drivers that supports bidi can safely call this member for any 2249 * Drivers that supports bidi can safely call this member for any
2250 * type of request, bidi or uni. In the later case @bidi_bytes is 2250 * type of request, bidi or uni. In the later case @bidi_bytes is
2251 * just ignored. 2251 * just ignored.
2252 * 2252 *
2253 * Return: 2253 * Return:
2254 * %false - we are done with this request 2254 * %false - we are done with this request
2255 * %true - still buffers pending for this request 2255 * %true - still buffers pending for this request
2256 **/ 2256 **/
2257 static bool blk_end_bidi_request(struct request *rq, int error, 2257 static bool blk_end_bidi_request(struct request *rq, int error,
2258 unsigned int nr_bytes, unsigned int bidi_bytes) 2258 unsigned int nr_bytes, unsigned int bidi_bytes)
2259 { 2259 {
2260 struct request_queue *q = rq->q; 2260 struct request_queue *q = rq->q;
2261 unsigned long flags; 2261 unsigned long flags;
2262 2262
2263 if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes)) 2263 if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes))
2264 return true; 2264 return true;
2265 2265
2266 spin_lock_irqsave(q->queue_lock, flags); 2266 spin_lock_irqsave(q->queue_lock, flags);
2267 blk_finish_request(rq, error); 2267 blk_finish_request(rq, error);
2268 spin_unlock_irqrestore(q->queue_lock, flags); 2268 spin_unlock_irqrestore(q->queue_lock, flags);
2269 2269
2270 return false; 2270 return false;
2271 } 2271 }
2272 2272
2273 /** 2273 /**
2274 * __blk_end_bidi_request - Complete a bidi request with queue lock held 2274 * __blk_end_bidi_request - Complete a bidi request with queue lock held
2275 * @rq: the request to complete 2275 * @rq: the request to complete
2276 * @error: %0 for success, < %0 for error 2276 * @error: %0 for success, < %0 for error
2277 * @nr_bytes: number of bytes to complete @rq 2277 * @nr_bytes: number of bytes to complete @rq
2278 * @bidi_bytes: number of bytes to complete @rq->next_rq 2278 * @bidi_bytes: number of bytes to complete @rq->next_rq
2279 * 2279 *
2280 * Description: 2280 * Description:
2281 * Identical to blk_end_bidi_request() except that queue lock is 2281 * Identical to blk_end_bidi_request() except that queue lock is
2282 * assumed to be locked on entry and remains so on return. 2282 * assumed to be locked on entry and remains so on return.
2283 * 2283 *
2284 * Return: 2284 * Return:
2285 * %false - we are done with this request 2285 * %false - we are done with this request
2286 * %true - still buffers pending for this request 2286 * %true - still buffers pending for this request
2287 **/ 2287 **/
2288 static bool __blk_end_bidi_request(struct request *rq, int error, 2288 static bool __blk_end_bidi_request(struct request *rq, int error,
2289 unsigned int nr_bytes, unsigned int bidi_bytes) 2289 unsigned int nr_bytes, unsigned int bidi_bytes)
2290 { 2290 {
2291 if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes)) 2291 if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes))
2292 return true; 2292 return true;
2293 2293
2294 blk_finish_request(rq, error); 2294 blk_finish_request(rq, error);
2295 2295
2296 return false; 2296 return false;
2297 } 2297 }
2298 2298
2299 /** 2299 /**
2300 * blk_end_request - Helper function for drivers to complete the request. 2300 * blk_end_request - Helper function for drivers to complete the request.
2301 * @rq: the request being processed 2301 * @rq: the request being processed
2302 * @error: %0 for success, < %0 for error 2302 * @error: %0 for success, < %0 for error
2303 * @nr_bytes: number of bytes to complete 2303 * @nr_bytes: number of bytes to complete
2304 * 2304 *
2305 * Description: 2305 * Description:
2306 * Ends I/O on a number of bytes attached to @rq. 2306 * Ends I/O on a number of bytes attached to @rq.
2307 * If @rq has leftover, sets it up for the next range of segments. 2307 * If @rq has leftover, sets it up for the next range of segments.
2308 * 2308 *
2309 * Return: 2309 * Return:
2310 * %false - we are done with this request 2310 * %false - we are done with this request
2311 * %true - still buffers pending for this request 2311 * %true - still buffers pending for this request
2312 **/ 2312 **/
2313 bool blk_end_request(struct request *rq, int error, unsigned int nr_bytes) 2313 bool blk_end_request(struct request *rq, int error, unsigned int nr_bytes)
2314 { 2314 {
2315 return blk_end_bidi_request(rq, error, nr_bytes, 0); 2315 return blk_end_bidi_request(rq, error, nr_bytes, 0);
2316 } 2316 }
2317 EXPORT_SYMBOL(blk_end_request); 2317 EXPORT_SYMBOL(blk_end_request);
2318 2318
2319 /** 2319 /**
2320 * blk_end_request_all - Helper function for drives to finish the request. 2320 * blk_end_request_all - Helper function for drives to finish the request.
2321 * @rq: the request to finish 2321 * @rq: the request to finish
2322 * @error: %0 for success, < %0 for error 2322 * @error: %0 for success, < %0 for error
2323 * 2323 *
2324 * Description: 2324 * Description:
2325 * Completely finish @rq. 2325 * Completely finish @rq.
2326 */ 2326 */
2327 void blk_end_request_all(struct request *rq, int error) 2327 void blk_end_request_all(struct request *rq, int error)
2328 { 2328 {
2329 bool pending; 2329 bool pending;
2330 unsigned int bidi_bytes = 0; 2330 unsigned int bidi_bytes = 0;
2331 2331
2332 if (unlikely(blk_bidi_rq(rq))) 2332 if (unlikely(blk_bidi_rq(rq)))
2333 bidi_bytes = blk_rq_bytes(rq->next_rq); 2333 bidi_bytes = blk_rq_bytes(rq->next_rq);
2334 2334
2335 pending = blk_end_bidi_request(rq, error, blk_rq_bytes(rq), bidi_bytes); 2335 pending = blk_end_bidi_request(rq, error, blk_rq_bytes(rq), bidi_bytes);
2336 BUG_ON(pending); 2336 BUG_ON(pending);
2337 } 2337 }
2338 EXPORT_SYMBOL(blk_end_request_all); 2338 EXPORT_SYMBOL(blk_end_request_all);
2339 2339
2340 /** 2340 /**
2341 * blk_end_request_cur - Helper function to finish the current request chunk. 2341 * blk_end_request_cur - Helper function to finish the current request chunk.
2342 * @rq: the request to finish the current chunk for 2342 * @rq: the request to finish the current chunk for
2343 * @error: %0 for success, < %0 for error 2343 * @error: %0 for success, < %0 for error
2344 * 2344 *
2345 * Description: 2345 * Description:
2346 * Complete the current consecutively mapped chunk from @rq. 2346 * Complete the current consecutively mapped chunk from @rq.
2347 * 2347 *
2348 * Return: 2348 * Return:
2349 * %false - we are done with this request 2349 * %false - we are done with this request
2350 * %true - still buffers pending for this request 2350 * %true - still buffers pending for this request
2351 */ 2351 */
2352 bool blk_end_request_cur(struct request *rq, int error) 2352 bool blk_end_request_cur(struct request *rq, int error)
2353 { 2353 {
2354 return blk_end_request(rq, error, blk_rq_cur_bytes(rq)); 2354 return blk_end_request(rq, error, blk_rq_cur_bytes(rq));
2355 } 2355 }
2356 EXPORT_SYMBOL(blk_end_request_cur); 2356 EXPORT_SYMBOL(blk_end_request_cur);
2357 2357
2358 /** 2358 /**
2359 * blk_end_request_err - Finish a request till the next failure boundary. 2359 * blk_end_request_err - Finish a request till the next failure boundary.
2360 * @rq: the request to finish till the next failure boundary for 2360 * @rq: the request to finish till the next failure boundary for
2361 * @error: must be negative errno 2361 * @error: must be negative errno
2362 * 2362 *
2363 * Description: 2363 * Description:
2364 * Complete @rq till the next failure boundary. 2364 * Complete @rq till the next failure boundary.
2365 * 2365 *
2366 * Return: 2366 * Return:
2367 * %false - we are done with this request 2367 * %false - we are done with this request
2368 * %true - still buffers pending for this request 2368 * %true - still buffers pending for this request
2369 */ 2369 */
2370 bool blk_end_request_err(struct request *rq, int error) 2370 bool blk_end_request_err(struct request *rq, int error)
2371 { 2371 {
2372 WARN_ON(error >= 0); 2372 WARN_ON(error >= 0);
2373 return blk_end_request(rq, error, blk_rq_err_bytes(rq)); 2373 return blk_end_request(rq, error, blk_rq_err_bytes(rq));
2374 } 2374 }
2375 EXPORT_SYMBOL_GPL(blk_end_request_err); 2375 EXPORT_SYMBOL_GPL(blk_end_request_err);
2376 2376
2377 /** 2377 /**
2378 * __blk_end_request - Helper function for drivers to complete the request. 2378 * __blk_end_request - Helper function for drivers to complete the request.
2379 * @rq: the request being processed 2379 * @rq: the request being processed
2380 * @error: %0 for success, < %0 for error 2380 * @error: %0 for success, < %0 for error
2381 * @nr_bytes: number of bytes to complete 2381 * @nr_bytes: number of bytes to complete
2382 * 2382 *
2383 * Description: 2383 * Description:
2384 * Must be called with queue lock held unlike blk_end_request(). 2384 * Must be called with queue lock held unlike blk_end_request().
2385 * 2385 *
2386 * Return: 2386 * Return:
2387 * %false - we are done with this request 2387 * %false - we are done with this request
2388 * %true - still buffers pending for this request 2388 * %true - still buffers pending for this request
2389 **/ 2389 **/
2390 bool __blk_end_request(struct request *rq, int error, unsigned int nr_bytes) 2390 bool __blk_end_request(struct request *rq, int error, unsigned int nr_bytes)
2391 { 2391 {
2392 return __blk_end_bidi_request(rq, error, nr_bytes, 0); 2392 return __blk_end_bidi_request(rq, error, nr_bytes, 0);
2393 } 2393 }
2394 EXPORT_SYMBOL(__blk_end_request); 2394 EXPORT_SYMBOL(__blk_end_request);
2395 2395
2396 /** 2396 /**
2397 * __blk_end_request_all - Helper function for drives to finish the request. 2397 * __blk_end_request_all - Helper function for drives to finish the request.
2398 * @rq: the request to finish 2398 * @rq: the request to finish
2399 * @error: %0 for success, < %0 for error 2399 * @error: %0 for success, < %0 for error
2400 * 2400 *
2401 * Description: 2401 * Description:
2402 * Completely finish @rq. Must be called with queue lock held. 2402 * Completely finish @rq. Must be called with queue lock held.
2403 */ 2403 */
2404 void __blk_end_request_all(struct request *rq, int error) 2404 void __blk_end_request_all(struct request *rq, int error)
2405 { 2405 {
2406 bool pending; 2406 bool pending;
2407 unsigned int bidi_bytes = 0; 2407 unsigned int bidi_bytes = 0;
2408 2408
2409 if (unlikely(blk_bidi_rq(rq))) 2409 if (unlikely(blk_bidi_rq(rq)))
2410 bidi_bytes = blk_rq_bytes(rq->next_rq); 2410 bidi_bytes = blk_rq_bytes(rq->next_rq);
2411 2411
2412 pending = __blk_end_bidi_request(rq, error, blk_rq_bytes(rq), bidi_bytes); 2412 pending = __blk_end_bidi_request(rq, error, blk_rq_bytes(rq), bidi_bytes);
2413 BUG_ON(pending); 2413 BUG_ON(pending);
2414 } 2414 }
2415 EXPORT_SYMBOL(__blk_end_request_all); 2415 EXPORT_SYMBOL(__blk_end_request_all);
2416 2416
2417 /** 2417 /**
2418 * __blk_end_request_cur - Helper function to finish the current request chunk. 2418 * __blk_end_request_cur - Helper function to finish the current request chunk.
2419 * @rq: the request to finish the current chunk for 2419 * @rq: the request to finish the current chunk for
2420 * @error: %0 for success, < %0 for error 2420 * @error: %0 for success, < %0 for error
2421 * 2421 *
2422 * Description: 2422 * Description:
2423 * Complete the current consecutively mapped chunk from @rq. Must 2423 * Complete the current consecutively mapped chunk from @rq. Must
2424 * be called with queue lock held. 2424 * be called with queue lock held.
2425 * 2425 *
2426 * Return: 2426 * Return:
2427 * %false - we are done with this request 2427 * %false - we are done with this request
2428 * %true - still buffers pending for this request 2428 * %true - still buffers pending for this request
2429 */ 2429 */
2430 bool __blk_end_request_cur(struct request *rq, int error) 2430 bool __blk_end_request_cur(struct request *rq, int error)
2431 { 2431 {
2432 return __blk_end_request(rq, error, blk_rq_cur_bytes(rq)); 2432 return __blk_end_request(rq, error, blk_rq_cur_bytes(rq));
2433 } 2433 }
2434 EXPORT_SYMBOL(__blk_end_request_cur); 2434 EXPORT_SYMBOL(__blk_end_request_cur);
2435 2435
2436 /** 2436 /**
2437 * __blk_end_request_err - Finish a request till the next failure boundary. 2437 * __blk_end_request_err - Finish a request till the next failure boundary.
2438 * @rq: the request to finish till the next failure boundary for 2438 * @rq: the request to finish till the next failure boundary for
2439 * @error: must be negative errno 2439 * @error: must be negative errno
2440 * 2440 *
2441 * Description: 2441 * Description:
2442 * Complete @rq till the next failure boundary. Must be called 2442 * Complete @rq till the next failure boundary. Must be called
2443 * with queue lock held. 2443 * with queue lock held.
2444 * 2444 *
2445 * Return: 2445 * Return:
2446 * %false - we are done with this request 2446 * %false - we are done with this request
2447 * %true - still buffers pending for this request 2447 * %true - still buffers pending for this request
2448 */ 2448 */
2449 bool __blk_end_request_err(struct request *rq, int error) 2449 bool __blk_end_request_err(struct request *rq, int error)
2450 { 2450 {
2451 WARN_ON(error >= 0); 2451 WARN_ON(error >= 0);
2452 return __blk_end_request(rq, error, blk_rq_err_bytes(rq)); 2452 return __blk_end_request(rq, error, blk_rq_err_bytes(rq));
2453 } 2453 }
2454 EXPORT_SYMBOL_GPL(__blk_end_request_err); 2454 EXPORT_SYMBOL_GPL(__blk_end_request_err);
2455 2455
2456 void blk_rq_bio_prep(struct request_queue *q, struct request *rq, 2456 void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
2457 struct bio *bio) 2457 struct bio *bio)
2458 { 2458 {
2459 /* Bit 0 (R/W) is identical in rq->cmd_flags and bio->bi_rw */ 2459 /* Bit 0 (R/W) is identical in rq->cmd_flags and bio->bi_rw */
2460 rq->cmd_flags |= bio->bi_rw & REQ_WRITE; 2460 rq->cmd_flags |= bio->bi_rw & REQ_WRITE;
2461 2461
2462 if (bio_has_data(bio)) { 2462 if (bio_has_data(bio)) {
2463 rq->nr_phys_segments = bio_phys_segments(q, bio); 2463 rq->nr_phys_segments = bio_phys_segments(q, bio);
2464 rq->buffer = bio_data(bio); 2464 rq->buffer = bio_data(bio);
2465 } 2465 }
2466 rq->__data_len = bio->bi_size; 2466 rq->__data_len = bio->bi_size;
2467 rq->bio = rq->biotail = bio; 2467 rq->bio = rq->biotail = bio;
2468 2468
2469 if (bio->bi_bdev) 2469 if (bio->bi_bdev)
2470 rq->rq_disk = bio->bi_bdev->bd_disk; 2470 rq->rq_disk = bio->bi_bdev->bd_disk;
2471 } 2471 }
2472 2472
2473 #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 2473 #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
2474 /** 2474 /**
2475 * rq_flush_dcache_pages - Helper function to flush all pages in a request 2475 * rq_flush_dcache_pages - Helper function to flush all pages in a request
2476 * @rq: the request to be flushed 2476 * @rq: the request to be flushed
2477 * 2477 *
2478 * Description: 2478 * Description:
2479 * Flush all pages in @rq. 2479 * Flush all pages in @rq.
2480 */ 2480 */
2481 void rq_flush_dcache_pages(struct request *rq) 2481 void rq_flush_dcache_pages(struct request *rq)
2482 { 2482 {
2483 struct req_iterator iter; 2483 struct req_iterator iter;
2484 struct bio_vec *bvec; 2484 struct bio_vec *bvec;
2485 2485
2486 rq_for_each_segment(bvec, rq, iter) 2486 rq_for_each_segment(bvec, rq, iter)
2487 flush_dcache_page(bvec->bv_page); 2487 flush_dcache_page(bvec->bv_page);
2488 } 2488 }
2489 EXPORT_SYMBOL_GPL(rq_flush_dcache_pages); 2489 EXPORT_SYMBOL_GPL(rq_flush_dcache_pages);
2490 #endif 2490 #endif
2491 2491
2492 /** 2492 /**
2493 * blk_lld_busy - Check if underlying low-level drivers of a device are busy 2493 * blk_lld_busy - Check if underlying low-level drivers of a device are busy
2494 * @q : the queue of the device being checked 2494 * @q : the queue of the device being checked
2495 * 2495 *
2496 * Description: 2496 * Description:
2497 * Check if underlying low-level drivers of a device are busy. 2497 * Check if underlying low-level drivers of a device are busy.
2498 * If the drivers want to export their busy state, they must set own 2498 * If the drivers want to export their busy state, they must set own
2499 * exporting function using blk_queue_lld_busy() first. 2499 * exporting function using blk_queue_lld_busy() first.
2500 * 2500 *
2501 * Basically, this function is used only by request stacking drivers 2501 * Basically, this function is used only by request stacking drivers
2502 * to stop dispatching requests to underlying devices when underlying 2502 * to stop dispatching requests to underlying devices when underlying
2503 * devices are busy. This behavior helps more I/O merging on the queue 2503 * devices are busy. This behavior helps more I/O merging on the queue
2504 * of the request stacking driver and prevents I/O throughput regression 2504 * of the request stacking driver and prevents I/O throughput regression
2505 * on burst I/O load. 2505 * on burst I/O load.
2506 * 2506 *
2507 * Return: 2507 * Return:
2508 * 0 - Not busy (The request stacking driver should dispatch request) 2508 * 0 - Not busy (The request stacking driver should dispatch request)
2509 * 1 - Busy (The request stacking driver should stop dispatching request) 2509 * 1 - Busy (The request stacking driver should stop dispatching request)
2510 */ 2510 */
2511 int blk_lld_busy(struct request_queue *q) 2511 int blk_lld_busy(struct request_queue *q)
2512 { 2512 {
2513 if (q->lld_busy_fn) 2513 if (q->lld_busy_fn)
2514 return q->lld_busy_fn(q); 2514 return q->lld_busy_fn(q);
2515 2515
2516 return 0; 2516 return 0;
2517 } 2517 }
2518 EXPORT_SYMBOL_GPL(blk_lld_busy); 2518 EXPORT_SYMBOL_GPL(blk_lld_busy);
2519 2519
2520 /** 2520 /**
2521 * blk_rq_unprep_clone - Helper function to free all bios in a cloned request 2521 * blk_rq_unprep_clone - Helper function to free all bios in a cloned request
2522 * @rq: the clone request to be cleaned up 2522 * @rq: the clone request to be cleaned up
2523 * 2523 *
2524 * Description: 2524 * Description:
2525 * Free all bios in @rq for a cloned request. 2525 * Free all bios in @rq for a cloned request.
2526 */ 2526 */
2527 void blk_rq_unprep_clone(struct request *rq) 2527 void blk_rq_unprep_clone(struct request *rq)
2528 { 2528 {
2529 struct bio *bio; 2529 struct bio *bio;
2530 2530
2531 while ((bio = rq->bio) != NULL) { 2531 while ((bio = rq->bio) != NULL) {
2532 rq->bio = bio->bi_next; 2532 rq->bio = bio->bi_next;
2533 2533
2534 bio_put(bio); 2534 bio_put(bio);
2535 } 2535 }
2536 } 2536 }
2537 EXPORT_SYMBOL_GPL(blk_rq_unprep_clone); 2537 EXPORT_SYMBOL_GPL(blk_rq_unprep_clone);
2538 2538
2539 /* 2539 /*
2540 * Copy attributes of the original request to the clone request. 2540 * Copy attributes of the original request to the clone request.
2541 * The actual data parts (e.g. ->cmd, ->buffer, ->sense) are not copied. 2541 * The actual data parts (e.g. ->cmd, ->buffer, ->sense) are not copied.
2542 */ 2542 */
2543 static void __blk_rq_prep_clone(struct request *dst, struct request *src) 2543 static void __blk_rq_prep_clone(struct request *dst, struct request *src)
2544 { 2544 {
2545 dst->cpu = src->cpu; 2545 dst->cpu = src->cpu;
2546 dst->cmd_flags = (src->cmd_flags & REQ_CLONE_MASK) | REQ_NOMERGE; 2546 dst->cmd_flags = (src->cmd_flags & REQ_CLONE_MASK) | REQ_NOMERGE;
2547 dst->cmd_type = src->cmd_type; 2547 dst->cmd_type = src->cmd_type;
2548 dst->__sector = blk_rq_pos(src); 2548 dst->__sector = blk_rq_pos(src);
2549 dst->__data_len = blk_rq_bytes(src); 2549 dst->__data_len = blk_rq_bytes(src);
2550 dst->nr_phys_segments = src->nr_phys_segments; 2550 dst->nr_phys_segments = src->nr_phys_segments;
2551 dst->ioprio = src->ioprio; 2551 dst->ioprio = src->ioprio;
2552 dst->extra_len = src->extra_len; 2552 dst->extra_len = src->extra_len;
2553 } 2553 }
2554 2554
2555 /** 2555 /**
2556 * blk_rq_prep_clone - Helper function to setup clone request 2556 * blk_rq_prep_clone - Helper function to setup clone request
2557 * @rq: the request to be setup 2557 * @rq: the request to be setup
2558 * @rq_src: original request to be cloned 2558 * @rq_src: original request to be cloned
2559 * @bs: bio_set that bios for clone are allocated from 2559 * @bs: bio_set that bios for clone are allocated from
2560 * @gfp_mask: memory allocation mask for bio 2560 * @gfp_mask: memory allocation mask for bio
2561 * @bio_ctr: setup function to be called for each clone bio. 2561 * @bio_ctr: setup function to be called for each clone bio.
2562 * Returns %0 for success, non %0 for failure. 2562 * Returns %0 for success, non %0 for failure.
2563 * @data: private data to be passed to @bio_ctr 2563 * @data: private data to be passed to @bio_ctr
2564 * 2564 *
2565 * Description: 2565 * Description:
2566 * Clones bios in @rq_src to @rq, and copies attributes of @rq_src to @rq. 2566 * Clones bios in @rq_src to @rq, and copies attributes of @rq_src to @rq.
2567 * The actual data parts of @rq_src (e.g. ->cmd, ->buffer, ->sense) 2567 * The actual data parts of @rq_src (e.g. ->cmd, ->buffer, ->sense)
2568 * are not copied, and copying such parts is the caller's responsibility. 2568 * are not copied, and copying such parts is the caller's responsibility.
2569 * Also, pages which the original bios are pointing to are not copied 2569 * Also, pages which the original bios are pointing to are not copied
2570 * and the cloned bios just point same pages. 2570 * and the cloned bios just point same pages.
2571 * So cloned bios must be completed before original bios, which means 2571 * So cloned bios must be completed before original bios, which means
2572 * the caller must complete @rq before @rq_src. 2572 * the caller must complete @rq before @rq_src.
2573 */ 2573 */
2574 int blk_rq_prep_clone(struct request *rq, struct request *rq_src, 2574 int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
2575 struct bio_set *bs, gfp_t gfp_mask, 2575 struct bio_set *bs, gfp_t gfp_mask,
2576 int (*bio_ctr)(struct bio *, struct bio *, void *), 2576 int (*bio_ctr)(struct bio *, struct bio *, void *),
2577 void *data) 2577 void *data)
2578 { 2578 {
2579 struct bio *bio, *bio_src; 2579 struct bio *bio, *bio_src;
2580 2580
2581 if (!bs) 2581 if (!bs)
2582 bs = fs_bio_set; 2582 bs = fs_bio_set;
2583 2583
2584 blk_rq_init(NULL, rq); 2584 blk_rq_init(NULL, rq);
2585 2585
2586 __rq_for_each_bio(bio_src, rq_src) { 2586 __rq_for_each_bio(bio_src, rq_src) {
2587 bio = bio_alloc_bioset(gfp_mask, bio_src->bi_max_vecs, bs); 2587 bio = bio_alloc_bioset(gfp_mask, bio_src->bi_max_vecs, bs);
2588 if (!bio) 2588 if (!bio)
2589 goto free_and_out; 2589 goto free_and_out;
2590 2590
2591 __bio_clone(bio, bio_src); 2591 __bio_clone(bio, bio_src);
2592 2592
2593 if (bio_integrity(bio_src) && 2593 if (bio_integrity(bio_src) &&
2594 bio_integrity_clone(bio, bio_src, gfp_mask, bs)) 2594 bio_integrity_clone(bio, bio_src, gfp_mask, bs))
2595 goto free_and_out; 2595 goto free_and_out;
2596 2596
2597 if (bio_ctr && bio_ctr(bio, bio_src, data)) 2597 if (bio_ctr && bio_ctr(bio, bio_src, data))
2598 goto free_and_out; 2598 goto free_and_out;
2599 2599
2600 if (rq->bio) { 2600 if (rq->bio) {
2601 rq->biotail->bi_next = bio; 2601 rq->biotail->bi_next = bio;
2602 rq->biotail = bio; 2602 rq->biotail = bio;
2603 } else 2603 } else
2604 rq->bio = rq->biotail = bio; 2604 rq->bio = rq->biotail = bio;
2605 } 2605 }
2606 2606
2607 __blk_rq_prep_clone(rq, rq_src); 2607 __blk_rq_prep_clone(rq, rq_src);
2608 2608
2609 return 0; 2609 return 0;
2610 2610
2611 free_and_out: 2611 free_and_out:
2612 if (bio) 2612 if (bio)
2613 bio_free(bio, bs); 2613 bio_free(bio, bs);
2614 blk_rq_unprep_clone(rq); 2614 blk_rq_unprep_clone(rq);
2615 2615
2616 return -ENOMEM; 2616 return -ENOMEM;
2617 } 2617 }
2618 EXPORT_SYMBOL_GPL(blk_rq_prep_clone); 2618 EXPORT_SYMBOL_GPL(blk_rq_prep_clone);
2619 2619
2620 int kblockd_schedule_work(struct request_queue *q, struct work_struct *work) 2620 int kblockd_schedule_work(struct request_queue *q, struct work_struct *work)
2621 { 2621 {
2622 return queue_work(kblockd_workqueue, work); 2622 return queue_work(kblockd_workqueue, work);
2623 } 2623 }
2624 EXPORT_SYMBOL(kblockd_schedule_work); 2624 EXPORT_SYMBOL(kblockd_schedule_work);
2625 2625
2626 int kblockd_schedule_delayed_work(struct request_queue *q, 2626 int kblockd_schedule_delayed_work(struct request_queue *q,
2627 struct delayed_work *dwork, unsigned long delay) 2627 struct delayed_work *dwork, unsigned long delay)
2628 { 2628 {
2629 return queue_delayed_work(kblockd_workqueue, dwork, delay); 2629 return queue_delayed_work(kblockd_workqueue, dwork, delay);
2630 } 2630 }
2631 EXPORT_SYMBOL(kblockd_schedule_delayed_work); 2631 EXPORT_SYMBOL(kblockd_schedule_delayed_work);
2632 2632
2633 #define PLUG_MAGIC 0x91827364 2633 #define PLUG_MAGIC 0x91827364
2634 2634
2635 void blk_start_plug(struct blk_plug *plug) 2635 void blk_start_plug(struct blk_plug *plug)
2636 { 2636 {
2637 struct task_struct *tsk = current; 2637 struct task_struct *tsk = current;
2638 2638
2639 plug->magic = PLUG_MAGIC; 2639 plug->magic = PLUG_MAGIC;
2640 INIT_LIST_HEAD(&plug->list); 2640 INIT_LIST_HEAD(&plug->list);
2641 plug->should_sort = 0; 2641 plug->should_sort = 0;
2642 2642
2643 /* 2643 /*
2644 * If this is a nested plug, don't actually assign it. It will be 2644 * If this is a nested plug, don't actually assign it. It will be
2645 * flushed on its own. 2645 * flushed on its own.
2646 */ 2646 */
2647 if (!tsk->plug) { 2647 if (!tsk->plug) {
2648 /* 2648 /*
2649 * Store ordering should not be needed here, since a potential 2649 * Store ordering should not be needed here, since a potential
2650 * preempt will imply a full memory barrier 2650 * preempt will imply a full memory barrier
2651 */ 2651 */
2652 tsk->plug = plug; 2652 tsk->plug = plug;
2653 } 2653 }
2654 } 2654 }
2655 EXPORT_SYMBOL(blk_start_plug); 2655 EXPORT_SYMBOL(blk_start_plug);
2656 2656
2657 static int plug_rq_cmp(void *priv, struct list_head *a, struct list_head *b) 2657 static int plug_rq_cmp(void *priv, struct list_head *a, struct list_head *b)
2658 { 2658 {
2659 struct request *rqa = container_of(a, struct request, queuelist); 2659 struct request *rqa = container_of(a, struct request, queuelist);
2660 struct request *rqb = container_of(b, struct request, queuelist); 2660 struct request *rqb = container_of(b, struct request, queuelist);
2661 2661
2662 return !(rqa->q <= rqb->q); 2662 return !(rqa->q <= rqb->q);
2663 } 2663 }
2664 2664
2665 /*
2666 * If 'from_schedule' is true, then postpone the dispatch of requests
2667 * until a safe kblockd context. We due this to avoid accidental big
2668 * additional stack usage in driver dispatch, in places where the originally
2669 * plugger did not intend it.
2670 */
2665 static void queue_unplugged(struct request_queue *q, unsigned int depth, 2671 static void queue_unplugged(struct request_queue *q, unsigned int depth,
2666 bool force_kblockd) 2672 bool from_schedule)
2667 { 2673 {
2668 trace_block_unplug_io(q, depth); 2674 trace_block_unplug(q, depth, !from_schedule);
2669 __blk_run_queue(q, force_kblockd); 2675 __blk_run_queue(q, from_schedule);
2670 2676
2671 if (q->unplugged_fn) 2677 if (q->unplugged_fn)
2672 q->unplugged_fn(q); 2678 q->unplugged_fn(q);
2673 } 2679 }
2674 2680
2675 void blk_flush_plug_list(struct blk_plug *plug, bool force_kblockd) 2681 void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
2676 { 2682 {
2677 struct request_queue *q; 2683 struct request_queue *q;
2678 unsigned long flags; 2684 unsigned long flags;
2679 struct request *rq; 2685 struct request *rq;
2680 LIST_HEAD(list); 2686 LIST_HEAD(list);
2681 unsigned int depth; 2687 unsigned int depth;
2682 2688
2683 BUG_ON(plug->magic != PLUG_MAGIC); 2689 BUG_ON(plug->magic != PLUG_MAGIC);
2684 2690
2685 if (list_empty(&plug->list)) 2691 if (list_empty(&plug->list))
2686 return; 2692 return;
2687 2693
2688 list_splice_init(&plug->list, &list); 2694 list_splice_init(&plug->list, &list);
2689 2695
2690 if (plug->should_sort) { 2696 if (plug->should_sort) {
2691 list_sort(NULL, &list, plug_rq_cmp); 2697 list_sort(NULL, &list, plug_rq_cmp);
2692 plug->should_sort = 0; 2698 plug->should_sort = 0;
2693 } 2699 }
2694 2700
2695 q = NULL; 2701 q = NULL;
2696 depth = 0; 2702 depth = 0;
2697 2703
2698 /* 2704 /*
2699 * Save and disable interrupts here, to avoid doing it for every 2705 * Save and disable interrupts here, to avoid doing it for every
2700 * queue lock we have to take. 2706 * queue lock we have to take.
2701 */ 2707 */
2702 local_irq_save(flags); 2708 local_irq_save(flags);
2703 while (!list_empty(&list)) { 2709 while (!list_empty(&list)) {
2704 rq = list_entry_rq(list.next); 2710 rq = list_entry_rq(list.next);
2705 list_del_init(&rq->queuelist); 2711 list_del_init(&rq->queuelist);
2706 BUG_ON(!(rq->cmd_flags & REQ_ON_PLUG)); 2712 BUG_ON(!(rq->cmd_flags & REQ_ON_PLUG));
2707 BUG_ON(!rq->q); 2713 BUG_ON(!rq->q);
2708 if (rq->q != q) { 2714 if (rq->q != q) {
2709 if (q) { 2715 if (q) {
2710 queue_unplugged(q, depth, force_kblockd); 2716 queue_unplugged(q, depth, from_schedule);
2711 spin_unlock(q->queue_lock); 2717 spin_unlock(q->queue_lock);
2712 } 2718 }
2713 q = rq->q; 2719 q = rq->q;
2714 depth = 0; 2720 depth = 0;
2715 spin_lock(q->queue_lock); 2721 spin_lock(q->queue_lock);
2716 } 2722 }
2717 rq->cmd_flags &= ~REQ_ON_PLUG; 2723 rq->cmd_flags &= ~REQ_ON_PLUG;
2718 2724
2719 /* 2725 /*
2720 * rq is already accounted, so use raw insert 2726 * rq is already accounted, so use raw insert
2721 */ 2727 */
2722 if (rq->cmd_flags & (REQ_FLUSH | REQ_FUA)) 2728 if (rq->cmd_flags & (REQ_FLUSH | REQ_FUA))
2723 __elv_add_request(q, rq, ELEVATOR_INSERT_FLUSH); 2729 __elv_add_request(q, rq, ELEVATOR_INSERT_FLUSH);
2724 else 2730 else
2725 __elv_add_request(q, rq, ELEVATOR_INSERT_SORT_MERGE); 2731 __elv_add_request(q, rq, ELEVATOR_INSERT_SORT_MERGE);
2726 2732
2727 depth++; 2733 depth++;
2728 } 2734 }
2729 2735
2730 if (q) { 2736 if (q) {
2731 queue_unplugged(q, depth, force_kblockd); 2737 queue_unplugged(q, depth, from_schedule);
2732 spin_unlock(q->queue_lock); 2738 spin_unlock(q->queue_lock);
2733 } 2739 }
2734 2740
2735 local_irq_restore(flags); 2741 local_irq_restore(flags);
2736 } 2742 }
2737 EXPORT_SYMBOL(blk_flush_plug_list); 2743 EXPORT_SYMBOL(blk_flush_plug_list);
2738 2744
2739 void blk_finish_plug(struct blk_plug *plug) 2745 void blk_finish_plug(struct blk_plug *plug)
2740 { 2746 {
2741 blk_flush_plug_list(plug, false); 2747 blk_flush_plug_list(plug, false);
2742 2748
2743 if (plug == current->plug) 2749 if (plug == current->plug)
2744 current->plug = NULL; 2750 current->plug = NULL;
2745 } 2751 }
2746 EXPORT_SYMBOL(blk_finish_plug); 2752 EXPORT_SYMBOL(blk_finish_plug);
2747 2753
2748 int __init blk_dev_init(void) 2754 int __init blk_dev_init(void)
2749 { 2755 {
2750 BUILD_BUG_ON(__REQ_NR_BITS > 8 * 2756 BUILD_BUG_ON(__REQ_NR_BITS > 8 *
2751 sizeof(((struct request *)0)->cmd_flags)); 2757 sizeof(((struct request *)0)->cmd_flags));
2752 2758
2753 /* used for unplugging and affects IO latency/throughput - HIGHPRI */ 2759 /* used for unplugging and affects IO latency/throughput - HIGHPRI */
2754 kblockd_workqueue = alloc_workqueue("kblockd", 2760 kblockd_workqueue = alloc_workqueue("kblockd",
2755 WQ_MEM_RECLAIM | WQ_HIGHPRI, 0); 2761 WQ_MEM_RECLAIM | WQ_HIGHPRI, 0);
2756 if (!kblockd_workqueue) 2762 if (!kblockd_workqueue)
2757 panic("Failed to create kblockd\n"); 2763 panic("Failed to create kblockd\n");
2758 2764
2759 request_cachep = kmem_cache_create("blkdev_requests", 2765 request_cachep = kmem_cache_create("blkdev_requests",
2760 sizeof(struct request), 0, SLAB_PANIC, NULL); 2766 sizeof(struct request), 0, SLAB_PANIC, NULL);
2761 2767
2762 blk_requestq_cachep = kmem_cache_create("blkdev_queue", 2768 blk_requestq_cachep = kmem_cache_create("blkdev_queue",
2763 sizeof(struct request_queue), 0, SLAB_PANIC, NULL); 2769 sizeof(struct request_queue), 0, SLAB_PANIC, NULL);
2764 2770
2765 return 0; 2771 return 0;
2766 } 2772 }
2767 2773
include/trace/events/block.h
1 #undef TRACE_SYSTEM 1 #undef TRACE_SYSTEM
2 #define TRACE_SYSTEM block 2 #define TRACE_SYSTEM block
3 3
4 #if !defined(_TRACE_BLOCK_H) || defined(TRACE_HEADER_MULTI_READ) 4 #if !defined(_TRACE_BLOCK_H) || defined(TRACE_HEADER_MULTI_READ)
5 #define _TRACE_BLOCK_H 5 #define _TRACE_BLOCK_H
6 6
7 #include <linux/blktrace_api.h> 7 #include <linux/blktrace_api.h>
8 #include <linux/blkdev.h> 8 #include <linux/blkdev.h>
9 #include <linux/tracepoint.h> 9 #include <linux/tracepoint.h>
10 10
11 DECLARE_EVENT_CLASS(block_rq_with_error, 11 DECLARE_EVENT_CLASS(block_rq_with_error,
12 12
13 TP_PROTO(struct request_queue *q, struct request *rq), 13 TP_PROTO(struct request_queue *q, struct request *rq),
14 14
15 TP_ARGS(q, rq), 15 TP_ARGS(q, rq),
16 16
17 TP_STRUCT__entry( 17 TP_STRUCT__entry(
18 __field( dev_t, dev ) 18 __field( dev_t, dev )
19 __field( sector_t, sector ) 19 __field( sector_t, sector )
20 __field( unsigned int, nr_sector ) 20 __field( unsigned int, nr_sector )
21 __field( int, errors ) 21 __field( int, errors )
22 __array( char, rwbs, 6 ) 22 __array( char, rwbs, 6 )
23 __dynamic_array( char, cmd, blk_cmd_buf_len(rq) ) 23 __dynamic_array( char, cmd, blk_cmd_buf_len(rq) )
24 ), 24 ),
25 25
26 TP_fast_assign( 26 TP_fast_assign(
27 __entry->dev = rq->rq_disk ? disk_devt(rq->rq_disk) : 0; 27 __entry->dev = rq->rq_disk ? disk_devt(rq->rq_disk) : 0;
28 __entry->sector = (rq->cmd_type == REQ_TYPE_BLOCK_PC) ? 28 __entry->sector = (rq->cmd_type == REQ_TYPE_BLOCK_PC) ?
29 0 : blk_rq_pos(rq); 29 0 : blk_rq_pos(rq);
30 __entry->nr_sector = (rq->cmd_type == REQ_TYPE_BLOCK_PC) ? 30 __entry->nr_sector = (rq->cmd_type == REQ_TYPE_BLOCK_PC) ?
31 0 : blk_rq_sectors(rq); 31 0 : blk_rq_sectors(rq);
32 __entry->errors = rq->errors; 32 __entry->errors = rq->errors;
33 33
34 blk_fill_rwbs(__entry->rwbs, rq->cmd_flags, blk_rq_bytes(rq)); 34 blk_fill_rwbs(__entry->rwbs, rq->cmd_flags, blk_rq_bytes(rq));
35 blk_dump_cmd(__get_str(cmd), rq); 35 blk_dump_cmd(__get_str(cmd), rq);
36 ), 36 ),
37 37
38 TP_printk("%d,%d %s (%s) %llu + %u [%d]", 38 TP_printk("%d,%d %s (%s) %llu + %u [%d]",
39 MAJOR(__entry->dev), MINOR(__entry->dev), 39 MAJOR(__entry->dev), MINOR(__entry->dev),
40 __entry->rwbs, __get_str(cmd), 40 __entry->rwbs, __get_str(cmd),
41 (unsigned long long)__entry->sector, 41 (unsigned long long)__entry->sector,
42 __entry->nr_sector, __entry->errors) 42 __entry->nr_sector, __entry->errors)
43 ); 43 );
44 44
45 /** 45 /**
46 * block_rq_abort - abort block operation request 46 * block_rq_abort - abort block operation request
47 * @q: queue containing the block operation request 47 * @q: queue containing the block operation request
48 * @rq: block IO operation request 48 * @rq: block IO operation request
49 * 49 *
50 * Called immediately after pending block IO operation request @rq in 50 * Called immediately after pending block IO operation request @rq in
51 * queue @q is aborted. The fields in the operation request @rq 51 * queue @q is aborted. The fields in the operation request @rq
52 * can be examined to determine which device and sectors the pending 52 * can be examined to determine which device and sectors the pending
53 * operation would access. 53 * operation would access.
54 */ 54 */
55 DEFINE_EVENT(block_rq_with_error, block_rq_abort, 55 DEFINE_EVENT(block_rq_with_error, block_rq_abort,
56 56
57 TP_PROTO(struct request_queue *q, struct request *rq), 57 TP_PROTO(struct request_queue *q, struct request *rq),
58 58
59 TP_ARGS(q, rq) 59 TP_ARGS(q, rq)
60 ); 60 );
61 61
62 /** 62 /**
63 * block_rq_requeue - place block IO request back on a queue 63 * block_rq_requeue - place block IO request back on a queue
64 * @q: queue holding operation 64 * @q: queue holding operation
65 * @rq: block IO operation request 65 * @rq: block IO operation request
66 * 66 *
67 * The block operation request @rq is being placed back into queue 67 * The block operation request @rq is being placed back into queue
68 * @q. For some reason the request was not completed and needs to be 68 * @q. For some reason the request was not completed and needs to be
69 * put back in the queue. 69 * put back in the queue.
70 */ 70 */
71 DEFINE_EVENT(block_rq_with_error, block_rq_requeue, 71 DEFINE_EVENT(block_rq_with_error, block_rq_requeue,
72 72
73 TP_PROTO(struct request_queue *q, struct request *rq), 73 TP_PROTO(struct request_queue *q, struct request *rq),
74 74
75 TP_ARGS(q, rq) 75 TP_ARGS(q, rq)
76 ); 76 );
77 77
78 /** 78 /**
79 * block_rq_complete - block IO operation completed by device driver 79 * block_rq_complete - block IO operation completed by device driver
80 * @q: queue containing the block operation request 80 * @q: queue containing the block operation request
81 * @rq: block operations request 81 * @rq: block operations request
82 * 82 *
83 * The block_rq_complete tracepoint event indicates that some portion 83 * The block_rq_complete tracepoint event indicates that some portion
84 * of operation request has been completed by the device driver. If 84 * of operation request has been completed by the device driver. If
85 * the @rq->bio is %NULL, then there is absolutely no additional work to 85 * the @rq->bio is %NULL, then there is absolutely no additional work to
86 * do for the request. If @rq->bio is non-NULL then there is 86 * do for the request. If @rq->bio is non-NULL then there is
87 * additional work required to complete the request. 87 * additional work required to complete the request.
88 */ 88 */
89 DEFINE_EVENT(block_rq_with_error, block_rq_complete, 89 DEFINE_EVENT(block_rq_with_error, block_rq_complete,
90 90
91 TP_PROTO(struct request_queue *q, struct request *rq), 91 TP_PROTO(struct request_queue *q, struct request *rq),
92 92
93 TP_ARGS(q, rq) 93 TP_ARGS(q, rq)
94 ); 94 );
95 95
96 DECLARE_EVENT_CLASS(block_rq, 96 DECLARE_EVENT_CLASS(block_rq,
97 97
98 TP_PROTO(struct request_queue *q, struct request *rq), 98 TP_PROTO(struct request_queue *q, struct request *rq),
99 99
100 TP_ARGS(q, rq), 100 TP_ARGS(q, rq),
101 101
102 TP_STRUCT__entry( 102 TP_STRUCT__entry(
103 __field( dev_t, dev ) 103 __field( dev_t, dev )
104 __field( sector_t, sector ) 104 __field( sector_t, sector )
105 __field( unsigned int, nr_sector ) 105 __field( unsigned int, nr_sector )
106 __field( unsigned int, bytes ) 106 __field( unsigned int, bytes )
107 __array( char, rwbs, 6 ) 107 __array( char, rwbs, 6 )
108 __array( char, comm, TASK_COMM_LEN ) 108 __array( char, comm, TASK_COMM_LEN )
109 __dynamic_array( char, cmd, blk_cmd_buf_len(rq) ) 109 __dynamic_array( char, cmd, blk_cmd_buf_len(rq) )
110 ), 110 ),
111 111
112 TP_fast_assign( 112 TP_fast_assign(
113 __entry->dev = rq->rq_disk ? disk_devt(rq->rq_disk) : 0; 113 __entry->dev = rq->rq_disk ? disk_devt(rq->rq_disk) : 0;
114 __entry->sector = (rq->cmd_type == REQ_TYPE_BLOCK_PC) ? 114 __entry->sector = (rq->cmd_type == REQ_TYPE_BLOCK_PC) ?
115 0 : blk_rq_pos(rq); 115 0 : blk_rq_pos(rq);
116 __entry->nr_sector = (rq->cmd_type == REQ_TYPE_BLOCK_PC) ? 116 __entry->nr_sector = (rq->cmd_type == REQ_TYPE_BLOCK_PC) ?
117 0 : blk_rq_sectors(rq); 117 0 : blk_rq_sectors(rq);
118 __entry->bytes = (rq->cmd_type == REQ_TYPE_BLOCK_PC) ? 118 __entry->bytes = (rq->cmd_type == REQ_TYPE_BLOCK_PC) ?
119 blk_rq_bytes(rq) : 0; 119 blk_rq_bytes(rq) : 0;
120 120
121 blk_fill_rwbs(__entry->rwbs, rq->cmd_flags, blk_rq_bytes(rq)); 121 blk_fill_rwbs(__entry->rwbs, rq->cmd_flags, blk_rq_bytes(rq));
122 blk_dump_cmd(__get_str(cmd), rq); 122 blk_dump_cmd(__get_str(cmd), rq);
123 memcpy(__entry->comm, current->comm, TASK_COMM_LEN); 123 memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
124 ), 124 ),
125 125
126 TP_printk("%d,%d %s %u (%s) %llu + %u [%s]", 126 TP_printk("%d,%d %s %u (%s) %llu + %u [%s]",
127 MAJOR(__entry->dev), MINOR(__entry->dev), 127 MAJOR(__entry->dev), MINOR(__entry->dev),
128 __entry->rwbs, __entry->bytes, __get_str(cmd), 128 __entry->rwbs, __entry->bytes, __get_str(cmd),
129 (unsigned long long)__entry->sector, 129 (unsigned long long)__entry->sector,
130 __entry->nr_sector, __entry->comm) 130 __entry->nr_sector, __entry->comm)
131 ); 131 );
132 132
133 /** 133 /**
134 * block_rq_insert - insert block operation request into queue 134 * block_rq_insert - insert block operation request into queue
135 * @q: target queue 135 * @q: target queue
136 * @rq: block IO operation request 136 * @rq: block IO operation request
137 * 137 *
138 * Called immediately before block operation request @rq is inserted 138 * Called immediately before block operation request @rq is inserted
139 * into queue @q. The fields in the operation request @rq struct can 139 * into queue @q. The fields in the operation request @rq struct can
140 * be examined to determine which device and sectors the pending 140 * be examined to determine which device and sectors the pending
141 * operation would access. 141 * operation would access.
142 */ 142 */
143 DEFINE_EVENT(block_rq, block_rq_insert, 143 DEFINE_EVENT(block_rq, block_rq_insert,
144 144
145 TP_PROTO(struct request_queue *q, struct request *rq), 145 TP_PROTO(struct request_queue *q, struct request *rq),
146 146
147 TP_ARGS(q, rq) 147 TP_ARGS(q, rq)
148 ); 148 );
149 149
150 /** 150 /**
151 * block_rq_issue - issue pending block IO request operation to device driver 151 * block_rq_issue - issue pending block IO request operation to device driver
152 * @q: queue holding operation 152 * @q: queue holding operation
153 * @rq: block IO operation operation request 153 * @rq: block IO operation operation request
154 * 154 *
155 * Called when block operation request @rq from queue @q is sent to a 155 * Called when block operation request @rq from queue @q is sent to a
156 * device driver for processing. 156 * device driver for processing.
157 */ 157 */
158 DEFINE_EVENT(block_rq, block_rq_issue, 158 DEFINE_EVENT(block_rq, block_rq_issue,
159 159
160 TP_PROTO(struct request_queue *q, struct request *rq), 160 TP_PROTO(struct request_queue *q, struct request *rq),
161 161
162 TP_ARGS(q, rq) 162 TP_ARGS(q, rq)
163 ); 163 );
164 164
165 /** 165 /**
166 * block_bio_bounce - used bounce buffer when processing block operation 166 * block_bio_bounce - used bounce buffer when processing block operation
167 * @q: queue holding the block operation 167 * @q: queue holding the block operation
168 * @bio: block operation 168 * @bio: block operation
169 * 169 *
170 * A bounce buffer was used to handle the block operation @bio in @q. 170 * A bounce buffer was used to handle the block operation @bio in @q.
171 * This occurs when hardware limitations prevent a direct transfer of 171 * This occurs when hardware limitations prevent a direct transfer of
172 * data between the @bio data memory area and the IO device. Use of a 172 * data between the @bio data memory area and the IO device. Use of a
173 * bounce buffer requires extra copying of data and decreases 173 * bounce buffer requires extra copying of data and decreases
174 * performance. 174 * performance.
175 */ 175 */
176 TRACE_EVENT(block_bio_bounce, 176 TRACE_EVENT(block_bio_bounce,
177 177
178 TP_PROTO(struct request_queue *q, struct bio *bio), 178 TP_PROTO(struct request_queue *q, struct bio *bio),
179 179
180 TP_ARGS(q, bio), 180 TP_ARGS(q, bio),
181 181
182 TP_STRUCT__entry( 182 TP_STRUCT__entry(
183 __field( dev_t, dev ) 183 __field( dev_t, dev )
184 __field( sector_t, sector ) 184 __field( sector_t, sector )
185 __field( unsigned int, nr_sector ) 185 __field( unsigned int, nr_sector )
186 __array( char, rwbs, 6 ) 186 __array( char, rwbs, 6 )
187 __array( char, comm, TASK_COMM_LEN ) 187 __array( char, comm, TASK_COMM_LEN )
188 ), 188 ),
189 189
190 TP_fast_assign( 190 TP_fast_assign(
191 __entry->dev = bio->bi_bdev ? 191 __entry->dev = bio->bi_bdev ?
192 bio->bi_bdev->bd_dev : 0; 192 bio->bi_bdev->bd_dev : 0;
193 __entry->sector = bio->bi_sector; 193 __entry->sector = bio->bi_sector;
194 __entry->nr_sector = bio->bi_size >> 9; 194 __entry->nr_sector = bio->bi_size >> 9;
195 blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); 195 blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
196 memcpy(__entry->comm, current->comm, TASK_COMM_LEN); 196 memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
197 ), 197 ),
198 198
199 TP_printk("%d,%d %s %llu + %u [%s]", 199 TP_printk("%d,%d %s %llu + %u [%s]",
200 MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, 200 MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
201 (unsigned long long)__entry->sector, 201 (unsigned long long)__entry->sector,
202 __entry->nr_sector, __entry->comm) 202 __entry->nr_sector, __entry->comm)
203 ); 203 );
204 204
205 /** 205 /**
206 * block_bio_complete - completed all work on the block operation 206 * block_bio_complete - completed all work on the block operation
207 * @q: queue holding the block operation 207 * @q: queue holding the block operation
208 * @bio: block operation completed 208 * @bio: block operation completed
209 * @error: io error value 209 * @error: io error value
210 * 210 *
211 * This tracepoint indicates there is no further work to do on this 211 * This tracepoint indicates there is no further work to do on this
212 * block IO operation @bio. 212 * block IO operation @bio.
213 */ 213 */
214 TRACE_EVENT(block_bio_complete, 214 TRACE_EVENT(block_bio_complete,
215 215
216 TP_PROTO(struct request_queue *q, struct bio *bio, int error), 216 TP_PROTO(struct request_queue *q, struct bio *bio, int error),
217 217
218 TP_ARGS(q, bio, error), 218 TP_ARGS(q, bio, error),
219 219
220 TP_STRUCT__entry( 220 TP_STRUCT__entry(
221 __field( dev_t, dev ) 221 __field( dev_t, dev )
222 __field( sector_t, sector ) 222 __field( sector_t, sector )
223 __field( unsigned, nr_sector ) 223 __field( unsigned, nr_sector )
224 __field( int, error ) 224 __field( int, error )
225 __array( char, rwbs, 6 ) 225 __array( char, rwbs, 6 )
226 ), 226 ),
227 227
228 TP_fast_assign( 228 TP_fast_assign(
229 __entry->dev = bio->bi_bdev->bd_dev; 229 __entry->dev = bio->bi_bdev->bd_dev;
230 __entry->sector = bio->bi_sector; 230 __entry->sector = bio->bi_sector;
231 __entry->nr_sector = bio->bi_size >> 9; 231 __entry->nr_sector = bio->bi_size >> 9;
232 __entry->error = error; 232 __entry->error = error;
233 blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); 233 blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
234 ), 234 ),
235 235
236 TP_printk("%d,%d %s %llu + %u [%d]", 236 TP_printk("%d,%d %s %llu + %u [%d]",
237 MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, 237 MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
238 (unsigned long long)__entry->sector, 238 (unsigned long long)__entry->sector,
239 __entry->nr_sector, __entry->error) 239 __entry->nr_sector, __entry->error)
240 ); 240 );
241 241
242 DECLARE_EVENT_CLASS(block_bio, 242 DECLARE_EVENT_CLASS(block_bio,
243 243
244 TP_PROTO(struct request_queue *q, struct bio *bio), 244 TP_PROTO(struct request_queue *q, struct bio *bio),
245 245
246 TP_ARGS(q, bio), 246 TP_ARGS(q, bio),
247 247
248 TP_STRUCT__entry( 248 TP_STRUCT__entry(
249 __field( dev_t, dev ) 249 __field( dev_t, dev )
250 __field( sector_t, sector ) 250 __field( sector_t, sector )
251 __field( unsigned int, nr_sector ) 251 __field( unsigned int, nr_sector )
252 __array( char, rwbs, 6 ) 252 __array( char, rwbs, 6 )
253 __array( char, comm, TASK_COMM_LEN ) 253 __array( char, comm, TASK_COMM_LEN )
254 ), 254 ),
255 255
256 TP_fast_assign( 256 TP_fast_assign(
257 __entry->dev = bio->bi_bdev->bd_dev; 257 __entry->dev = bio->bi_bdev->bd_dev;
258 __entry->sector = bio->bi_sector; 258 __entry->sector = bio->bi_sector;
259 __entry->nr_sector = bio->bi_size >> 9; 259 __entry->nr_sector = bio->bi_size >> 9;
260 blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); 260 blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
261 memcpy(__entry->comm, current->comm, TASK_COMM_LEN); 261 memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
262 ), 262 ),
263 263
264 TP_printk("%d,%d %s %llu + %u [%s]", 264 TP_printk("%d,%d %s %llu + %u [%s]",
265 MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, 265 MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
266 (unsigned long long)__entry->sector, 266 (unsigned long long)__entry->sector,
267 __entry->nr_sector, __entry->comm) 267 __entry->nr_sector, __entry->comm)
268 ); 268 );
269 269
270 /** 270 /**
271 * block_bio_backmerge - merging block operation to the end of an existing operation 271 * block_bio_backmerge - merging block operation to the end of an existing operation
272 * @q: queue holding operation 272 * @q: queue holding operation
273 * @bio: new block operation to merge 273 * @bio: new block operation to merge
274 * 274 *
275 * Merging block request @bio to the end of an existing block request 275 * Merging block request @bio to the end of an existing block request
276 * in queue @q. 276 * in queue @q.
277 */ 277 */
278 DEFINE_EVENT(block_bio, block_bio_backmerge, 278 DEFINE_EVENT(block_bio, block_bio_backmerge,
279 279
280 TP_PROTO(struct request_queue *q, struct bio *bio), 280 TP_PROTO(struct request_queue *q, struct bio *bio),
281 281
282 TP_ARGS(q, bio) 282 TP_ARGS(q, bio)
283 ); 283 );
284 284
285 /** 285 /**
286 * block_bio_frontmerge - merging block operation to the beginning of an existing operation 286 * block_bio_frontmerge - merging block operation to the beginning of an existing operation
287 * @q: queue holding operation 287 * @q: queue holding operation
288 * @bio: new block operation to merge 288 * @bio: new block operation to merge
289 * 289 *
290 * Merging block IO operation @bio to the beginning of an existing block 290 * Merging block IO operation @bio to the beginning of an existing block
291 * operation in queue @q. 291 * operation in queue @q.
292 */ 292 */
293 DEFINE_EVENT(block_bio, block_bio_frontmerge, 293 DEFINE_EVENT(block_bio, block_bio_frontmerge,
294 294
295 TP_PROTO(struct request_queue *q, struct bio *bio), 295 TP_PROTO(struct request_queue *q, struct bio *bio),
296 296
297 TP_ARGS(q, bio) 297 TP_ARGS(q, bio)
298 ); 298 );
299 299
300 /** 300 /**
301 * block_bio_queue - putting new block IO operation in queue 301 * block_bio_queue - putting new block IO operation in queue
302 * @q: queue holding operation 302 * @q: queue holding operation
303 * @bio: new block operation 303 * @bio: new block operation
304 * 304 *
305 * About to place the block IO operation @bio into queue @q. 305 * About to place the block IO operation @bio into queue @q.
306 */ 306 */
307 DEFINE_EVENT(block_bio, block_bio_queue, 307 DEFINE_EVENT(block_bio, block_bio_queue,
308 308
309 TP_PROTO(struct request_queue *q, struct bio *bio), 309 TP_PROTO(struct request_queue *q, struct bio *bio),
310 310
311 TP_ARGS(q, bio) 311 TP_ARGS(q, bio)
312 ); 312 );
313 313
314 DECLARE_EVENT_CLASS(block_get_rq, 314 DECLARE_EVENT_CLASS(block_get_rq,
315 315
316 TP_PROTO(struct request_queue *q, struct bio *bio, int rw), 316 TP_PROTO(struct request_queue *q, struct bio *bio, int rw),
317 317
318 TP_ARGS(q, bio, rw), 318 TP_ARGS(q, bio, rw),
319 319
320 TP_STRUCT__entry( 320 TP_STRUCT__entry(
321 __field( dev_t, dev ) 321 __field( dev_t, dev )
322 __field( sector_t, sector ) 322 __field( sector_t, sector )
323 __field( unsigned int, nr_sector ) 323 __field( unsigned int, nr_sector )
324 __array( char, rwbs, 6 ) 324 __array( char, rwbs, 6 )
325 __array( char, comm, TASK_COMM_LEN ) 325 __array( char, comm, TASK_COMM_LEN )
326 ), 326 ),
327 327
328 TP_fast_assign( 328 TP_fast_assign(
329 __entry->dev = bio ? bio->bi_bdev->bd_dev : 0; 329 __entry->dev = bio ? bio->bi_bdev->bd_dev : 0;
330 __entry->sector = bio ? bio->bi_sector : 0; 330 __entry->sector = bio ? bio->bi_sector : 0;
331 __entry->nr_sector = bio ? bio->bi_size >> 9 : 0; 331 __entry->nr_sector = bio ? bio->bi_size >> 9 : 0;
332 blk_fill_rwbs(__entry->rwbs, 332 blk_fill_rwbs(__entry->rwbs,
333 bio ? bio->bi_rw : 0, __entry->nr_sector); 333 bio ? bio->bi_rw : 0, __entry->nr_sector);
334 memcpy(__entry->comm, current->comm, TASK_COMM_LEN); 334 memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
335 ), 335 ),
336 336
337 TP_printk("%d,%d %s %llu + %u [%s]", 337 TP_printk("%d,%d %s %llu + %u [%s]",
338 MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, 338 MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
339 (unsigned long long)__entry->sector, 339 (unsigned long long)__entry->sector,
340 __entry->nr_sector, __entry->comm) 340 __entry->nr_sector, __entry->comm)
341 ); 341 );
342 342
343 /** 343 /**
344 * block_getrq - get a free request entry in queue for block IO operations 344 * block_getrq - get a free request entry in queue for block IO operations
345 * @q: queue for operations 345 * @q: queue for operations
346 * @bio: pending block IO operation 346 * @bio: pending block IO operation
347 * @rw: low bit indicates a read (%0) or a write (%1) 347 * @rw: low bit indicates a read (%0) or a write (%1)
348 * 348 *
349 * A request struct for queue @q has been allocated to handle the 349 * A request struct for queue @q has been allocated to handle the
350 * block IO operation @bio. 350 * block IO operation @bio.
351 */ 351 */
352 DEFINE_EVENT(block_get_rq, block_getrq, 352 DEFINE_EVENT(block_get_rq, block_getrq,
353 353
354 TP_PROTO(struct request_queue *q, struct bio *bio, int rw), 354 TP_PROTO(struct request_queue *q, struct bio *bio, int rw),
355 355
356 TP_ARGS(q, bio, rw) 356 TP_ARGS(q, bio, rw)
357 ); 357 );
358 358
359 /** 359 /**
360 * block_sleeprq - waiting to get a free request entry in queue for block IO operation 360 * block_sleeprq - waiting to get a free request entry in queue for block IO operation
361 * @q: queue for operation 361 * @q: queue for operation
362 * @bio: pending block IO operation 362 * @bio: pending block IO operation
363 * @rw: low bit indicates a read (%0) or a write (%1) 363 * @rw: low bit indicates a read (%0) or a write (%1)
364 * 364 *
365 * In the case where a request struct cannot be provided for queue @q 365 * In the case where a request struct cannot be provided for queue @q
366 * the process needs to wait for an request struct to become 366 * the process needs to wait for an request struct to become
367 * available. This tracepoint event is generated each time the 367 * available. This tracepoint event is generated each time the
368 * process goes to sleep waiting for request struct become available. 368 * process goes to sleep waiting for request struct become available.
369 */ 369 */
370 DEFINE_EVENT(block_get_rq, block_sleeprq, 370 DEFINE_EVENT(block_get_rq, block_sleeprq,
371 371
372 TP_PROTO(struct request_queue *q, struct bio *bio, int rw), 372 TP_PROTO(struct request_queue *q, struct bio *bio, int rw),
373 373
374 TP_ARGS(q, bio, rw) 374 TP_ARGS(q, bio, rw)
375 ); 375 );
376 376
377 /** 377 /**
378 * block_plug - keep operations requests in request queue 378 * block_plug - keep operations requests in request queue
379 * @q: request queue to plug 379 * @q: request queue to plug
380 * 380 *
381 * Plug the request queue @q. Do not allow block operation requests 381 * Plug the request queue @q. Do not allow block operation requests
382 * to be sent to the device driver. Instead, accumulate requests in 382 * to be sent to the device driver. Instead, accumulate requests in
383 * the queue to improve throughput performance of the block device. 383 * the queue to improve throughput performance of the block device.
384 */ 384 */
385 TRACE_EVENT(block_plug, 385 TRACE_EVENT(block_plug,
386 386
387 TP_PROTO(struct request_queue *q), 387 TP_PROTO(struct request_queue *q),
388 388
389 TP_ARGS(q), 389 TP_ARGS(q),
390 390
391 TP_STRUCT__entry( 391 TP_STRUCT__entry(
392 __array( char, comm, TASK_COMM_LEN ) 392 __array( char, comm, TASK_COMM_LEN )
393 ), 393 ),
394 394
395 TP_fast_assign( 395 TP_fast_assign(
396 memcpy(__entry->comm, current->comm, TASK_COMM_LEN); 396 memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
397 ), 397 ),
398 398
399 TP_printk("[%s]", __entry->comm) 399 TP_printk("[%s]", __entry->comm)
400 ); 400 );
401 401
402 DECLARE_EVENT_CLASS(block_unplug, 402 DECLARE_EVENT_CLASS(block_unplug,
403 403
404 TP_PROTO(struct request_queue *q, unsigned int depth), 404 TP_PROTO(struct request_queue *q, unsigned int depth, bool explicit),
405 405
406 TP_ARGS(q, depth), 406 TP_ARGS(q, depth, explicit),
407 407
408 TP_STRUCT__entry( 408 TP_STRUCT__entry(
409 __field( int, nr_rq ) 409 __field( int, nr_rq )
410 __array( char, comm, TASK_COMM_LEN ) 410 __array( char, comm, TASK_COMM_LEN )
411 ), 411 ),
412 412
413 TP_fast_assign( 413 TP_fast_assign(
414 __entry->nr_rq = depth; 414 __entry->nr_rq = depth;
415 memcpy(__entry->comm, current->comm, TASK_COMM_LEN); 415 memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
416 ), 416 ),
417 417
418 TP_printk("[%s] %d", __entry->comm, __entry->nr_rq) 418 TP_printk("[%s] %d", __entry->comm, __entry->nr_rq)
419 ); 419 );
420 420
421 /** 421 /**
422 * block_unplug_io - release of operations requests in request queue 422 * block_unplug - release of operations requests in request queue
423 * @q: request queue to unplug 423 * @q: request queue to unplug
424 * @depth: number of requests just added to the queue 424 * @depth: number of requests just added to the queue
425 * @explicit: whether this was an explicit unplug, or one from schedule()
425 * 426 *
426 * Unplug request queue @q because device driver is scheduled to work 427 * Unplug request queue @q because device driver is scheduled to work
427 * on elements in the request queue. 428 * on elements in the request queue.
428 */ 429 */
429 DEFINE_EVENT(block_unplug, block_unplug_io, 430 DEFINE_EVENT(block_unplug, block_unplug,
430 431
431 TP_PROTO(struct request_queue *q, unsigned int depth), 432 TP_PROTO(struct request_queue *q, unsigned int depth, bool explicit),
432 433
433 TP_ARGS(q, depth) 434 TP_ARGS(q, depth, explicit)
434 ); 435 );
435 436
436 /** 437 /**
437 * block_split - split a single bio struct into two bio structs 438 * block_split - split a single bio struct into two bio structs
438 * @q: queue containing the bio 439 * @q: queue containing the bio
439 * @bio: block operation being split 440 * @bio: block operation being split
440 * @new_sector: The starting sector for the new bio 441 * @new_sector: The starting sector for the new bio
441 * 442 *
442 * The bio request @bio in request queue @q needs to be split into two 443 * The bio request @bio in request queue @q needs to be split into two
443 * bio requests. The newly created @bio request starts at 444 * bio requests. The newly created @bio request starts at
444 * @new_sector. This split may be required due to hardware limitation 445 * @new_sector. This split may be required due to hardware limitation
445 * such as operation crossing device boundaries in a RAID system. 446 * such as operation crossing device boundaries in a RAID system.
446 */ 447 */
447 TRACE_EVENT(block_split, 448 TRACE_EVENT(block_split,
448 449
449 TP_PROTO(struct request_queue *q, struct bio *bio, 450 TP_PROTO(struct request_queue *q, struct bio *bio,
450 unsigned int new_sector), 451 unsigned int new_sector),
451 452
452 TP_ARGS(q, bio, new_sector), 453 TP_ARGS(q, bio, new_sector),
453 454
454 TP_STRUCT__entry( 455 TP_STRUCT__entry(
455 __field( dev_t, dev ) 456 __field( dev_t, dev )
456 __field( sector_t, sector ) 457 __field( sector_t, sector )
457 __field( sector_t, new_sector ) 458 __field( sector_t, new_sector )
458 __array( char, rwbs, 6 ) 459 __array( char, rwbs, 6 )
459 __array( char, comm, TASK_COMM_LEN ) 460 __array( char, comm, TASK_COMM_LEN )
460 ), 461 ),
461 462
462 TP_fast_assign( 463 TP_fast_assign(
463 __entry->dev = bio->bi_bdev->bd_dev; 464 __entry->dev = bio->bi_bdev->bd_dev;
464 __entry->sector = bio->bi_sector; 465 __entry->sector = bio->bi_sector;
465 __entry->new_sector = new_sector; 466 __entry->new_sector = new_sector;
466 blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); 467 blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
467 memcpy(__entry->comm, current->comm, TASK_COMM_LEN); 468 memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
468 ), 469 ),
469 470
470 TP_printk("%d,%d %s %llu / %llu [%s]", 471 TP_printk("%d,%d %s %llu / %llu [%s]",
471 MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, 472 MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
472 (unsigned long long)__entry->sector, 473 (unsigned long long)__entry->sector,
473 (unsigned long long)__entry->new_sector, 474 (unsigned long long)__entry->new_sector,
474 __entry->comm) 475 __entry->comm)
475 ); 476 );
476 477
477 /** 478 /**
478 * block_bio_remap - map request for a logical device to the raw device 479 * block_bio_remap - map request for a logical device to the raw device
479 * @q: queue holding the operation 480 * @q: queue holding the operation
480 * @bio: revised operation 481 * @bio: revised operation
481 * @dev: device for the operation 482 * @dev: device for the operation
482 * @from: original sector for the operation 483 * @from: original sector for the operation
483 * 484 *
484 * An operation for a logical device has been mapped to the 485 * An operation for a logical device has been mapped to the
485 * raw block device. 486 * raw block device.
486 */ 487 */
487 TRACE_EVENT(block_bio_remap, 488 TRACE_EVENT(block_bio_remap,
488 489
489 TP_PROTO(struct request_queue *q, struct bio *bio, dev_t dev, 490 TP_PROTO(struct request_queue *q, struct bio *bio, dev_t dev,
490 sector_t from), 491 sector_t from),
491 492
492 TP_ARGS(q, bio, dev, from), 493 TP_ARGS(q, bio, dev, from),
493 494
494 TP_STRUCT__entry( 495 TP_STRUCT__entry(
495 __field( dev_t, dev ) 496 __field( dev_t, dev )
496 __field( sector_t, sector ) 497 __field( sector_t, sector )
497 __field( unsigned int, nr_sector ) 498 __field( unsigned int, nr_sector )
498 __field( dev_t, old_dev ) 499 __field( dev_t, old_dev )
499 __field( sector_t, old_sector ) 500 __field( sector_t, old_sector )
500 __array( char, rwbs, 6 ) 501 __array( char, rwbs, 6 )
501 ), 502 ),
502 503
503 TP_fast_assign( 504 TP_fast_assign(
504 __entry->dev = bio->bi_bdev->bd_dev; 505 __entry->dev = bio->bi_bdev->bd_dev;
505 __entry->sector = bio->bi_sector; 506 __entry->sector = bio->bi_sector;
506 __entry->nr_sector = bio->bi_size >> 9; 507 __entry->nr_sector = bio->bi_size >> 9;
507 __entry->old_dev = dev; 508 __entry->old_dev = dev;
508 __entry->old_sector = from; 509 __entry->old_sector = from;
509 blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); 510 blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
510 ), 511 ),
511 512
512 TP_printk("%d,%d %s %llu + %u <- (%d,%d) %llu", 513 TP_printk("%d,%d %s %llu + %u <- (%d,%d) %llu",
513 MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, 514 MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
514 (unsigned long long)__entry->sector, 515 (unsigned long long)__entry->sector,
515 __entry->nr_sector, 516 __entry->nr_sector,
516 MAJOR(__entry->old_dev), MINOR(__entry->old_dev), 517 MAJOR(__entry->old_dev), MINOR(__entry->old_dev),
517 (unsigned long long)__entry->old_sector) 518 (unsigned long long)__entry->old_sector)
518 ); 519 );
519 520
520 /** 521 /**
521 * block_rq_remap - map request for a block operation request 522 * block_rq_remap - map request for a block operation request
522 * @q: queue holding the operation 523 * @q: queue holding the operation
523 * @rq: block IO operation request 524 * @rq: block IO operation request
524 * @dev: device for the operation 525 * @dev: device for the operation
525 * @from: original sector for the operation 526 * @from: original sector for the operation
526 * 527 *
527 * The block operation request @rq in @q has been remapped. The block 528 * The block operation request @rq in @q has been remapped. The block
528 * operation request @rq holds the current information and @from hold 529 * operation request @rq holds the current information and @from hold
529 * the original sector. 530 * the original sector.
530 */ 531 */
531 TRACE_EVENT(block_rq_remap, 532 TRACE_EVENT(block_rq_remap,
532 533
533 TP_PROTO(struct request_queue *q, struct request *rq, dev_t dev, 534 TP_PROTO(struct request_queue *q, struct request *rq, dev_t dev,
534 sector_t from), 535 sector_t from),
535 536
536 TP_ARGS(q, rq, dev, from), 537 TP_ARGS(q, rq, dev, from),
537 538
538 TP_STRUCT__entry( 539 TP_STRUCT__entry(
539 __field( dev_t, dev ) 540 __field( dev_t, dev )
540 __field( sector_t, sector ) 541 __field( sector_t, sector )
541 __field( unsigned int, nr_sector ) 542 __field( unsigned int, nr_sector )
542 __field( dev_t, old_dev ) 543 __field( dev_t, old_dev )
543 __field( sector_t, old_sector ) 544 __field( sector_t, old_sector )
544 __array( char, rwbs, 6 ) 545 __array( char, rwbs, 6 )
545 ), 546 ),
546 547
547 TP_fast_assign( 548 TP_fast_assign(
548 __entry->dev = disk_devt(rq->rq_disk); 549 __entry->dev = disk_devt(rq->rq_disk);
549 __entry->sector = blk_rq_pos(rq); 550 __entry->sector = blk_rq_pos(rq);
550 __entry->nr_sector = blk_rq_sectors(rq); 551 __entry->nr_sector = blk_rq_sectors(rq);
551 __entry->old_dev = dev; 552 __entry->old_dev = dev;
552 __entry->old_sector = from; 553 __entry->old_sector = from;
553 blk_fill_rwbs(__entry->rwbs, rq->cmd_flags, blk_rq_bytes(rq)); 554 blk_fill_rwbs(__entry->rwbs, rq->cmd_flags, blk_rq_bytes(rq));
554 ), 555 ),
555 556
556 TP_printk("%d,%d %s %llu + %u <- (%d,%d) %llu", 557 TP_printk("%d,%d %s %llu + %u <- (%d,%d) %llu",
557 MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, 558 MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
558 (unsigned long long)__entry->sector, 559 (unsigned long long)__entry->sector,
559 __entry->nr_sector, 560 __entry->nr_sector,
560 MAJOR(__entry->old_dev), MINOR(__entry->old_dev), 561 MAJOR(__entry->old_dev), MINOR(__entry->old_dev),
561 (unsigned long long)__entry->old_sector) 562 (unsigned long long)__entry->old_sector)
562 ); 563 );
563 564
564 #endif /* _TRACE_BLOCK_H */ 565 #endif /* _TRACE_BLOCK_H */
565 566
566 /* This part must be outside protection */ 567 /* This part must be outside protection */
567 #include <trace/define_trace.h> 568 #include <trace/define_trace.h>
568 569
569 570
kernel/trace/blktrace.c
1 /* 1 /*
2 * Copyright (C) 2006 Jens Axboe <axboe@kernel.dk> 2 * Copyright (C) 2006 Jens Axboe <axboe@kernel.dk>
3 * 3 *
4 * This program is free software; you can redistribute it and/or modify 4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as 5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation. 6 * published by the Free Software Foundation.
7 * 7 *
8 * This program is distributed in the hope that it will be useful, 8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details. 11 * GNU General Public License for more details.
12 * 12 *
13 * You should have received a copy of the GNU General Public License 13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write to the Free Software 14 * along with this program; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 15 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
16 * 16 *
17 */ 17 */
18 #include <linux/kernel.h> 18 #include <linux/kernel.h>
19 #include <linux/blkdev.h> 19 #include <linux/blkdev.h>
20 #include <linux/blktrace_api.h> 20 #include <linux/blktrace_api.h>
21 #include <linux/percpu.h> 21 #include <linux/percpu.h>
22 #include <linux/init.h> 22 #include <linux/init.h>
23 #include <linux/mutex.h> 23 #include <linux/mutex.h>
24 #include <linux/slab.h> 24 #include <linux/slab.h>
25 #include <linux/debugfs.h> 25 #include <linux/debugfs.h>
26 #include <linux/time.h> 26 #include <linux/time.h>
27 #include <linux/uaccess.h> 27 #include <linux/uaccess.h>
28 28
29 #include <trace/events/block.h> 29 #include <trace/events/block.h>
30 30
31 #include "trace_output.h" 31 #include "trace_output.h"
32 32
33 #ifdef CONFIG_BLK_DEV_IO_TRACE 33 #ifdef CONFIG_BLK_DEV_IO_TRACE
34 34
35 static unsigned int blktrace_seq __read_mostly = 1; 35 static unsigned int blktrace_seq __read_mostly = 1;
36 36
37 static struct trace_array *blk_tr; 37 static struct trace_array *blk_tr;
38 static bool blk_tracer_enabled __read_mostly; 38 static bool blk_tracer_enabled __read_mostly;
39 39
40 /* Select an alternative, minimalistic output than the original one */ 40 /* Select an alternative, minimalistic output than the original one */
41 #define TRACE_BLK_OPT_CLASSIC 0x1 41 #define TRACE_BLK_OPT_CLASSIC 0x1
42 42
43 static struct tracer_opt blk_tracer_opts[] = { 43 static struct tracer_opt blk_tracer_opts[] = {
44 /* Default disable the minimalistic output */ 44 /* Default disable the minimalistic output */
45 { TRACER_OPT(blk_classic, TRACE_BLK_OPT_CLASSIC) }, 45 { TRACER_OPT(blk_classic, TRACE_BLK_OPT_CLASSIC) },
46 { } 46 { }
47 }; 47 };
48 48
49 static struct tracer_flags blk_tracer_flags = { 49 static struct tracer_flags blk_tracer_flags = {
50 .val = 0, 50 .val = 0,
51 .opts = blk_tracer_opts, 51 .opts = blk_tracer_opts,
52 }; 52 };
53 53
54 /* Global reference count of probes */ 54 /* Global reference count of probes */
55 static atomic_t blk_probes_ref = ATOMIC_INIT(0); 55 static atomic_t blk_probes_ref = ATOMIC_INIT(0);
56 56
57 static void blk_register_tracepoints(void); 57 static void blk_register_tracepoints(void);
58 static void blk_unregister_tracepoints(void); 58 static void blk_unregister_tracepoints(void);
59 59
60 /* 60 /*
61 * Send out a notify message. 61 * Send out a notify message.
62 */ 62 */
63 static void trace_note(struct blk_trace *bt, pid_t pid, int action, 63 static void trace_note(struct blk_trace *bt, pid_t pid, int action,
64 const void *data, size_t len) 64 const void *data, size_t len)
65 { 65 {
66 struct blk_io_trace *t; 66 struct blk_io_trace *t;
67 struct ring_buffer_event *event = NULL; 67 struct ring_buffer_event *event = NULL;
68 struct ring_buffer *buffer = NULL; 68 struct ring_buffer *buffer = NULL;
69 int pc = 0; 69 int pc = 0;
70 int cpu = smp_processor_id(); 70 int cpu = smp_processor_id();
71 bool blk_tracer = blk_tracer_enabled; 71 bool blk_tracer = blk_tracer_enabled;
72 72
73 if (blk_tracer) { 73 if (blk_tracer) {
74 buffer = blk_tr->buffer; 74 buffer = blk_tr->buffer;
75 pc = preempt_count(); 75 pc = preempt_count();
76 event = trace_buffer_lock_reserve(buffer, TRACE_BLK, 76 event = trace_buffer_lock_reserve(buffer, TRACE_BLK,
77 sizeof(*t) + len, 77 sizeof(*t) + len,
78 0, pc); 78 0, pc);
79 if (!event) 79 if (!event)
80 return; 80 return;
81 t = ring_buffer_event_data(event); 81 t = ring_buffer_event_data(event);
82 goto record_it; 82 goto record_it;
83 } 83 }
84 84
85 if (!bt->rchan) 85 if (!bt->rchan)
86 return; 86 return;
87 87
88 t = relay_reserve(bt->rchan, sizeof(*t) + len); 88 t = relay_reserve(bt->rchan, sizeof(*t) + len);
89 if (t) { 89 if (t) {
90 t->magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION; 90 t->magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION;
91 t->time = ktime_to_ns(ktime_get()); 91 t->time = ktime_to_ns(ktime_get());
92 record_it: 92 record_it:
93 t->device = bt->dev; 93 t->device = bt->dev;
94 t->action = action; 94 t->action = action;
95 t->pid = pid; 95 t->pid = pid;
96 t->cpu = cpu; 96 t->cpu = cpu;
97 t->pdu_len = len; 97 t->pdu_len = len;
98 memcpy((void *) t + sizeof(*t), data, len); 98 memcpy((void *) t + sizeof(*t), data, len);
99 99
100 if (blk_tracer) 100 if (blk_tracer)
101 trace_buffer_unlock_commit(buffer, event, 0, pc); 101 trace_buffer_unlock_commit(buffer, event, 0, pc);
102 } 102 }
103 } 103 }
104 104
105 /* 105 /*
106 * Send out a notify for this process, if we haven't done so since a trace 106 * Send out a notify for this process, if we haven't done so since a trace
107 * started 107 * started
108 */ 108 */
109 static void trace_note_tsk(struct blk_trace *bt, struct task_struct *tsk) 109 static void trace_note_tsk(struct blk_trace *bt, struct task_struct *tsk)
110 { 110 {
111 tsk->btrace_seq = blktrace_seq; 111 tsk->btrace_seq = blktrace_seq;
112 trace_note(bt, tsk->pid, BLK_TN_PROCESS, tsk->comm, sizeof(tsk->comm)); 112 trace_note(bt, tsk->pid, BLK_TN_PROCESS, tsk->comm, sizeof(tsk->comm));
113 } 113 }
114 114
115 static void trace_note_time(struct blk_trace *bt) 115 static void trace_note_time(struct blk_trace *bt)
116 { 116 {
117 struct timespec now; 117 struct timespec now;
118 unsigned long flags; 118 unsigned long flags;
119 u32 words[2]; 119 u32 words[2];
120 120
121 getnstimeofday(&now); 121 getnstimeofday(&now);
122 words[0] = now.tv_sec; 122 words[0] = now.tv_sec;
123 words[1] = now.tv_nsec; 123 words[1] = now.tv_nsec;
124 124
125 local_irq_save(flags); 125 local_irq_save(flags);
126 trace_note(bt, 0, BLK_TN_TIMESTAMP, words, sizeof(words)); 126 trace_note(bt, 0, BLK_TN_TIMESTAMP, words, sizeof(words));
127 local_irq_restore(flags); 127 local_irq_restore(flags);
128 } 128 }
129 129
130 void __trace_note_message(struct blk_trace *bt, const char *fmt, ...) 130 void __trace_note_message(struct blk_trace *bt, const char *fmt, ...)
131 { 131 {
132 int n; 132 int n;
133 va_list args; 133 va_list args;
134 unsigned long flags; 134 unsigned long flags;
135 char *buf; 135 char *buf;
136 136
137 if (unlikely(bt->trace_state != Blktrace_running && 137 if (unlikely(bt->trace_state != Blktrace_running &&
138 !blk_tracer_enabled)) 138 !blk_tracer_enabled))
139 return; 139 return;
140 140
141 /* 141 /*
142 * If the BLK_TC_NOTIFY action mask isn't set, don't send any note 142 * If the BLK_TC_NOTIFY action mask isn't set, don't send any note
143 * message to the trace. 143 * message to the trace.
144 */ 144 */
145 if (!(bt->act_mask & BLK_TC_NOTIFY)) 145 if (!(bt->act_mask & BLK_TC_NOTIFY))
146 return; 146 return;
147 147
148 local_irq_save(flags); 148 local_irq_save(flags);
149 buf = per_cpu_ptr(bt->msg_data, smp_processor_id()); 149 buf = per_cpu_ptr(bt->msg_data, smp_processor_id());
150 va_start(args, fmt); 150 va_start(args, fmt);
151 n = vscnprintf(buf, BLK_TN_MAX_MSG, fmt, args); 151 n = vscnprintf(buf, BLK_TN_MAX_MSG, fmt, args);
152 va_end(args); 152 va_end(args);
153 153
154 trace_note(bt, 0, BLK_TN_MESSAGE, buf, n); 154 trace_note(bt, 0, BLK_TN_MESSAGE, buf, n);
155 local_irq_restore(flags); 155 local_irq_restore(flags);
156 } 156 }
157 EXPORT_SYMBOL_GPL(__trace_note_message); 157 EXPORT_SYMBOL_GPL(__trace_note_message);
158 158
159 static int act_log_check(struct blk_trace *bt, u32 what, sector_t sector, 159 static int act_log_check(struct blk_trace *bt, u32 what, sector_t sector,
160 pid_t pid) 160 pid_t pid)
161 { 161 {
162 if (((bt->act_mask << BLK_TC_SHIFT) & what) == 0) 162 if (((bt->act_mask << BLK_TC_SHIFT) & what) == 0)
163 return 1; 163 return 1;
164 if (sector && (sector < bt->start_lba || sector > bt->end_lba)) 164 if (sector && (sector < bt->start_lba || sector > bt->end_lba))
165 return 1; 165 return 1;
166 if (bt->pid && pid != bt->pid) 166 if (bt->pid && pid != bt->pid)
167 return 1; 167 return 1;
168 168
169 return 0; 169 return 0;
170 } 170 }
171 171
172 /* 172 /*
173 * Data direction bit lookup 173 * Data direction bit lookup
174 */ 174 */
175 static const u32 ddir_act[2] = { BLK_TC_ACT(BLK_TC_READ), 175 static const u32 ddir_act[2] = { BLK_TC_ACT(BLK_TC_READ),
176 BLK_TC_ACT(BLK_TC_WRITE) }; 176 BLK_TC_ACT(BLK_TC_WRITE) };
177 177
178 #define BLK_TC_RAHEAD BLK_TC_AHEAD 178 #define BLK_TC_RAHEAD BLK_TC_AHEAD
179 179
180 /* The ilog2() calls fall out because they're constant */ 180 /* The ilog2() calls fall out because they're constant */
181 #define MASK_TC_BIT(rw, __name) ((rw & REQ_ ## __name) << \ 181 #define MASK_TC_BIT(rw, __name) ((rw & REQ_ ## __name) << \
182 (ilog2(BLK_TC_ ## __name) + BLK_TC_SHIFT - __REQ_ ## __name)) 182 (ilog2(BLK_TC_ ## __name) + BLK_TC_SHIFT - __REQ_ ## __name))
183 183
184 /* 184 /*
185 * The worker for the various blk_add_trace*() types. Fills out a 185 * The worker for the various blk_add_trace*() types. Fills out a
186 * blk_io_trace structure and places it in a per-cpu subbuffer. 186 * blk_io_trace structure and places it in a per-cpu subbuffer.
187 */ 187 */
188 static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, 188 static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
189 int rw, u32 what, int error, int pdu_len, void *pdu_data) 189 int rw, u32 what, int error, int pdu_len, void *pdu_data)
190 { 190 {
191 struct task_struct *tsk = current; 191 struct task_struct *tsk = current;
192 struct ring_buffer_event *event = NULL; 192 struct ring_buffer_event *event = NULL;
193 struct ring_buffer *buffer = NULL; 193 struct ring_buffer *buffer = NULL;
194 struct blk_io_trace *t; 194 struct blk_io_trace *t;
195 unsigned long flags = 0; 195 unsigned long flags = 0;
196 unsigned long *sequence; 196 unsigned long *sequence;
197 pid_t pid; 197 pid_t pid;
198 int cpu, pc = 0; 198 int cpu, pc = 0;
199 bool blk_tracer = blk_tracer_enabled; 199 bool blk_tracer = blk_tracer_enabled;
200 200
201 if (unlikely(bt->trace_state != Blktrace_running && !blk_tracer)) 201 if (unlikely(bt->trace_state != Blktrace_running && !blk_tracer))
202 return; 202 return;
203 203
204 what |= ddir_act[rw & WRITE]; 204 what |= ddir_act[rw & WRITE];
205 what |= MASK_TC_BIT(rw, SYNC); 205 what |= MASK_TC_BIT(rw, SYNC);
206 what |= MASK_TC_BIT(rw, RAHEAD); 206 what |= MASK_TC_BIT(rw, RAHEAD);
207 what |= MASK_TC_BIT(rw, META); 207 what |= MASK_TC_BIT(rw, META);
208 what |= MASK_TC_BIT(rw, DISCARD); 208 what |= MASK_TC_BIT(rw, DISCARD);
209 209
210 pid = tsk->pid; 210 pid = tsk->pid;
211 if (act_log_check(bt, what, sector, pid)) 211 if (act_log_check(bt, what, sector, pid))
212 return; 212 return;
213 cpu = raw_smp_processor_id(); 213 cpu = raw_smp_processor_id();
214 214
215 if (blk_tracer) { 215 if (blk_tracer) {
216 tracing_record_cmdline(current); 216 tracing_record_cmdline(current);
217 217
218 buffer = blk_tr->buffer; 218 buffer = blk_tr->buffer;
219 pc = preempt_count(); 219 pc = preempt_count();
220 event = trace_buffer_lock_reserve(buffer, TRACE_BLK, 220 event = trace_buffer_lock_reserve(buffer, TRACE_BLK,
221 sizeof(*t) + pdu_len, 221 sizeof(*t) + pdu_len,
222 0, pc); 222 0, pc);
223 if (!event) 223 if (!event)
224 return; 224 return;
225 t = ring_buffer_event_data(event); 225 t = ring_buffer_event_data(event);
226 goto record_it; 226 goto record_it;
227 } 227 }
228 228
229 /* 229 /*
230 * A word about the locking here - we disable interrupts to reserve 230 * A word about the locking here - we disable interrupts to reserve
231 * some space in the relay per-cpu buffer, to prevent an irq 231 * some space in the relay per-cpu buffer, to prevent an irq
232 * from coming in and stepping on our toes. 232 * from coming in and stepping on our toes.
233 */ 233 */
234 local_irq_save(flags); 234 local_irq_save(flags);
235 235
236 if (unlikely(tsk->btrace_seq != blktrace_seq)) 236 if (unlikely(tsk->btrace_seq != blktrace_seq))
237 trace_note_tsk(bt, tsk); 237 trace_note_tsk(bt, tsk);
238 238
239 t = relay_reserve(bt->rchan, sizeof(*t) + pdu_len); 239 t = relay_reserve(bt->rchan, sizeof(*t) + pdu_len);
240 if (t) { 240 if (t) {
241 sequence = per_cpu_ptr(bt->sequence, cpu); 241 sequence = per_cpu_ptr(bt->sequence, cpu);
242 242
243 t->magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION; 243 t->magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION;
244 t->sequence = ++(*sequence); 244 t->sequence = ++(*sequence);
245 t->time = ktime_to_ns(ktime_get()); 245 t->time = ktime_to_ns(ktime_get());
246 record_it: 246 record_it:
247 /* 247 /*
248 * These two are not needed in ftrace as they are in the 248 * These two are not needed in ftrace as they are in the
249 * generic trace_entry, filled by tracing_generic_entry_update, 249 * generic trace_entry, filled by tracing_generic_entry_update,
250 * but for the trace_event->bin() synthesizer benefit we do it 250 * but for the trace_event->bin() synthesizer benefit we do it
251 * here too. 251 * here too.
252 */ 252 */
253 t->cpu = cpu; 253 t->cpu = cpu;
254 t->pid = pid; 254 t->pid = pid;
255 255
256 t->sector = sector; 256 t->sector = sector;
257 t->bytes = bytes; 257 t->bytes = bytes;
258 t->action = what; 258 t->action = what;
259 t->device = bt->dev; 259 t->device = bt->dev;
260 t->error = error; 260 t->error = error;
261 t->pdu_len = pdu_len; 261 t->pdu_len = pdu_len;
262 262
263 if (pdu_len) 263 if (pdu_len)
264 memcpy((void *) t + sizeof(*t), pdu_data, pdu_len); 264 memcpy((void *) t + sizeof(*t), pdu_data, pdu_len);
265 265
266 if (blk_tracer) { 266 if (blk_tracer) {
267 trace_buffer_unlock_commit(buffer, event, 0, pc); 267 trace_buffer_unlock_commit(buffer, event, 0, pc);
268 return; 268 return;
269 } 269 }
270 } 270 }
271 271
272 local_irq_restore(flags); 272 local_irq_restore(flags);
273 } 273 }
274 274
275 static struct dentry *blk_tree_root; 275 static struct dentry *blk_tree_root;
276 static DEFINE_MUTEX(blk_tree_mutex); 276 static DEFINE_MUTEX(blk_tree_mutex);
277 277
278 static void blk_trace_free(struct blk_trace *bt) 278 static void blk_trace_free(struct blk_trace *bt)
279 { 279 {
280 debugfs_remove(bt->msg_file); 280 debugfs_remove(bt->msg_file);
281 debugfs_remove(bt->dropped_file); 281 debugfs_remove(bt->dropped_file);
282 relay_close(bt->rchan); 282 relay_close(bt->rchan);
283 debugfs_remove(bt->dir); 283 debugfs_remove(bt->dir);
284 free_percpu(bt->sequence); 284 free_percpu(bt->sequence);
285 free_percpu(bt->msg_data); 285 free_percpu(bt->msg_data);
286 kfree(bt); 286 kfree(bt);
287 } 287 }
288 288
289 static void blk_trace_cleanup(struct blk_trace *bt) 289 static void blk_trace_cleanup(struct blk_trace *bt)
290 { 290 {
291 blk_trace_free(bt); 291 blk_trace_free(bt);
292 if (atomic_dec_and_test(&blk_probes_ref)) 292 if (atomic_dec_and_test(&blk_probes_ref))
293 blk_unregister_tracepoints(); 293 blk_unregister_tracepoints();
294 } 294 }
295 295
296 int blk_trace_remove(struct request_queue *q) 296 int blk_trace_remove(struct request_queue *q)
297 { 297 {
298 struct blk_trace *bt; 298 struct blk_trace *bt;
299 299
300 bt = xchg(&q->blk_trace, NULL); 300 bt = xchg(&q->blk_trace, NULL);
301 if (!bt) 301 if (!bt)
302 return -EINVAL; 302 return -EINVAL;
303 303
304 if (bt->trace_state != Blktrace_running) 304 if (bt->trace_state != Blktrace_running)
305 blk_trace_cleanup(bt); 305 blk_trace_cleanup(bt);
306 306
307 return 0; 307 return 0;
308 } 308 }
309 EXPORT_SYMBOL_GPL(blk_trace_remove); 309 EXPORT_SYMBOL_GPL(blk_trace_remove);
310 310
311 static int blk_dropped_open(struct inode *inode, struct file *filp) 311 static int blk_dropped_open(struct inode *inode, struct file *filp)
312 { 312 {
313 filp->private_data = inode->i_private; 313 filp->private_data = inode->i_private;
314 314
315 return 0; 315 return 0;
316 } 316 }
317 317
318 static ssize_t blk_dropped_read(struct file *filp, char __user *buffer, 318 static ssize_t blk_dropped_read(struct file *filp, char __user *buffer,
319 size_t count, loff_t *ppos) 319 size_t count, loff_t *ppos)
320 { 320 {
321 struct blk_trace *bt = filp->private_data; 321 struct blk_trace *bt = filp->private_data;
322 char buf[16]; 322 char buf[16];
323 323
324 snprintf(buf, sizeof(buf), "%u\n", atomic_read(&bt->dropped)); 324 snprintf(buf, sizeof(buf), "%u\n", atomic_read(&bt->dropped));
325 325
326 return simple_read_from_buffer(buffer, count, ppos, buf, strlen(buf)); 326 return simple_read_from_buffer(buffer, count, ppos, buf, strlen(buf));
327 } 327 }
328 328
329 static const struct file_operations blk_dropped_fops = { 329 static const struct file_operations blk_dropped_fops = {
330 .owner = THIS_MODULE, 330 .owner = THIS_MODULE,
331 .open = blk_dropped_open, 331 .open = blk_dropped_open,
332 .read = blk_dropped_read, 332 .read = blk_dropped_read,
333 .llseek = default_llseek, 333 .llseek = default_llseek,
334 }; 334 };
335 335
336 static int blk_msg_open(struct inode *inode, struct file *filp) 336 static int blk_msg_open(struct inode *inode, struct file *filp)
337 { 337 {
338 filp->private_data = inode->i_private; 338 filp->private_data = inode->i_private;
339 339
340 return 0; 340 return 0;
341 } 341 }
342 342
343 static ssize_t blk_msg_write(struct file *filp, const char __user *buffer, 343 static ssize_t blk_msg_write(struct file *filp, const char __user *buffer,
344 size_t count, loff_t *ppos) 344 size_t count, loff_t *ppos)
345 { 345 {
346 char *msg; 346 char *msg;
347 struct blk_trace *bt; 347 struct blk_trace *bt;
348 348
349 if (count >= BLK_TN_MAX_MSG) 349 if (count >= BLK_TN_MAX_MSG)
350 return -EINVAL; 350 return -EINVAL;
351 351
352 msg = kmalloc(count + 1, GFP_KERNEL); 352 msg = kmalloc(count + 1, GFP_KERNEL);
353 if (msg == NULL) 353 if (msg == NULL)
354 return -ENOMEM; 354 return -ENOMEM;
355 355
356 if (copy_from_user(msg, buffer, count)) { 356 if (copy_from_user(msg, buffer, count)) {
357 kfree(msg); 357 kfree(msg);
358 return -EFAULT; 358 return -EFAULT;
359 } 359 }
360 360
361 msg[count] = '\0'; 361 msg[count] = '\0';
362 bt = filp->private_data; 362 bt = filp->private_data;
363 __trace_note_message(bt, "%s", msg); 363 __trace_note_message(bt, "%s", msg);
364 kfree(msg); 364 kfree(msg);
365 365
366 return count; 366 return count;
367 } 367 }
368 368
369 static const struct file_operations blk_msg_fops = { 369 static const struct file_operations blk_msg_fops = {
370 .owner = THIS_MODULE, 370 .owner = THIS_MODULE,
371 .open = blk_msg_open, 371 .open = blk_msg_open,
372 .write = blk_msg_write, 372 .write = blk_msg_write,
373 .llseek = noop_llseek, 373 .llseek = noop_llseek,
374 }; 374 };
375 375
376 /* 376 /*
377 * Keep track of how many times we encountered a full subbuffer, to aid 377 * Keep track of how many times we encountered a full subbuffer, to aid
378 * the user space app in telling how many lost events there were. 378 * the user space app in telling how many lost events there were.
379 */ 379 */
380 static int blk_subbuf_start_callback(struct rchan_buf *buf, void *subbuf, 380 static int blk_subbuf_start_callback(struct rchan_buf *buf, void *subbuf,
381 void *prev_subbuf, size_t prev_padding) 381 void *prev_subbuf, size_t prev_padding)
382 { 382 {
383 struct blk_trace *bt; 383 struct blk_trace *bt;
384 384
385 if (!relay_buf_full(buf)) 385 if (!relay_buf_full(buf))
386 return 1; 386 return 1;
387 387
388 bt = buf->chan->private_data; 388 bt = buf->chan->private_data;
389 atomic_inc(&bt->dropped); 389 atomic_inc(&bt->dropped);
390 return 0; 390 return 0;
391 } 391 }
392 392
393 static int blk_remove_buf_file_callback(struct dentry *dentry) 393 static int blk_remove_buf_file_callback(struct dentry *dentry)
394 { 394 {
395 debugfs_remove(dentry); 395 debugfs_remove(dentry);
396 396
397 return 0; 397 return 0;
398 } 398 }
399 399
400 static struct dentry *blk_create_buf_file_callback(const char *filename, 400 static struct dentry *blk_create_buf_file_callback(const char *filename,
401 struct dentry *parent, 401 struct dentry *parent,
402 int mode, 402 int mode,
403 struct rchan_buf *buf, 403 struct rchan_buf *buf,
404 int *is_global) 404 int *is_global)
405 { 405 {
406 return debugfs_create_file(filename, mode, parent, buf, 406 return debugfs_create_file(filename, mode, parent, buf,
407 &relay_file_operations); 407 &relay_file_operations);
408 } 408 }
409 409
410 static struct rchan_callbacks blk_relay_callbacks = { 410 static struct rchan_callbacks blk_relay_callbacks = {
411 .subbuf_start = blk_subbuf_start_callback, 411 .subbuf_start = blk_subbuf_start_callback,
412 .create_buf_file = blk_create_buf_file_callback, 412 .create_buf_file = blk_create_buf_file_callback,
413 .remove_buf_file = blk_remove_buf_file_callback, 413 .remove_buf_file = blk_remove_buf_file_callback,
414 }; 414 };
415 415
416 static void blk_trace_setup_lba(struct blk_trace *bt, 416 static void blk_trace_setup_lba(struct blk_trace *bt,
417 struct block_device *bdev) 417 struct block_device *bdev)
418 { 418 {
419 struct hd_struct *part = NULL; 419 struct hd_struct *part = NULL;
420 420
421 if (bdev) 421 if (bdev)
422 part = bdev->bd_part; 422 part = bdev->bd_part;
423 423
424 if (part) { 424 if (part) {
425 bt->start_lba = part->start_sect; 425 bt->start_lba = part->start_sect;
426 bt->end_lba = part->start_sect + part->nr_sects; 426 bt->end_lba = part->start_sect + part->nr_sects;
427 } else { 427 } else {
428 bt->start_lba = 0; 428 bt->start_lba = 0;
429 bt->end_lba = -1ULL; 429 bt->end_lba = -1ULL;
430 } 430 }
431 } 431 }
432 432
433 /* 433 /*
434 * Setup everything required to start tracing 434 * Setup everything required to start tracing
435 */ 435 */
436 int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, 436 int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
437 struct block_device *bdev, 437 struct block_device *bdev,
438 struct blk_user_trace_setup *buts) 438 struct blk_user_trace_setup *buts)
439 { 439 {
440 struct blk_trace *old_bt, *bt = NULL; 440 struct blk_trace *old_bt, *bt = NULL;
441 struct dentry *dir = NULL; 441 struct dentry *dir = NULL;
442 int ret, i; 442 int ret, i;
443 443
444 if (!buts->buf_size || !buts->buf_nr) 444 if (!buts->buf_size || !buts->buf_nr)
445 return -EINVAL; 445 return -EINVAL;
446 446
447 strncpy(buts->name, name, BLKTRACE_BDEV_SIZE); 447 strncpy(buts->name, name, BLKTRACE_BDEV_SIZE);
448 buts->name[BLKTRACE_BDEV_SIZE - 1] = '\0'; 448 buts->name[BLKTRACE_BDEV_SIZE - 1] = '\0';
449 449
450 /* 450 /*
451 * some device names have larger paths - convert the slashes 451 * some device names have larger paths - convert the slashes
452 * to underscores for this to work as expected 452 * to underscores for this to work as expected
453 */ 453 */
454 for (i = 0; i < strlen(buts->name); i++) 454 for (i = 0; i < strlen(buts->name); i++)
455 if (buts->name[i] == '/') 455 if (buts->name[i] == '/')
456 buts->name[i] = '_'; 456 buts->name[i] = '_';
457 457
458 bt = kzalloc(sizeof(*bt), GFP_KERNEL); 458 bt = kzalloc(sizeof(*bt), GFP_KERNEL);
459 if (!bt) 459 if (!bt)
460 return -ENOMEM; 460 return -ENOMEM;
461 461
462 ret = -ENOMEM; 462 ret = -ENOMEM;
463 bt->sequence = alloc_percpu(unsigned long); 463 bt->sequence = alloc_percpu(unsigned long);
464 if (!bt->sequence) 464 if (!bt->sequence)
465 goto err; 465 goto err;
466 466
467 bt->msg_data = __alloc_percpu(BLK_TN_MAX_MSG, __alignof__(char)); 467 bt->msg_data = __alloc_percpu(BLK_TN_MAX_MSG, __alignof__(char));
468 if (!bt->msg_data) 468 if (!bt->msg_data)
469 goto err; 469 goto err;
470 470
471 ret = -ENOENT; 471 ret = -ENOENT;
472 472
473 mutex_lock(&blk_tree_mutex); 473 mutex_lock(&blk_tree_mutex);
474 if (!blk_tree_root) { 474 if (!blk_tree_root) {
475 blk_tree_root = debugfs_create_dir("block", NULL); 475 blk_tree_root = debugfs_create_dir("block", NULL);
476 if (!blk_tree_root) { 476 if (!blk_tree_root) {
477 mutex_unlock(&blk_tree_mutex); 477 mutex_unlock(&blk_tree_mutex);
478 goto err; 478 goto err;
479 } 479 }
480 } 480 }
481 mutex_unlock(&blk_tree_mutex); 481 mutex_unlock(&blk_tree_mutex);
482 482
483 dir = debugfs_create_dir(buts->name, blk_tree_root); 483 dir = debugfs_create_dir(buts->name, blk_tree_root);
484 484
485 if (!dir) 485 if (!dir)
486 goto err; 486 goto err;
487 487
488 bt->dir = dir; 488 bt->dir = dir;
489 bt->dev = dev; 489 bt->dev = dev;
490 atomic_set(&bt->dropped, 0); 490 atomic_set(&bt->dropped, 0);
491 491
492 ret = -EIO; 492 ret = -EIO;
493 bt->dropped_file = debugfs_create_file("dropped", 0444, dir, bt, 493 bt->dropped_file = debugfs_create_file("dropped", 0444, dir, bt,
494 &blk_dropped_fops); 494 &blk_dropped_fops);
495 if (!bt->dropped_file) 495 if (!bt->dropped_file)
496 goto err; 496 goto err;
497 497
498 bt->msg_file = debugfs_create_file("msg", 0222, dir, bt, &blk_msg_fops); 498 bt->msg_file = debugfs_create_file("msg", 0222, dir, bt, &blk_msg_fops);
499 if (!bt->msg_file) 499 if (!bt->msg_file)
500 goto err; 500 goto err;
501 501
502 bt->rchan = relay_open("trace", dir, buts->buf_size, 502 bt->rchan = relay_open("trace", dir, buts->buf_size,
503 buts->buf_nr, &blk_relay_callbacks, bt); 503 buts->buf_nr, &blk_relay_callbacks, bt);
504 if (!bt->rchan) 504 if (!bt->rchan)
505 goto err; 505 goto err;
506 506
507 bt->act_mask = buts->act_mask; 507 bt->act_mask = buts->act_mask;
508 if (!bt->act_mask) 508 if (!bt->act_mask)
509 bt->act_mask = (u16) -1; 509 bt->act_mask = (u16) -1;
510 510
511 blk_trace_setup_lba(bt, bdev); 511 blk_trace_setup_lba(bt, bdev);
512 512
513 /* overwrite with user settings */ 513 /* overwrite with user settings */
514 if (buts->start_lba) 514 if (buts->start_lba)
515 bt->start_lba = buts->start_lba; 515 bt->start_lba = buts->start_lba;
516 if (buts->end_lba) 516 if (buts->end_lba)
517 bt->end_lba = buts->end_lba; 517 bt->end_lba = buts->end_lba;
518 518
519 bt->pid = buts->pid; 519 bt->pid = buts->pid;
520 bt->trace_state = Blktrace_setup; 520 bt->trace_state = Blktrace_setup;
521 521
522 ret = -EBUSY; 522 ret = -EBUSY;
523 old_bt = xchg(&q->blk_trace, bt); 523 old_bt = xchg(&q->blk_trace, bt);
524 if (old_bt) { 524 if (old_bt) {
525 (void) xchg(&q->blk_trace, old_bt); 525 (void) xchg(&q->blk_trace, old_bt);
526 goto err; 526 goto err;
527 } 527 }
528 528
529 if (atomic_inc_return(&blk_probes_ref) == 1) 529 if (atomic_inc_return(&blk_probes_ref) == 1)
530 blk_register_tracepoints(); 530 blk_register_tracepoints();
531 531
532 return 0; 532 return 0;
533 err: 533 err:
534 blk_trace_free(bt); 534 blk_trace_free(bt);
535 return ret; 535 return ret;
536 } 536 }
537 537
538 int blk_trace_setup(struct request_queue *q, char *name, dev_t dev, 538 int blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
539 struct block_device *bdev, 539 struct block_device *bdev,
540 char __user *arg) 540 char __user *arg)
541 { 541 {
542 struct blk_user_trace_setup buts; 542 struct blk_user_trace_setup buts;
543 int ret; 543 int ret;
544 544
545 ret = copy_from_user(&buts, arg, sizeof(buts)); 545 ret = copy_from_user(&buts, arg, sizeof(buts));
546 if (ret) 546 if (ret)
547 return -EFAULT; 547 return -EFAULT;
548 548
549 ret = do_blk_trace_setup(q, name, dev, bdev, &buts); 549 ret = do_blk_trace_setup(q, name, dev, bdev, &buts);
550 if (ret) 550 if (ret)
551 return ret; 551 return ret;
552 552
553 if (copy_to_user(arg, &buts, sizeof(buts))) { 553 if (copy_to_user(arg, &buts, sizeof(buts))) {
554 blk_trace_remove(q); 554 blk_trace_remove(q);
555 return -EFAULT; 555 return -EFAULT;
556 } 556 }
557 return 0; 557 return 0;
558 } 558 }
559 EXPORT_SYMBOL_GPL(blk_trace_setup); 559 EXPORT_SYMBOL_GPL(blk_trace_setup);
560 560
561 #if defined(CONFIG_COMPAT) && defined(CONFIG_X86_64) 561 #if defined(CONFIG_COMPAT) && defined(CONFIG_X86_64)
562 static int compat_blk_trace_setup(struct request_queue *q, char *name, 562 static int compat_blk_trace_setup(struct request_queue *q, char *name,
563 dev_t dev, struct block_device *bdev, 563 dev_t dev, struct block_device *bdev,
564 char __user *arg) 564 char __user *arg)
565 { 565 {
566 struct blk_user_trace_setup buts; 566 struct blk_user_trace_setup buts;
567 struct compat_blk_user_trace_setup cbuts; 567 struct compat_blk_user_trace_setup cbuts;
568 int ret; 568 int ret;
569 569
570 if (copy_from_user(&cbuts, arg, sizeof(cbuts))) 570 if (copy_from_user(&cbuts, arg, sizeof(cbuts)))
571 return -EFAULT; 571 return -EFAULT;
572 572
573 buts = (struct blk_user_trace_setup) { 573 buts = (struct blk_user_trace_setup) {
574 .act_mask = cbuts.act_mask, 574 .act_mask = cbuts.act_mask,
575 .buf_size = cbuts.buf_size, 575 .buf_size = cbuts.buf_size,
576 .buf_nr = cbuts.buf_nr, 576 .buf_nr = cbuts.buf_nr,
577 .start_lba = cbuts.start_lba, 577 .start_lba = cbuts.start_lba,
578 .end_lba = cbuts.end_lba, 578 .end_lba = cbuts.end_lba,
579 .pid = cbuts.pid, 579 .pid = cbuts.pid,
580 }; 580 };
581 memcpy(&buts.name, &cbuts.name, 32); 581 memcpy(&buts.name, &cbuts.name, 32);
582 582
583 ret = do_blk_trace_setup(q, name, dev, bdev, &buts); 583 ret = do_blk_trace_setup(q, name, dev, bdev, &buts);
584 if (ret) 584 if (ret)
585 return ret; 585 return ret;
586 586
587 if (copy_to_user(arg, &buts.name, 32)) { 587 if (copy_to_user(arg, &buts.name, 32)) {
588 blk_trace_remove(q); 588 blk_trace_remove(q);
589 return -EFAULT; 589 return -EFAULT;
590 } 590 }
591 591
592 return 0; 592 return 0;
593 } 593 }
594 #endif 594 #endif
595 595
596 int blk_trace_startstop(struct request_queue *q, int start) 596 int blk_trace_startstop(struct request_queue *q, int start)
597 { 597 {
598 int ret; 598 int ret;
599 struct blk_trace *bt = q->blk_trace; 599 struct blk_trace *bt = q->blk_trace;
600 600
601 if (bt == NULL) 601 if (bt == NULL)
602 return -EINVAL; 602 return -EINVAL;
603 603
604 /* 604 /*
605 * For starting a trace, we can transition from a setup or stopped 605 * For starting a trace, we can transition from a setup or stopped
606 * trace. For stopping a trace, the state must be running 606 * trace. For stopping a trace, the state must be running
607 */ 607 */
608 ret = -EINVAL; 608 ret = -EINVAL;
609 if (start) { 609 if (start) {
610 if (bt->trace_state == Blktrace_setup || 610 if (bt->trace_state == Blktrace_setup ||
611 bt->trace_state == Blktrace_stopped) { 611 bt->trace_state == Blktrace_stopped) {
612 blktrace_seq++; 612 blktrace_seq++;
613 smp_mb(); 613 smp_mb();
614 bt->trace_state = Blktrace_running; 614 bt->trace_state = Blktrace_running;
615 615
616 trace_note_time(bt); 616 trace_note_time(bt);
617 ret = 0; 617 ret = 0;
618 } 618 }
619 } else { 619 } else {
620 if (bt->trace_state == Blktrace_running) { 620 if (bt->trace_state == Blktrace_running) {
621 bt->trace_state = Blktrace_stopped; 621 bt->trace_state = Blktrace_stopped;
622 relay_flush(bt->rchan); 622 relay_flush(bt->rchan);
623 ret = 0; 623 ret = 0;
624 } 624 }
625 } 625 }
626 626
627 return ret; 627 return ret;
628 } 628 }
629 EXPORT_SYMBOL_GPL(blk_trace_startstop); 629 EXPORT_SYMBOL_GPL(blk_trace_startstop);
630 630
631 /** 631 /**
632 * blk_trace_ioctl: - handle the ioctls associated with tracing 632 * blk_trace_ioctl: - handle the ioctls associated with tracing
633 * @bdev: the block device 633 * @bdev: the block device
634 * @cmd: the ioctl cmd 634 * @cmd: the ioctl cmd
635 * @arg: the argument data, if any 635 * @arg: the argument data, if any
636 * 636 *
637 **/ 637 **/
638 int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg) 638 int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg)
639 { 639 {
640 struct request_queue *q; 640 struct request_queue *q;
641 int ret, start = 0; 641 int ret, start = 0;
642 char b[BDEVNAME_SIZE]; 642 char b[BDEVNAME_SIZE];
643 643
644 q = bdev_get_queue(bdev); 644 q = bdev_get_queue(bdev);
645 if (!q) 645 if (!q)
646 return -ENXIO; 646 return -ENXIO;
647 647
648 mutex_lock(&bdev->bd_mutex); 648 mutex_lock(&bdev->bd_mutex);
649 649
650 switch (cmd) { 650 switch (cmd) {
651 case BLKTRACESETUP: 651 case BLKTRACESETUP:
652 bdevname(bdev, b); 652 bdevname(bdev, b);
653 ret = blk_trace_setup(q, b, bdev->bd_dev, bdev, arg); 653 ret = blk_trace_setup(q, b, bdev->bd_dev, bdev, arg);
654 break; 654 break;
655 #if defined(CONFIG_COMPAT) && defined(CONFIG_X86_64) 655 #if defined(CONFIG_COMPAT) && defined(CONFIG_X86_64)
656 case BLKTRACESETUP32: 656 case BLKTRACESETUP32:
657 bdevname(bdev, b); 657 bdevname(bdev, b);
658 ret = compat_blk_trace_setup(q, b, bdev->bd_dev, bdev, arg); 658 ret = compat_blk_trace_setup(q, b, bdev->bd_dev, bdev, arg);
659 break; 659 break;
660 #endif 660 #endif
661 case BLKTRACESTART: 661 case BLKTRACESTART:
662 start = 1; 662 start = 1;
663 case BLKTRACESTOP: 663 case BLKTRACESTOP:
664 ret = blk_trace_startstop(q, start); 664 ret = blk_trace_startstop(q, start);
665 break; 665 break;
666 case BLKTRACETEARDOWN: 666 case BLKTRACETEARDOWN:
667 ret = blk_trace_remove(q); 667 ret = blk_trace_remove(q);
668 break; 668 break;
669 default: 669 default:
670 ret = -ENOTTY; 670 ret = -ENOTTY;
671 break; 671 break;
672 } 672 }
673 673
674 mutex_unlock(&bdev->bd_mutex); 674 mutex_unlock(&bdev->bd_mutex);
675 return ret; 675 return ret;
676 } 676 }
677 677
678 /** 678 /**
679 * blk_trace_shutdown: - stop and cleanup trace structures 679 * blk_trace_shutdown: - stop and cleanup trace structures
680 * @q: the request queue associated with the device 680 * @q: the request queue associated with the device
681 * 681 *
682 **/ 682 **/
683 void blk_trace_shutdown(struct request_queue *q) 683 void blk_trace_shutdown(struct request_queue *q)
684 { 684 {
685 if (q->blk_trace) { 685 if (q->blk_trace) {
686 blk_trace_startstop(q, 0); 686 blk_trace_startstop(q, 0);
687 blk_trace_remove(q); 687 blk_trace_remove(q);
688 } 688 }
689 } 689 }
690 690
691 /* 691 /*
692 * blktrace probes 692 * blktrace probes
693 */ 693 */
694 694
695 /** 695 /**
696 * blk_add_trace_rq - Add a trace for a request oriented action 696 * blk_add_trace_rq - Add a trace for a request oriented action
697 * @q: queue the io is for 697 * @q: queue the io is for
698 * @rq: the source request 698 * @rq: the source request
699 * @what: the action 699 * @what: the action
700 * 700 *
701 * Description: 701 * Description:
702 * Records an action against a request. Will log the bio offset + size. 702 * Records an action against a request. Will log the bio offset + size.
703 * 703 *
704 **/ 704 **/
705 static void blk_add_trace_rq(struct request_queue *q, struct request *rq, 705 static void blk_add_trace_rq(struct request_queue *q, struct request *rq,
706 u32 what) 706 u32 what)
707 { 707 {
708 struct blk_trace *bt = q->blk_trace; 708 struct blk_trace *bt = q->blk_trace;
709 709
710 if (likely(!bt)) 710 if (likely(!bt))
711 return; 711 return;
712 712
713 if (rq->cmd_type == REQ_TYPE_BLOCK_PC) { 713 if (rq->cmd_type == REQ_TYPE_BLOCK_PC) {
714 what |= BLK_TC_ACT(BLK_TC_PC); 714 what |= BLK_TC_ACT(BLK_TC_PC);
715 __blk_add_trace(bt, 0, blk_rq_bytes(rq), rq->cmd_flags, 715 __blk_add_trace(bt, 0, blk_rq_bytes(rq), rq->cmd_flags,
716 what, rq->errors, rq->cmd_len, rq->cmd); 716 what, rq->errors, rq->cmd_len, rq->cmd);
717 } else { 717 } else {
718 what |= BLK_TC_ACT(BLK_TC_FS); 718 what |= BLK_TC_ACT(BLK_TC_FS);
719 __blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq), 719 __blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq),
720 rq->cmd_flags, what, rq->errors, 0, NULL); 720 rq->cmd_flags, what, rq->errors, 0, NULL);
721 } 721 }
722 } 722 }
723 723
724 static void blk_add_trace_rq_abort(void *ignore, 724 static void blk_add_trace_rq_abort(void *ignore,
725 struct request_queue *q, struct request *rq) 725 struct request_queue *q, struct request *rq)
726 { 726 {
727 blk_add_trace_rq(q, rq, BLK_TA_ABORT); 727 blk_add_trace_rq(q, rq, BLK_TA_ABORT);
728 } 728 }
729 729
730 static void blk_add_trace_rq_insert(void *ignore, 730 static void blk_add_trace_rq_insert(void *ignore,
731 struct request_queue *q, struct request *rq) 731 struct request_queue *q, struct request *rq)
732 { 732 {
733 blk_add_trace_rq(q, rq, BLK_TA_INSERT); 733 blk_add_trace_rq(q, rq, BLK_TA_INSERT);
734 } 734 }
735 735
736 static void blk_add_trace_rq_issue(void *ignore, 736 static void blk_add_trace_rq_issue(void *ignore,
737 struct request_queue *q, struct request *rq) 737 struct request_queue *q, struct request *rq)
738 { 738 {
739 blk_add_trace_rq(q, rq, BLK_TA_ISSUE); 739 blk_add_trace_rq(q, rq, BLK_TA_ISSUE);
740 } 740 }
741 741
742 static void blk_add_trace_rq_requeue(void *ignore, 742 static void blk_add_trace_rq_requeue(void *ignore,
743 struct request_queue *q, 743 struct request_queue *q,
744 struct request *rq) 744 struct request *rq)
745 { 745 {
746 blk_add_trace_rq(q, rq, BLK_TA_REQUEUE); 746 blk_add_trace_rq(q, rq, BLK_TA_REQUEUE);
747 } 747 }
748 748
749 static void blk_add_trace_rq_complete(void *ignore, 749 static void blk_add_trace_rq_complete(void *ignore,
750 struct request_queue *q, 750 struct request_queue *q,
751 struct request *rq) 751 struct request *rq)
752 { 752 {
753 blk_add_trace_rq(q, rq, BLK_TA_COMPLETE); 753 blk_add_trace_rq(q, rq, BLK_TA_COMPLETE);
754 } 754 }
755 755
756 /** 756 /**
757 * blk_add_trace_bio - Add a trace for a bio oriented action 757 * blk_add_trace_bio - Add a trace for a bio oriented action
758 * @q: queue the io is for 758 * @q: queue the io is for
759 * @bio: the source bio 759 * @bio: the source bio
760 * @what: the action 760 * @what: the action
761 * @error: error, if any 761 * @error: error, if any
762 * 762 *
763 * Description: 763 * Description:
764 * Records an action against a bio. Will log the bio offset + size. 764 * Records an action against a bio. Will log the bio offset + size.
765 * 765 *
766 **/ 766 **/
767 static void blk_add_trace_bio(struct request_queue *q, struct bio *bio, 767 static void blk_add_trace_bio(struct request_queue *q, struct bio *bio,
768 u32 what, int error) 768 u32 what, int error)
769 { 769 {
770 struct blk_trace *bt = q->blk_trace; 770 struct blk_trace *bt = q->blk_trace;
771 771
772 if (likely(!bt)) 772 if (likely(!bt))
773 return; 773 return;
774 774
775 if (!error && !bio_flagged(bio, BIO_UPTODATE)) 775 if (!error && !bio_flagged(bio, BIO_UPTODATE))
776 error = EIO; 776 error = EIO;
777 777
778 __blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw, what, 778 __blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw, what,
779 error, 0, NULL); 779 error, 0, NULL);
780 } 780 }
781 781
782 static void blk_add_trace_bio_bounce(void *ignore, 782 static void blk_add_trace_bio_bounce(void *ignore,
783 struct request_queue *q, struct bio *bio) 783 struct request_queue *q, struct bio *bio)
784 { 784 {
785 blk_add_trace_bio(q, bio, BLK_TA_BOUNCE, 0); 785 blk_add_trace_bio(q, bio, BLK_TA_BOUNCE, 0);
786 } 786 }
787 787
788 static void blk_add_trace_bio_complete(void *ignore, 788 static void blk_add_trace_bio_complete(void *ignore,
789 struct request_queue *q, struct bio *bio, 789 struct request_queue *q, struct bio *bio,
790 int error) 790 int error)
791 { 791 {
792 blk_add_trace_bio(q, bio, BLK_TA_COMPLETE, error); 792 blk_add_trace_bio(q, bio, BLK_TA_COMPLETE, error);
793 } 793 }
794 794
795 static void blk_add_trace_bio_backmerge(void *ignore, 795 static void blk_add_trace_bio_backmerge(void *ignore,
796 struct request_queue *q, 796 struct request_queue *q,
797 struct bio *bio) 797 struct bio *bio)
798 { 798 {
799 blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE, 0); 799 blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE, 0);
800 } 800 }
801 801
802 static void blk_add_trace_bio_frontmerge(void *ignore, 802 static void blk_add_trace_bio_frontmerge(void *ignore,
803 struct request_queue *q, 803 struct request_queue *q,
804 struct bio *bio) 804 struct bio *bio)
805 { 805 {
806 blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE, 0); 806 blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE, 0);
807 } 807 }
808 808
809 static void blk_add_trace_bio_queue(void *ignore, 809 static void blk_add_trace_bio_queue(void *ignore,
810 struct request_queue *q, struct bio *bio) 810 struct request_queue *q, struct bio *bio)
811 { 811 {
812 blk_add_trace_bio(q, bio, BLK_TA_QUEUE, 0); 812 blk_add_trace_bio(q, bio, BLK_TA_QUEUE, 0);
813 } 813 }
814 814
815 static void blk_add_trace_getrq(void *ignore, 815 static void blk_add_trace_getrq(void *ignore,
816 struct request_queue *q, 816 struct request_queue *q,
817 struct bio *bio, int rw) 817 struct bio *bio, int rw)
818 { 818 {
819 if (bio) 819 if (bio)
820 blk_add_trace_bio(q, bio, BLK_TA_GETRQ, 0); 820 blk_add_trace_bio(q, bio, BLK_TA_GETRQ, 0);
821 else { 821 else {
822 struct blk_trace *bt = q->blk_trace; 822 struct blk_trace *bt = q->blk_trace;
823 823
824 if (bt) 824 if (bt)
825 __blk_add_trace(bt, 0, 0, rw, BLK_TA_GETRQ, 0, 0, NULL); 825 __blk_add_trace(bt, 0, 0, rw, BLK_TA_GETRQ, 0, 0, NULL);
826 } 826 }
827 } 827 }
828 828
829 829
830 static void blk_add_trace_sleeprq(void *ignore, 830 static void blk_add_trace_sleeprq(void *ignore,
831 struct request_queue *q, 831 struct request_queue *q,
832 struct bio *bio, int rw) 832 struct bio *bio, int rw)
833 { 833 {
834 if (bio) 834 if (bio)
835 blk_add_trace_bio(q, bio, BLK_TA_SLEEPRQ, 0); 835 blk_add_trace_bio(q, bio, BLK_TA_SLEEPRQ, 0);
836 else { 836 else {
837 struct blk_trace *bt = q->blk_trace; 837 struct blk_trace *bt = q->blk_trace;
838 838
839 if (bt) 839 if (bt)
840 __blk_add_trace(bt, 0, 0, rw, BLK_TA_SLEEPRQ, 840 __blk_add_trace(bt, 0, 0, rw, BLK_TA_SLEEPRQ,
841 0, 0, NULL); 841 0, 0, NULL);
842 } 842 }
843 } 843 }
844 844
845 static void blk_add_trace_plug(void *ignore, struct request_queue *q) 845 static void blk_add_trace_plug(void *ignore, struct request_queue *q)
846 { 846 {
847 struct blk_trace *bt = q->blk_trace; 847 struct blk_trace *bt = q->blk_trace;
848 848
849 if (bt) 849 if (bt)
850 __blk_add_trace(bt, 0, 0, 0, BLK_TA_PLUG, 0, 0, NULL); 850 __blk_add_trace(bt, 0, 0, 0, BLK_TA_PLUG, 0, 0, NULL);
851 } 851 }
852 852
853 static void blk_add_trace_unplug_io(void *ignore, struct request_queue *q, 853 static void blk_add_trace_unplug(void *ignore, struct request_queue *q,
854 unsigned int depth) 854 unsigned int depth, bool explicit)
855 { 855 {
856 struct blk_trace *bt = q->blk_trace; 856 struct blk_trace *bt = q->blk_trace;
857 857
858 if (bt) { 858 if (bt) {
859 __be64 rpdu = cpu_to_be64(depth); 859 __be64 rpdu = cpu_to_be64(depth);
860 u32 what;
860 861
861 __blk_add_trace(bt, 0, 0, 0, BLK_TA_UNPLUG_IO, 0, 862 if (explicit)
862 sizeof(rpdu), &rpdu); 863 what = BLK_TA_UNPLUG_IO;
864 else
865 what = BLK_TA_UNPLUG_TIMER;
866
867 __blk_add_trace(bt, 0, 0, 0, what, 0, sizeof(rpdu), &rpdu);
863 } 868 }
864 } 869 }
865 870
866 static void blk_add_trace_split(void *ignore, 871 static void blk_add_trace_split(void *ignore,
867 struct request_queue *q, struct bio *bio, 872 struct request_queue *q, struct bio *bio,
868 unsigned int pdu) 873 unsigned int pdu)
869 { 874 {
870 struct blk_trace *bt = q->blk_trace; 875 struct blk_trace *bt = q->blk_trace;
871 876
872 if (bt) { 877 if (bt) {
873 __be64 rpdu = cpu_to_be64(pdu); 878 __be64 rpdu = cpu_to_be64(pdu);
874 879
875 __blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw, 880 __blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw,
876 BLK_TA_SPLIT, !bio_flagged(bio, BIO_UPTODATE), 881 BLK_TA_SPLIT, !bio_flagged(bio, BIO_UPTODATE),
877 sizeof(rpdu), &rpdu); 882 sizeof(rpdu), &rpdu);
878 } 883 }
879 } 884 }
880 885
881 /** 886 /**
882 * blk_add_trace_bio_remap - Add a trace for a bio-remap operation 887 * blk_add_trace_bio_remap - Add a trace for a bio-remap operation
883 * @ignore: trace callback data parameter (not used) 888 * @ignore: trace callback data parameter (not used)
884 * @q: queue the io is for 889 * @q: queue the io is for
885 * @bio: the source bio 890 * @bio: the source bio
886 * @dev: target device 891 * @dev: target device
887 * @from: source sector 892 * @from: source sector
888 * 893 *
889 * Description: 894 * Description:
890 * Device mapper or raid target sometimes need to split a bio because 895 * Device mapper or raid target sometimes need to split a bio because
891 * it spans a stripe (or similar). Add a trace for that action. 896 * it spans a stripe (or similar). Add a trace for that action.
892 * 897 *
893 **/ 898 **/
894 static void blk_add_trace_bio_remap(void *ignore, 899 static void blk_add_trace_bio_remap(void *ignore,
895 struct request_queue *q, struct bio *bio, 900 struct request_queue *q, struct bio *bio,
896 dev_t dev, sector_t from) 901 dev_t dev, sector_t from)
897 { 902 {
898 struct blk_trace *bt = q->blk_trace; 903 struct blk_trace *bt = q->blk_trace;
899 struct blk_io_trace_remap r; 904 struct blk_io_trace_remap r;
900 905
901 if (likely(!bt)) 906 if (likely(!bt))
902 return; 907 return;
903 908
904 r.device_from = cpu_to_be32(dev); 909 r.device_from = cpu_to_be32(dev);
905 r.device_to = cpu_to_be32(bio->bi_bdev->bd_dev); 910 r.device_to = cpu_to_be32(bio->bi_bdev->bd_dev);
906 r.sector_from = cpu_to_be64(from); 911 r.sector_from = cpu_to_be64(from);
907 912
908 __blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw, 913 __blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw,
909 BLK_TA_REMAP, !bio_flagged(bio, BIO_UPTODATE), 914 BLK_TA_REMAP, !bio_flagged(bio, BIO_UPTODATE),
910 sizeof(r), &r); 915 sizeof(r), &r);
911 } 916 }
912 917
913 /** 918 /**
914 * blk_add_trace_rq_remap - Add a trace for a request-remap operation 919 * blk_add_trace_rq_remap - Add a trace for a request-remap operation
915 * @ignore: trace callback data parameter (not used) 920 * @ignore: trace callback data parameter (not used)
916 * @q: queue the io is for 921 * @q: queue the io is for
917 * @rq: the source request 922 * @rq: the source request
918 * @dev: target device 923 * @dev: target device
919 * @from: source sector 924 * @from: source sector
920 * 925 *
921 * Description: 926 * Description:
922 * Device mapper remaps request to other devices. 927 * Device mapper remaps request to other devices.
923 * Add a trace for that action. 928 * Add a trace for that action.
924 * 929 *
925 **/ 930 **/
926 static void blk_add_trace_rq_remap(void *ignore, 931 static void blk_add_trace_rq_remap(void *ignore,
927 struct request_queue *q, 932 struct request_queue *q,
928 struct request *rq, dev_t dev, 933 struct request *rq, dev_t dev,
929 sector_t from) 934 sector_t from)
930 { 935 {
931 struct blk_trace *bt = q->blk_trace; 936 struct blk_trace *bt = q->blk_trace;
932 struct blk_io_trace_remap r; 937 struct blk_io_trace_remap r;
933 938
934 if (likely(!bt)) 939 if (likely(!bt))
935 return; 940 return;
936 941
937 r.device_from = cpu_to_be32(dev); 942 r.device_from = cpu_to_be32(dev);
938 r.device_to = cpu_to_be32(disk_devt(rq->rq_disk)); 943 r.device_to = cpu_to_be32(disk_devt(rq->rq_disk));
939 r.sector_from = cpu_to_be64(from); 944 r.sector_from = cpu_to_be64(from);
940 945
941 __blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq), 946 __blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq),
942 rq_data_dir(rq), BLK_TA_REMAP, !!rq->errors, 947 rq_data_dir(rq), BLK_TA_REMAP, !!rq->errors,
943 sizeof(r), &r); 948 sizeof(r), &r);
944 } 949 }
945 950
946 /** 951 /**
947 * blk_add_driver_data - Add binary message with driver-specific data 952 * blk_add_driver_data - Add binary message with driver-specific data
948 * @q: queue the io is for 953 * @q: queue the io is for
949 * @rq: io request 954 * @rq: io request
950 * @data: driver-specific data 955 * @data: driver-specific data
951 * @len: length of driver-specific data 956 * @len: length of driver-specific data
952 * 957 *
953 * Description: 958 * Description:
954 * Some drivers might want to write driver-specific data per request. 959 * Some drivers might want to write driver-specific data per request.
955 * 960 *
956 **/ 961 **/
957 void blk_add_driver_data(struct request_queue *q, 962 void blk_add_driver_data(struct request_queue *q,
958 struct request *rq, 963 struct request *rq,
959 void *data, size_t len) 964 void *data, size_t len)
960 { 965 {
961 struct blk_trace *bt = q->blk_trace; 966 struct blk_trace *bt = q->blk_trace;
962 967
963 if (likely(!bt)) 968 if (likely(!bt))
964 return; 969 return;
965 970
966 if (rq->cmd_type == REQ_TYPE_BLOCK_PC) 971 if (rq->cmd_type == REQ_TYPE_BLOCK_PC)
967 __blk_add_trace(bt, 0, blk_rq_bytes(rq), 0, 972 __blk_add_trace(bt, 0, blk_rq_bytes(rq), 0,
968 BLK_TA_DRV_DATA, rq->errors, len, data); 973 BLK_TA_DRV_DATA, rq->errors, len, data);
969 else 974 else
970 __blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq), 0, 975 __blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq), 0,
971 BLK_TA_DRV_DATA, rq->errors, len, data); 976 BLK_TA_DRV_DATA, rq->errors, len, data);
972 } 977 }
973 EXPORT_SYMBOL_GPL(blk_add_driver_data); 978 EXPORT_SYMBOL_GPL(blk_add_driver_data);
974 979
975 static void blk_register_tracepoints(void) 980 static void blk_register_tracepoints(void)
976 { 981 {
977 int ret; 982 int ret;
978 983
979 ret = register_trace_block_rq_abort(blk_add_trace_rq_abort, NULL); 984 ret = register_trace_block_rq_abort(blk_add_trace_rq_abort, NULL);
980 WARN_ON(ret); 985 WARN_ON(ret);
981 ret = register_trace_block_rq_insert(blk_add_trace_rq_insert, NULL); 986 ret = register_trace_block_rq_insert(blk_add_trace_rq_insert, NULL);
982 WARN_ON(ret); 987 WARN_ON(ret);
983 ret = register_trace_block_rq_issue(blk_add_trace_rq_issue, NULL); 988 ret = register_trace_block_rq_issue(blk_add_trace_rq_issue, NULL);
984 WARN_ON(ret); 989 WARN_ON(ret);
985 ret = register_trace_block_rq_requeue(blk_add_trace_rq_requeue, NULL); 990 ret = register_trace_block_rq_requeue(blk_add_trace_rq_requeue, NULL);
986 WARN_ON(ret); 991 WARN_ON(ret);
987 ret = register_trace_block_rq_complete(blk_add_trace_rq_complete, NULL); 992 ret = register_trace_block_rq_complete(blk_add_trace_rq_complete, NULL);
988 WARN_ON(ret); 993 WARN_ON(ret);
989 ret = register_trace_block_bio_bounce(blk_add_trace_bio_bounce, NULL); 994 ret = register_trace_block_bio_bounce(blk_add_trace_bio_bounce, NULL);
990 WARN_ON(ret); 995 WARN_ON(ret);
991 ret = register_trace_block_bio_complete(blk_add_trace_bio_complete, NULL); 996 ret = register_trace_block_bio_complete(blk_add_trace_bio_complete, NULL);
992 WARN_ON(ret); 997 WARN_ON(ret);
993 ret = register_trace_block_bio_backmerge(blk_add_trace_bio_backmerge, NULL); 998 ret = register_trace_block_bio_backmerge(blk_add_trace_bio_backmerge, NULL);
994 WARN_ON(ret); 999 WARN_ON(ret);
995 ret = register_trace_block_bio_frontmerge(blk_add_trace_bio_frontmerge, NULL); 1000 ret = register_trace_block_bio_frontmerge(blk_add_trace_bio_frontmerge, NULL);
996 WARN_ON(ret); 1001 WARN_ON(ret);
997 ret = register_trace_block_bio_queue(blk_add_trace_bio_queue, NULL); 1002 ret = register_trace_block_bio_queue(blk_add_trace_bio_queue, NULL);
998 WARN_ON(ret); 1003 WARN_ON(ret);
999 ret = register_trace_block_getrq(blk_add_trace_getrq, NULL); 1004 ret = register_trace_block_getrq(blk_add_trace_getrq, NULL);
1000 WARN_ON(ret); 1005 WARN_ON(ret);
1001 ret = register_trace_block_sleeprq(blk_add_trace_sleeprq, NULL); 1006 ret = register_trace_block_sleeprq(blk_add_trace_sleeprq, NULL);
1002 WARN_ON(ret); 1007 WARN_ON(ret);
1003 ret = register_trace_block_plug(blk_add_trace_plug, NULL); 1008 ret = register_trace_block_plug(blk_add_trace_plug, NULL);
1004 WARN_ON(ret); 1009 WARN_ON(ret);
1005 ret = register_trace_block_unplug_io(blk_add_trace_unplug_io, NULL); 1010 ret = register_trace_block_unplug(blk_add_trace_unplug, NULL);
1006 WARN_ON(ret); 1011 WARN_ON(ret);
1007 ret = register_trace_block_split(blk_add_trace_split, NULL); 1012 ret = register_trace_block_split(blk_add_trace_split, NULL);
1008 WARN_ON(ret); 1013 WARN_ON(ret);
1009 ret = register_trace_block_bio_remap(blk_add_trace_bio_remap, NULL); 1014 ret = register_trace_block_bio_remap(blk_add_trace_bio_remap, NULL);
1010 WARN_ON(ret); 1015 WARN_ON(ret);
1011 ret = register_trace_block_rq_remap(blk_add_trace_rq_remap, NULL); 1016 ret = register_trace_block_rq_remap(blk_add_trace_rq_remap, NULL);
1012 WARN_ON(ret); 1017 WARN_ON(ret);
1013 } 1018 }
1014 1019
1015 static void blk_unregister_tracepoints(void) 1020 static void blk_unregister_tracepoints(void)
1016 { 1021 {
1017 unregister_trace_block_rq_remap(blk_add_trace_rq_remap, NULL); 1022 unregister_trace_block_rq_remap(blk_add_trace_rq_remap, NULL);
1018 unregister_trace_block_bio_remap(blk_add_trace_bio_remap, NULL); 1023 unregister_trace_block_bio_remap(blk_add_trace_bio_remap, NULL);
1019 unregister_trace_block_split(blk_add_trace_split, NULL); 1024 unregister_trace_block_split(blk_add_trace_split, NULL);
1020 unregister_trace_block_unplug_io(blk_add_trace_unplug_io, NULL); 1025 unregister_trace_block_unplug(blk_add_trace_unplug, NULL);
1021 unregister_trace_block_plug(blk_add_trace_plug, NULL); 1026 unregister_trace_block_plug(blk_add_trace_plug, NULL);
1022 unregister_trace_block_sleeprq(blk_add_trace_sleeprq, NULL); 1027 unregister_trace_block_sleeprq(blk_add_trace_sleeprq, NULL);
1023 unregister_trace_block_getrq(blk_add_trace_getrq, NULL); 1028 unregister_trace_block_getrq(blk_add_trace_getrq, NULL);
1024 unregister_trace_block_bio_queue(blk_add_trace_bio_queue, NULL); 1029 unregister_trace_block_bio_queue(blk_add_trace_bio_queue, NULL);
1025 unregister_trace_block_bio_frontmerge(blk_add_trace_bio_frontmerge, NULL); 1030 unregister_trace_block_bio_frontmerge(blk_add_trace_bio_frontmerge, NULL);
1026 unregister_trace_block_bio_backmerge(blk_add_trace_bio_backmerge, NULL); 1031 unregister_trace_block_bio_backmerge(blk_add_trace_bio_backmerge, NULL);
1027 unregister_trace_block_bio_complete(blk_add_trace_bio_complete, NULL); 1032 unregister_trace_block_bio_complete(blk_add_trace_bio_complete, NULL);
1028 unregister_trace_block_bio_bounce(blk_add_trace_bio_bounce, NULL); 1033 unregister_trace_block_bio_bounce(blk_add_trace_bio_bounce, NULL);
1029 unregister_trace_block_rq_complete(blk_add_trace_rq_complete, NULL); 1034 unregister_trace_block_rq_complete(blk_add_trace_rq_complete, NULL);
1030 unregister_trace_block_rq_requeue(blk_add_trace_rq_requeue, NULL); 1035 unregister_trace_block_rq_requeue(blk_add_trace_rq_requeue, NULL);
1031 unregister_trace_block_rq_issue(blk_add_trace_rq_issue, NULL); 1036 unregister_trace_block_rq_issue(blk_add_trace_rq_issue, NULL);
1032 unregister_trace_block_rq_insert(blk_add_trace_rq_insert, NULL); 1037 unregister_trace_block_rq_insert(blk_add_trace_rq_insert, NULL);
1033 unregister_trace_block_rq_abort(blk_add_trace_rq_abort, NULL); 1038 unregister_trace_block_rq_abort(blk_add_trace_rq_abort, NULL);
1034 1039
1035 tracepoint_synchronize_unregister(); 1040 tracepoint_synchronize_unregister();
1036 } 1041 }
1037 1042
1038 /* 1043 /*
1039 * struct blk_io_tracer formatting routines 1044 * struct blk_io_tracer formatting routines
1040 */ 1045 */
1041 1046
1042 static void fill_rwbs(char *rwbs, const struct blk_io_trace *t) 1047 static void fill_rwbs(char *rwbs, const struct blk_io_trace *t)
1043 { 1048 {
1044 int i = 0; 1049 int i = 0;
1045 int tc = t->action >> BLK_TC_SHIFT; 1050 int tc = t->action >> BLK_TC_SHIFT;
1046 1051
1047 if (t->action == BLK_TN_MESSAGE) { 1052 if (t->action == BLK_TN_MESSAGE) {
1048 rwbs[i++] = 'N'; 1053 rwbs[i++] = 'N';
1049 goto out; 1054 goto out;
1050 } 1055 }
1051 1056
1052 if (tc & BLK_TC_DISCARD) 1057 if (tc & BLK_TC_DISCARD)
1053 rwbs[i++] = 'D'; 1058 rwbs[i++] = 'D';
1054 else if (tc & BLK_TC_WRITE) 1059 else if (tc & BLK_TC_WRITE)
1055 rwbs[i++] = 'W'; 1060 rwbs[i++] = 'W';
1056 else if (t->bytes) 1061 else if (t->bytes)
1057 rwbs[i++] = 'R'; 1062 rwbs[i++] = 'R';
1058 else 1063 else
1059 rwbs[i++] = 'N'; 1064 rwbs[i++] = 'N';
1060 1065
1061 if (tc & BLK_TC_AHEAD) 1066 if (tc & BLK_TC_AHEAD)
1062 rwbs[i++] = 'A'; 1067 rwbs[i++] = 'A';
1063 if (tc & BLK_TC_BARRIER) 1068 if (tc & BLK_TC_BARRIER)
1064 rwbs[i++] = 'B'; 1069 rwbs[i++] = 'B';
1065 if (tc & BLK_TC_SYNC) 1070 if (tc & BLK_TC_SYNC)
1066 rwbs[i++] = 'S'; 1071 rwbs[i++] = 'S';
1067 if (tc & BLK_TC_META) 1072 if (tc & BLK_TC_META)
1068 rwbs[i++] = 'M'; 1073 rwbs[i++] = 'M';
1069 out: 1074 out:
1070 rwbs[i] = '\0'; 1075 rwbs[i] = '\0';
1071 } 1076 }
1072 1077
1073 static inline 1078 static inline
1074 const struct blk_io_trace *te_blk_io_trace(const struct trace_entry *ent) 1079 const struct blk_io_trace *te_blk_io_trace(const struct trace_entry *ent)
1075 { 1080 {
1076 return (const struct blk_io_trace *)ent; 1081 return (const struct blk_io_trace *)ent;
1077 } 1082 }
1078 1083
1079 static inline const void *pdu_start(const struct trace_entry *ent) 1084 static inline const void *pdu_start(const struct trace_entry *ent)
1080 { 1085 {
1081 return te_blk_io_trace(ent) + 1; 1086 return te_blk_io_trace(ent) + 1;
1082 } 1087 }
1083 1088
1084 static inline u32 t_action(const struct trace_entry *ent) 1089 static inline u32 t_action(const struct trace_entry *ent)
1085 { 1090 {
1086 return te_blk_io_trace(ent)->action; 1091 return te_blk_io_trace(ent)->action;
1087 } 1092 }
1088 1093
1089 static inline u32 t_bytes(const struct trace_entry *ent) 1094 static inline u32 t_bytes(const struct trace_entry *ent)
1090 { 1095 {
1091 return te_blk_io_trace(ent)->bytes; 1096 return te_blk_io_trace(ent)->bytes;
1092 } 1097 }
1093 1098
1094 static inline u32 t_sec(const struct trace_entry *ent) 1099 static inline u32 t_sec(const struct trace_entry *ent)
1095 { 1100 {
1096 return te_blk_io_trace(ent)->bytes >> 9; 1101 return te_blk_io_trace(ent)->bytes >> 9;
1097 } 1102 }
1098 1103
1099 static inline unsigned long long t_sector(const struct trace_entry *ent) 1104 static inline unsigned long long t_sector(const struct trace_entry *ent)
1100 { 1105 {
1101 return te_blk_io_trace(ent)->sector; 1106 return te_blk_io_trace(ent)->sector;
1102 } 1107 }
1103 1108
1104 static inline __u16 t_error(const struct trace_entry *ent) 1109 static inline __u16 t_error(const struct trace_entry *ent)
1105 { 1110 {
1106 return te_blk_io_trace(ent)->error; 1111 return te_blk_io_trace(ent)->error;
1107 } 1112 }
1108 1113
1109 static __u64 get_pdu_int(const struct trace_entry *ent) 1114 static __u64 get_pdu_int(const struct trace_entry *ent)
1110 { 1115 {
1111 const __u64 *val = pdu_start(ent); 1116 const __u64 *val = pdu_start(ent);
1112 return be64_to_cpu(*val); 1117 return be64_to_cpu(*val);
1113 } 1118 }
1114 1119
1115 static void get_pdu_remap(const struct trace_entry *ent, 1120 static void get_pdu_remap(const struct trace_entry *ent,
1116 struct blk_io_trace_remap *r) 1121 struct blk_io_trace_remap *r)
1117 { 1122 {
1118 const struct blk_io_trace_remap *__r = pdu_start(ent); 1123 const struct blk_io_trace_remap *__r = pdu_start(ent);
1119 __u64 sector_from = __r->sector_from; 1124 __u64 sector_from = __r->sector_from;
1120 1125
1121 r->device_from = be32_to_cpu(__r->device_from); 1126 r->device_from = be32_to_cpu(__r->device_from);
1122 r->device_to = be32_to_cpu(__r->device_to); 1127 r->device_to = be32_to_cpu(__r->device_to);
1123 r->sector_from = be64_to_cpu(sector_from); 1128 r->sector_from = be64_to_cpu(sector_from);
1124 } 1129 }
1125 1130
1126 typedef int (blk_log_action_t) (struct trace_iterator *iter, const char *act); 1131 typedef int (blk_log_action_t) (struct trace_iterator *iter, const char *act);
1127 1132
1128 static int blk_log_action_classic(struct trace_iterator *iter, const char *act) 1133 static int blk_log_action_classic(struct trace_iterator *iter, const char *act)
1129 { 1134 {
1130 char rwbs[6]; 1135 char rwbs[6];
1131 unsigned long long ts = iter->ts; 1136 unsigned long long ts = iter->ts;
1132 unsigned long nsec_rem = do_div(ts, NSEC_PER_SEC); 1137 unsigned long nsec_rem = do_div(ts, NSEC_PER_SEC);
1133 unsigned secs = (unsigned long)ts; 1138 unsigned secs = (unsigned long)ts;
1134 const struct blk_io_trace *t = te_blk_io_trace(iter->ent); 1139 const struct blk_io_trace *t = te_blk_io_trace(iter->ent);
1135 1140
1136 fill_rwbs(rwbs, t); 1141 fill_rwbs(rwbs, t);
1137 1142
1138 return trace_seq_printf(&iter->seq, 1143 return trace_seq_printf(&iter->seq,
1139 "%3d,%-3d %2d %5d.%09lu %5u %2s %3s ", 1144 "%3d,%-3d %2d %5d.%09lu %5u %2s %3s ",
1140 MAJOR(t->device), MINOR(t->device), iter->cpu, 1145 MAJOR(t->device), MINOR(t->device), iter->cpu,
1141 secs, nsec_rem, iter->ent->pid, act, rwbs); 1146 secs, nsec_rem, iter->ent->pid, act, rwbs);
1142 } 1147 }
1143 1148
1144 static int blk_log_action(struct trace_iterator *iter, const char *act) 1149 static int blk_log_action(struct trace_iterator *iter, const char *act)
1145 { 1150 {
1146 char rwbs[6]; 1151 char rwbs[6];
1147 const struct blk_io_trace *t = te_blk_io_trace(iter->ent); 1152 const struct blk_io_trace *t = te_blk_io_trace(iter->ent);
1148 1153
1149 fill_rwbs(rwbs, t); 1154 fill_rwbs(rwbs, t);
1150 return trace_seq_printf(&iter->seq, "%3d,%-3d %2s %3s ", 1155 return trace_seq_printf(&iter->seq, "%3d,%-3d %2s %3s ",
1151 MAJOR(t->device), MINOR(t->device), act, rwbs); 1156 MAJOR(t->device), MINOR(t->device), act, rwbs);
1152 } 1157 }
1153 1158
1154 static int blk_log_dump_pdu(struct trace_seq *s, const struct trace_entry *ent) 1159 static int blk_log_dump_pdu(struct trace_seq *s, const struct trace_entry *ent)
1155 { 1160 {
1156 const unsigned char *pdu_buf; 1161 const unsigned char *pdu_buf;
1157 int pdu_len; 1162 int pdu_len;
1158 int i, end, ret; 1163 int i, end, ret;
1159 1164
1160 pdu_buf = pdu_start(ent); 1165 pdu_buf = pdu_start(ent);
1161 pdu_len = te_blk_io_trace(ent)->pdu_len; 1166 pdu_len = te_blk_io_trace(ent)->pdu_len;
1162 1167
1163 if (!pdu_len) 1168 if (!pdu_len)
1164 return 1; 1169 return 1;
1165 1170
1166 /* find the last zero that needs to be printed */ 1171 /* find the last zero that needs to be printed */
1167 for (end = pdu_len - 1; end >= 0; end--) 1172 for (end = pdu_len - 1; end >= 0; end--)
1168 if (pdu_buf[end]) 1173 if (pdu_buf[end])
1169 break; 1174 break;
1170 end++; 1175 end++;
1171 1176
1172 if (!trace_seq_putc(s, '(')) 1177 if (!trace_seq_putc(s, '('))
1173 return 0; 1178 return 0;
1174 1179
1175 for (i = 0; i < pdu_len; i++) { 1180 for (i = 0; i < pdu_len; i++) {
1176 1181
1177 ret = trace_seq_printf(s, "%s%02x", 1182 ret = trace_seq_printf(s, "%s%02x",
1178 i == 0 ? "" : " ", pdu_buf[i]); 1183 i == 0 ? "" : " ", pdu_buf[i]);
1179 if (!ret) 1184 if (!ret)
1180 return ret; 1185 return ret;
1181 1186
1182 /* 1187 /*
1183 * stop when the rest is just zeroes and indicate so 1188 * stop when the rest is just zeroes and indicate so
1184 * with a ".." appended 1189 * with a ".." appended
1185 */ 1190 */
1186 if (i == end && end != pdu_len - 1) 1191 if (i == end && end != pdu_len - 1)
1187 return trace_seq_puts(s, " ..) "); 1192 return trace_seq_puts(s, " ..) ");
1188 } 1193 }
1189 1194
1190 return trace_seq_puts(s, ") "); 1195 return trace_seq_puts(s, ") ");
1191 } 1196 }
1192 1197
1193 static int blk_log_generic(struct trace_seq *s, const struct trace_entry *ent) 1198 static int blk_log_generic(struct trace_seq *s, const struct trace_entry *ent)
1194 { 1199 {
1195 char cmd[TASK_COMM_LEN]; 1200 char cmd[TASK_COMM_LEN];
1196 1201
1197 trace_find_cmdline(ent->pid, cmd); 1202 trace_find_cmdline(ent->pid, cmd);
1198 1203
1199 if (t_action(ent) & BLK_TC_ACT(BLK_TC_PC)) { 1204 if (t_action(ent) & BLK_TC_ACT(BLK_TC_PC)) {
1200 int ret; 1205 int ret;
1201 1206
1202 ret = trace_seq_printf(s, "%u ", t_bytes(ent)); 1207 ret = trace_seq_printf(s, "%u ", t_bytes(ent));
1203 if (!ret) 1208 if (!ret)
1204 return 0; 1209 return 0;
1205 ret = blk_log_dump_pdu(s, ent); 1210 ret = blk_log_dump_pdu(s, ent);
1206 if (!ret) 1211 if (!ret)
1207 return 0; 1212 return 0;
1208 return trace_seq_printf(s, "[%s]\n", cmd); 1213 return trace_seq_printf(s, "[%s]\n", cmd);
1209 } else { 1214 } else {
1210 if (t_sec(ent)) 1215 if (t_sec(ent))
1211 return trace_seq_printf(s, "%llu + %u [%s]\n", 1216 return trace_seq_printf(s, "%llu + %u [%s]\n",
1212 t_sector(ent), t_sec(ent), cmd); 1217 t_sector(ent), t_sec(ent), cmd);
1213 return trace_seq_printf(s, "[%s]\n", cmd); 1218 return trace_seq_printf(s, "[%s]\n", cmd);
1214 } 1219 }
1215 } 1220 }
1216 1221
1217 static int blk_log_with_error(struct trace_seq *s, 1222 static int blk_log_with_error(struct trace_seq *s,
1218 const struct trace_entry *ent) 1223 const struct trace_entry *ent)
1219 { 1224 {
1220 if (t_action(ent) & BLK_TC_ACT(BLK_TC_PC)) { 1225 if (t_action(ent) & BLK_TC_ACT(BLK_TC_PC)) {
1221 int ret; 1226 int ret;
1222 1227
1223 ret = blk_log_dump_pdu(s, ent); 1228 ret = blk_log_dump_pdu(s, ent);
1224 if (ret) 1229 if (ret)
1225 return trace_seq_printf(s, "[%d]\n", t_error(ent)); 1230 return trace_seq_printf(s, "[%d]\n", t_error(ent));
1226 return 0; 1231 return 0;
1227 } else { 1232 } else {
1228 if (t_sec(ent)) 1233 if (t_sec(ent))
1229 return trace_seq_printf(s, "%llu + %u [%d]\n", 1234 return trace_seq_printf(s, "%llu + %u [%d]\n",
1230 t_sector(ent), 1235 t_sector(ent),
1231 t_sec(ent), t_error(ent)); 1236 t_sec(ent), t_error(ent));
1232 return trace_seq_printf(s, "%llu [%d]\n", 1237 return trace_seq_printf(s, "%llu [%d]\n",
1233 t_sector(ent), t_error(ent)); 1238 t_sector(ent), t_error(ent));
1234 } 1239 }
1235 } 1240 }
1236 1241
1237 static int blk_log_remap(struct trace_seq *s, const struct trace_entry *ent) 1242 static int blk_log_remap(struct trace_seq *s, const struct trace_entry *ent)
1238 { 1243 {
1239 struct blk_io_trace_remap r = { .device_from = 0, }; 1244 struct blk_io_trace_remap r = { .device_from = 0, };
1240 1245
1241 get_pdu_remap(ent, &r); 1246 get_pdu_remap(ent, &r);
1242 return trace_seq_printf(s, "%llu + %u <- (%d,%d) %llu\n", 1247 return trace_seq_printf(s, "%llu + %u <- (%d,%d) %llu\n",
1243 t_sector(ent), t_sec(ent), 1248 t_sector(ent), t_sec(ent),
1244 MAJOR(r.device_from), MINOR(r.device_from), 1249 MAJOR(r.device_from), MINOR(r.device_from),
1245 (unsigned long long)r.sector_from); 1250 (unsigned long long)r.sector_from);
1246 } 1251 }
1247 1252
1248 static int blk_log_plug(struct trace_seq *s, const struct trace_entry *ent) 1253 static int blk_log_plug(struct trace_seq *s, const struct trace_entry *ent)
1249 { 1254 {
1250 char cmd[TASK_COMM_LEN]; 1255 char cmd[TASK_COMM_LEN];
1251 1256
1252 trace_find_cmdline(ent->pid, cmd); 1257 trace_find_cmdline(ent->pid, cmd);
1253 1258
1254 return trace_seq_printf(s, "[%s]\n", cmd); 1259 return trace_seq_printf(s, "[%s]\n", cmd);
1255 } 1260 }
1256 1261
1257 static int blk_log_unplug(struct trace_seq *s, const struct trace_entry *ent) 1262 static int blk_log_unplug(struct trace_seq *s, const struct trace_entry *ent)
1258 { 1263 {
1259 char cmd[TASK_COMM_LEN]; 1264 char cmd[TASK_COMM_LEN];
1260 1265
1261 trace_find_cmdline(ent->pid, cmd); 1266 trace_find_cmdline(ent->pid, cmd);
1262 1267
1263 return trace_seq_printf(s, "[%s] %llu\n", cmd, get_pdu_int(ent)); 1268 return trace_seq_printf(s, "[%s] %llu\n", cmd, get_pdu_int(ent));
1264 } 1269 }
1265 1270
1266 static int blk_log_split(struct trace_seq *s, const struct trace_entry *ent) 1271 static int blk_log_split(struct trace_seq *s, const struct trace_entry *ent)
1267 { 1272 {
1268 char cmd[TASK_COMM_LEN]; 1273 char cmd[TASK_COMM_LEN];
1269 1274
1270 trace_find_cmdline(ent->pid, cmd); 1275 trace_find_cmdline(ent->pid, cmd);
1271 1276
1272 return trace_seq_printf(s, "%llu / %llu [%s]\n", t_sector(ent), 1277 return trace_seq_printf(s, "%llu / %llu [%s]\n", t_sector(ent),
1273 get_pdu_int(ent), cmd); 1278 get_pdu_int(ent), cmd);
1274 } 1279 }
1275 1280
1276 static int blk_log_msg(struct trace_seq *s, const struct trace_entry *ent) 1281 static int blk_log_msg(struct trace_seq *s, const struct trace_entry *ent)
1277 { 1282 {
1278 int ret; 1283 int ret;
1279 const struct blk_io_trace *t = te_blk_io_trace(ent); 1284 const struct blk_io_trace *t = te_blk_io_trace(ent);
1280 1285
1281 ret = trace_seq_putmem(s, t + 1, t->pdu_len); 1286 ret = trace_seq_putmem(s, t + 1, t->pdu_len);
1282 if (ret) 1287 if (ret)
1283 return trace_seq_putc(s, '\n'); 1288 return trace_seq_putc(s, '\n');
1284 return ret; 1289 return ret;
1285 } 1290 }
1286 1291
1287 /* 1292 /*
1288 * struct tracer operations 1293 * struct tracer operations
1289 */ 1294 */
1290 1295
1291 static void blk_tracer_print_header(struct seq_file *m) 1296 static void blk_tracer_print_header(struct seq_file *m)
1292 { 1297 {
1293 if (!(blk_tracer_flags.val & TRACE_BLK_OPT_CLASSIC)) 1298 if (!(blk_tracer_flags.val & TRACE_BLK_OPT_CLASSIC))
1294 return; 1299 return;
1295 seq_puts(m, "# DEV CPU TIMESTAMP PID ACT FLG\n" 1300 seq_puts(m, "# DEV CPU TIMESTAMP PID ACT FLG\n"
1296 "# | | | | | |\n"); 1301 "# | | | | | |\n");
1297 } 1302 }
1298 1303
1299 static void blk_tracer_start(struct trace_array *tr) 1304 static void blk_tracer_start(struct trace_array *tr)
1300 { 1305 {
1301 blk_tracer_enabled = true; 1306 blk_tracer_enabled = true;
1302 } 1307 }
1303 1308
1304 static int blk_tracer_init(struct trace_array *tr) 1309 static int blk_tracer_init(struct trace_array *tr)
1305 { 1310 {
1306 blk_tr = tr; 1311 blk_tr = tr;
1307 blk_tracer_start(tr); 1312 blk_tracer_start(tr);
1308 return 0; 1313 return 0;
1309 } 1314 }
1310 1315
1311 static void blk_tracer_stop(struct trace_array *tr) 1316 static void blk_tracer_stop(struct trace_array *tr)
1312 { 1317 {
1313 blk_tracer_enabled = false; 1318 blk_tracer_enabled = false;
1314 } 1319 }
1315 1320
1316 static void blk_tracer_reset(struct trace_array *tr) 1321 static void blk_tracer_reset(struct trace_array *tr)
1317 { 1322 {
1318 blk_tracer_stop(tr); 1323 blk_tracer_stop(tr);
1319 } 1324 }
1320 1325
1321 static const struct { 1326 static const struct {
1322 const char *act[2]; 1327 const char *act[2];
1323 int (*print)(struct trace_seq *s, const struct trace_entry *ent); 1328 int (*print)(struct trace_seq *s, const struct trace_entry *ent);
1324 } what2act[] = { 1329 } what2act[] = {
1325 [__BLK_TA_QUEUE] = {{ "Q", "queue" }, blk_log_generic }, 1330 [__BLK_TA_QUEUE] = {{ "Q", "queue" }, blk_log_generic },
1326 [__BLK_TA_BACKMERGE] = {{ "M", "backmerge" }, blk_log_generic }, 1331 [__BLK_TA_BACKMERGE] = {{ "M", "backmerge" }, blk_log_generic },
1327 [__BLK_TA_FRONTMERGE] = {{ "F", "frontmerge" }, blk_log_generic }, 1332 [__BLK_TA_FRONTMERGE] = {{ "F", "frontmerge" }, blk_log_generic },
1328 [__BLK_TA_GETRQ] = {{ "G", "getrq" }, blk_log_generic }, 1333 [__BLK_TA_GETRQ] = {{ "G", "getrq" }, blk_log_generic },
1329 [__BLK_TA_SLEEPRQ] = {{ "S", "sleeprq" }, blk_log_generic }, 1334 [__BLK_TA_SLEEPRQ] = {{ "S", "sleeprq" }, blk_log_generic },
1330 [__BLK_TA_REQUEUE] = {{ "R", "requeue" }, blk_log_with_error }, 1335 [__BLK_TA_REQUEUE] = {{ "R", "requeue" }, blk_log_with_error },
1331 [__BLK_TA_ISSUE] = {{ "D", "issue" }, blk_log_generic }, 1336 [__BLK_TA_ISSUE] = {{ "D", "issue" }, blk_log_generic },
1332 [__BLK_TA_COMPLETE] = {{ "C", "complete" }, blk_log_with_error }, 1337 [__BLK_TA_COMPLETE] = {{ "C", "complete" }, blk_log_with_error },
1333 [__BLK_TA_PLUG] = {{ "P", "plug" }, blk_log_plug }, 1338 [__BLK_TA_PLUG] = {{ "P", "plug" }, blk_log_plug },
1334 [__BLK_TA_UNPLUG_IO] = {{ "U", "unplug_io" }, blk_log_unplug }, 1339 [__BLK_TA_UNPLUG_IO] = {{ "U", "unplug_io" }, blk_log_unplug },
1340 [__BLK_TA_UNPLUG_TIMER] = {{ "UT", "unplug_timer" }, blk_log_unplug },
1335 [__BLK_TA_INSERT] = {{ "I", "insert" }, blk_log_generic }, 1341 [__BLK_TA_INSERT] = {{ "I", "insert" }, blk_log_generic },
1336 [__BLK_TA_SPLIT] = {{ "X", "split" }, blk_log_split }, 1342 [__BLK_TA_SPLIT] = {{ "X", "split" }, blk_log_split },
1337 [__BLK_TA_BOUNCE] = {{ "B", "bounce" }, blk_log_generic }, 1343 [__BLK_TA_BOUNCE] = {{ "B", "bounce" }, blk_log_generic },
1338 [__BLK_TA_REMAP] = {{ "A", "remap" }, blk_log_remap }, 1344 [__BLK_TA_REMAP] = {{ "A", "remap" }, blk_log_remap },
1339 }; 1345 };
1340 1346
1341 static enum print_line_t print_one_line(struct trace_iterator *iter, 1347 static enum print_line_t print_one_line(struct trace_iterator *iter,
1342 bool classic) 1348 bool classic)
1343 { 1349 {
1344 struct trace_seq *s = &iter->seq; 1350 struct trace_seq *s = &iter->seq;
1345 const struct blk_io_trace *t; 1351 const struct blk_io_trace *t;
1346 u16 what; 1352 u16 what;
1347 int ret; 1353 int ret;
1348 bool long_act; 1354 bool long_act;
1349 blk_log_action_t *log_action; 1355 blk_log_action_t *log_action;
1350 1356
1351 t = te_blk_io_trace(iter->ent); 1357 t = te_blk_io_trace(iter->ent);
1352 what = t->action & ((1 << BLK_TC_SHIFT) - 1); 1358 what = t->action & ((1 << BLK_TC_SHIFT) - 1);
1353 long_act = !!(trace_flags & TRACE_ITER_VERBOSE); 1359 long_act = !!(trace_flags & TRACE_ITER_VERBOSE);
1354 log_action = classic ? &blk_log_action_classic : &blk_log_action; 1360 log_action = classic ? &blk_log_action_classic : &blk_log_action;
1355 1361
1356 if (t->action == BLK_TN_MESSAGE) { 1362 if (t->action == BLK_TN_MESSAGE) {
1357 ret = log_action(iter, long_act ? "message" : "m"); 1363 ret = log_action(iter, long_act ? "message" : "m");
1358 if (ret) 1364 if (ret)
1359 ret = blk_log_msg(s, iter->ent); 1365 ret = blk_log_msg(s, iter->ent);
1360 goto out; 1366 goto out;
1361 } 1367 }
1362 1368
1363 if (unlikely(what == 0 || what >= ARRAY_SIZE(what2act))) 1369 if (unlikely(what == 0 || what >= ARRAY_SIZE(what2act)))
1364 ret = trace_seq_printf(s, "Unknown action %x\n", what); 1370 ret = trace_seq_printf(s, "Unknown action %x\n", what);
1365 else { 1371 else {
1366 ret = log_action(iter, what2act[what].act[long_act]); 1372 ret = log_action(iter, what2act[what].act[long_act]);
1367 if (ret) 1373 if (ret)
1368 ret = what2act[what].print(s, iter->ent); 1374 ret = what2act[what].print(s, iter->ent);
1369 } 1375 }
1370 out: 1376 out:
1371 return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE; 1377 return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE;
1372 } 1378 }
1373 1379
1374 static enum print_line_t blk_trace_event_print(struct trace_iterator *iter, 1380 static enum print_line_t blk_trace_event_print(struct trace_iterator *iter,
1375 int flags, struct trace_event *event) 1381 int flags, struct trace_event *event)
1376 { 1382 {
1377 return print_one_line(iter, false); 1383 return print_one_line(iter, false);
1378 } 1384 }
1379 1385
1380 static int blk_trace_synthesize_old_trace(struct trace_iterator *iter) 1386 static int blk_trace_synthesize_old_trace(struct trace_iterator *iter)
1381 { 1387 {
1382 struct trace_seq *s = &iter->seq; 1388 struct trace_seq *s = &iter->seq;
1383 struct blk_io_trace *t = (struct blk_io_trace *)iter->ent; 1389 struct blk_io_trace *t = (struct blk_io_trace *)iter->ent;
1384 const int offset = offsetof(struct blk_io_trace, sector); 1390 const int offset = offsetof(struct blk_io_trace, sector);
1385 struct blk_io_trace old = { 1391 struct blk_io_trace old = {
1386 .magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION, 1392 .magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION,
1387 .time = iter->ts, 1393 .time = iter->ts,
1388 }; 1394 };
1389 1395
1390 if (!trace_seq_putmem(s, &old, offset)) 1396 if (!trace_seq_putmem(s, &old, offset))
1391 return 0; 1397 return 0;
1392 return trace_seq_putmem(s, &t->sector, 1398 return trace_seq_putmem(s, &t->sector,
1393 sizeof(old) - offset + t->pdu_len); 1399 sizeof(old) - offset + t->pdu_len);
1394 } 1400 }
1395 1401
1396 static enum print_line_t 1402 static enum print_line_t
1397 blk_trace_event_print_binary(struct trace_iterator *iter, int flags, 1403 blk_trace_event_print_binary(struct trace_iterator *iter, int flags,
1398 struct trace_event *event) 1404 struct trace_event *event)
1399 { 1405 {
1400 return blk_trace_synthesize_old_trace(iter) ? 1406 return blk_trace_synthesize_old_trace(iter) ?
1401 TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE; 1407 TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE;
1402 } 1408 }
1403 1409
1404 static enum print_line_t blk_tracer_print_line(struct trace_iterator *iter) 1410 static enum print_line_t blk_tracer_print_line(struct trace_iterator *iter)
1405 { 1411 {
1406 if (!(blk_tracer_flags.val & TRACE_BLK_OPT_CLASSIC)) 1412 if (!(blk_tracer_flags.val & TRACE_BLK_OPT_CLASSIC))
1407 return TRACE_TYPE_UNHANDLED; 1413 return TRACE_TYPE_UNHANDLED;
1408 1414
1409 return print_one_line(iter, true); 1415 return print_one_line(iter, true);
1410 } 1416 }
1411 1417
1412 static int blk_tracer_set_flag(u32 old_flags, u32 bit, int set) 1418 static int blk_tracer_set_flag(u32 old_flags, u32 bit, int set)
1413 { 1419 {
1414 /* don't output context-info for blk_classic output */ 1420 /* don't output context-info for blk_classic output */
1415 if (bit == TRACE_BLK_OPT_CLASSIC) { 1421 if (bit == TRACE_BLK_OPT_CLASSIC) {
1416 if (set) 1422 if (set)
1417 trace_flags &= ~TRACE_ITER_CONTEXT_INFO; 1423 trace_flags &= ~TRACE_ITER_CONTEXT_INFO;
1418 else 1424 else
1419 trace_flags |= TRACE_ITER_CONTEXT_INFO; 1425 trace_flags |= TRACE_ITER_CONTEXT_INFO;
1420 } 1426 }
1421 return 0; 1427 return 0;
1422 } 1428 }
1423 1429
1424 static struct tracer blk_tracer __read_mostly = { 1430 static struct tracer blk_tracer __read_mostly = {
1425 .name = "blk", 1431 .name = "blk",
1426 .init = blk_tracer_init, 1432 .init = blk_tracer_init,
1427 .reset = blk_tracer_reset, 1433 .reset = blk_tracer_reset,
1428 .start = blk_tracer_start, 1434 .start = blk_tracer_start,
1429 .stop = blk_tracer_stop, 1435 .stop = blk_tracer_stop,
1430 .print_header = blk_tracer_print_header, 1436 .print_header = blk_tracer_print_header,
1431 .print_line = blk_tracer_print_line, 1437 .print_line = blk_tracer_print_line,
1432 .flags = &blk_tracer_flags, 1438 .flags = &blk_tracer_flags,
1433 .set_flag = blk_tracer_set_flag, 1439 .set_flag = blk_tracer_set_flag,
1434 }; 1440 };
1435 1441
1436 static struct trace_event_functions trace_blk_event_funcs = { 1442 static struct trace_event_functions trace_blk_event_funcs = {
1437 .trace = blk_trace_event_print, 1443 .trace = blk_trace_event_print,
1438 .binary = blk_trace_event_print_binary, 1444 .binary = blk_trace_event_print_binary,
1439 }; 1445 };
1440 1446
1441 static struct trace_event trace_blk_event = { 1447 static struct trace_event trace_blk_event = {
1442 .type = TRACE_BLK, 1448 .type = TRACE_BLK,
1443 .funcs = &trace_blk_event_funcs, 1449 .funcs = &trace_blk_event_funcs,
1444 }; 1450 };
1445 1451
1446 static int __init init_blk_tracer(void) 1452 static int __init init_blk_tracer(void)
1447 { 1453 {
1448 if (!register_ftrace_event(&trace_blk_event)) { 1454 if (!register_ftrace_event(&trace_blk_event)) {
1449 pr_warning("Warning: could not register block events\n"); 1455 pr_warning("Warning: could not register block events\n");
1450 return 1; 1456 return 1;
1451 } 1457 }
1452 1458
1453 if (register_tracer(&blk_tracer) != 0) { 1459 if (register_tracer(&blk_tracer) != 0) {
1454 pr_warning("Warning: could not register the block tracer\n"); 1460 pr_warning("Warning: could not register the block tracer\n");
1455 unregister_ftrace_event(&trace_blk_event); 1461 unregister_ftrace_event(&trace_blk_event);
1456 return 1; 1462 return 1;
1457 } 1463 }
1458 1464
1459 return 0; 1465 return 0;
1460 } 1466 }
1461 1467
1462 device_initcall(init_blk_tracer); 1468 device_initcall(init_blk_tracer);
1463 1469
1464 static int blk_trace_remove_queue(struct request_queue *q) 1470 static int blk_trace_remove_queue(struct request_queue *q)
1465 { 1471 {
1466 struct blk_trace *bt; 1472 struct blk_trace *bt;
1467 1473
1468 bt = xchg(&q->blk_trace, NULL); 1474 bt = xchg(&q->blk_trace, NULL);
1469 if (bt == NULL) 1475 if (bt == NULL)
1470 return -EINVAL; 1476 return -EINVAL;
1471 1477
1472 if (atomic_dec_and_test(&blk_probes_ref)) 1478 if (atomic_dec_and_test(&blk_probes_ref))
1473 blk_unregister_tracepoints(); 1479 blk_unregister_tracepoints();
1474 1480
1475 blk_trace_free(bt); 1481 blk_trace_free(bt);
1476 return 0; 1482 return 0;
1477 } 1483 }
1478 1484
1479 /* 1485 /*
1480 * Setup everything required to start tracing 1486 * Setup everything required to start tracing
1481 */ 1487 */
1482 static int blk_trace_setup_queue(struct request_queue *q, 1488 static int blk_trace_setup_queue(struct request_queue *q,
1483 struct block_device *bdev) 1489 struct block_device *bdev)
1484 { 1490 {
1485 struct blk_trace *old_bt, *bt = NULL; 1491 struct blk_trace *old_bt, *bt = NULL;
1486 int ret = -ENOMEM; 1492 int ret = -ENOMEM;
1487 1493
1488 bt = kzalloc(sizeof(*bt), GFP_KERNEL); 1494 bt = kzalloc(sizeof(*bt), GFP_KERNEL);
1489 if (!bt) 1495 if (!bt)
1490 return -ENOMEM; 1496 return -ENOMEM;
1491 1497
1492 bt->msg_data = __alloc_percpu(BLK_TN_MAX_MSG, __alignof__(char)); 1498 bt->msg_data = __alloc_percpu(BLK_TN_MAX_MSG, __alignof__(char));
1493 if (!bt->msg_data) 1499 if (!bt->msg_data)
1494 goto free_bt; 1500 goto free_bt;
1495 1501
1496 bt->dev = bdev->bd_dev; 1502 bt->dev = bdev->bd_dev;
1497 bt->act_mask = (u16)-1; 1503 bt->act_mask = (u16)-1;
1498 1504
1499 blk_trace_setup_lba(bt, bdev); 1505 blk_trace_setup_lba(bt, bdev);
1500 1506
1501 old_bt = xchg(&q->blk_trace, bt); 1507 old_bt = xchg(&q->blk_trace, bt);
1502 if (old_bt != NULL) { 1508 if (old_bt != NULL) {
1503 (void)xchg(&q->blk_trace, old_bt); 1509 (void)xchg(&q->blk_trace, old_bt);
1504 ret = -EBUSY; 1510 ret = -EBUSY;
1505 goto free_bt; 1511 goto free_bt;
1506 } 1512 }
1507 1513
1508 if (atomic_inc_return(&blk_probes_ref) == 1) 1514 if (atomic_inc_return(&blk_probes_ref) == 1)
1509 blk_register_tracepoints(); 1515 blk_register_tracepoints();
1510 return 0; 1516 return 0;
1511 1517
1512 free_bt: 1518 free_bt:
1513 blk_trace_free(bt); 1519 blk_trace_free(bt);
1514 return ret; 1520 return ret;
1515 } 1521 }
1516 1522
1517 /* 1523 /*
1518 * sysfs interface to enable and configure tracing 1524 * sysfs interface to enable and configure tracing
1519 */ 1525 */
1520 1526
1521 static ssize_t sysfs_blk_trace_attr_show(struct device *dev, 1527 static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
1522 struct device_attribute *attr, 1528 struct device_attribute *attr,
1523 char *buf); 1529 char *buf);
1524 static ssize_t sysfs_blk_trace_attr_store(struct device *dev, 1530 static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
1525 struct device_attribute *attr, 1531 struct device_attribute *attr,
1526 const char *buf, size_t count); 1532 const char *buf, size_t count);
1527 #define BLK_TRACE_DEVICE_ATTR(_name) \ 1533 #define BLK_TRACE_DEVICE_ATTR(_name) \
1528 DEVICE_ATTR(_name, S_IRUGO | S_IWUSR, \ 1534 DEVICE_ATTR(_name, S_IRUGO | S_IWUSR, \
1529 sysfs_blk_trace_attr_show, \ 1535 sysfs_blk_trace_attr_show, \
1530 sysfs_blk_trace_attr_store) 1536 sysfs_blk_trace_attr_store)
1531 1537
1532 static BLK_TRACE_DEVICE_ATTR(enable); 1538 static BLK_TRACE_DEVICE_ATTR(enable);
1533 static BLK_TRACE_DEVICE_ATTR(act_mask); 1539 static BLK_TRACE_DEVICE_ATTR(act_mask);
1534 static BLK_TRACE_DEVICE_ATTR(pid); 1540 static BLK_TRACE_DEVICE_ATTR(pid);
1535 static BLK_TRACE_DEVICE_ATTR(start_lba); 1541 static BLK_TRACE_DEVICE_ATTR(start_lba);
1536 static BLK_TRACE_DEVICE_ATTR(end_lba); 1542 static BLK_TRACE_DEVICE_ATTR(end_lba);
1537 1543
1538 static struct attribute *blk_trace_attrs[] = { 1544 static struct attribute *blk_trace_attrs[] = {
1539 &dev_attr_enable.attr, 1545 &dev_attr_enable.attr,
1540 &dev_attr_act_mask.attr, 1546 &dev_attr_act_mask.attr,
1541 &dev_attr_pid.attr, 1547 &dev_attr_pid.attr,
1542 &dev_attr_start_lba.attr, 1548 &dev_attr_start_lba.attr,
1543 &dev_attr_end_lba.attr, 1549 &dev_attr_end_lba.attr,
1544 NULL 1550 NULL
1545 }; 1551 };
1546 1552
1547 struct attribute_group blk_trace_attr_group = { 1553 struct attribute_group blk_trace_attr_group = {
1548 .name = "trace", 1554 .name = "trace",
1549 .attrs = blk_trace_attrs, 1555 .attrs = blk_trace_attrs,
1550 }; 1556 };
1551 1557
1552 static const struct { 1558 static const struct {
1553 int mask; 1559 int mask;
1554 const char *str; 1560 const char *str;
1555 } mask_maps[] = { 1561 } mask_maps[] = {
1556 { BLK_TC_READ, "read" }, 1562 { BLK_TC_READ, "read" },
1557 { BLK_TC_WRITE, "write" }, 1563 { BLK_TC_WRITE, "write" },
1558 { BLK_TC_BARRIER, "barrier" }, 1564 { BLK_TC_BARRIER, "barrier" },
1559 { BLK_TC_SYNC, "sync" }, 1565 { BLK_TC_SYNC, "sync" },
1560 { BLK_TC_QUEUE, "queue" }, 1566 { BLK_TC_QUEUE, "queue" },
1561 { BLK_TC_REQUEUE, "requeue" }, 1567 { BLK_TC_REQUEUE, "requeue" },
1562 { BLK_TC_ISSUE, "issue" }, 1568 { BLK_TC_ISSUE, "issue" },
1563 { BLK_TC_COMPLETE, "complete" }, 1569 { BLK_TC_COMPLETE, "complete" },
1564 { BLK_TC_FS, "fs" }, 1570 { BLK_TC_FS, "fs" },
1565 { BLK_TC_PC, "pc" }, 1571 { BLK_TC_PC, "pc" },
1566 { BLK_TC_AHEAD, "ahead" }, 1572 { BLK_TC_AHEAD, "ahead" },
1567 { BLK_TC_META, "meta" }, 1573 { BLK_TC_META, "meta" },
1568 { BLK_TC_DISCARD, "discard" }, 1574 { BLK_TC_DISCARD, "discard" },
1569 { BLK_TC_DRV_DATA, "drv_data" }, 1575 { BLK_TC_DRV_DATA, "drv_data" },
1570 }; 1576 };
1571 1577
1572 static int blk_trace_str2mask(const char *str) 1578 static int blk_trace_str2mask(const char *str)
1573 { 1579 {
1574 int i; 1580 int i;
1575 int mask = 0; 1581 int mask = 0;
1576 char *buf, *s, *token; 1582 char *buf, *s, *token;
1577 1583
1578 buf = kstrdup(str, GFP_KERNEL); 1584 buf = kstrdup(str, GFP_KERNEL);
1579 if (buf == NULL) 1585 if (buf == NULL)
1580 return -ENOMEM; 1586 return -ENOMEM;
1581 s = strstrip(buf); 1587 s = strstrip(buf);
1582 1588
1583 while (1) { 1589 while (1) {
1584 token = strsep(&s, ","); 1590 token = strsep(&s, ",");
1585 if (token == NULL) 1591 if (token == NULL)
1586 break; 1592 break;
1587 1593
1588 if (*token == '\0') 1594 if (*token == '\0')
1589 continue; 1595 continue;
1590 1596
1591 for (i = 0; i < ARRAY_SIZE(mask_maps); i++) { 1597 for (i = 0; i < ARRAY_SIZE(mask_maps); i++) {
1592 if (strcasecmp(token, mask_maps[i].str) == 0) { 1598 if (strcasecmp(token, mask_maps[i].str) == 0) {
1593 mask |= mask_maps[i].mask; 1599 mask |= mask_maps[i].mask;
1594 break; 1600 break;
1595 } 1601 }
1596 } 1602 }
1597 if (i == ARRAY_SIZE(mask_maps)) { 1603 if (i == ARRAY_SIZE(mask_maps)) {
1598 mask = -EINVAL; 1604 mask = -EINVAL;
1599 break; 1605 break;
1600 } 1606 }
1601 } 1607 }
1602 kfree(buf); 1608 kfree(buf);
1603 1609
1604 return mask; 1610 return mask;
1605 } 1611 }
1606 1612
1607 static ssize_t blk_trace_mask2str(char *buf, int mask) 1613 static ssize_t blk_trace_mask2str(char *buf, int mask)
1608 { 1614 {
1609 int i; 1615 int i;
1610 char *p = buf; 1616 char *p = buf;
1611 1617
1612 for (i = 0; i < ARRAY_SIZE(mask_maps); i++) { 1618 for (i = 0; i < ARRAY_SIZE(mask_maps); i++) {
1613 if (mask & mask_maps[i].mask) { 1619 if (mask & mask_maps[i].mask) {
1614 p += sprintf(p, "%s%s", 1620 p += sprintf(p, "%s%s",
1615 (p == buf) ? "" : ",", mask_maps[i].str); 1621 (p == buf) ? "" : ",", mask_maps[i].str);
1616 } 1622 }
1617 } 1623 }
1618 *p++ = '\n'; 1624 *p++ = '\n';
1619 1625
1620 return p - buf; 1626 return p - buf;
1621 } 1627 }
1622 1628
1623 static struct request_queue *blk_trace_get_queue(struct block_device *bdev) 1629 static struct request_queue *blk_trace_get_queue(struct block_device *bdev)
1624 { 1630 {
1625 if (bdev->bd_disk == NULL) 1631 if (bdev->bd_disk == NULL)
1626 return NULL; 1632 return NULL;
1627 1633
1628 return bdev_get_queue(bdev); 1634 return bdev_get_queue(bdev);
1629 } 1635 }
1630 1636
1631 static ssize_t sysfs_blk_trace_attr_show(struct device *dev, 1637 static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
1632 struct device_attribute *attr, 1638 struct device_attribute *attr,
1633 char *buf) 1639 char *buf)
1634 { 1640 {
1635 struct hd_struct *p = dev_to_part(dev); 1641 struct hd_struct *p = dev_to_part(dev);
1636 struct request_queue *q; 1642 struct request_queue *q;
1637 struct block_device *bdev; 1643 struct block_device *bdev;
1638 ssize_t ret = -ENXIO; 1644 ssize_t ret = -ENXIO;
1639 1645
1640 bdev = bdget(part_devt(p)); 1646 bdev = bdget(part_devt(p));
1641 if (bdev == NULL) 1647 if (bdev == NULL)
1642 goto out; 1648 goto out;
1643 1649
1644 q = blk_trace_get_queue(bdev); 1650 q = blk_trace_get_queue(bdev);
1645 if (q == NULL) 1651 if (q == NULL)
1646 goto out_bdput; 1652 goto out_bdput;
1647 1653
1648 mutex_lock(&bdev->bd_mutex); 1654 mutex_lock(&bdev->bd_mutex);
1649 1655
1650 if (attr == &dev_attr_enable) { 1656 if (attr == &dev_attr_enable) {
1651 ret = sprintf(buf, "%u\n", !!q->blk_trace); 1657 ret = sprintf(buf, "%u\n", !!q->blk_trace);
1652 goto out_unlock_bdev; 1658 goto out_unlock_bdev;
1653 } 1659 }
1654 1660
1655 if (q->blk_trace == NULL) 1661 if (q->blk_trace == NULL)
1656 ret = sprintf(buf, "disabled\n"); 1662 ret = sprintf(buf, "disabled\n");
1657 else if (attr == &dev_attr_act_mask) 1663 else if (attr == &dev_attr_act_mask)
1658 ret = blk_trace_mask2str(buf, q->blk_trace->act_mask); 1664 ret = blk_trace_mask2str(buf, q->blk_trace->act_mask);
1659 else if (attr == &dev_attr_pid) 1665 else if (attr == &dev_attr_pid)
1660 ret = sprintf(buf, "%u\n", q->blk_trace->pid); 1666 ret = sprintf(buf, "%u\n", q->blk_trace->pid);
1661 else if (attr == &dev_attr_start_lba) 1667 else if (attr == &dev_attr_start_lba)
1662 ret = sprintf(buf, "%llu\n", q->blk_trace->start_lba); 1668 ret = sprintf(buf, "%llu\n", q->blk_trace->start_lba);
1663 else if (attr == &dev_attr_end_lba) 1669 else if (attr == &dev_attr_end_lba)
1664 ret = sprintf(buf, "%llu\n", q->blk_trace->end_lba); 1670 ret = sprintf(buf, "%llu\n", q->blk_trace->end_lba);
1665 1671
1666 out_unlock_bdev: 1672 out_unlock_bdev:
1667 mutex_unlock(&bdev->bd_mutex); 1673 mutex_unlock(&bdev->bd_mutex);
1668 out_bdput: 1674 out_bdput:
1669 bdput(bdev); 1675 bdput(bdev);
1670 out: 1676 out:
1671 return ret; 1677 return ret;
1672 } 1678 }
1673 1679
1674 static ssize_t sysfs_blk_trace_attr_store(struct device *dev, 1680 static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
1675 struct device_attribute *attr, 1681 struct device_attribute *attr,
1676 const char *buf, size_t count) 1682 const char *buf, size_t count)
1677 { 1683 {
1678 struct block_device *bdev; 1684 struct block_device *bdev;
1679 struct request_queue *q; 1685 struct request_queue *q;
1680 struct hd_struct *p; 1686 struct hd_struct *p;
1681 u64 value; 1687 u64 value;
1682 ssize_t ret = -EINVAL; 1688 ssize_t ret = -EINVAL;
1683 1689
1684 if (count == 0) 1690 if (count == 0)
1685 goto out; 1691 goto out;
1686 1692
1687 if (attr == &dev_attr_act_mask) { 1693 if (attr == &dev_attr_act_mask) {
1688 if (sscanf(buf, "%llx", &value) != 1) { 1694 if (sscanf(buf, "%llx", &value) != 1) {
1689 /* Assume it is a list of trace category names */ 1695 /* Assume it is a list of trace category names */
1690 ret = blk_trace_str2mask(buf); 1696 ret = blk_trace_str2mask(buf);
1691 if (ret < 0) 1697 if (ret < 0)
1692 goto out; 1698 goto out;
1693 value = ret; 1699 value = ret;
1694 } 1700 }
1695 } else if (sscanf(buf, "%llu", &value) != 1) 1701 } else if (sscanf(buf, "%llu", &value) != 1)
1696 goto out; 1702 goto out;
1697 1703
1698 ret = -ENXIO; 1704 ret = -ENXIO;
1699 1705
1700 p = dev_to_part(dev); 1706 p = dev_to_part(dev);
1701 bdev = bdget(part_devt(p)); 1707 bdev = bdget(part_devt(p));
1702 if (bdev == NULL) 1708 if (bdev == NULL)
1703 goto out; 1709 goto out;
1704 1710
1705 q = blk_trace_get_queue(bdev); 1711 q = blk_trace_get_queue(bdev);
1706 if (q == NULL) 1712 if (q == NULL)
1707 goto out_bdput; 1713 goto out_bdput;
1708 1714
1709 mutex_lock(&bdev->bd_mutex); 1715 mutex_lock(&bdev->bd_mutex);
1710 1716
1711 if (attr == &dev_attr_enable) { 1717 if (attr == &dev_attr_enable) {
1712 if (value) 1718 if (value)
1713 ret = blk_trace_setup_queue(q, bdev); 1719 ret = blk_trace_setup_queue(q, bdev);
1714 else 1720 else
1715 ret = blk_trace_remove_queue(q); 1721 ret = blk_trace_remove_queue(q);
1716 goto out_unlock_bdev; 1722 goto out_unlock_bdev;
1717 } 1723 }
1718 1724
1719 ret = 0; 1725 ret = 0;
1720 if (q->blk_trace == NULL) 1726 if (q->blk_trace == NULL)
1721 ret = blk_trace_setup_queue(q, bdev); 1727 ret = blk_trace_setup_queue(q, bdev);
1722 1728
1723 if (ret == 0) { 1729 if (ret == 0) {
1724 if (attr == &dev_attr_act_mask) 1730 if (attr == &dev_attr_act_mask)
1725 q->blk_trace->act_mask = value; 1731 q->blk_trace->act_mask = value;
1726 else if (attr == &dev_attr_pid) 1732 else if (attr == &dev_attr_pid)
1727 q->blk_trace->pid = value; 1733 q->blk_trace->pid = value;
1728 else if (attr == &dev_attr_start_lba) 1734 else if (attr == &dev_attr_start_lba)
1729 q->blk_trace->start_lba = value; 1735 q->blk_trace->start_lba = value;
1730 else if (attr == &dev_attr_end_lba) 1736 else if (attr == &dev_attr_end_lba)
1731 q->blk_trace->end_lba = value; 1737 q->blk_trace->end_lba = value;
1732 } 1738 }
1733 1739
1734 out_unlock_bdev: 1740 out_unlock_bdev:
1735 mutex_unlock(&bdev->bd_mutex); 1741 mutex_unlock(&bdev->bd_mutex);
1736 out_bdput: 1742 out_bdput:
1737 bdput(bdev); 1743 bdput(bdev);
1738 out: 1744 out:
1739 return ret ? ret : count; 1745 return ret ? ret : count;
1740 } 1746 }
1741 1747
1742 int blk_trace_init_sysfs(struct device *dev) 1748 int blk_trace_init_sysfs(struct device *dev)
1743 { 1749 {
1744 return sysfs_create_group(&dev->kobj, &blk_trace_attr_group); 1750 return sysfs_create_group(&dev->kobj, &blk_trace_attr_group);
1745 } 1751 }
1746 1752
1747 void blk_trace_remove_sysfs(struct device *dev) 1753 void blk_trace_remove_sysfs(struct device *dev)
1748 { 1754 {
1749 sysfs_remove_group(&dev->kobj, &blk_trace_attr_group); 1755 sysfs_remove_group(&dev->kobj, &blk_trace_attr_group);
1750 } 1756 }
1751 1757
1752 #endif /* CONFIG_BLK_DEV_IO_TRACE */ 1758 #endif /* CONFIG_BLK_DEV_IO_TRACE */
1753 1759
1754 #ifdef CONFIG_EVENT_TRACING 1760 #ifdef CONFIG_EVENT_TRACING
1755 1761
1756 void blk_dump_cmd(char *buf, struct request *rq) 1762 void blk_dump_cmd(char *buf, struct request *rq)
1757 { 1763 {
1758 int i, end; 1764 int i, end;
1759 int len = rq->cmd_len; 1765 int len = rq->cmd_len;
1760 unsigned char *cmd = rq->cmd; 1766 unsigned char *cmd = rq->cmd;
1761 1767
1762 if (rq->cmd_type != REQ_TYPE_BLOCK_PC) { 1768 if (rq->cmd_type != REQ_TYPE_BLOCK_PC) {
1763 buf[0] = '\0'; 1769 buf[0] = '\0';
1764 return; 1770 return;
1765 } 1771 }
1766 1772
1767 for (end = len - 1; end >= 0; end--) 1773 for (end = len - 1; end >= 0; end--)
1768 if (cmd[end]) 1774 if (cmd[end])
1769 break; 1775 break;
1770 end++; 1776 end++;
1771 1777
1772 for (i = 0; i < len; i++) { 1778 for (i = 0; i < len; i++) {
1773 buf += sprintf(buf, "%s%02x", i == 0 ? "" : " ", cmd[i]); 1779 buf += sprintf(buf, "%s%02x", i == 0 ? "" : " ", cmd[i]);
1774 if (i == end && end != len - 1) { 1780 if (i == end && end != len - 1) {
1775 sprintf(buf, " .."); 1781 sprintf(buf, " ..");
1776 break; 1782 break;
1777 } 1783 }
1778 } 1784 }
1779 } 1785 }
1780 1786
1781 void blk_fill_rwbs(char *rwbs, u32 rw, int bytes) 1787 void blk_fill_rwbs(char *rwbs, u32 rw, int bytes)
1782 { 1788 {
1783 int i = 0; 1789 int i = 0;
1784 1790
1785 if (rw & WRITE) 1791 if (rw & WRITE)
1786 rwbs[i++] = 'W'; 1792 rwbs[i++] = 'W';
1787 else if (rw & REQ_DISCARD) 1793 else if (rw & REQ_DISCARD)
1788 rwbs[i++] = 'D'; 1794 rwbs[i++] = 'D';
1789 else if (bytes) 1795 else if (bytes)
1790 rwbs[i++] = 'R'; 1796 rwbs[i++] = 'R';
1791 else 1797 else
1792 rwbs[i++] = 'N'; 1798 rwbs[i++] = 'N';
1793 1799
1794 if (rw & REQ_RAHEAD) 1800 if (rw & REQ_RAHEAD)
1795 rwbs[i++] = 'A'; 1801 rwbs[i++] = 'A';
1796 if (rw & REQ_SYNC) 1802 if (rw & REQ_SYNC)
1797 rwbs[i++] = 'S'; 1803 rwbs[i++] = 'S';
1798 if (rw & REQ_META) 1804 if (rw & REQ_META)
1799 rwbs[i++] = 'M'; 1805 rwbs[i++] = 'M';
1800 if (rw & REQ_SECURE) 1806 if (rw & REQ_SECURE)
1801 rwbs[i++] = 'E'; 1807 rwbs[i++] = 'E';
1802 1808
1803 rwbs[i] = '\0'; 1809 rwbs[i] = '\0';
1804 } 1810 }
1805 1811
1806 #endif /* CONFIG_EVENT_TRACING */ 1812 #endif /* CONFIG_EVENT_TRACING */
1807 1813
1808 1814