Commit e2e1a148bc45855816ae6b4692ce29d0020fa22e

Authored by Jens Axboe
1 parent 841fdffdd3

block: add sysfs knob for turning off disk entropy contributions

There are two reasons for doing this:

- On SSD disks, the completion times aren't as random as they
  are for rotational drives. So it's questionable whether they
  should contribute to the random pool in the first place.

- Calling add_disk_randomness() has a lot of overhead.

This adds /sys/block/<dev>/queue/add_random that will allow you to
switch off on a per-device basis. The default setting is on, so there
should be no functional changes from this patch.

Signed-off-by: Jens Axboe <jaxboe@fusionio.com>

Showing 3 changed files with 34 additions and 2 deletions Inline Diff

1 /* 1 /*
2 * Copyright (C) 1991, 1992 Linus Torvalds 2 * Copyright (C) 1991, 1992 Linus Torvalds
3 * Copyright (C) 1994, Karl Keyte: Added support for disk statistics 3 * Copyright (C) 1994, Karl Keyte: Added support for disk statistics
4 * Elevator latency, (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE 4 * Elevator latency, (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
5 * Queue request tables / lock, selectable elevator, Jens Axboe <axboe@suse.de> 5 * Queue request tables / lock, selectable elevator, Jens Axboe <axboe@suse.de>
6 * kernel-doc documentation started by NeilBrown <neilb@cse.unsw.edu.au> 6 * kernel-doc documentation started by NeilBrown <neilb@cse.unsw.edu.au>
7 * - July2000 7 * - July2000
8 * bio rewrite, highmem i/o, etc, Jens Axboe <axboe@suse.de> - may 2001 8 * bio rewrite, highmem i/o, etc, Jens Axboe <axboe@suse.de> - may 2001
9 */ 9 */
10 10
11 /* 11 /*
12 * This handles all read/write requests to block devices 12 * This handles all read/write requests to block devices
13 */ 13 */
14 #include <linux/kernel.h> 14 #include <linux/kernel.h>
15 #include <linux/module.h> 15 #include <linux/module.h>
16 #include <linux/backing-dev.h> 16 #include <linux/backing-dev.h>
17 #include <linux/bio.h> 17 #include <linux/bio.h>
18 #include <linux/blkdev.h> 18 #include <linux/blkdev.h>
19 #include <linux/highmem.h> 19 #include <linux/highmem.h>
20 #include <linux/mm.h> 20 #include <linux/mm.h>
21 #include <linux/kernel_stat.h> 21 #include <linux/kernel_stat.h>
22 #include <linux/string.h> 22 #include <linux/string.h>
23 #include <linux/init.h> 23 #include <linux/init.h>
24 #include <linux/completion.h> 24 #include <linux/completion.h>
25 #include <linux/slab.h> 25 #include <linux/slab.h>
26 #include <linux/swap.h> 26 #include <linux/swap.h>
27 #include <linux/writeback.h> 27 #include <linux/writeback.h>
28 #include <linux/task_io_accounting_ops.h> 28 #include <linux/task_io_accounting_ops.h>
29 #include <linux/fault-inject.h> 29 #include <linux/fault-inject.h>
30 30
31 #define CREATE_TRACE_POINTS 31 #define CREATE_TRACE_POINTS
32 #include <trace/events/block.h> 32 #include <trace/events/block.h>
33 33
34 #include "blk.h" 34 #include "blk.h"
35 35
36 EXPORT_TRACEPOINT_SYMBOL_GPL(block_remap); 36 EXPORT_TRACEPOINT_SYMBOL_GPL(block_remap);
37 EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap); 37 EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap);
38 EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete); 38 EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete);
39 39
40 static int __make_request(struct request_queue *q, struct bio *bio); 40 static int __make_request(struct request_queue *q, struct bio *bio);
41 41
42 /* 42 /*
43 * For the allocated request tables 43 * For the allocated request tables
44 */ 44 */
45 static struct kmem_cache *request_cachep; 45 static struct kmem_cache *request_cachep;
46 46
47 /* 47 /*
48 * For queue allocation 48 * For queue allocation
49 */ 49 */
50 struct kmem_cache *blk_requestq_cachep; 50 struct kmem_cache *blk_requestq_cachep;
51 51
52 /* 52 /*
53 * Controlling structure to kblockd 53 * Controlling structure to kblockd
54 */ 54 */
55 static struct workqueue_struct *kblockd_workqueue; 55 static struct workqueue_struct *kblockd_workqueue;
56 56
57 static void drive_stat_acct(struct request *rq, int new_io) 57 static void drive_stat_acct(struct request *rq, int new_io)
58 { 58 {
59 struct hd_struct *part; 59 struct hd_struct *part;
60 int rw = rq_data_dir(rq); 60 int rw = rq_data_dir(rq);
61 int cpu; 61 int cpu;
62 62
63 if (!blk_do_io_stat(rq)) 63 if (!blk_do_io_stat(rq))
64 return; 64 return;
65 65
66 cpu = part_stat_lock(); 66 cpu = part_stat_lock();
67 part = disk_map_sector_rcu(rq->rq_disk, blk_rq_pos(rq)); 67 part = disk_map_sector_rcu(rq->rq_disk, blk_rq_pos(rq));
68 68
69 if (!new_io) 69 if (!new_io)
70 part_stat_inc(cpu, part, merges[rw]); 70 part_stat_inc(cpu, part, merges[rw]);
71 else { 71 else {
72 part_round_stats(cpu, part); 72 part_round_stats(cpu, part);
73 part_inc_in_flight(part, rw); 73 part_inc_in_flight(part, rw);
74 } 74 }
75 75
76 part_stat_unlock(); 76 part_stat_unlock();
77 } 77 }
78 78
79 void blk_queue_congestion_threshold(struct request_queue *q) 79 void blk_queue_congestion_threshold(struct request_queue *q)
80 { 80 {
81 int nr; 81 int nr;
82 82
83 nr = q->nr_requests - (q->nr_requests / 8) + 1; 83 nr = q->nr_requests - (q->nr_requests / 8) + 1;
84 if (nr > q->nr_requests) 84 if (nr > q->nr_requests)
85 nr = q->nr_requests; 85 nr = q->nr_requests;
86 q->nr_congestion_on = nr; 86 q->nr_congestion_on = nr;
87 87
88 nr = q->nr_requests - (q->nr_requests / 8) - (q->nr_requests / 16) - 1; 88 nr = q->nr_requests - (q->nr_requests / 8) - (q->nr_requests / 16) - 1;
89 if (nr < 1) 89 if (nr < 1)
90 nr = 1; 90 nr = 1;
91 q->nr_congestion_off = nr; 91 q->nr_congestion_off = nr;
92 } 92 }
93 93
94 /** 94 /**
95 * blk_get_backing_dev_info - get the address of a queue's backing_dev_info 95 * blk_get_backing_dev_info - get the address of a queue's backing_dev_info
96 * @bdev: device 96 * @bdev: device
97 * 97 *
98 * Locates the passed device's request queue and returns the address of its 98 * Locates the passed device's request queue and returns the address of its
99 * backing_dev_info 99 * backing_dev_info
100 * 100 *
101 * Will return NULL if the request queue cannot be located. 101 * Will return NULL if the request queue cannot be located.
102 */ 102 */
103 struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev) 103 struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev)
104 { 104 {
105 struct backing_dev_info *ret = NULL; 105 struct backing_dev_info *ret = NULL;
106 struct request_queue *q = bdev_get_queue(bdev); 106 struct request_queue *q = bdev_get_queue(bdev);
107 107
108 if (q) 108 if (q)
109 ret = &q->backing_dev_info; 109 ret = &q->backing_dev_info;
110 return ret; 110 return ret;
111 } 111 }
112 EXPORT_SYMBOL(blk_get_backing_dev_info); 112 EXPORT_SYMBOL(blk_get_backing_dev_info);
113 113
114 void blk_rq_init(struct request_queue *q, struct request *rq) 114 void blk_rq_init(struct request_queue *q, struct request *rq)
115 { 115 {
116 memset(rq, 0, sizeof(*rq)); 116 memset(rq, 0, sizeof(*rq));
117 117
118 INIT_LIST_HEAD(&rq->queuelist); 118 INIT_LIST_HEAD(&rq->queuelist);
119 INIT_LIST_HEAD(&rq->timeout_list); 119 INIT_LIST_HEAD(&rq->timeout_list);
120 rq->cpu = -1; 120 rq->cpu = -1;
121 rq->q = q; 121 rq->q = q;
122 rq->__sector = (sector_t) -1; 122 rq->__sector = (sector_t) -1;
123 INIT_HLIST_NODE(&rq->hash); 123 INIT_HLIST_NODE(&rq->hash);
124 RB_CLEAR_NODE(&rq->rb_node); 124 RB_CLEAR_NODE(&rq->rb_node);
125 rq->cmd = rq->__cmd; 125 rq->cmd = rq->__cmd;
126 rq->cmd_len = BLK_MAX_CDB; 126 rq->cmd_len = BLK_MAX_CDB;
127 rq->tag = -1; 127 rq->tag = -1;
128 rq->ref_count = 1; 128 rq->ref_count = 1;
129 rq->start_time = jiffies; 129 rq->start_time = jiffies;
130 set_start_time_ns(rq); 130 set_start_time_ns(rq);
131 } 131 }
132 EXPORT_SYMBOL(blk_rq_init); 132 EXPORT_SYMBOL(blk_rq_init);
133 133
134 static void req_bio_endio(struct request *rq, struct bio *bio, 134 static void req_bio_endio(struct request *rq, struct bio *bio,
135 unsigned int nbytes, int error) 135 unsigned int nbytes, int error)
136 { 136 {
137 struct request_queue *q = rq->q; 137 struct request_queue *q = rq->q;
138 138
139 if (&q->bar_rq != rq) { 139 if (&q->bar_rq != rq) {
140 if (error) 140 if (error)
141 clear_bit(BIO_UPTODATE, &bio->bi_flags); 141 clear_bit(BIO_UPTODATE, &bio->bi_flags);
142 else if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) 142 else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
143 error = -EIO; 143 error = -EIO;
144 144
145 if (unlikely(nbytes > bio->bi_size)) { 145 if (unlikely(nbytes > bio->bi_size)) {
146 printk(KERN_ERR "%s: want %u bytes done, %u left\n", 146 printk(KERN_ERR "%s: want %u bytes done, %u left\n",
147 __func__, nbytes, bio->bi_size); 147 __func__, nbytes, bio->bi_size);
148 nbytes = bio->bi_size; 148 nbytes = bio->bi_size;
149 } 149 }
150 150
151 if (unlikely(rq->cmd_flags & REQ_QUIET)) 151 if (unlikely(rq->cmd_flags & REQ_QUIET))
152 set_bit(BIO_QUIET, &bio->bi_flags); 152 set_bit(BIO_QUIET, &bio->bi_flags);
153 153
154 bio->bi_size -= nbytes; 154 bio->bi_size -= nbytes;
155 bio->bi_sector += (nbytes >> 9); 155 bio->bi_sector += (nbytes >> 9);
156 156
157 if (bio_integrity(bio)) 157 if (bio_integrity(bio))
158 bio_integrity_advance(bio, nbytes); 158 bio_integrity_advance(bio, nbytes);
159 159
160 if (bio->bi_size == 0) 160 if (bio->bi_size == 0)
161 bio_endio(bio, error); 161 bio_endio(bio, error);
162 } else { 162 } else {
163 163
164 /* 164 /*
165 * Okay, this is the barrier request in progress, just 165 * Okay, this is the barrier request in progress, just
166 * record the error; 166 * record the error;
167 */ 167 */
168 if (error && !q->orderr) 168 if (error && !q->orderr)
169 q->orderr = error; 169 q->orderr = error;
170 } 170 }
171 } 171 }
172 172
173 void blk_dump_rq_flags(struct request *rq, char *msg) 173 void blk_dump_rq_flags(struct request *rq, char *msg)
174 { 174 {
175 int bit; 175 int bit;
176 176
177 printk(KERN_INFO "%s: dev %s: type=%x, flags=%x\n", msg, 177 printk(KERN_INFO "%s: dev %s: type=%x, flags=%x\n", msg,
178 rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->cmd_type, 178 rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->cmd_type,
179 rq->cmd_flags); 179 rq->cmd_flags);
180 180
181 printk(KERN_INFO " sector %llu, nr/cnr %u/%u\n", 181 printk(KERN_INFO " sector %llu, nr/cnr %u/%u\n",
182 (unsigned long long)blk_rq_pos(rq), 182 (unsigned long long)blk_rq_pos(rq),
183 blk_rq_sectors(rq), blk_rq_cur_sectors(rq)); 183 blk_rq_sectors(rq), blk_rq_cur_sectors(rq));
184 printk(KERN_INFO " bio %p, biotail %p, buffer %p, len %u\n", 184 printk(KERN_INFO " bio %p, biotail %p, buffer %p, len %u\n",
185 rq->bio, rq->biotail, rq->buffer, blk_rq_bytes(rq)); 185 rq->bio, rq->biotail, rq->buffer, blk_rq_bytes(rq));
186 186
187 if (blk_pc_request(rq)) { 187 if (blk_pc_request(rq)) {
188 printk(KERN_INFO " cdb: "); 188 printk(KERN_INFO " cdb: ");
189 for (bit = 0; bit < BLK_MAX_CDB; bit++) 189 for (bit = 0; bit < BLK_MAX_CDB; bit++)
190 printk("%02x ", rq->cmd[bit]); 190 printk("%02x ", rq->cmd[bit]);
191 printk("\n"); 191 printk("\n");
192 } 192 }
193 } 193 }
194 EXPORT_SYMBOL(blk_dump_rq_flags); 194 EXPORT_SYMBOL(blk_dump_rq_flags);
195 195
196 /* 196 /*
197 * "plug" the device if there are no outstanding requests: this will 197 * "plug" the device if there are no outstanding requests: this will
198 * force the transfer to start only after we have put all the requests 198 * force the transfer to start only after we have put all the requests
199 * on the list. 199 * on the list.
200 * 200 *
201 * This is called with interrupts off and no requests on the queue and 201 * This is called with interrupts off and no requests on the queue and
202 * with the queue lock held. 202 * with the queue lock held.
203 */ 203 */
204 void blk_plug_device(struct request_queue *q) 204 void blk_plug_device(struct request_queue *q)
205 { 205 {
206 WARN_ON(!irqs_disabled()); 206 WARN_ON(!irqs_disabled());
207 207
208 /* 208 /*
209 * don't plug a stopped queue, it must be paired with blk_start_queue() 209 * don't plug a stopped queue, it must be paired with blk_start_queue()
210 * which will restart the queueing 210 * which will restart the queueing
211 */ 211 */
212 if (blk_queue_stopped(q)) 212 if (blk_queue_stopped(q))
213 return; 213 return;
214 214
215 if (!queue_flag_test_and_set(QUEUE_FLAG_PLUGGED, q)) { 215 if (!queue_flag_test_and_set(QUEUE_FLAG_PLUGGED, q)) {
216 mod_timer(&q->unplug_timer, jiffies + q->unplug_delay); 216 mod_timer(&q->unplug_timer, jiffies + q->unplug_delay);
217 trace_block_plug(q); 217 trace_block_plug(q);
218 } 218 }
219 } 219 }
220 EXPORT_SYMBOL(blk_plug_device); 220 EXPORT_SYMBOL(blk_plug_device);
221 221
222 /** 222 /**
223 * blk_plug_device_unlocked - plug a device without queue lock held 223 * blk_plug_device_unlocked - plug a device without queue lock held
224 * @q: The &struct request_queue to plug 224 * @q: The &struct request_queue to plug
225 * 225 *
226 * Description: 226 * Description:
227 * Like @blk_plug_device(), but grabs the queue lock and disables 227 * Like @blk_plug_device(), but grabs the queue lock and disables
228 * interrupts. 228 * interrupts.
229 **/ 229 **/
230 void blk_plug_device_unlocked(struct request_queue *q) 230 void blk_plug_device_unlocked(struct request_queue *q)
231 { 231 {
232 unsigned long flags; 232 unsigned long flags;
233 233
234 spin_lock_irqsave(q->queue_lock, flags); 234 spin_lock_irqsave(q->queue_lock, flags);
235 blk_plug_device(q); 235 blk_plug_device(q);
236 spin_unlock_irqrestore(q->queue_lock, flags); 236 spin_unlock_irqrestore(q->queue_lock, flags);
237 } 237 }
238 EXPORT_SYMBOL(blk_plug_device_unlocked); 238 EXPORT_SYMBOL(blk_plug_device_unlocked);
239 239
240 /* 240 /*
241 * remove the queue from the plugged list, if present. called with 241 * remove the queue from the plugged list, if present. called with
242 * queue lock held and interrupts disabled. 242 * queue lock held and interrupts disabled.
243 */ 243 */
244 int blk_remove_plug(struct request_queue *q) 244 int blk_remove_plug(struct request_queue *q)
245 { 245 {
246 WARN_ON(!irqs_disabled()); 246 WARN_ON(!irqs_disabled());
247 247
248 if (!queue_flag_test_and_clear(QUEUE_FLAG_PLUGGED, q)) 248 if (!queue_flag_test_and_clear(QUEUE_FLAG_PLUGGED, q))
249 return 0; 249 return 0;
250 250
251 del_timer(&q->unplug_timer); 251 del_timer(&q->unplug_timer);
252 return 1; 252 return 1;
253 } 253 }
254 EXPORT_SYMBOL(blk_remove_plug); 254 EXPORT_SYMBOL(blk_remove_plug);
255 255
256 /* 256 /*
257 * remove the plug and let it rip.. 257 * remove the plug and let it rip..
258 */ 258 */
259 void __generic_unplug_device(struct request_queue *q) 259 void __generic_unplug_device(struct request_queue *q)
260 { 260 {
261 if (unlikely(blk_queue_stopped(q))) 261 if (unlikely(blk_queue_stopped(q)))
262 return; 262 return;
263 if (!blk_remove_plug(q) && !blk_queue_nonrot(q)) 263 if (!blk_remove_plug(q) && !blk_queue_nonrot(q))
264 return; 264 return;
265 265
266 q->request_fn(q); 266 q->request_fn(q);
267 } 267 }
268 268
269 /** 269 /**
270 * generic_unplug_device - fire a request queue 270 * generic_unplug_device - fire a request queue
271 * @q: The &struct request_queue in question 271 * @q: The &struct request_queue in question
272 * 272 *
273 * Description: 273 * Description:
274 * Linux uses plugging to build bigger requests queues before letting 274 * Linux uses plugging to build bigger requests queues before letting
275 * the device have at them. If a queue is plugged, the I/O scheduler 275 * the device have at them. If a queue is plugged, the I/O scheduler
276 * is still adding and merging requests on the queue. Once the queue 276 * is still adding and merging requests on the queue. Once the queue
277 * gets unplugged, the request_fn defined for the queue is invoked and 277 * gets unplugged, the request_fn defined for the queue is invoked and
278 * transfers started. 278 * transfers started.
279 **/ 279 **/
280 void generic_unplug_device(struct request_queue *q) 280 void generic_unplug_device(struct request_queue *q)
281 { 281 {
282 if (blk_queue_plugged(q)) { 282 if (blk_queue_plugged(q)) {
283 spin_lock_irq(q->queue_lock); 283 spin_lock_irq(q->queue_lock);
284 __generic_unplug_device(q); 284 __generic_unplug_device(q);
285 spin_unlock_irq(q->queue_lock); 285 spin_unlock_irq(q->queue_lock);
286 } 286 }
287 } 287 }
288 EXPORT_SYMBOL(generic_unplug_device); 288 EXPORT_SYMBOL(generic_unplug_device);
289 289
290 static void blk_backing_dev_unplug(struct backing_dev_info *bdi, 290 static void blk_backing_dev_unplug(struct backing_dev_info *bdi,
291 struct page *page) 291 struct page *page)
292 { 292 {
293 struct request_queue *q = bdi->unplug_io_data; 293 struct request_queue *q = bdi->unplug_io_data;
294 294
295 blk_unplug(q); 295 blk_unplug(q);
296 } 296 }
297 297
298 void blk_unplug_work(struct work_struct *work) 298 void blk_unplug_work(struct work_struct *work)
299 { 299 {
300 struct request_queue *q = 300 struct request_queue *q =
301 container_of(work, struct request_queue, unplug_work); 301 container_of(work, struct request_queue, unplug_work);
302 302
303 trace_block_unplug_io(q); 303 trace_block_unplug_io(q);
304 q->unplug_fn(q); 304 q->unplug_fn(q);
305 } 305 }
306 306
307 void blk_unplug_timeout(unsigned long data) 307 void blk_unplug_timeout(unsigned long data)
308 { 308 {
309 struct request_queue *q = (struct request_queue *)data; 309 struct request_queue *q = (struct request_queue *)data;
310 310
311 trace_block_unplug_timer(q); 311 trace_block_unplug_timer(q);
312 kblockd_schedule_work(q, &q->unplug_work); 312 kblockd_schedule_work(q, &q->unplug_work);
313 } 313 }
314 314
315 void blk_unplug(struct request_queue *q) 315 void blk_unplug(struct request_queue *q)
316 { 316 {
317 /* 317 /*
318 * devices don't necessarily have an ->unplug_fn defined 318 * devices don't necessarily have an ->unplug_fn defined
319 */ 319 */
320 if (q->unplug_fn) { 320 if (q->unplug_fn) {
321 trace_block_unplug_io(q); 321 trace_block_unplug_io(q);
322 q->unplug_fn(q); 322 q->unplug_fn(q);
323 } 323 }
324 } 324 }
325 EXPORT_SYMBOL(blk_unplug); 325 EXPORT_SYMBOL(blk_unplug);
326 326
327 /** 327 /**
328 * blk_start_queue - restart a previously stopped queue 328 * blk_start_queue - restart a previously stopped queue
329 * @q: The &struct request_queue in question 329 * @q: The &struct request_queue in question
330 * 330 *
331 * Description: 331 * Description:
332 * blk_start_queue() will clear the stop flag on the queue, and call 332 * blk_start_queue() will clear the stop flag on the queue, and call
333 * the request_fn for the queue if it was in a stopped state when 333 * the request_fn for the queue if it was in a stopped state when
334 * entered. Also see blk_stop_queue(). Queue lock must be held. 334 * entered. Also see blk_stop_queue(). Queue lock must be held.
335 **/ 335 **/
336 void blk_start_queue(struct request_queue *q) 336 void blk_start_queue(struct request_queue *q)
337 { 337 {
338 WARN_ON(!irqs_disabled()); 338 WARN_ON(!irqs_disabled());
339 339
340 queue_flag_clear(QUEUE_FLAG_STOPPED, q); 340 queue_flag_clear(QUEUE_FLAG_STOPPED, q);
341 __blk_run_queue(q); 341 __blk_run_queue(q);
342 } 342 }
343 EXPORT_SYMBOL(blk_start_queue); 343 EXPORT_SYMBOL(blk_start_queue);
344 344
345 /** 345 /**
346 * blk_stop_queue - stop a queue 346 * blk_stop_queue - stop a queue
347 * @q: The &struct request_queue in question 347 * @q: The &struct request_queue in question
348 * 348 *
349 * Description: 349 * Description:
350 * The Linux block layer assumes that a block driver will consume all 350 * The Linux block layer assumes that a block driver will consume all
351 * entries on the request queue when the request_fn strategy is called. 351 * entries on the request queue when the request_fn strategy is called.
352 * Often this will not happen, because of hardware limitations (queue 352 * Often this will not happen, because of hardware limitations (queue
353 * depth settings). If a device driver gets a 'queue full' response, 353 * depth settings). If a device driver gets a 'queue full' response,
354 * or if it simply chooses not to queue more I/O at one point, it can 354 * or if it simply chooses not to queue more I/O at one point, it can
355 * call this function to prevent the request_fn from being called until 355 * call this function to prevent the request_fn from being called until
356 * the driver has signalled it's ready to go again. This happens by calling 356 * the driver has signalled it's ready to go again. This happens by calling
357 * blk_start_queue() to restart queue operations. Queue lock must be held. 357 * blk_start_queue() to restart queue operations. Queue lock must be held.
358 **/ 358 **/
359 void blk_stop_queue(struct request_queue *q) 359 void blk_stop_queue(struct request_queue *q)
360 { 360 {
361 blk_remove_plug(q); 361 blk_remove_plug(q);
362 queue_flag_set(QUEUE_FLAG_STOPPED, q); 362 queue_flag_set(QUEUE_FLAG_STOPPED, q);
363 } 363 }
364 EXPORT_SYMBOL(blk_stop_queue); 364 EXPORT_SYMBOL(blk_stop_queue);
365 365
366 /** 366 /**
367 * blk_sync_queue - cancel any pending callbacks on a queue 367 * blk_sync_queue - cancel any pending callbacks on a queue
368 * @q: the queue 368 * @q: the queue
369 * 369 *
370 * Description: 370 * Description:
371 * The block layer may perform asynchronous callback activity 371 * The block layer may perform asynchronous callback activity
372 * on a queue, such as calling the unplug function after a timeout. 372 * on a queue, such as calling the unplug function after a timeout.
373 * A block device may call blk_sync_queue to ensure that any 373 * A block device may call blk_sync_queue to ensure that any
374 * such activity is cancelled, thus allowing it to release resources 374 * such activity is cancelled, thus allowing it to release resources
375 * that the callbacks might use. The caller must already have made sure 375 * that the callbacks might use. The caller must already have made sure
376 * that its ->make_request_fn will not re-add plugging prior to calling 376 * that its ->make_request_fn will not re-add plugging prior to calling
377 * this function. 377 * this function.
378 * 378 *
379 */ 379 */
380 void blk_sync_queue(struct request_queue *q) 380 void blk_sync_queue(struct request_queue *q)
381 { 381 {
382 del_timer_sync(&q->unplug_timer); 382 del_timer_sync(&q->unplug_timer);
383 del_timer_sync(&q->timeout); 383 del_timer_sync(&q->timeout);
384 cancel_work_sync(&q->unplug_work); 384 cancel_work_sync(&q->unplug_work);
385 } 385 }
386 EXPORT_SYMBOL(blk_sync_queue); 386 EXPORT_SYMBOL(blk_sync_queue);
387 387
388 /** 388 /**
389 * __blk_run_queue - run a single device queue 389 * __blk_run_queue - run a single device queue
390 * @q: The queue to run 390 * @q: The queue to run
391 * 391 *
392 * Description: 392 * Description:
393 * See @blk_run_queue. This variant must be called with the queue lock 393 * See @blk_run_queue. This variant must be called with the queue lock
394 * held and interrupts disabled. 394 * held and interrupts disabled.
395 * 395 *
396 */ 396 */
397 void __blk_run_queue(struct request_queue *q) 397 void __blk_run_queue(struct request_queue *q)
398 { 398 {
399 blk_remove_plug(q); 399 blk_remove_plug(q);
400 400
401 if (unlikely(blk_queue_stopped(q))) 401 if (unlikely(blk_queue_stopped(q)))
402 return; 402 return;
403 403
404 if (elv_queue_empty(q)) 404 if (elv_queue_empty(q))
405 return; 405 return;
406 406
407 /* 407 /*
408 * Only recurse once to avoid overrunning the stack, let the unplug 408 * Only recurse once to avoid overrunning the stack, let the unplug
409 * handling reinvoke the handler shortly if we already got there. 409 * handling reinvoke the handler shortly if we already got there.
410 */ 410 */
411 if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) { 411 if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) {
412 q->request_fn(q); 412 q->request_fn(q);
413 queue_flag_clear(QUEUE_FLAG_REENTER, q); 413 queue_flag_clear(QUEUE_FLAG_REENTER, q);
414 } else { 414 } else {
415 queue_flag_set(QUEUE_FLAG_PLUGGED, q); 415 queue_flag_set(QUEUE_FLAG_PLUGGED, q);
416 kblockd_schedule_work(q, &q->unplug_work); 416 kblockd_schedule_work(q, &q->unplug_work);
417 } 417 }
418 } 418 }
419 EXPORT_SYMBOL(__blk_run_queue); 419 EXPORT_SYMBOL(__blk_run_queue);
420 420
421 /** 421 /**
422 * blk_run_queue - run a single device queue 422 * blk_run_queue - run a single device queue
423 * @q: The queue to run 423 * @q: The queue to run
424 * 424 *
425 * Description: 425 * Description:
426 * Invoke request handling on this queue, if it has pending work to do. 426 * Invoke request handling on this queue, if it has pending work to do.
427 * May be used to restart queueing when a request has completed. 427 * May be used to restart queueing when a request has completed.
428 */ 428 */
429 void blk_run_queue(struct request_queue *q) 429 void blk_run_queue(struct request_queue *q)
430 { 430 {
431 unsigned long flags; 431 unsigned long flags;
432 432
433 spin_lock_irqsave(q->queue_lock, flags); 433 spin_lock_irqsave(q->queue_lock, flags);
434 __blk_run_queue(q); 434 __blk_run_queue(q);
435 spin_unlock_irqrestore(q->queue_lock, flags); 435 spin_unlock_irqrestore(q->queue_lock, flags);
436 } 436 }
437 EXPORT_SYMBOL(blk_run_queue); 437 EXPORT_SYMBOL(blk_run_queue);
438 438
439 void blk_put_queue(struct request_queue *q) 439 void blk_put_queue(struct request_queue *q)
440 { 440 {
441 kobject_put(&q->kobj); 441 kobject_put(&q->kobj);
442 } 442 }
443 443
444 void blk_cleanup_queue(struct request_queue *q) 444 void blk_cleanup_queue(struct request_queue *q)
445 { 445 {
446 /* 446 /*
447 * We know we have process context here, so we can be a little 447 * We know we have process context here, so we can be a little
448 * cautious and ensure that pending block actions on this device 448 * cautious and ensure that pending block actions on this device
449 * are done before moving on. Going into this function, we should 449 * are done before moving on. Going into this function, we should
450 * not have processes doing IO to this device. 450 * not have processes doing IO to this device.
451 */ 451 */
452 blk_sync_queue(q); 452 blk_sync_queue(q);
453 453
454 del_timer_sync(&q->backing_dev_info.laptop_mode_wb_timer); 454 del_timer_sync(&q->backing_dev_info.laptop_mode_wb_timer);
455 mutex_lock(&q->sysfs_lock); 455 mutex_lock(&q->sysfs_lock);
456 queue_flag_set_unlocked(QUEUE_FLAG_DEAD, q); 456 queue_flag_set_unlocked(QUEUE_FLAG_DEAD, q);
457 mutex_unlock(&q->sysfs_lock); 457 mutex_unlock(&q->sysfs_lock);
458 458
459 if (q->elevator) 459 if (q->elevator)
460 elevator_exit(q->elevator); 460 elevator_exit(q->elevator);
461 461
462 blk_put_queue(q); 462 blk_put_queue(q);
463 } 463 }
464 EXPORT_SYMBOL(blk_cleanup_queue); 464 EXPORT_SYMBOL(blk_cleanup_queue);
465 465
466 static int blk_init_free_list(struct request_queue *q) 466 static int blk_init_free_list(struct request_queue *q)
467 { 467 {
468 struct request_list *rl = &q->rq; 468 struct request_list *rl = &q->rq;
469 469
470 if (unlikely(rl->rq_pool)) 470 if (unlikely(rl->rq_pool))
471 return 0; 471 return 0;
472 472
473 rl->count[BLK_RW_SYNC] = rl->count[BLK_RW_ASYNC] = 0; 473 rl->count[BLK_RW_SYNC] = rl->count[BLK_RW_ASYNC] = 0;
474 rl->starved[BLK_RW_SYNC] = rl->starved[BLK_RW_ASYNC] = 0; 474 rl->starved[BLK_RW_SYNC] = rl->starved[BLK_RW_ASYNC] = 0;
475 rl->elvpriv = 0; 475 rl->elvpriv = 0;
476 init_waitqueue_head(&rl->wait[BLK_RW_SYNC]); 476 init_waitqueue_head(&rl->wait[BLK_RW_SYNC]);
477 init_waitqueue_head(&rl->wait[BLK_RW_ASYNC]); 477 init_waitqueue_head(&rl->wait[BLK_RW_ASYNC]);
478 478
479 rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab, 479 rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab,
480 mempool_free_slab, request_cachep, q->node); 480 mempool_free_slab, request_cachep, q->node);
481 481
482 if (!rl->rq_pool) 482 if (!rl->rq_pool)
483 return -ENOMEM; 483 return -ENOMEM;
484 484
485 return 0; 485 return 0;
486 } 486 }
487 487
488 struct request_queue *blk_alloc_queue(gfp_t gfp_mask) 488 struct request_queue *blk_alloc_queue(gfp_t gfp_mask)
489 { 489 {
490 return blk_alloc_queue_node(gfp_mask, -1); 490 return blk_alloc_queue_node(gfp_mask, -1);
491 } 491 }
492 EXPORT_SYMBOL(blk_alloc_queue); 492 EXPORT_SYMBOL(blk_alloc_queue);
493 493
494 struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) 494 struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
495 { 495 {
496 struct request_queue *q; 496 struct request_queue *q;
497 int err; 497 int err;
498 498
499 q = kmem_cache_alloc_node(blk_requestq_cachep, 499 q = kmem_cache_alloc_node(blk_requestq_cachep,
500 gfp_mask | __GFP_ZERO, node_id); 500 gfp_mask | __GFP_ZERO, node_id);
501 if (!q) 501 if (!q)
502 return NULL; 502 return NULL;
503 503
504 q->backing_dev_info.unplug_io_fn = blk_backing_dev_unplug; 504 q->backing_dev_info.unplug_io_fn = blk_backing_dev_unplug;
505 q->backing_dev_info.unplug_io_data = q; 505 q->backing_dev_info.unplug_io_data = q;
506 q->backing_dev_info.ra_pages = 506 q->backing_dev_info.ra_pages =
507 (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; 507 (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
508 q->backing_dev_info.state = 0; 508 q->backing_dev_info.state = 0;
509 q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY; 509 q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY;
510 q->backing_dev_info.name = "block"; 510 q->backing_dev_info.name = "block";
511 511
512 err = bdi_init(&q->backing_dev_info); 512 err = bdi_init(&q->backing_dev_info);
513 if (err) { 513 if (err) {
514 kmem_cache_free(blk_requestq_cachep, q); 514 kmem_cache_free(blk_requestq_cachep, q);
515 return NULL; 515 return NULL;
516 } 516 }
517 517
518 setup_timer(&q->backing_dev_info.laptop_mode_wb_timer, 518 setup_timer(&q->backing_dev_info.laptop_mode_wb_timer,
519 laptop_mode_timer_fn, (unsigned long) q); 519 laptop_mode_timer_fn, (unsigned long) q);
520 init_timer(&q->unplug_timer); 520 init_timer(&q->unplug_timer);
521 setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q); 521 setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q);
522 INIT_LIST_HEAD(&q->timeout_list); 522 INIT_LIST_HEAD(&q->timeout_list);
523 INIT_WORK(&q->unplug_work, blk_unplug_work); 523 INIT_WORK(&q->unplug_work, blk_unplug_work);
524 524
525 kobject_init(&q->kobj, &blk_queue_ktype); 525 kobject_init(&q->kobj, &blk_queue_ktype);
526 526
527 mutex_init(&q->sysfs_lock); 527 mutex_init(&q->sysfs_lock);
528 spin_lock_init(&q->__queue_lock); 528 spin_lock_init(&q->__queue_lock);
529 529
530 return q; 530 return q;
531 } 531 }
532 EXPORT_SYMBOL(blk_alloc_queue_node); 532 EXPORT_SYMBOL(blk_alloc_queue_node);
533 533
534 /** 534 /**
535 * blk_init_queue - prepare a request queue for use with a block device 535 * blk_init_queue - prepare a request queue for use with a block device
536 * @rfn: The function to be called to process requests that have been 536 * @rfn: The function to be called to process requests that have been
537 * placed on the queue. 537 * placed on the queue.
538 * @lock: Request queue spin lock 538 * @lock: Request queue spin lock
539 * 539 *
540 * Description: 540 * Description:
541 * If a block device wishes to use the standard request handling procedures, 541 * If a block device wishes to use the standard request handling procedures,
542 * which sorts requests and coalesces adjacent requests, then it must 542 * which sorts requests and coalesces adjacent requests, then it must
543 * call blk_init_queue(). The function @rfn will be called when there 543 * call blk_init_queue(). The function @rfn will be called when there
544 * are requests on the queue that need to be processed. If the device 544 * are requests on the queue that need to be processed. If the device
545 * supports plugging, then @rfn may not be called immediately when requests 545 * supports plugging, then @rfn may not be called immediately when requests
546 * are available on the queue, but may be called at some time later instead. 546 * are available on the queue, but may be called at some time later instead.
547 * Plugged queues are generally unplugged when a buffer belonging to one 547 * Plugged queues are generally unplugged when a buffer belonging to one
548 * of the requests on the queue is needed, or due to memory pressure. 548 * of the requests on the queue is needed, or due to memory pressure.
549 * 549 *
550 * @rfn is not required, or even expected, to remove all requests off the 550 * @rfn is not required, or even expected, to remove all requests off the
551 * queue, but only as many as it can handle at a time. If it does leave 551 * queue, but only as many as it can handle at a time. If it does leave
552 * requests on the queue, it is responsible for arranging that the requests 552 * requests on the queue, it is responsible for arranging that the requests
553 * get dealt with eventually. 553 * get dealt with eventually.
554 * 554 *
555 * The queue spin lock must be held while manipulating the requests on the 555 * The queue spin lock must be held while manipulating the requests on the
556 * request queue; this lock will be taken also from interrupt context, so irq 556 * request queue; this lock will be taken also from interrupt context, so irq
557 * disabling is needed for it. 557 * disabling is needed for it.
558 * 558 *
559 * Function returns a pointer to the initialized request queue, or %NULL if 559 * Function returns a pointer to the initialized request queue, or %NULL if
560 * it didn't succeed. 560 * it didn't succeed.
561 * 561 *
562 * Note: 562 * Note:
563 * blk_init_queue() must be paired with a blk_cleanup_queue() call 563 * blk_init_queue() must be paired with a blk_cleanup_queue() call
564 * when the block device is deactivated (such as at module unload). 564 * when the block device is deactivated (such as at module unload).
565 **/ 565 **/
566 566
567 struct request_queue *blk_init_queue(request_fn_proc *rfn, spinlock_t *lock) 567 struct request_queue *blk_init_queue(request_fn_proc *rfn, spinlock_t *lock)
568 { 568 {
569 return blk_init_queue_node(rfn, lock, -1); 569 return blk_init_queue_node(rfn, lock, -1);
570 } 570 }
571 EXPORT_SYMBOL(blk_init_queue); 571 EXPORT_SYMBOL(blk_init_queue);
572 572
573 struct request_queue * 573 struct request_queue *
574 blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id) 574 blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)
575 { 575 {
576 struct request_queue *uninit_q, *q; 576 struct request_queue *uninit_q, *q;
577 577
578 uninit_q = blk_alloc_queue_node(GFP_KERNEL, node_id); 578 uninit_q = blk_alloc_queue_node(GFP_KERNEL, node_id);
579 if (!uninit_q) 579 if (!uninit_q)
580 return NULL; 580 return NULL;
581 581
582 q = blk_init_allocated_queue_node(uninit_q, rfn, lock, node_id); 582 q = blk_init_allocated_queue_node(uninit_q, rfn, lock, node_id);
583 if (!q) 583 if (!q)
584 blk_cleanup_queue(uninit_q); 584 blk_cleanup_queue(uninit_q);
585 585
586 return q; 586 return q;
587 } 587 }
588 EXPORT_SYMBOL(blk_init_queue_node); 588 EXPORT_SYMBOL(blk_init_queue_node);
589 589
590 struct request_queue * 590 struct request_queue *
591 blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn, 591 blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn,
592 spinlock_t *lock) 592 spinlock_t *lock)
593 { 593 {
594 return blk_init_allocated_queue_node(q, rfn, lock, -1); 594 return blk_init_allocated_queue_node(q, rfn, lock, -1);
595 } 595 }
596 EXPORT_SYMBOL(blk_init_allocated_queue); 596 EXPORT_SYMBOL(blk_init_allocated_queue);
597 597
598 struct request_queue * 598 struct request_queue *
599 blk_init_allocated_queue_node(struct request_queue *q, request_fn_proc *rfn, 599 blk_init_allocated_queue_node(struct request_queue *q, request_fn_proc *rfn,
600 spinlock_t *lock, int node_id) 600 spinlock_t *lock, int node_id)
601 { 601 {
602 if (!q) 602 if (!q)
603 return NULL; 603 return NULL;
604 604
605 q->node = node_id; 605 q->node = node_id;
606 if (blk_init_free_list(q)) 606 if (blk_init_free_list(q))
607 return NULL; 607 return NULL;
608 608
609 q->request_fn = rfn; 609 q->request_fn = rfn;
610 q->prep_rq_fn = NULL; 610 q->prep_rq_fn = NULL;
611 q->unplug_fn = generic_unplug_device; 611 q->unplug_fn = generic_unplug_device;
612 q->queue_flags = QUEUE_FLAG_DEFAULT; 612 q->queue_flags = QUEUE_FLAG_DEFAULT;
613 q->queue_lock = lock; 613 q->queue_lock = lock;
614 614
615 /* 615 /*
616 * This also sets hw/phys segments, boundary and size 616 * This also sets hw/phys segments, boundary and size
617 */ 617 */
618 blk_queue_make_request(q, __make_request); 618 blk_queue_make_request(q, __make_request);
619 619
620 q->sg_reserved_size = INT_MAX; 620 q->sg_reserved_size = INT_MAX;
621 621
622 /* 622 /*
623 * all done 623 * all done
624 */ 624 */
625 if (!elevator_init(q, NULL)) { 625 if (!elevator_init(q, NULL)) {
626 blk_queue_congestion_threshold(q); 626 blk_queue_congestion_threshold(q);
627 return q; 627 return q;
628 } 628 }
629 629
630 return NULL; 630 return NULL;
631 } 631 }
632 EXPORT_SYMBOL(blk_init_allocated_queue_node); 632 EXPORT_SYMBOL(blk_init_allocated_queue_node);
633 633
634 int blk_get_queue(struct request_queue *q) 634 int blk_get_queue(struct request_queue *q)
635 { 635 {
636 if (likely(!test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) { 636 if (likely(!test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) {
637 kobject_get(&q->kobj); 637 kobject_get(&q->kobj);
638 return 0; 638 return 0;
639 } 639 }
640 640
641 return 1; 641 return 1;
642 } 642 }
643 643
644 static inline void blk_free_request(struct request_queue *q, struct request *rq) 644 static inline void blk_free_request(struct request_queue *q, struct request *rq)
645 { 645 {
646 if (rq->cmd_flags & REQ_ELVPRIV) 646 if (rq->cmd_flags & REQ_ELVPRIV)
647 elv_put_request(q, rq); 647 elv_put_request(q, rq);
648 mempool_free(rq, q->rq.rq_pool); 648 mempool_free(rq, q->rq.rq_pool);
649 } 649 }
650 650
651 static struct request * 651 static struct request *
652 blk_alloc_request(struct request_queue *q, int flags, int priv, gfp_t gfp_mask) 652 blk_alloc_request(struct request_queue *q, int flags, int priv, gfp_t gfp_mask)
653 { 653 {
654 struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask); 654 struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask);
655 655
656 if (!rq) 656 if (!rq)
657 return NULL; 657 return NULL;
658 658
659 blk_rq_init(q, rq); 659 blk_rq_init(q, rq);
660 660
661 rq->cmd_flags = flags | REQ_ALLOCED; 661 rq->cmd_flags = flags | REQ_ALLOCED;
662 662
663 if (priv) { 663 if (priv) {
664 if (unlikely(elv_set_request(q, rq, gfp_mask))) { 664 if (unlikely(elv_set_request(q, rq, gfp_mask))) {
665 mempool_free(rq, q->rq.rq_pool); 665 mempool_free(rq, q->rq.rq_pool);
666 return NULL; 666 return NULL;
667 } 667 }
668 rq->cmd_flags |= REQ_ELVPRIV; 668 rq->cmd_flags |= REQ_ELVPRIV;
669 } 669 }
670 670
671 return rq; 671 return rq;
672 } 672 }
673 673
674 /* 674 /*
675 * ioc_batching returns true if the ioc is a valid batching request and 675 * ioc_batching returns true if the ioc is a valid batching request and
676 * should be given priority access to a request. 676 * should be given priority access to a request.
677 */ 677 */
678 static inline int ioc_batching(struct request_queue *q, struct io_context *ioc) 678 static inline int ioc_batching(struct request_queue *q, struct io_context *ioc)
679 { 679 {
680 if (!ioc) 680 if (!ioc)
681 return 0; 681 return 0;
682 682
683 /* 683 /*
684 * Make sure the process is able to allocate at least 1 request 684 * Make sure the process is able to allocate at least 1 request
685 * even if the batch times out, otherwise we could theoretically 685 * even if the batch times out, otherwise we could theoretically
686 * lose wakeups. 686 * lose wakeups.
687 */ 687 */
688 return ioc->nr_batch_requests == q->nr_batching || 688 return ioc->nr_batch_requests == q->nr_batching ||
689 (ioc->nr_batch_requests > 0 689 (ioc->nr_batch_requests > 0
690 && time_before(jiffies, ioc->last_waited + BLK_BATCH_TIME)); 690 && time_before(jiffies, ioc->last_waited + BLK_BATCH_TIME));
691 } 691 }
692 692
693 /* 693 /*
694 * ioc_set_batching sets ioc to be a new "batcher" if it is not one. This 694 * ioc_set_batching sets ioc to be a new "batcher" if it is not one. This
695 * will cause the process to be a "batcher" on all queues in the system. This 695 * will cause the process to be a "batcher" on all queues in the system. This
696 * is the behaviour we want though - once it gets a wakeup it should be given 696 * is the behaviour we want though - once it gets a wakeup it should be given
697 * a nice run. 697 * a nice run.
698 */ 698 */
699 static void ioc_set_batching(struct request_queue *q, struct io_context *ioc) 699 static void ioc_set_batching(struct request_queue *q, struct io_context *ioc)
700 { 700 {
701 if (!ioc || ioc_batching(q, ioc)) 701 if (!ioc || ioc_batching(q, ioc))
702 return; 702 return;
703 703
704 ioc->nr_batch_requests = q->nr_batching; 704 ioc->nr_batch_requests = q->nr_batching;
705 ioc->last_waited = jiffies; 705 ioc->last_waited = jiffies;
706 } 706 }
707 707
708 static void __freed_request(struct request_queue *q, int sync) 708 static void __freed_request(struct request_queue *q, int sync)
709 { 709 {
710 struct request_list *rl = &q->rq; 710 struct request_list *rl = &q->rq;
711 711
712 if (rl->count[sync] < queue_congestion_off_threshold(q)) 712 if (rl->count[sync] < queue_congestion_off_threshold(q))
713 blk_clear_queue_congested(q, sync); 713 blk_clear_queue_congested(q, sync);
714 714
715 if (rl->count[sync] + 1 <= q->nr_requests) { 715 if (rl->count[sync] + 1 <= q->nr_requests) {
716 if (waitqueue_active(&rl->wait[sync])) 716 if (waitqueue_active(&rl->wait[sync]))
717 wake_up(&rl->wait[sync]); 717 wake_up(&rl->wait[sync]);
718 718
719 blk_clear_queue_full(q, sync); 719 blk_clear_queue_full(q, sync);
720 } 720 }
721 } 721 }
722 722
723 /* 723 /*
724 * A request has just been released. Account for it, update the full and 724 * A request has just been released. Account for it, update the full and
725 * congestion status, wake up any waiters. Called under q->queue_lock. 725 * congestion status, wake up any waiters. Called under q->queue_lock.
726 */ 726 */
727 static void freed_request(struct request_queue *q, int sync, int priv) 727 static void freed_request(struct request_queue *q, int sync, int priv)
728 { 728 {
729 struct request_list *rl = &q->rq; 729 struct request_list *rl = &q->rq;
730 730
731 rl->count[sync]--; 731 rl->count[sync]--;
732 if (priv) 732 if (priv)
733 rl->elvpriv--; 733 rl->elvpriv--;
734 734
735 __freed_request(q, sync); 735 __freed_request(q, sync);
736 736
737 if (unlikely(rl->starved[sync ^ 1])) 737 if (unlikely(rl->starved[sync ^ 1]))
738 __freed_request(q, sync ^ 1); 738 __freed_request(q, sync ^ 1);
739 } 739 }
740 740
741 /* 741 /*
742 * Get a free request, queue_lock must be held. 742 * Get a free request, queue_lock must be held.
743 * Returns NULL on failure, with queue_lock held. 743 * Returns NULL on failure, with queue_lock held.
744 * Returns !NULL on success, with queue_lock *not held*. 744 * Returns !NULL on success, with queue_lock *not held*.
745 */ 745 */
746 static struct request *get_request(struct request_queue *q, int rw_flags, 746 static struct request *get_request(struct request_queue *q, int rw_flags,
747 struct bio *bio, gfp_t gfp_mask) 747 struct bio *bio, gfp_t gfp_mask)
748 { 748 {
749 struct request *rq = NULL; 749 struct request *rq = NULL;
750 struct request_list *rl = &q->rq; 750 struct request_list *rl = &q->rq;
751 struct io_context *ioc = NULL; 751 struct io_context *ioc = NULL;
752 const bool is_sync = rw_is_sync(rw_flags) != 0; 752 const bool is_sync = rw_is_sync(rw_flags) != 0;
753 int may_queue, priv; 753 int may_queue, priv;
754 754
755 may_queue = elv_may_queue(q, rw_flags); 755 may_queue = elv_may_queue(q, rw_flags);
756 if (may_queue == ELV_MQUEUE_NO) 756 if (may_queue == ELV_MQUEUE_NO)
757 goto rq_starved; 757 goto rq_starved;
758 758
759 if (rl->count[is_sync]+1 >= queue_congestion_on_threshold(q)) { 759 if (rl->count[is_sync]+1 >= queue_congestion_on_threshold(q)) {
760 if (rl->count[is_sync]+1 >= q->nr_requests) { 760 if (rl->count[is_sync]+1 >= q->nr_requests) {
761 ioc = current_io_context(GFP_ATOMIC, q->node); 761 ioc = current_io_context(GFP_ATOMIC, q->node);
762 /* 762 /*
763 * The queue will fill after this allocation, so set 763 * The queue will fill after this allocation, so set
764 * it as full, and mark this process as "batching". 764 * it as full, and mark this process as "batching".
765 * This process will be allowed to complete a batch of 765 * This process will be allowed to complete a batch of
766 * requests, others will be blocked. 766 * requests, others will be blocked.
767 */ 767 */
768 if (!blk_queue_full(q, is_sync)) { 768 if (!blk_queue_full(q, is_sync)) {
769 ioc_set_batching(q, ioc); 769 ioc_set_batching(q, ioc);
770 blk_set_queue_full(q, is_sync); 770 blk_set_queue_full(q, is_sync);
771 } else { 771 } else {
772 if (may_queue != ELV_MQUEUE_MUST 772 if (may_queue != ELV_MQUEUE_MUST
773 && !ioc_batching(q, ioc)) { 773 && !ioc_batching(q, ioc)) {
774 /* 774 /*
775 * The queue is full and the allocating 775 * The queue is full and the allocating
776 * process is not a "batcher", and not 776 * process is not a "batcher", and not
777 * exempted by the IO scheduler 777 * exempted by the IO scheduler
778 */ 778 */
779 goto out; 779 goto out;
780 } 780 }
781 } 781 }
782 } 782 }
783 blk_set_queue_congested(q, is_sync); 783 blk_set_queue_congested(q, is_sync);
784 } 784 }
785 785
786 /* 786 /*
787 * Only allow batching queuers to allocate up to 50% over the defined 787 * Only allow batching queuers to allocate up to 50% over the defined
788 * limit of requests, otherwise we could have thousands of requests 788 * limit of requests, otherwise we could have thousands of requests
789 * allocated with any setting of ->nr_requests 789 * allocated with any setting of ->nr_requests
790 */ 790 */
791 if (rl->count[is_sync] >= (3 * q->nr_requests / 2)) 791 if (rl->count[is_sync] >= (3 * q->nr_requests / 2))
792 goto out; 792 goto out;
793 793
794 rl->count[is_sync]++; 794 rl->count[is_sync]++;
795 rl->starved[is_sync] = 0; 795 rl->starved[is_sync] = 0;
796 796
797 priv = !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); 797 priv = !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
798 if (priv) 798 if (priv)
799 rl->elvpriv++; 799 rl->elvpriv++;
800 800
801 if (blk_queue_io_stat(q)) 801 if (blk_queue_io_stat(q))
802 rw_flags |= REQ_IO_STAT; 802 rw_flags |= REQ_IO_STAT;
803 spin_unlock_irq(q->queue_lock); 803 spin_unlock_irq(q->queue_lock);
804 804
805 rq = blk_alloc_request(q, rw_flags, priv, gfp_mask); 805 rq = blk_alloc_request(q, rw_flags, priv, gfp_mask);
806 if (unlikely(!rq)) { 806 if (unlikely(!rq)) {
807 /* 807 /*
808 * Allocation failed presumably due to memory. Undo anything 808 * Allocation failed presumably due to memory. Undo anything
809 * we might have messed up. 809 * we might have messed up.
810 * 810 *
811 * Allocating task should really be put onto the front of the 811 * Allocating task should really be put onto the front of the
812 * wait queue, but this is pretty rare. 812 * wait queue, but this is pretty rare.
813 */ 813 */
814 spin_lock_irq(q->queue_lock); 814 spin_lock_irq(q->queue_lock);
815 freed_request(q, is_sync, priv); 815 freed_request(q, is_sync, priv);
816 816
817 /* 817 /*
818 * in the very unlikely event that allocation failed and no 818 * in the very unlikely event that allocation failed and no
819 * requests for this direction was pending, mark us starved 819 * requests for this direction was pending, mark us starved
820 * so that freeing of a request in the other direction will 820 * so that freeing of a request in the other direction will
821 * notice us. another possible fix would be to split the 821 * notice us. another possible fix would be to split the
822 * rq mempool into READ and WRITE 822 * rq mempool into READ and WRITE
823 */ 823 */
824 rq_starved: 824 rq_starved:
825 if (unlikely(rl->count[is_sync] == 0)) 825 if (unlikely(rl->count[is_sync] == 0))
826 rl->starved[is_sync] = 1; 826 rl->starved[is_sync] = 1;
827 827
828 goto out; 828 goto out;
829 } 829 }
830 830
831 /* 831 /*
832 * ioc may be NULL here, and ioc_batching will be false. That's 832 * ioc may be NULL here, and ioc_batching will be false. That's
833 * OK, if the queue is under the request limit then requests need 833 * OK, if the queue is under the request limit then requests need
834 * not count toward the nr_batch_requests limit. There will always 834 * not count toward the nr_batch_requests limit. There will always
835 * be some limit enforced by BLK_BATCH_TIME. 835 * be some limit enforced by BLK_BATCH_TIME.
836 */ 836 */
837 if (ioc_batching(q, ioc)) 837 if (ioc_batching(q, ioc))
838 ioc->nr_batch_requests--; 838 ioc->nr_batch_requests--;
839 839
840 trace_block_getrq(q, bio, rw_flags & 1); 840 trace_block_getrq(q, bio, rw_flags & 1);
841 out: 841 out:
842 return rq; 842 return rq;
843 } 843 }
844 844
845 /* 845 /*
846 * No available requests for this queue, unplug the device and wait for some 846 * No available requests for this queue, unplug the device and wait for some
847 * requests to become available. 847 * requests to become available.
848 * 848 *
849 * Called with q->queue_lock held, and returns with it unlocked. 849 * Called with q->queue_lock held, and returns with it unlocked.
850 */ 850 */
851 static struct request *get_request_wait(struct request_queue *q, int rw_flags, 851 static struct request *get_request_wait(struct request_queue *q, int rw_flags,
852 struct bio *bio) 852 struct bio *bio)
853 { 853 {
854 const bool is_sync = rw_is_sync(rw_flags) != 0; 854 const bool is_sync = rw_is_sync(rw_flags) != 0;
855 struct request *rq; 855 struct request *rq;
856 856
857 rq = get_request(q, rw_flags, bio, GFP_NOIO); 857 rq = get_request(q, rw_flags, bio, GFP_NOIO);
858 while (!rq) { 858 while (!rq) {
859 DEFINE_WAIT(wait); 859 DEFINE_WAIT(wait);
860 struct io_context *ioc; 860 struct io_context *ioc;
861 struct request_list *rl = &q->rq; 861 struct request_list *rl = &q->rq;
862 862
863 prepare_to_wait_exclusive(&rl->wait[is_sync], &wait, 863 prepare_to_wait_exclusive(&rl->wait[is_sync], &wait,
864 TASK_UNINTERRUPTIBLE); 864 TASK_UNINTERRUPTIBLE);
865 865
866 trace_block_sleeprq(q, bio, rw_flags & 1); 866 trace_block_sleeprq(q, bio, rw_flags & 1);
867 867
868 __generic_unplug_device(q); 868 __generic_unplug_device(q);
869 spin_unlock_irq(q->queue_lock); 869 spin_unlock_irq(q->queue_lock);
870 io_schedule(); 870 io_schedule();
871 871
872 /* 872 /*
873 * After sleeping, we become a "batching" process and 873 * After sleeping, we become a "batching" process and
874 * will be able to allocate at least one request, and 874 * will be able to allocate at least one request, and
875 * up to a big batch of them for a small period time. 875 * up to a big batch of them for a small period time.
876 * See ioc_batching, ioc_set_batching 876 * See ioc_batching, ioc_set_batching
877 */ 877 */
878 ioc = current_io_context(GFP_NOIO, q->node); 878 ioc = current_io_context(GFP_NOIO, q->node);
879 ioc_set_batching(q, ioc); 879 ioc_set_batching(q, ioc);
880 880
881 spin_lock_irq(q->queue_lock); 881 spin_lock_irq(q->queue_lock);
882 finish_wait(&rl->wait[is_sync], &wait); 882 finish_wait(&rl->wait[is_sync], &wait);
883 883
884 rq = get_request(q, rw_flags, bio, GFP_NOIO); 884 rq = get_request(q, rw_flags, bio, GFP_NOIO);
885 }; 885 };
886 886
887 return rq; 887 return rq;
888 } 888 }
889 889
890 struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask) 890 struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask)
891 { 891 {
892 struct request *rq; 892 struct request *rq;
893 893
894 BUG_ON(rw != READ && rw != WRITE); 894 BUG_ON(rw != READ && rw != WRITE);
895 895
896 spin_lock_irq(q->queue_lock); 896 spin_lock_irq(q->queue_lock);
897 if (gfp_mask & __GFP_WAIT) { 897 if (gfp_mask & __GFP_WAIT) {
898 rq = get_request_wait(q, rw, NULL); 898 rq = get_request_wait(q, rw, NULL);
899 } else { 899 } else {
900 rq = get_request(q, rw, NULL, gfp_mask); 900 rq = get_request(q, rw, NULL, gfp_mask);
901 if (!rq) 901 if (!rq)
902 spin_unlock_irq(q->queue_lock); 902 spin_unlock_irq(q->queue_lock);
903 } 903 }
904 /* q->queue_lock is unlocked at this point */ 904 /* q->queue_lock is unlocked at this point */
905 905
906 return rq; 906 return rq;
907 } 907 }
908 EXPORT_SYMBOL(blk_get_request); 908 EXPORT_SYMBOL(blk_get_request);
909 909
910 /** 910 /**
911 * blk_make_request - given a bio, allocate a corresponding struct request. 911 * blk_make_request - given a bio, allocate a corresponding struct request.
912 * @q: target request queue 912 * @q: target request queue
913 * @bio: The bio describing the memory mappings that will be submitted for IO. 913 * @bio: The bio describing the memory mappings that will be submitted for IO.
914 * It may be a chained-bio properly constructed by block/bio layer. 914 * It may be a chained-bio properly constructed by block/bio layer.
915 * @gfp_mask: gfp flags to be used for memory allocation 915 * @gfp_mask: gfp flags to be used for memory allocation
916 * 916 *
917 * blk_make_request is the parallel of generic_make_request for BLOCK_PC 917 * blk_make_request is the parallel of generic_make_request for BLOCK_PC
918 * type commands. Where the struct request needs to be farther initialized by 918 * type commands. Where the struct request needs to be farther initialized by
919 * the caller. It is passed a &struct bio, which describes the memory info of 919 * the caller. It is passed a &struct bio, which describes the memory info of
920 * the I/O transfer. 920 * the I/O transfer.
921 * 921 *
922 * The caller of blk_make_request must make sure that bi_io_vec 922 * The caller of blk_make_request must make sure that bi_io_vec
923 * are set to describe the memory buffers. That bio_data_dir() will return 923 * are set to describe the memory buffers. That bio_data_dir() will return
924 * the needed direction of the request. (And all bio's in the passed bio-chain 924 * the needed direction of the request. (And all bio's in the passed bio-chain
925 * are properly set accordingly) 925 * are properly set accordingly)
926 * 926 *
927 * If called under none-sleepable conditions, mapped bio buffers must not 927 * If called under none-sleepable conditions, mapped bio buffers must not
928 * need bouncing, by calling the appropriate masked or flagged allocator, 928 * need bouncing, by calling the appropriate masked or flagged allocator,
929 * suitable for the target device. Otherwise the call to blk_queue_bounce will 929 * suitable for the target device. Otherwise the call to blk_queue_bounce will
930 * BUG. 930 * BUG.
931 * 931 *
932 * WARNING: When allocating/cloning a bio-chain, careful consideration should be 932 * WARNING: When allocating/cloning a bio-chain, careful consideration should be
933 * given to how you allocate bios. In particular, you cannot use __GFP_WAIT for 933 * given to how you allocate bios. In particular, you cannot use __GFP_WAIT for
934 * anything but the first bio in the chain. Otherwise you risk waiting for IO 934 * anything but the first bio in the chain. Otherwise you risk waiting for IO
935 * completion of a bio that hasn't been submitted yet, thus resulting in a 935 * completion of a bio that hasn't been submitted yet, thus resulting in a
936 * deadlock. Alternatively bios should be allocated using bio_kmalloc() instead 936 * deadlock. Alternatively bios should be allocated using bio_kmalloc() instead
937 * of bio_alloc(), as that avoids the mempool deadlock. 937 * of bio_alloc(), as that avoids the mempool deadlock.
938 * If possible a big IO should be split into smaller parts when allocation 938 * If possible a big IO should be split into smaller parts when allocation
939 * fails. Partial allocation should not be an error, or you risk a live-lock. 939 * fails. Partial allocation should not be an error, or you risk a live-lock.
940 */ 940 */
941 struct request *blk_make_request(struct request_queue *q, struct bio *bio, 941 struct request *blk_make_request(struct request_queue *q, struct bio *bio,
942 gfp_t gfp_mask) 942 gfp_t gfp_mask)
943 { 943 {
944 struct request *rq = blk_get_request(q, bio_data_dir(bio), gfp_mask); 944 struct request *rq = blk_get_request(q, bio_data_dir(bio), gfp_mask);
945 945
946 if (unlikely(!rq)) 946 if (unlikely(!rq))
947 return ERR_PTR(-ENOMEM); 947 return ERR_PTR(-ENOMEM);
948 948
949 for_each_bio(bio) { 949 for_each_bio(bio) {
950 struct bio *bounce_bio = bio; 950 struct bio *bounce_bio = bio;
951 int ret; 951 int ret;
952 952
953 blk_queue_bounce(q, &bounce_bio); 953 blk_queue_bounce(q, &bounce_bio);
954 ret = blk_rq_append_bio(q, rq, bounce_bio); 954 ret = blk_rq_append_bio(q, rq, bounce_bio);
955 if (unlikely(ret)) { 955 if (unlikely(ret)) {
956 blk_put_request(rq); 956 blk_put_request(rq);
957 return ERR_PTR(ret); 957 return ERR_PTR(ret);
958 } 958 }
959 } 959 }
960 960
961 return rq; 961 return rq;
962 } 962 }
963 EXPORT_SYMBOL(blk_make_request); 963 EXPORT_SYMBOL(blk_make_request);
964 964
965 /** 965 /**
966 * blk_requeue_request - put a request back on queue 966 * blk_requeue_request - put a request back on queue
967 * @q: request queue where request should be inserted 967 * @q: request queue where request should be inserted
968 * @rq: request to be inserted 968 * @rq: request to be inserted
969 * 969 *
970 * Description: 970 * Description:
971 * Drivers often keep queueing requests until the hardware cannot accept 971 * Drivers often keep queueing requests until the hardware cannot accept
972 * more, when that condition happens we need to put the request back 972 * more, when that condition happens we need to put the request back
973 * on the queue. Must be called with queue lock held. 973 * on the queue. Must be called with queue lock held.
974 */ 974 */
975 void blk_requeue_request(struct request_queue *q, struct request *rq) 975 void blk_requeue_request(struct request_queue *q, struct request *rq)
976 { 976 {
977 blk_delete_timer(rq); 977 blk_delete_timer(rq);
978 blk_clear_rq_complete(rq); 978 blk_clear_rq_complete(rq);
979 trace_block_rq_requeue(q, rq); 979 trace_block_rq_requeue(q, rq);
980 980
981 if (blk_rq_tagged(rq)) 981 if (blk_rq_tagged(rq))
982 blk_queue_end_tag(q, rq); 982 blk_queue_end_tag(q, rq);
983 983
984 BUG_ON(blk_queued_rq(rq)); 984 BUG_ON(blk_queued_rq(rq));
985 985
986 elv_requeue_request(q, rq); 986 elv_requeue_request(q, rq);
987 } 987 }
988 EXPORT_SYMBOL(blk_requeue_request); 988 EXPORT_SYMBOL(blk_requeue_request);
989 989
990 /** 990 /**
991 * blk_insert_request - insert a special request into a request queue 991 * blk_insert_request - insert a special request into a request queue
992 * @q: request queue where request should be inserted 992 * @q: request queue where request should be inserted
993 * @rq: request to be inserted 993 * @rq: request to be inserted
994 * @at_head: insert request at head or tail of queue 994 * @at_head: insert request at head or tail of queue
995 * @data: private data 995 * @data: private data
996 * 996 *
997 * Description: 997 * Description:
998 * Many block devices need to execute commands asynchronously, so they don't 998 * Many block devices need to execute commands asynchronously, so they don't
999 * block the whole kernel from preemption during request execution. This is 999 * block the whole kernel from preemption during request execution. This is
1000 * accomplished normally by inserting aritficial requests tagged as 1000 * accomplished normally by inserting aritficial requests tagged as
1001 * REQ_TYPE_SPECIAL in to the corresponding request queue, and letting them 1001 * REQ_TYPE_SPECIAL in to the corresponding request queue, and letting them
1002 * be scheduled for actual execution by the request queue. 1002 * be scheduled for actual execution by the request queue.
1003 * 1003 *
1004 * We have the option of inserting the head or the tail of the queue. 1004 * We have the option of inserting the head or the tail of the queue.
1005 * Typically we use the tail for new ioctls and so forth. We use the head 1005 * Typically we use the tail for new ioctls and so forth. We use the head
1006 * of the queue for things like a QUEUE_FULL message from a device, or a 1006 * of the queue for things like a QUEUE_FULL message from a device, or a
1007 * host that is unable to accept a particular command. 1007 * host that is unable to accept a particular command.
1008 */ 1008 */
1009 void blk_insert_request(struct request_queue *q, struct request *rq, 1009 void blk_insert_request(struct request_queue *q, struct request *rq,
1010 int at_head, void *data) 1010 int at_head, void *data)
1011 { 1011 {
1012 int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK; 1012 int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK;
1013 unsigned long flags; 1013 unsigned long flags;
1014 1014
1015 /* 1015 /*
1016 * tell I/O scheduler that this isn't a regular read/write (ie it 1016 * tell I/O scheduler that this isn't a regular read/write (ie it
1017 * must not attempt merges on this) and that it acts as a soft 1017 * must not attempt merges on this) and that it acts as a soft
1018 * barrier 1018 * barrier
1019 */ 1019 */
1020 rq->cmd_type = REQ_TYPE_SPECIAL; 1020 rq->cmd_type = REQ_TYPE_SPECIAL;
1021 1021
1022 rq->special = data; 1022 rq->special = data;
1023 1023
1024 spin_lock_irqsave(q->queue_lock, flags); 1024 spin_lock_irqsave(q->queue_lock, flags);
1025 1025
1026 /* 1026 /*
1027 * If command is tagged, release the tag 1027 * If command is tagged, release the tag
1028 */ 1028 */
1029 if (blk_rq_tagged(rq)) 1029 if (blk_rq_tagged(rq))
1030 blk_queue_end_tag(q, rq); 1030 blk_queue_end_tag(q, rq);
1031 1031
1032 drive_stat_acct(rq, 1); 1032 drive_stat_acct(rq, 1);
1033 __elv_add_request(q, rq, where, 0); 1033 __elv_add_request(q, rq, where, 0);
1034 __blk_run_queue(q); 1034 __blk_run_queue(q);
1035 spin_unlock_irqrestore(q->queue_lock, flags); 1035 spin_unlock_irqrestore(q->queue_lock, flags);
1036 } 1036 }
1037 EXPORT_SYMBOL(blk_insert_request); 1037 EXPORT_SYMBOL(blk_insert_request);
1038 1038
1039 /* 1039 /*
1040 * add-request adds a request to the linked list. 1040 * add-request adds a request to the linked list.
1041 * queue lock is held and interrupts disabled, as we muck with the 1041 * queue lock is held and interrupts disabled, as we muck with the
1042 * request queue list. 1042 * request queue list.
1043 */ 1043 */
1044 static inline void add_request(struct request_queue *q, struct request *req) 1044 static inline void add_request(struct request_queue *q, struct request *req)
1045 { 1045 {
1046 drive_stat_acct(req, 1); 1046 drive_stat_acct(req, 1);
1047 1047
1048 /* 1048 /*
1049 * elevator indicated where it wants this request to be 1049 * elevator indicated where it wants this request to be
1050 * inserted at elevator_merge time 1050 * inserted at elevator_merge time
1051 */ 1051 */
1052 __elv_add_request(q, req, ELEVATOR_INSERT_SORT, 0); 1052 __elv_add_request(q, req, ELEVATOR_INSERT_SORT, 0);
1053 } 1053 }
1054 1054
1055 static void part_round_stats_single(int cpu, struct hd_struct *part, 1055 static void part_round_stats_single(int cpu, struct hd_struct *part,
1056 unsigned long now) 1056 unsigned long now)
1057 { 1057 {
1058 if (now == part->stamp) 1058 if (now == part->stamp)
1059 return; 1059 return;
1060 1060
1061 if (part_in_flight(part)) { 1061 if (part_in_flight(part)) {
1062 __part_stat_add(cpu, part, time_in_queue, 1062 __part_stat_add(cpu, part, time_in_queue,
1063 part_in_flight(part) * (now - part->stamp)); 1063 part_in_flight(part) * (now - part->stamp));
1064 __part_stat_add(cpu, part, io_ticks, (now - part->stamp)); 1064 __part_stat_add(cpu, part, io_ticks, (now - part->stamp));
1065 } 1065 }
1066 part->stamp = now; 1066 part->stamp = now;
1067 } 1067 }
1068 1068
1069 /** 1069 /**
1070 * part_round_stats() - Round off the performance stats on a struct disk_stats. 1070 * part_round_stats() - Round off the performance stats on a struct disk_stats.
1071 * @cpu: cpu number for stats access 1071 * @cpu: cpu number for stats access
1072 * @part: target partition 1072 * @part: target partition
1073 * 1073 *
1074 * The average IO queue length and utilisation statistics are maintained 1074 * The average IO queue length and utilisation statistics are maintained
1075 * by observing the current state of the queue length and the amount of 1075 * by observing the current state of the queue length and the amount of
1076 * time it has been in this state for. 1076 * time it has been in this state for.
1077 * 1077 *
1078 * Normally, that accounting is done on IO completion, but that can result 1078 * Normally, that accounting is done on IO completion, but that can result
1079 * in more than a second's worth of IO being accounted for within any one 1079 * in more than a second's worth of IO being accounted for within any one
1080 * second, leading to >100% utilisation. To deal with that, we call this 1080 * second, leading to >100% utilisation. To deal with that, we call this
1081 * function to do a round-off before returning the results when reading 1081 * function to do a round-off before returning the results when reading
1082 * /proc/diskstats. This accounts immediately for all queue usage up to 1082 * /proc/diskstats. This accounts immediately for all queue usage up to
1083 * the current jiffies and restarts the counters again. 1083 * the current jiffies and restarts the counters again.
1084 */ 1084 */
1085 void part_round_stats(int cpu, struct hd_struct *part) 1085 void part_round_stats(int cpu, struct hd_struct *part)
1086 { 1086 {
1087 unsigned long now = jiffies; 1087 unsigned long now = jiffies;
1088 1088
1089 if (part->partno) 1089 if (part->partno)
1090 part_round_stats_single(cpu, &part_to_disk(part)->part0, now); 1090 part_round_stats_single(cpu, &part_to_disk(part)->part0, now);
1091 part_round_stats_single(cpu, part, now); 1091 part_round_stats_single(cpu, part, now);
1092 } 1092 }
1093 EXPORT_SYMBOL_GPL(part_round_stats); 1093 EXPORT_SYMBOL_GPL(part_round_stats);
1094 1094
1095 /* 1095 /*
1096 * queue lock must be held 1096 * queue lock must be held
1097 */ 1097 */
1098 void __blk_put_request(struct request_queue *q, struct request *req) 1098 void __blk_put_request(struct request_queue *q, struct request *req)
1099 { 1099 {
1100 if (unlikely(!q)) 1100 if (unlikely(!q))
1101 return; 1101 return;
1102 if (unlikely(--req->ref_count)) 1102 if (unlikely(--req->ref_count))
1103 return; 1103 return;
1104 1104
1105 elv_completed_request(q, req); 1105 elv_completed_request(q, req);
1106 1106
1107 /* this is a bio leak */ 1107 /* this is a bio leak */
1108 WARN_ON(req->bio != NULL); 1108 WARN_ON(req->bio != NULL);
1109 1109
1110 /* 1110 /*
1111 * Request may not have originated from ll_rw_blk. if not, 1111 * Request may not have originated from ll_rw_blk. if not,
1112 * it didn't come out of our reserved rq pools 1112 * it didn't come out of our reserved rq pools
1113 */ 1113 */
1114 if (req->cmd_flags & REQ_ALLOCED) { 1114 if (req->cmd_flags & REQ_ALLOCED) {
1115 int is_sync = rq_is_sync(req) != 0; 1115 int is_sync = rq_is_sync(req) != 0;
1116 int priv = req->cmd_flags & REQ_ELVPRIV; 1116 int priv = req->cmd_flags & REQ_ELVPRIV;
1117 1117
1118 BUG_ON(!list_empty(&req->queuelist)); 1118 BUG_ON(!list_empty(&req->queuelist));
1119 BUG_ON(!hlist_unhashed(&req->hash)); 1119 BUG_ON(!hlist_unhashed(&req->hash));
1120 1120
1121 blk_free_request(q, req); 1121 blk_free_request(q, req);
1122 freed_request(q, is_sync, priv); 1122 freed_request(q, is_sync, priv);
1123 } 1123 }
1124 } 1124 }
1125 EXPORT_SYMBOL_GPL(__blk_put_request); 1125 EXPORT_SYMBOL_GPL(__blk_put_request);
1126 1126
1127 void blk_put_request(struct request *req) 1127 void blk_put_request(struct request *req)
1128 { 1128 {
1129 unsigned long flags; 1129 unsigned long flags;
1130 struct request_queue *q = req->q; 1130 struct request_queue *q = req->q;
1131 1131
1132 spin_lock_irqsave(q->queue_lock, flags); 1132 spin_lock_irqsave(q->queue_lock, flags);
1133 __blk_put_request(q, req); 1133 __blk_put_request(q, req);
1134 spin_unlock_irqrestore(q->queue_lock, flags); 1134 spin_unlock_irqrestore(q->queue_lock, flags);
1135 } 1135 }
1136 EXPORT_SYMBOL(blk_put_request); 1136 EXPORT_SYMBOL(blk_put_request);
1137 1137
1138 void init_request_from_bio(struct request *req, struct bio *bio) 1138 void init_request_from_bio(struct request *req, struct bio *bio)
1139 { 1139 {
1140 req->cpu = bio->bi_comp_cpu; 1140 req->cpu = bio->bi_comp_cpu;
1141 req->cmd_type = REQ_TYPE_FS; 1141 req->cmd_type = REQ_TYPE_FS;
1142 1142
1143 /* 1143 /*
1144 * Inherit FAILFAST from bio (for read-ahead, and explicit 1144 * Inherit FAILFAST from bio (for read-ahead, and explicit
1145 * FAILFAST). FAILFAST flags are identical for req and bio. 1145 * FAILFAST). FAILFAST flags are identical for req and bio.
1146 */ 1146 */
1147 if (bio_rw_flagged(bio, BIO_RW_AHEAD)) 1147 if (bio_rw_flagged(bio, BIO_RW_AHEAD))
1148 req->cmd_flags |= REQ_FAILFAST_MASK; 1148 req->cmd_flags |= REQ_FAILFAST_MASK;
1149 else 1149 else
1150 req->cmd_flags |= bio->bi_rw & REQ_FAILFAST_MASK; 1150 req->cmd_flags |= bio->bi_rw & REQ_FAILFAST_MASK;
1151 1151
1152 if (bio_rw_flagged(bio, BIO_RW_DISCARD)) 1152 if (bio_rw_flagged(bio, BIO_RW_DISCARD))
1153 req->cmd_flags |= REQ_DISCARD; 1153 req->cmd_flags |= REQ_DISCARD;
1154 if (bio_rw_flagged(bio, BIO_RW_BARRIER)) 1154 if (bio_rw_flagged(bio, BIO_RW_BARRIER))
1155 req->cmd_flags |= REQ_HARDBARRIER; 1155 req->cmd_flags |= REQ_HARDBARRIER;
1156 if (bio_rw_flagged(bio, BIO_RW_SYNCIO)) 1156 if (bio_rw_flagged(bio, BIO_RW_SYNCIO))
1157 req->cmd_flags |= REQ_RW_SYNC; 1157 req->cmd_flags |= REQ_RW_SYNC;
1158 if (bio_rw_flagged(bio, BIO_RW_META)) 1158 if (bio_rw_flagged(bio, BIO_RW_META))
1159 req->cmd_flags |= REQ_RW_META; 1159 req->cmd_flags |= REQ_RW_META;
1160 if (bio_rw_flagged(bio, BIO_RW_NOIDLE)) 1160 if (bio_rw_flagged(bio, BIO_RW_NOIDLE))
1161 req->cmd_flags |= REQ_NOIDLE; 1161 req->cmd_flags |= REQ_NOIDLE;
1162 1162
1163 req->errors = 0; 1163 req->errors = 0;
1164 req->__sector = bio->bi_sector; 1164 req->__sector = bio->bi_sector;
1165 req->ioprio = bio_prio(bio); 1165 req->ioprio = bio_prio(bio);
1166 blk_rq_bio_prep(req->q, req, bio); 1166 blk_rq_bio_prep(req->q, req, bio);
1167 } 1167 }
1168 1168
1169 /* 1169 /*
1170 * Only disabling plugging for non-rotational devices if it does tagging 1170 * Only disabling plugging for non-rotational devices if it does tagging
1171 * as well, otherwise we do need the proper merging 1171 * as well, otherwise we do need the proper merging
1172 */ 1172 */
1173 static inline bool queue_should_plug(struct request_queue *q) 1173 static inline bool queue_should_plug(struct request_queue *q)
1174 { 1174 {
1175 return !(blk_queue_nonrot(q) && blk_queue_tagged(q)); 1175 return !(blk_queue_nonrot(q) && blk_queue_tagged(q));
1176 } 1176 }
1177 1177
1178 static int __make_request(struct request_queue *q, struct bio *bio) 1178 static int __make_request(struct request_queue *q, struct bio *bio)
1179 { 1179 {
1180 struct request *req; 1180 struct request *req;
1181 int el_ret; 1181 int el_ret;
1182 unsigned int bytes = bio->bi_size; 1182 unsigned int bytes = bio->bi_size;
1183 const unsigned short prio = bio_prio(bio); 1183 const unsigned short prio = bio_prio(bio);
1184 const bool sync = bio_rw_flagged(bio, BIO_RW_SYNCIO); 1184 const bool sync = bio_rw_flagged(bio, BIO_RW_SYNCIO);
1185 const bool unplug = bio_rw_flagged(bio, BIO_RW_UNPLUG); 1185 const bool unplug = bio_rw_flagged(bio, BIO_RW_UNPLUG);
1186 const unsigned int ff = bio->bi_rw & REQ_FAILFAST_MASK; 1186 const unsigned int ff = bio->bi_rw & REQ_FAILFAST_MASK;
1187 int rw_flags; 1187 int rw_flags;
1188 1188
1189 if (bio_rw_flagged(bio, BIO_RW_BARRIER) && 1189 if (bio_rw_flagged(bio, BIO_RW_BARRIER) &&
1190 (q->next_ordered == QUEUE_ORDERED_NONE)) { 1190 (q->next_ordered == QUEUE_ORDERED_NONE)) {
1191 bio_endio(bio, -EOPNOTSUPP); 1191 bio_endio(bio, -EOPNOTSUPP);
1192 return 0; 1192 return 0;
1193 } 1193 }
1194 /* 1194 /*
1195 * low level driver can indicate that it wants pages above a 1195 * low level driver can indicate that it wants pages above a
1196 * certain limit bounced to low memory (ie for highmem, or even 1196 * certain limit bounced to low memory (ie for highmem, or even
1197 * ISA dma in theory) 1197 * ISA dma in theory)
1198 */ 1198 */
1199 blk_queue_bounce(q, &bio); 1199 blk_queue_bounce(q, &bio);
1200 1200
1201 spin_lock_irq(q->queue_lock); 1201 spin_lock_irq(q->queue_lock);
1202 1202
1203 if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER)) || elv_queue_empty(q)) 1203 if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER)) || elv_queue_empty(q))
1204 goto get_rq; 1204 goto get_rq;
1205 1205
1206 el_ret = elv_merge(q, &req, bio); 1206 el_ret = elv_merge(q, &req, bio);
1207 switch (el_ret) { 1207 switch (el_ret) {
1208 case ELEVATOR_BACK_MERGE: 1208 case ELEVATOR_BACK_MERGE:
1209 BUG_ON(!rq_mergeable(req)); 1209 BUG_ON(!rq_mergeable(req));
1210 1210
1211 if (!ll_back_merge_fn(q, req, bio)) 1211 if (!ll_back_merge_fn(q, req, bio))
1212 break; 1212 break;
1213 1213
1214 trace_block_bio_backmerge(q, bio); 1214 trace_block_bio_backmerge(q, bio);
1215 1215
1216 if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff) 1216 if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
1217 blk_rq_set_mixed_merge(req); 1217 blk_rq_set_mixed_merge(req);
1218 1218
1219 req->biotail->bi_next = bio; 1219 req->biotail->bi_next = bio;
1220 req->biotail = bio; 1220 req->biotail = bio;
1221 req->__data_len += bytes; 1221 req->__data_len += bytes;
1222 req->ioprio = ioprio_best(req->ioprio, prio); 1222 req->ioprio = ioprio_best(req->ioprio, prio);
1223 if (!blk_rq_cpu_valid(req)) 1223 if (!blk_rq_cpu_valid(req))
1224 req->cpu = bio->bi_comp_cpu; 1224 req->cpu = bio->bi_comp_cpu;
1225 drive_stat_acct(req, 0); 1225 drive_stat_acct(req, 0);
1226 elv_bio_merged(q, req, bio); 1226 elv_bio_merged(q, req, bio);
1227 if (!attempt_back_merge(q, req)) 1227 if (!attempt_back_merge(q, req))
1228 elv_merged_request(q, req, el_ret); 1228 elv_merged_request(q, req, el_ret);
1229 goto out; 1229 goto out;
1230 1230
1231 case ELEVATOR_FRONT_MERGE: 1231 case ELEVATOR_FRONT_MERGE:
1232 BUG_ON(!rq_mergeable(req)); 1232 BUG_ON(!rq_mergeable(req));
1233 1233
1234 if (!ll_front_merge_fn(q, req, bio)) 1234 if (!ll_front_merge_fn(q, req, bio))
1235 break; 1235 break;
1236 1236
1237 trace_block_bio_frontmerge(q, bio); 1237 trace_block_bio_frontmerge(q, bio);
1238 1238
1239 if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff) { 1239 if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff) {
1240 blk_rq_set_mixed_merge(req); 1240 blk_rq_set_mixed_merge(req);
1241 req->cmd_flags &= ~REQ_FAILFAST_MASK; 1241 req->cmd_flags &= ~REQ_FAILFAST_MASK;
1242 req->cmd_flags |= ff; 1242 req->cmd_flags |= ff;
1243 } 1243 }
1244 1244
1245 bio->bi_next = req->bio; 1245 bio->bi_next = req->bio;
1246 req->bio = bio; 1246 req->bio = bio;
1247 1247
1248 /* 1248 /*
1249 * may not be valid. if the low level driver said 1249 * may not be valid. if the low level driver said
1250 * it didn't need a bounce buffer then it better 1250 * it didn't need a bounce buffer then it better
1251 * not touch req->buffer either... 1251 * not touch req->buffer either...
1252 */ 1252 */
1253 req->buffer = bio_data(bio); 1253 req->buffer = bio_data(bio);
1254 req->__sector = bio->bi_sector; 1254 req->__sector = bio->bi_sector;
1255 req->__data_len += bytes; 1255 req->__data_len += bytes;
1256 req->ioprio = ioprio_best(req->ioprio, prio); 1256 req->ioprio = ioprio_best(req->ioprio, prio);
1257 if (!blk_rq_cpu_valid(req)) 1257 if (!blk_rq_cpu_valid(req))
1258 req->cpu = bio->bi_comp_cpu; 1258 req->cpu = bio->bi_comp_cpu;
1259 drive_stat_acct(req, 0); 1259 drive_stat_acct(req, 0);
1260 elv_bio_merged(q, req, bio); 1260 elv_bio_merged(q, req, bio);
1261 if (!attempt_front_merge(q, req)) 1261 if (!attempt_front_merge(q, req))
1262 elv_merged_request(q, req, el_ret); 1262 elv_merged_request(q, req, el_ret);
1263 goto out; 1263 goto out;
1264 1264
1265 /* ELV_NO_MERGE: elevator says don't/can't merge. */ 1265 /* ELV_NO_MERGE: elevator says don't/can't merge. */
1266 default: 1266 default:
1267 ; 1267 ;
1268 } 1268 }
1269 1269
1270 get_rq: 1270 get_rq:
1271 /* 1271 /*
1272 * This sync check and mask will be re-done in init_request_from_bio(), 1272 * This sync check and mask will be re-done in init_request_from_bio(),
1273 * but we need to set it earlier to expose the sync flag to the 1273 * but we need to set it earlier to expose the sync flag to the
1274 * rq allocator and io schedulers. 1274 * rq allocator and io schedulers.
1275 */ 1275 */
1276 rw_flags = bio_data_dir(bio); 1276 rw_flags = bio_data_dir(bio);
1277 if (sync) 1277 if (sync)
1278 rw_flags |= REQ_RW_SYNC; 1278 rw_flags |= REQ_RW_SYNC;
1279 1279
1280 /* 1280 /*
1281 * Grab a free request. This is might sleep but can not fail. 1281 * Grab a free request. This is might sleep but can not fail.
1282 * Returns with the queue unlocked. 1282 * Returns with the queue unlocked.
1283 */ 1283 */
1284 req = get_request_wait(q, rw_flags, bio); 1284 req = get_request_wait(q, rw_flags, bio);
1285 1285
1286 /* 1286 /*
1287 * After dropping the lock and possibly sleeping here, our request 1287 * After dropping the lock and possibly sleeping here, our request
1288 * may now be mergeable after it had proven unmergeable (above). 1288 * may now be mergeable after it had proven unmergeable (above).
1289 * We don't worry about that case for efficiency. It won't happen 1289 * We don't worry about that case for efficiency. It won't happen
1290 * often, and the elevators are able to handle it. 1290 * often, and the elevators are able to handle it.
1291 */ 1291 */
1292 init_request_from_bio(req, bio); 1292 init_request_from_bio(req, bio);
1293 1293
1294 spin_lock_irq(q->queue_lock); 1294 spin_lock_irq(q->queue_lock);
1295 if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) || 1295 if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) ||
1296 bio_flagged(bio, BIO_CPU_AFFINE)) 1296 bio_flagged(bio, BIO_CPU_AFFINE))
1297 req->cpu = blk_cpu_to_group(smp_processor_id()); 1297 req->cpu = blk_cpu_to_group(smp_processor_id());
1298 if (queue_should_plug(q) && elv_queue_empty(q)) 1298 if (queue_should_plug(q) && elv_queue_empty(q))
1299 blk_plug_device(q); 1299 blk_plug_device(q);
1300 add_request(q, req); 1300 add_request(q, req);
1301 out: 1301 out:
1302 if (unplug || !queue_should_plug(q)) 1302 if (unplug || !queue_should_plug(q))
1303 __generic_unplug_device(q); 1303 __generic_unplug_device(q);
1304 spin_unlock_irq(q->queue_lock); 1304 spin_unlock_irq(q->queue_lock);
1305 return 0; 1305 return 0;
1306 } 1306 }
1307 1307
1308 /* 1308 /*
1309 * If bio->bi_dev is a partition, remap the location 1309 * If bio->bi_dev is a partition, remap the location
1310 */ 1310 */
1311 static inline void blk_partition_remap(struct bio *bio) 1311 static inline void blk_partition_remap(struct bio *bio)
1312 { 1312 {
1313 struct block_device *bdev = bio->bi_bdev; 1313 struct block_device *bdev = bio->bi_bdev;
1314 1314
1315 if (bio_sectors(bio) && bdev != bdev->bd_contains) { 1315 if (bio_sectors(bio) && bdev != bdev->bd_contains) {
1316 struct hd_struct *p = bdev->bd_part; 1316 struct hd_struct *p = bdev->bd_part;
1317 1317
1318 bio->bi_sector += p->start_sect; 1318 bio->bi_sector += p->start_sect;
1319 bio->bi_bdev = bdev->bd_contains; 1319 bio->bi_bdev = bdev->bd_contains;
1320 1320
1321 trace_block_remap(bdev_get_queue(bio->bi_bdev), bio, 1321 trace_block_remap(bdev_get_queue(bio->bi_bdev), bio,
1322 bdev->bd_dev, 1322 bdev->bd_dev,
1323 bio->bi_sector - p->start_sect); 1323 bio->bi_sector - p->start_sect);
1324 } 1324 }
1325 } 1325 }
1326 1326
1327 static void handle_bad_sector(struct bio *bio) 1327 static void handle_bad_sector(struct bio *bio)
1328 { 1328 {
1329 char b[BDEVNAME_SIZE]; 1329 char b[BDEVNAME_SIZE];
1330 1330
1331 printk(KERN_INFO "attempt to access beyond end of device\n"); 1331 printk(KERN_INFO "attempt to access beyond end of device\n");
1332 printk(KERN_INFO "%s: rw=%ld, want=%Lu, limit=%Lu\n", 1332 printk(KERN_INFO "%s: rw=%ld, want=%Lu, limit=%Lu\n",
1333 bdevname(bio->bi_bdev, b), 1333 bdevname(bio->bi_bdev, b),
1334 bio->bi_rw, 1334 bio->bi_rw,
1335 (unsigned long long)bio->bi_sector + bio_sectors(bio), 1335 (unsigned long long)bio->bi_sector + bio_sectors(bio),
1336 (long long)(bio->bi_bdev->bd_inode->i_size >> 9)); 1336 (long long)(bio->bi_bdev->bd_inode->i_size >> 9));
1337 1337
1338 set_bit(BIO_EOF, &bio->bi_flags); 1338 set_bit(BIO_EOF, &bio->bi_flags);
1339 } 1339 }
1340 1340
1341 #ifdef CONFIG_FAIL_MAKE_REQUEST 1341 #ifdef CONFIG_FAIL_MAKE_REQUEST
1342 1342
1343 static DECLARE_FAULT_ATTR(fail_make_request); 1343 static DECLARE_FAULT_ATTR(fail_make_request);
1344 1344
1345 static int __init setup_fail_make_request(char *str) 1345 static int __init setup_fail_make_request(char *str)
1346 { 1346 {
1347 return setup_fault_attr(&fail_make_request, str); 1347 return setup_fault_attr(&fail_make_request, str);
1348 } 1348 }
1349 __setup("fail_make_request=", setup_fail_make_request); 1349 __setup("fail_make_request=", setup_fail_make_request);
1350 1350
1351 static int should_fail_request(struct bio *bio) 1351 static int should_fail_request(struct bio *bio)
1352 { 1352 {
1353 struct hd_struct *part = bio->bi_bdev->bd_part; 1353 struct hd_struct *part = bio->bi_bdev->bd_part;
1354 1354
1355 if (part_to_disk(part)->part0.make_it_fail || part->make_it_fail) 1355 if (part_to_disk(part)->part0.make_it_fail || part->make_it_fail)
1356 return should_fail(&fail_make_request, bio->bi_size); 1356 return should_fail(&fail_make_request, bio->bi_size);
1357 1357
1358 return 0; 1358 return 0;
1359 } 1359 }
1360 1360
1361 static int __init fail_make_request_debugfs(void) 1361 static int __init fail_make_request_debugfs(void)
1362 { 1362 {
1363 return init_fault_attr_dentries(&fail_make_request, 1363 return init_fault_attr_dentries(&fail_make_request,
1364 "fail_make_request"); 1364 "fail_make_request");
1365 } 1365 }
1366 1366
1367 late_initcall(fail_make_request_debugfs); 1367 late_initcall(fail_make_request_debugfs);
1368 1368
1369 #else /* CONFIG_FAIL_MAKE_REQUEST */ 1369 #else /* CONFIG_FAIL_MAKE_REQUEST */
1370 1370
1371 static inline int should_fail_request(struct bio *bio) 1371 static inline int should_fail_request(struct bio *bio)
1372 { 1372 {
1373 return 0; 1373 return 0;
1374 } 1374 }
1375 1375
1376 #endif /* CONFIG_FAIL_MAKE_REQUEST */ 1376 #endif /* CONFIG_FAIL_MAKE_REQUEST */
1377 1377
1378 /* 1378 /*
1379 * Check whether this bio extends beyond the end of the device. 1379 * Check whether this bio extends beyond the end of the device.
1380 */ 1380 */
1381 static inline int bio_check_eod(struct bio *bio, unsigned int nr_sectors) 1381 static inline int bio_check_eod(struct bio *bio, unsigned int nr_sectors)
1382 { 1382 {
1383 sector_t maxsector; 1383 sector_t maxsector;
1384 1384
1385 if (!nr_sectors) 1385 if (!nr_sectors)
1386 return 0; 1386 return 0;
1387 1387
1388 /* Test device or partition size, when known. */ 1388 /* Test device or partition size, when known. */
1389 maxsector = bio->bi_bdev->bd_inode->i_size >> 9; 1389 maxsector = bio->bi_bdev->bd_inode->i_size >> 9;
1390 if (maxsector) { 1390 if (maxsector) {
1391 sector_t sector = bio->bi_sector; 1391 sector_t sector = bio->bi_sector;
1392 1392
1393 if (maxsector < nr_sectors || maxsector - nr_sectors < sector) { 1393 if (maxsector < nr_sectors || maxsector - nr_sectors < sector) {
1394 /* 1394 /*
1395 * This may well happen - the kernel calls bread() 1395 * This may well happen - the kernel calls bread()
1396 * without checking the size of the device, e.g., when 1396 * without checking the size of the device, e.g., when
1397 * mounting a device. 1397 * mounting a device.
1398 */ 1398 */
1399 handle_bad_sector(bio); 1399 handle_bad_sector(bio);
1400 return 1; 1400 return 1;
1401 } 1401 }
1402 } 1402 }
1403 1403
1404 return 0; 1404 return 0;
1405 } 1405 }
1406 1406
1407 /** 1407 /**
1408 * generic_make_request - hand a buffer to its device driver for I/O 1408 * generic_make_request - hand a buffer to its device driver for I/O
1409 * @bio: The bio describing the location in memory and on the device. 1409 * @bio: The bio describing the location in memory and on the device.
1410 * 1410 *
1411 * generic_make_request() is used to make I/O requests of block 1411 * generic_make_request() is used to make I/O requests of block
1412 * devices. It is passed a &struct bio, which describes the I/O that needs 1412 * devices. It is passed a &struct bio, which describes the I/O that needs
1413 * to be done. 1413 * to be done.
1414 * 1414 *
1415 * generic_make_request() does not return any status. The 1415 * generic_make_request() does not return any status. The
1416 * success/failure status of the request, along with notification of 1416 * success/failure status of the request, along with notification of
1417 * completion, is delivered asynchronously through the bio->bi_end_io 1417 * completion, is delivered asynchronously through the bio->bi_end_io
1418 * function described (one day) else where. 1418 * function described (one day) else where.
1419 * 1419 *
1420 * The caller of generic_make_request must make sure that bi_io_vec 1420 * The caller of generic_make_request must make sure that bi_io_vec
1421 * are set to describe the memory buffer, and that bi_dev and bi_sector are 1421 * are set to describe the memory buffer, and that bi_dev and bi_sector are
1422 * set to describe the device address, and the 1422 * set to describe the device address, and the
1423 * bi_end_io and optionally bi_private are set to describe how 1423 * bi_end_io and optionally bi_private are set to describe how
1424 * completion notification should be signaled. 1424 * completion notification should be signaled.
1425 * 1425 *
1426 * generic_make_request and the drivers it calls may use bi_next if this 1426 * generic_make_request and the drivers it calls may use bi_next if this
1427 * bio happens to be merged with someone else, and may change bi_dev and 1427 * bio happens to be merged with someone else, and may change bi_dev and
1428 * bi_sector for remaps as it sees fit. So the values of these fields 1428 * bi_sector for remaps as it sees fit. So the values of these fields
1429 * should NOT be depended on after the call to generic_make_request. 1429 * should NOT be depended on after the call to generic_make_request.
1430 */ 1430 */
1431 static inline void __generic_make_request(struct bio *bio) 1431 static inline void __generic_make_request(struct bio *bio)
1432 { 1432 {
1433 struct request_queue *q; 1433 struct request_queue *q;
1434 sector_t old_sector; 1434 sector_t old_sector;
1435 int ret, nr_sectors = bio_sectors(bio); 1435 int ret, nr_sectors = bio_sectors(bio);
1436 dev_t old_dev; 1436 dev_t old_dev;
1437 int err = -EIO; 1437 int err = -EIO;
1438 1438
1439 might_sleep(); 1439 might_sleep();
1440 1440
1441 if (bio_check_eod(bio, nr_sectors)) 1441 if (bio_check_eod(bio, nr_sectors))
1442 goto end_io; 1442 goto end_io;
1443 1443
1444 /* 1444 /*
1445 * Resolve the mapping until finished. (drivers are 1445 * Resolve the mapping until finished. (drivers are
1446 * still free to implement/resolve their own stacking 1446 * still free to implement/resolve their own stacking
1447 * by explicitly returning 0) 1447 * by explicitly returning 0)
1448 * 1448 *
1449 * NOTE: we don't repeat the blk_size check for each new device. 1449 * NOTE: we don't repeat the blk_size check for each new device.
1450 * Stacking drivers are expected to know what they are doing. 1450 * Stacking drivers are expected to know what they are doing.
1451 */ 1451 */
1452 old_sector = -1; 1452 old_sector = -1;
1453 old_dev = 0; 1453 old_dev = 0;
1454 do { 1454 do {
1455 char b[BDEVNAME_SIZE]; 1455 char b[BDEVNAME_SIZE];
1456 1456
1457 q = bdev_get_queue(bio->bi_bdev); 1457 q = bdev_get_queue(bio->bi_bdev);
1458 if (unlikely(!q)) { 1458 if (unlikely(!q)) {
1459 printk(KERN_ERR 1459 printk(KERN_ERR
1460 "generic_make_request: Trying to access " 1460 "generic_make_request: Trying to access "
1461 "nonexistent block-device %s (%Lu)\n", 1461 "nonexistent block-device %s (%Lu)\n",
1462 bdevname(bio->bi_bdev, b), 1462 bdevname(bio->bi_bdev, b),
1463 (long long) bio->bi_sector); 1463 (long long) bio->bi_sector);
1464 goto end_io; 1464 goto end_io;
1465 } 1465 }
1466 1466
1467 if (unlikely(!bio_rw_flagged(bio, BIO_RW_DISCARD) && 1467 if (unlikely(!bio_rw_flagged(bio, BIO_RW_DISCARD) &&
1468 nr_sectors > queue_max_hw_sectors(q))) { 1468 nr_sectors > queue_max_hw_sectors(q))) {
1469 printk(KERN_ERR "bio too big device %s (%u > %u)\n", 1469 printk(KERN_ERR "bio too big device %s (%u > %u)\n",
1470 bdevname(bio->bi_bdev, b), 1470 bdevname(bio->bi_bdev, b),
1471 bio_sectors(bio), 1471 bio_sectors(bio),
1472 queue_max_hw_sectors(q)); 1472 queue_max_hw_sectors(q));
1473 goto end_io; 1473 goto end_io;
1474 } 1474 }
1475 1475
1476 if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) 1476 if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)))
1477 goto end_io; 1477 goto end_io;
1478 1478
1479 if (should_fail_request(bio)) 1479 if (should_fail_request(bio))
1480 goto end_io; 1480 goto end_io;
1481 1481
1482 /* 1482 /*
1483 * If this device has partitions, remap block n 1483 * If this device has partitions, remap block n
1484 * of partition p to block n+start(p) of the disk. 1484 * of partition p to block n+start(p) of the disk.
1485 */ 1485 */
1486 blk_partition_remap(bio); 1486 blk_partition_remap(bio);
1487 1487
1488 if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) 1488 if (bio_integrity_enabled(bio) && bio_integrity_prep(bio))
1489 goto end_io; 1489 goto end_io;
1490 1490
1491 if (old_sector != -1) 1491 if (old_sector != -1)
1492 trace_block_remap(q, bio, old_dev, old_sector); 1492 trace_block_remap(q, bio, old_dev, old_sector);
1493 1493
1494 old_sector = bio->bi_sector; 1494 old_sector = bio->bi_sector;
1495 old_dev = bio->bi_bdev->bd_dev; 1495 old_dev = bio->bi_bdev->bd_dev;
1496 1496
1497 if (bio_check_eod(bio, nr_sectors)) 1497 if (bio_check_eod(bio, nr_sectors))
1498 goto end_io; 1498 goto end_io;
1499 1499
1500 if (bio_rw_flagged(bio, BIO_RW_DISCARD) && 1500 if (bio_rw_flagged(bio, BIO_RW_DISCARD) &&
1501 !blk_queue_discard(q)) { 1501 !blk_queue_discard(q)) {
1502 err = -EOPNOTSUPP; 1502 err = -EOPNOTSUPP;
1503 goto end_io; 1503 goto end_io;
1504 } 1504 }
1505 1505
1506 trace_block_bio_queue(q, bio); 1506 trace_block_bio_queue(q, bio);
1507 1507
1508 ret = q->make_request_fn(q, bio); 1508 ret = q->make_request_fn(q, bio);
1509 } while (ret); 1509 } while (ret);
1510 1510
1511 return; 1511 return;
1512 1512
1513 end_io: 1513 end_io:
1514 bio_endio(bio, err); 1514 bio_endio(bio, err);
1515 } 1515 }
1516 1516
1517 /* 1517 /*
1518 * We only want one ->make_request_fn to be active at a time, 1518 * We only want one ->make_request_fn to be active at a time,
1519 * else stack usage with stacked devices could be a problem. 1519 * else stack usage with stacked devices could be a problem.
1520 * So use current->bio_list to keep a list of requests 1520 * So use current->bio_list to keep a list of requests
1521 * submited by a make_request_fn function. 1521 * submited by a make_request_fn function.
1522 * current->bio_list is also used as a flag to say if 1522 * current->bio_list is also used as a flag to say if
1523 * generic_make_request is currently active in this task or not. 1523 * generic_make_request is currently active in this task or not.
1524 * If it is NULL, then no make_request is active. If it is non-NULL, 1524 * If it is NULL, then no make_request is active. If it is non-NULL,
1525 * then a make_request is active, and new requests should be added 1525 * then a make_request is active, and new requests should be added
1526 * at the tail 1526 * at the tail
1527 */ 1527 */
1528 void generic_make_request(struct bio *bio) 1528 void generic_make_request(struct bio *bio)
1529 { 1529 {
1530 struct bio_list bio_list_on_stack; 1530 struct bio_list bio_list_on_stack;
1531 1531
1532 if (current->bio_list) { 1532 if (current->bio_list) {
1533 /* make_request is active */ 1533 /* make_request is active */
1534 bio_list_add(current->bio_list, bio); 1534 bio_list_add(current->bio_list, bio);
1535 return; 1535 return;
1536 } 1536 }
1537 /* following loop may be a bit non-obvious, and so deserves some 1537 /* following loop may be a bit non-obvious, and so deserves some
1538 * explanation. 1538 * explanation.
1539 * Before entering the loop, bio->bi_next is NULL (as all callers 1539 * Before entering the loop, bio->bi_next is NULL (as all callers
1540 * ensure that) so we have a list with a single bio. 1540 * ensure that) so we have a list with a single bio.
1541 * We pretend that we have just taken it off a longer list, so 1541 * We pretend that we have just taken it off a longer list, so
1542 * we assign bio_list to a pointer to the bio_list_on_stack, 1542 * we assign bio_list to a pointer to the bio_list_on_stack,
1543 * thus initialising the bio_list of new bios to be 1543 * thus initialising the bio_list of new bios to be
1544 * added. __generic_make_request may indeed add some more bios 1544 * added. __generic_make_request may indeed add some more bios
1545 * through a recursive call to generic_make_request. If it 1545 * through a recursive call to generic_make_request. If it
1546 * did, we find a non-NULL value in bio_list and re-enter the loop 1546 * did, we find a non-NULL value in bio_list and re-enter the loop
1547 * from the top. In this case we really did just take the bio 1547 * from the top. In this case we really did just take the bio
1548 * of the top of the list (no pretending) and so remove it from 1548 * of the top of the list (no pretending) and so remove it from
1549 * bio_list, and call into __generic_make_request again. 1549 * bio_list, and call into __generic_make_request again.
1550 * 1550 *
1551 * The loop was structured like this to make only one call to 1551 * The loop was structured like this to make only one call to
1552 * __generic_make_request (which is important as it is large and 1552 * __generic_make_request (which is important as it is large and
1553 * inlined) and to keep the structure simple. 1553 * inlined) and to keep the structure simple.
1554 */ 1554 */
1555 BUG_ON(bio->bi_next); 1555 BUG_ON(bio->bi_next);
1556 bio_list_init(&bio_list_on_stack); 1556 bio_list_init(&bio_list_on_stack);
1557 current->bio_list = &bio_list_on_stack; 1557 current->bio_list = &bio_list_on_stack;
1558 do { 1558 do {
1559 __generic_make_request(bio); 1559 __generic_make_request(bio);
1560 bio = bio_list_pop(current->bio_list); 1560 bio = bio_list_pop(current->bio_list);
1561 } while (bio); 1561 } while (bio);
1562 current->bio_list = NULL; /* deactivate */ 1562 current->bio_list = NULL; /* deactivate */
1563 } 1563 }
1564 EXPORT_SYMBOL(generic_make_request); 1564 EXPORT_SYMBOL(generic_make_request);
1565 1565
1566 /** 1566 /**
1567 * submit_bio - submit a bio to the block device layer for I/O 1567 * submit_bio - submit a bio to the block device layer for I/O
1568 * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead) 1568 * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead)
1569 * @bio: The &struct bio which describes the I/O 1569 * @bio: The &struct bio which describes the I/O
1570 * 1570 *
1571 * submit_bio() is very similar in purpose to generic_make_request(), and 1571 * submit_bio() is very similar in purpose to generic_make_request(), and
1572 * uses that function to do most of the work. Both are fairly rough 1572 * uses that function to do most of the work. Both are fairly rough
1573 * interfaces; @bio must be presetup and ready for I/O. 1573 * interfaces; @bio must be presetup and ready for I/O.
1574 * 1574 *
1575 */ 1575 */
1576 void submit_bio(int rw, struct bio *bio) 1576 void submit_bio(int rw, struct bio *bio)
1577 { 1577 {
1578 int count = bio_sectors(bio); 1578 int count = bio_sectors(bio);
1579 1579
1580 bio->bi_rw |= rw; 1580 bio->bi_rw |= rw;
1581 1581
1582 /* 1582 /*
1583 * If it's a regular read/write or a barrier with data attached, 1583 * If it's a regular read/write or a barrier with data attached,
1584 * go through the normal accounting stuff before submission. 1584 * go through the normal accounting stuff before submission.
1585 */ 1585 */
1586 if (bio_has_data(bio) && !(rw & (1 << BIO_RW_DISCARD))) { 1586 if (bio_has_data(bio) && !(rw & (1 << BIO_RW_DISCARD))) {
1587 if (rw & WRITE) { 1587 if (rw & WRITE) {
1588 count_vm_events(PGPGOUT, count); 1588 count_vm_events(PGPGOUT, count);
1589 } else { 1589 } else {
1590 task_io_account_read(bio->bi_size); 1590 task_io_account_read(bio->bi_size);
1591 count_vm_events(PGPGIN, count); 1591 count_vm_events(PGPGIN, count);
1592 } 1592 }
1593 1593
1594 if (unlikely(block_dump)) { 1594 if (unlikely(block_dump)) {
1595 char b[BDEVNAME_SIZE]; 1595 char b[BDEVNAME_SIZE];
1596 printk(KERN_DEBUG "%s(%d): %s block %Lu on %s\n", 1596 printk(KERN_DEBUG "%s(%d): %s block %Lu on %s\n",
1597 current->comm, task_pid_nr(current), 1597 current->comm, task_pid_nr(current),
1598 (rw & WRITE) ? "WRITE" : "READ", 1598 (rw & WRITE) ? "WRITE" : "READ",
1599 (unsigned long long)bio->bi_sector, 1599 (unsigned long long)bio->bi_sector,
1600 bdevname(bio->bi_bdev, b)); 1600 bdevname(bio->bi_bdev, b));
1601 } 1601 }
1602 } 1602 }
1603 1603
1604 generic_make_request(bio); 1604 generic_make_request(bio);
1605 } 1605 }
1606 EXPORT_SYMBOL(submit_bio); 1606 EXPORT_SYMBOL(submit_bio);
1607 1607
1608 /** 1608 /**
1609 * blk_rq_check_limits - Helper function to check a request for the queue limit 1609 * blk_rq_check_limits - Helper function to check a request for the queue limit
1610 * @q: the queue 1610 * @q: the queue
1611 * @rq: the request being checked 1611 * @rq: the request being checked
1612 * 1612 *
1613 * Description: 1613 * Description:
1614 * @rq may have been made based on weaker limitations of upper-level queues 1614 * @rq may have been made based on weaker limitations of upper-level queues
1615 * in request stacking drivers, and it may violate the limitation of @q. 1615 * in request stacking drivers, and it may violate the limitation of @q.
1616 * Since the block layer and the underlying device driver trust @rq 1616 * Since the block layer and the underlying device driver trust @rq
1617 * after it is inserted to @q, it should be checked against @q before 1617 * after it is inserted to @q, it should be checked against @q before
1618 * the insertion using this generic function. 1618 * the insertion using this generic function.
1619 * 1619 *
1620 * This function should also be useful for request stacking drivers 1620 * This function should also be useful for request stacking drivers
1621 * in some cases below, so export this fuction. 1621 * in some cases below, so export this fuction.
1622 * Request stacking drivers like request-based dm may change the queue 1622 * Request stacking drivers like request-based dm may change the queue
1623 * limits while requests are in the queue (e.g. dm's table swapping). 1623 * limits while requests are in the queue (e.g. dm's table swapping).
1624 * Such request stacking drivers should check those requests agaist 1624 * Such request stacking drivers should check those requests agaist
1625 * the new queue limits again when they dispatch those requests, 1625 * the new queue limits again when they dispatch those requests,
1626 * although such checkings are also done against the old queue limits 1626 * although such checkings are also done against the old queue limits
1627 * when submitting requests. 1627 * when submitting requests.
1628 */ 1628 */
1629 int blk_rq_check_limits(struct request_queue *q, struct request *rq) 1629 int blk_rq_check_limits(struct request_queue *q, struct request *rq)
1630 { 1630 {
1631 if (blk_rq_sectors(rq) > queue_max_sectors(q) || 1631 if (blk_rq_sectors(rq) > queue_max_sectors(q) ||
1632 blk_rq_bytes(rq) > queue_max_hw_sectors(q) << 9) { 1632 blk_rq_bytes(rq) > queue_max_hw_sectors(q) << 9) {
1633 printk(KERN_ERR "%s: over max size limit.\n", __func__); 1633 printk(KERN_ERR "%s: over max size limit.\n", __func__);
1634 return -EIO; 1634 return -EIO;
1635 } 1635 }
1636 1636
1637 /* 1637 /*
1638 * queue's settings related to segment counting like q->bounce_pfn 1638 * queue's settings related to segment counting like q->bounce_pfn
1639 * may differ from that of other stacking queues. 1639 * may differ from that of other stacking queues.
1640 * Recalculate it to check the request correctly on this queue's 1640 * Recalculate it to check the request correctly on this queue's
1641 * limitation. 1641 * limitation.
1642 */ 1642 */
1643 blk_recalc_rq_segments(rq); 1643 blk_recalc_rq_segments(rq);
1644 if (rq->nr_phys_segments > queue_max_segments(q)) { 1644 if (rq->nr_phys_segments > queue_max_segments(q)) {
1645 printk(KERN_ERR "%s: over max segments limit.\n", __func__); 1645 printk(KERN_ERR "%s: over max segments limit.\n", __func__);
1646 return -EIO; 1646 return -EIO;
1647 } 1647 }
1648 1648
1649 return 0; 1649 return 0;
1650 } 1650 }
1651 EXPORT_SYMBOL_GPL(blk_rq_check_limits); 1651 EXPORT_SYMBOL_GPL(blk_rq_check_limits);
1652 1652
1653 /** 1653 /**
1654 * blk_insert_cloned_request - Helper for stacking drivers to submit a request 1654 * blk_insert_cloned_request - Helper for stacking drivers to submit a request
1655 * @q: the queue to submit the request 1655 * @q: the queue to submit the request
1656 * @rq: the request being queued 1656 * @rq: the request being queued
1657 */ 1657 */
1658 int blk_insert_cloned_request(struct request_queue *q, struct request *rq) 1658 int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
1659 { 1659 {
1660 unsigned long flags; 1660 unsigned long flags;
1661 1661
1662 if (blk_rq_check_limits(q, rq)) 1662 if (blk_rq_check_limits(q, rq))
1663 return -EIO; 1663 return -EIO;
1664 1664
1665 #ifdef CONFIG_FAIL_MAKE_REQUEST 1665 #ifdef CONFIG_FAIL_MAKE_REQUEST
1666 if (rq->rq_disk && rq->rq_disk->part0.make_it_fail && 1666 if (rq->rq_disk && rq->rq_disk->part0.make_it_fail &&
1667 should_fail(&fail_make_request, blk_rq_bytes(rq))) 1667 should_fail(&fail_make_request, blk_rq_bytes(rq)))
1668 return -EIO; 1668 return -EIO;
1669 #endif 1669 #endif
1670 1670
1671 spin_lock_irqsave(q->queue_lock, flags); 1671 spin_lock_irqsave(q->queue_lock, flags);
1672 1672
1673 /* 1673 /*
1674 * Submitting request must be dequeued before calling this function 1674 * Submitting request must be dequeued before calling this function
1675 * because it will be linked to another request_queue 1675 * because it will be linked to another request_queue
1676 */ 1676 */
1677 BUG_ON(blk_queued_rq(rq)); 1677 BUG_ON(blk_queued_rq(rq));
1678 1678
1679 drive_stat_acct(rq, 1); 1679 drive_stat_acct(rq, 1);
1680 __elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 0); 1680 __elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 0);
1681 1681
1682 spin_unlock_irqrestore(q->queue_lock, flags); 1682 spin_unlock_irqrestore(q->queue_lock, flags);
1683 1683
1684 return 0; 1684 return 0;
1685 } 1685 }
1686 EXPORT_SYMBOL_GPL(blk_insert_cloned_request); 1686 EXPORT_SYMBOL_GPL(blk_insert_cloned_request);
1687 1687
1688 /** 1688 /**
1689 * blk_rq_err_bytes - determine number of bytes till the next failure boundary 1689 * blk_rq_err_bytes - determine number of bytes till the next failure boundary
1690 * @rq: request to examine 1690 * @rq: request to examine
1691 * 1691 *
1692 * Description: 1692 * Description:
1693 * A request could be merge of IOs which require different failure 1693 * A request could be merge of IOs which require different failure
1694 * handling. This function determines the number of bytes which 1694 * handling. This function determines the number of bytes which
1695 * can be failed from the beginning of the request without 1695 * can be failed from the beginning of the request without
1696 * crossing into area which need to be retried further. 1696 * crossing into area which need to be retried further.
1697 * 1697 *
1698 * Return: 1698 * Return:
1699 * The number of bytes to fail. 1699 * The number of bytes to fail.
1700 * 1700 *
1701 * Context: 1701 * Context:
1702 * queue_lock must be held. 1702 * queue_lock must be held.
1703 */ 1703 */
1704 unsigned int blk_rq_err_bytes(const struct request *rq) 1704 unsigned int blk_rq_err_bytes(const struct request *rq)
1705 { 1705 {
1706 unsigned int ff = rq->cmd_flags & REQ_FAILFAST_MASK; 1706 unsigned int ff = rq->cmd_flags & REQ_FAILFAST_MASK;
1707 unsigned int bytes = 0; 1707 unsigned int bytes = 0;
1708 struct bio *bio; 1708 struct bio *bio;
1709 1709
1710 if (!(rq->cmd_flags & REQ_MIXED_MERGE)) 1710 if (!(rq->cmd_flags & REQ_MIXED_MERGE))
1711 return blk_rq_bytes(rq); 1711 return blk_rq_bytes(rq);
1712 1712
1713 /* 1713 /*
1714 * Currently the only 'mixing' which can happen is between 1714 * Currently the only 'mixing' which can happen is between
1715 * different fastfail types. We can safely fail portions 1715 * different fastfail types. We can safely fail portions
1716 * which have all the failfast bits that the first one has - 1716 * which have all the failfast bits that the first one has -
1717 * the ones which are at least as eager to fail as the first 1717 * the ones which are at least as eager to fail as the first
1718 * one. 1718 * one.
1719 */ 1719 */
1720 for (bio = rq->bio; bio; bio = bio->bi_next) { 1720 for (bio = rq->bio; bio; bio = bio->bi_next) {
1721 if ((bio->bi_rw & ff) != ff) 1721 if ((bio->bi_rw & ff) != ff)
1722 break; 1722 break;
1723 bytes += bio->bi_size; 1723 bytes += bio->bi_size;
1724 } 1724 }
1725 1725
1726 /* this could lead to infinite loop */ 1726 /* this could lead to infinite loop */
1727 BUG_ON(blk_rq_bytes(rq) && !bytes); 1727 BUG_ON(blk_rq_bytes(rq) && !bytes);
1728 return bytes; 1728 return bytes;
1729 } 1729 }
1730 EXPORT_SYMBOL_GPL(blk_rq_err_bytes); 1730 EXPORT_SYMBOL_GPL(blk_rq_err_bytes);
1731 1731
1732 static void blk_account_io_completion(struct request *req, unsigned int bytes) 1732 static void blk_account_io_completion(struct request *req, unsigned int bytes)
1733 { 1733 {
1734 if (blk_do_io_stat(req)) { 1734 if (blk_do_io_stat(req)) {
1735 const int rw = rq_data_dir(req); 1735 const int rw = rq_data_dir(req);
1736 struct hd_struct *part; 1736 struct hd_struct *part;
1737 int cpu; 1737 int cpu;
1738 1738
1739 cpu = part_stat_lock(); 1739 cpu = part_stat_lock();
1740 part = disk_map_sector_rcu(req->rq_disk, blk_rq_pos(req)); 1740 part = disk_map_sector_rcu(req->rq_disk, blk_rq_pos(req));
1741 part_stat_add(cpu, part, sectors[rw], bytes >> 9); 1741 part_stat_add(cpu, part, sectors[rw], bytes >> 9);
1742 part_stat_unlock(); 1742 part_stat_unlock();
1743 } 1743 }
1744 } 1744 }
1745 1745
1746 static void blk_account_io_done(struct request *req) 1746 static void blk_account_io_done(struct request *req)
1747 { 1747 {
1748 /* 1748 /*
1749 * Account IO completion. bar_rq isn't accounted as a normal 1749 * Account IO completion. bar_rq isn't accounted as a normal
1750 * IO on queueing nor completion. Accounting the containing 1750 * IO on queueing nor completion. Accounting the containing
1751 * request is enough. 1751 * request is enough.
1752 */ 1752 */
1753 if (blk_do_io_stat(req) && req != &req->q->bar_rq) { 1753 if (blk_do_io_stat(req) && req != &req->q->bar_rq) {
1754 unsigned long duration = jiffies - req->start_time; 1754 unsigned long duration = jiffies - req->start_time;
1755 const int rw = rq_data_dir(req); 1755 const int rw = rq_data_dir(req);
1756 struct hd_struct *part; 1756 struct hd_struct *part;
1757 int cpu; 1757 int cpu;
1758 1758
1759 cpu = part_stat_lock(); 1759 cpu = part_stat_lock();
1760 part = disk_map_sector_rcu(req->rq_disk, blk_rq_pos(req)); 1760 part = disk_map_sector_rcu(req->rq_disk, blk_rq_pos(req));
1761 1761
1762 part_stat_inc(cpu, part, ios[rw]); 1762 part_stat_inc(cpu, part, ios[rw]);
1763 part_stat_add(cpu, part, ticks[rw], duration); 1763 part_stat_add(cpu, part, ticks[rw], duration);
1764 part_round_stats(cpu, part); 1764 part_round_stats(cpu, part);
1765 part_dec_in_flight(part, rw); 1765 part_dec_in_flight(part, rw);
1766 1766
1767 part_stat_unlock(); 1767 part_stat_unlock();
1768 } 1768 }
1769 } 1769 }
1770 1770
1771 /** 1771 /**
1772 * blk_peek_request - peek at the top of a request queue 1772 * blk_peek_request - peek at the top of a request queue
1773 * @q: request queue to peek at 1773 * @q: request queue to peek at
1774 * 1774 *
1775 * Description: 1775 * Description:
1776 * Return the request at the top of @q. The returned request 1776 * Return the request at the top of @q. The returned request
1777 * should be started using blk_start_request() before LLD starts 1777 * should be started using blk_start_request() before LLD starts
1778 * processing it. 1778 * processing it.
1779 * 1779 *
1780 * Return: 1780 * Return:
1781 * Pointer to the request at the top of @q if available. Null 1781 * Pointer to the request at the top of @q if available. Null
1782 * otherwise. 1782 * otherwise.
1783 * 1783 *
1784 * Context: 1784 * Context:
1785 * queue_lock must be held. 1785 * queue_lock must be held.
1786 */ 1786 */
1787 struct request *blk_peek_request(struct request_queue *q) 1787 struct request *blk_peek_request(struct request_queue *q)
1788 { 1788 {
1789 struct request *rq; 1789 struct request *rq;
1790 int ret; 1790 int ret;
1791 1791
1792 while ((rq = __elv_next_request(q)) != NULL) { 1792 while ((rq = __elv_next_request(q)) != NULL) {
1793 if (!(rq->cmd_flags & REQ_STARTED)) { 1793 if (!(rq->cmd_flags & REQ_STARTED)) {
1794 /* 1794 /*
1795 * This is the first time the device driver 1795 * This is the first time the device driver
1796 * sees this request (possibly after 1796 * sees this request (possibly after
1797 * requeueing). Notify IO scheduler. 1797 * requeueing). Notify IO scheduler.
1798 */ 1798 */
1799 if (blk_sorted_rq(rq)) 1799 if (blk_sorted_rq(rq))
1800 elv_activate_rq(q, rq); 1800 elv_activate_rq(q, rq);
1801 1801
1802 /* 1802 /*
1803 * just mark as started even if we don't start 1803 * just mark as started even if we don't start
1804 * it, a request that has been delayed should 1804 * it, a request that has been delayed should
1805 * not be passed by new incoming requests 1805 * not be passed by new incoming requests
1806 */ 1806 */
1807 rq->cmd_flags |= REQ_STARTED; 1807 rq->cmd_flags |= REQ_STARTED;
1808 trace_block_rq_issue(q, rq); 1808 trace_block_rq_issue(q, rq);
1809 } 1809 }
1810 1810
1811 if (!q->boundary_rq || q->boundary_rq == rq) { 1811 if (!q->boundary_rq || q->boundary_rq == rq) {
1812 q->end_sector = rq_end_sector(rq); 1812 q->end_sector = rq_end_sector(rq);
1813 q->boundary_rq = NULL; 1813 q->boundary_rq = NULL;
1814 } 1814 }
1815 1815
1816 if (rq->cmd_flags & REQ_DONTPREP) 1816 if (rq->cmd_flags & REQ_DONTPREP)
1817 break; 1817 break;
1818 1818
1819 if (q->dma_drain_size && blk_rq_bytes(rq)) { 1819 if (q->dma_drain_size && blk_rq_bytes(rq)) {
1820 /* 1820 /*
1821 * make sure space for the drain appears we 1821 * make sure space for the drain appears we
1822 * know we can do this because max_hw_segments 1822 * know we can do this because max_hw_segments
1823 * has been adjusted to be one fewer than the 1823 * has been adjusted to be one fewer than the
1824 * device can handle 1824 * device can handle
1825 */ 1825 */
1826 rq->nr_phys_segments++; 1826 rq->nr_phys_segments++;
1827 } 1827 }
1828 1828
1829 if (!q->prep_rq_fn) 1829 if (!q->prep_rq_fn)
1830 break; 1830 break;
1831 1831
1832 ret = q->prep_rq_fn(q, rq); 1832 ret = q->prep_rq_fn(q, rq);
1833 if (ret == BLKPREP_OK) { 1833 if (ret == BLKPREP_OK) {
1834 break; 1834 break;
1835 } else if (ret == BLKPREP_DEFER) { 1835 } else if (ret == BLKPREP_DEFER) {
1836 /* 1836 /*
1837 * the request may have been (partially) prepped. 1837 * the request may have been (partially) prepped.
1838 * we need to keep this request in the front to 1838 * we need to keep this request in the front to
1839 * avoid resource deadlock. REQ_STARTED will 1839 * avoid resource deadlock. REQ_STARTED will
1840 * prevent other fs requests from passing this one. 1840 * prevent other fs requests from passing this one.
1841 */ 1841 */
1842 if (q->dma_drain_size && blk_rq_bytes(rq) && 1842 if (q->dma_drain_size && blk_rq_bytes(rq) &&
1843 !(rq->cmd_flags & REQ_DONTPREP)) { 1843 !(rq->cmd_flags & REQ_DONTPREP)) {
1844 /* 1844 /*
1845 * remove the space for the drain we added 1845 * remove the space for the drain we added
1846 * so that we don't add it again 1846 * so that we don't add it again
1847 */ 1847 */
1848 --rq->nr_phys_segments; 1848 --rq->nr_phys_segments;
1849 } 1849 }
1850 1850
1851 rq = NULL; 1851 rq = NULL;
1852 break; 1852 break;
1853 } else if (ret == BLKPREP_KILL) { 1853 } else if (ret == BLKPREP_KILL) {
1854 rq->cmd_flags |= REQ_QUIET; 1854 rq->cmd_flags |= REQ_QUIET;
1855 /* 1855 /*
1856 * Mark this request as started so we don't trigger 1856 * Mark this request as started so we don't trigger
1857 * any debug logic in the end I/O path. 1857 * any debug logic in the end I/O path.
1858 */ 1858 */
1859 blk_start_request(rq); 1859 blk_start_request(rq);
1860 __blk_end_request_all(rq, -EIO); 1860 __blk_end_request_all(rq, -EIO);
1861 } else { 1861 } else {
1862 printk(KERN_ERR "%s: bad return=%d\n", __func__, ret); 1862 printk(KERN_ERR "%s: bad return=%d\n", __func__, ret);
1863 break; 1863 break;
1864 } 1864 }
1865 } 1865 }
1866 1866
1867 return rq; 1867 return rq;
1868 } 1868 }
1869 EXPORT_SYMBOL(blk_peek_request); 1869 EXPORT_SYMBOL(blk_peek_request);
1870 1870
1871 void blk_dequeue_request(struct request *rq) 1871 void blk_dequeue_request(struct request *rq)
1872 { 1872 {
1873 struct request_queue *q = rq->q; 1873 struct request_queue *q = rq->q;
1874 1874
1875 BUG_ON(list_empty(&rq->queuelist)); 1875 BUG_ON(list_empty(&rq->queuelist));
1876 BUG_ON(ELV_ON_HASH(rq)); 1876 BUG_ON(ELV_ON_HASH(rq));
1877 1877
1878 list_del_init(&rq->queuelist); 1878 list_del_init(&rq->queuelist);
1879 1879
1880 /* 1880 /*
1881 * the time frame between a request being removed from the lists 1881 * the time frame between a request being removed from the lists
1882 * and to it is freed is accounted as io that is in progress at 1882 * and to it is freed is accounted as io that is in progress at
1883 * the driver side. 1883 * the driver side.
1884 */ 1884 */
1885 if (blk_account_rq(rq)) { 1885 if (blk_account_rq(rq)) {
1886 q->in_flight[rq_is_sync(rq)]++; 1886 q->in_flight[rq_is_sync(rq)]++;
1887 set_io_start_time_ns(rq); 1887 set_io_start_time_ns(rq);
1888 } 1888 }
1889 } 1889 }
1890 1890
1891 /** 1891 /**
1892 * blk_start_request - start request processing on the driver 1892 * blk_start_request - start request processing on the driver
1893 * @req: request to dequeue 1893 * @req: request to dequeue
1894 * 1894 *
1895 * Description: 1895 * Description:
1896 * Dequeue @req and start timeout timer on it. This hands off the 1896 * Dequeue @req and start timeout timer on it. This hands off the
1897 * request to the driver. 1897 * request to the driver.
1898 * 1898 *
1899 * Block internal functions which don't want to start timer should 1899 * Block internal functions which don't want to start timer should
1900 * call blk_dequeue_request(). 1900 * call blk_dequeue_request().
1901 * 1901 *
1902 * Context: 1902 * Context:
1903 * queue_lock must be held. 1903 * queue_lock must be held.
1904 */ 1904 */
1905 void blk_start_request(struct request *req) 1905 void blk_start_request(struct request *req)
1906 { 1906 {
1907 blk_dequeue_request(req); 1907 blk_dequeue_request(req);
1908 1908
1909 /* 1909 /*
1910 * We are now handing the request to the hardware, initialize 1910 * We are now handing the request to the hardware, initialize
1911 * resid_len to full count and add the timeout handler. 1911 * resid_len to full count and add the timeout handler.
1912 */ 1912 */
1913 req->resid_len = blk_rq_bytes(req); 1913 req->resid_len = blk_rq_bytes(req);
1914 if (unlikely(blk_bidi_rq(req))) 1914 if (unlikely(blk_bidi_rq(req)))
1915 req->next_rq->resid_len = blk_rq_bytes(req->next_rq); 1915 req->next_rq->resid_len = blk_rq_bytes(req->next_rq);
1916 1916
1917 blk_add_timer(req); 1917 blk_add_timer(req);
1918 } 1918 }
1919 EXPORT_SYMBOL(blk_start_request); 1919 EXPORT_SYMBOL(blk_start_request);
1920 1920
1921 /** 1921 /**
1922 * blk_fetch_request - fetch a request from a request queue 1922 * blk_fetch_request - fetch a request from a request queue
1923 * @q: request queue to fetch a request from 1923 * @q: request queue to fetch a request from
1924 * 1924 *
1925 * Description: 1925 * Description:
1926 * Return the request at the top of @q. The request is started on 1926 * Return the request at the top of @q. The request is started on
1927 * return and LLD can start processing it immediately. 1927 * return and LLD can start processing it immediately.
1928 * 1928 *
1929 * Return: 1929 * Return:
1930 * Pointer to the request at the top of @q if available. Null 1930 * Pointer to the request at the top of @q if available. Null
1931 * otherwise. 1931 * otherwise.
1932 * 1932 *
1933 * Context: 1933 * Context:
1934 * queue_lock must be held. 1934 * queue_lock must be held.
1935 */ 1935 */
1936 struct request *blk_fetch_request(struct request_queue *q) 1936 struct request *blk_fetch_request(struct request_queue *q)
1937 { 1937 {
1938 struct request *rq; 1938 struct request *rq;
1939 1939
1940 rq = blk_peek_request(q); 1940 rq = blk_peek_request(q);
1941 if (rq) 1941 if (rq)
1942 blk_start_request(rq); 1942 blk_start_request(rq);
1943 return rq; 1943 return rq;
1944 } 1944 }
1945 EXPORT_SYMBOL(blk_fetch_request); 1945 EXPORT_SYMBOL(blk_fetch_request);
1946 1946
1947 /** 1947 /**
1948 * blk_update_request - Special helper function for request stacking drivers 1948 * blk_update_request - Special helper function for request stacking drivers
1949 * @req: the request being processed 1949 * @req: the request being processed
1950 * @error: %0 for success, < %0 for error 1950 * @error: %0 for success, < %0 for error
1951 * @nr_bytes: number of bytes to complete @req 1951 * @nr_bytes: number of bytes to complete @req
1952 * 1952 *
1953 * Description: 1953 * Description:
1954 * Ends I/O on a number of bytes attached to @req, but doesn't complete 1954 * Ends I/O on a number of bytes attached to @req, but doesn't complete
1955 * the request structure even if @req doesn't have leftover. 1955 * the request structure even if @req doesn't have leftover.
1956 * If @req has leftover, sets it up for the next range of segments. 1956 * If @req has leftover, sets it up for the next range of segments.
1957 * 1957 *
1958 * This special helper function is only for request stacking drivers 1958 * This special helper function is only for request stacking drivers
1959 * (e.g. request-based dm) so that they can handle partial completion. 1959 * (e.g. request-based dm) so that they can handle partial completion.
1960 * Actual device drivers should use blk_end_request instead. 1960 * Actual device drivers should use blk_end_request instead.
1961 * 1961 *
1962 * Passing the result of blk_rq_bytes() as @nr_bytes guarantees 1962 * Passing the result of blk_rq_bytes() as @nr_bytes guarantees
1963 * %false return from this function. 1963 * %false return from this function.
1964 * 1964 *
1965 * Return: 1965 * Return:
1966 * %false - this request doesn't have any more data 1966 * %false - this request doesn't have any more data
1967 * %true - this request has more data 1967 * %true - this request has more data
1968 **/ 1968 **/
1969 bool blk_update_request(struct request *req, int error, unsigned int nr_bytes) 1969 bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
1970 { 1970 {
1971 int total_bytes, bio_nbytes, next_idx = 0; 1971 int total_bytes, bio_nbytes, next_idx = 0;
1972 struct bio *bio; 1972 struct bio *bio;
1973 1973
1974 if (!req->bio) 1974 if (!req->bio)
1975 return false; 1975 return false;
1976 1976
1977 trace_block_rq_complete(req->q, req); 1977 trace_block_rq_complete(req->q, req);
1978 1978
1979 /* 1979 /*
1980 * For fs requests, rq is just carrier of independent bio's 1980 * For fs requests, rq is just carrier of independent bio's
1981 * and each partial completion should be handled separately. 1981 * and each partial completion should be handled separately.
1982 * Reset per-request error on each partial completion. 1982 * Reset per-request error on each partial completion.
1983 * 1983 *
1984 * TODO: tj: This is too subtle. It would be better to let 1984 * TODO: tj: This is too subtle. It would be better to let
1985 * low level drivers do what they see fit. 1985 * low level drivers do what they see fit.
1986 */ 1986 */
1987 if (blk_fs_request(req)) 1987 if (blk_fs_request(req))
1988 req->errors = 0; 1988 req->errors = 0;
1989 1989
1990 if (error && (blk_fs_request(req) && !(req->cmd_flags & REQ_QUIET))) { 1990 if (error && (blk_fs_request(req) && !(req->cmd_flags & REQ_QUIET))) {
1991 printk(KERN_ERR "end_request: I/O error, dev %s, sector %llu\n", 1991 printk(KERN_ERR "end_request: I/O error, dev %s, sector %llu\n",
1992 req->rq_disk ? req->rq_disk->disk_name : "?", 1992 req->rq_disk ? req->rq_disk->disk_name : "?",
1993 (unsigned long long)blk_rq_pos(req)); 1993 (unsigned long long)blk_rq_pos(req));
1994 } 1994 }
1995 1995
1996 blk_account_io_completion(req, nr_bytes); 1996 blk_account_io_completion(req, nr_bytes);
1997 1997
1998 total_bytes = bio_nbytes = 0; 1998 total_bytes = bio_nbytes = 0;
1999 while ((bio = req->bio) != NULL) { 1999 while ((bio = req->bio) != NULL) {
2000 int nbytes; 2000 int nbytes;
2001 2001
2002 if (nr_bytes >= bio->bi_size) { 2002 if (nr_bytes >= bio->bi_size) {
2003 req->bio = bio->bi_next; 2003 req->bio = bio->bi_next;
2004 nbytes = bio->bi_size; 2004 nbytes = bio->bi_size;
2005 req_bio_endio(req, bio, nbytes, error); 2005 req_bio_endio(req, bio, nbytes, error);
2006 next_idx = 0; 2006 next_idx = 0;
2007 bio_nbytes = 0; 2007 bio_nbytes = 0;
2008 } else { 2008 } else {
2009 int idx = bio->bi_idx + next_idx; 2009 int idx = bio->bi_idx + next_idx;
2010 2010
2011 if (unlikely(idx >= bio->bi_vcnt)) { 2011 if (unlikely(idx >= bio->bi_vcnt)) {
2012 blk_dump_rq_flags(req, "__end_that"); 2012 blk_dump_rq_flags(req, "__end_that");
2013 printk(KERN_ERR "%s: bio idx %d >= vcnt %d\n", 2013 printk(KERN_ERR "%s: bio idx %d >= vcnt %d\n",
2014 __func__, idx, bio->bi_vcnt); 2014 __func__, idx, bio->bi_vcnt);
2015 break; 2015 break;
2016 } 2016 }
2017 2017
2018 nbytes = bio_iovec_idx(bio, idx)->bv_len; 2018 nbytes = bio_iovec_idx(bio, idx)->bv_len;
2019 BIO_BUG_ON(nbytes > bio->bi_size); 2019 BIO_BUG_ON(nbytes > bio->bi_size);
2020 2020
2021 /* 2021 /*
2022 * not a complete bvec done 2022 * not a complete bvec done
2023 */ 2023 */
2024 if (unlikely(nbytes > nr_bytes)) { 2024 if (unlikely(nbytes > nr_bytes)) {
2025 bio_nbytes += nr_bytes; 2025 bio_nbytes += nr_bytes;
2026 total_bytes += nr_bytes; 2026 total_bytes += nr_bytes;
2027 break; 2027 break;
2028 } 2028 }
2029 2029
2030 /* 2030 /*
2031 * advance to the next vector 2031 * advance to the next vector
2032 */ 2032 */
2033 next_idx++; 2033 next_idx++;
2034 bio_nbytes += nbytes; 2034 bio_nbytes += nbytes;
2035 } 2035 }
2036 2036
2037 total_bytes += nbytes; 2037 total_bytes += nbytes;
2038 nr_bytes -= nbytes; 2038 nr_bytes -= nbytes;
2039 2039
2040 bio = req->bio; 2040 bio = req->bio;
2041 if (bio) { 2041 if (bio) {
2042 /* 2042 /*
2043 * end more in this run, or just return 'not-done' 2043 * end more in this run, or just return 'not-done'
2044 */ 2044 */
2045 if (unlikely(nr_bytes <= 0)) 2045 if (unlikely(nr_bytes <= 0))
2046 break; 2046 break;
2047 } 2047 }
2048 } 2048 }
2049 2049
2050 /* 2050 /*
2051 * completely done 2051 * completely done
2052 */ 2052 */
2053 if (!req->bio) { 2053 if (!req->bio) {
2054 /* 2054 /*
2055 * Reset counters so that the request stacking driver 2055 * Reset counters so that the request stacking driver
2056 * can find how many bytes remain in the request 2056 * can find how many bytes remain in the request
2057 * later. 2057 * later.
2058 */ 2058 */
2059 req->__data_len = 0; 2059 req->__data_len = 0;
2060 return false; 2060 return false;
2061 } 2061 }
2062 2062
2063 /* 2063 /*
2064 * if the request wasn't completed, update state 2064 * if the request wasn't completed, update state
2065 */ 2065 */
2066 if (bio_nbytes) { 2066 if (bio_nbytes) {
2067 req_bio_endio(req, bio, bio_nbytes, error); 2067 req_bio_endio(req, bio, bio_nbytes, error);
2068 bio->bi_idx += next_idx; 2068 bio->bi_idx += next_idx;
2069 bio_iovec(bio)->bv_offset += nr_bytes; 2069 bio_iovec(bio)->bv_offset += nr_bytes;
2070 bio_iovec(bio)->bv_len -= nr_bytes; 2070 bio_iovec(bio)->bv_len -= nr_bytes;
2071 } 2071 }
2072 2072
2073 req->__data_len -= total_bytes; 2073 req->__data_len -= total_bytes;
2074 req->buffer = bio_data(req->bio); 2074 req->buffer = bio_data(req->bio);
2075 2075
2076 /* update sector only for requests with clear definition of sector */ 2076 /* update sector only for requests with clear definition of sector */
2077 if (blk_fs_request(req) || blk_discard_rq(req)) 2077 if (blk_fs_request(req) || blk_discard_rq(req))
2078 req->__sector += total_bytes >> 9; 2078 req->__sector += total_bytes >> 9;
2079 2079
2080 /* mixed attributes always follow the first bio */ 2080 /* mixed attributes always follow the first bio */
2081 if (req->cmd_flags & REQ_MIXED_MERGE) { 2081 if (req->cmd_flags & REQ_MIXED_MERGE) {
2082 req->cmd_flags &= ~REQ_FAILFAST_MASK; 2082 req->cmd_flags &= ~REQ_FAILFAST_MASK;
2083 req->cmd_flags |= req->bio->bi_rw & REQ_FAILFAST_MASK; 2083 req->cmd_flags |= req->bio->bi_rw & REQ_FAILFAST_MASK;
2084 } 2084 }
2085 2085
2086 /* 2086 /*
2087 * If total number of sectors is less than the first segment 2087 * If total number of sectors is less than the first segment
2088 * size, something has gone terribly wrong. 2088 * size, something has gone terribly wrong.
2089 */ 2089 */
2090 if (blk_rq_bytes(req) < blk_rq_cur_bytes(req)) { 2090 if (blk_rq_bytes(req) < blk_rq_cur_bytes(req)) {
2091 printk(KERN_ERR "blk: request botched\n"); 2091 printk(KERN_ERR "blk: request botched\n");
2092 req->__data_len = blk_rq_cur_bytes(req); 2092 req->__data_len = blk_rq_cur_bytes(req);
2093 } 2093 }
2094 2094
2095 /* recalculate the number of segments */ 2095 /* recalculate the number of segments */
2096 blk_recalc_rq_segments(req); 2096 blk_recalc_rq_segments(req);
2097 2097
2098 return true; 2098 return true;
2099 } 2099 }
2100 EXPORT_SYMBOL_GPL(blk_update_request); 2100 EXPORT_SYMBOL_GPL(blk_update_request);
2101 2101
2102 static bool blk_update_bidi_request(struct request *rq, int error, 2102 static bool blk_update_bidi_request(struct request *rq, int error,
2103 unsigned int nr_bytes, 2103 unsigned int nr_bytes,
2104 unsigned int bidi_bytes) 2104 unsigned int bidi_bytes)
2105 { 2105 {
2106 if (blk_update_request(rq, error, nr_bytes)) 2106 if (blk_update_request(rq, error, nr_bytes))
2107 return true; 2107 return true;
2108 2108
2109 /* Bidi request must be completed as a whole */ 2109 /* Bidi request must be completed as a whole */
2110 if (unlikely(blk_bidi_rq(rq)) && 2110 if (unlikely(blk_bidi_rq(rq)) &&
2111 blk_update_request(rq->next_rq, error, bidi_bytes)) 2111 blk_update_request(rq->next_rq, error, bidi_bytes))
2112 return true; 2112 return true;
2113 2113
2114 add_disk_randomness(rq->rq_disk); 2114 if (blk_queue_add_random(rq->q))
2115 add_disk_randomness(rq->rq_disk);
2115 2116
2116 return false; 2117 return false;
2117 } 2118 }
2118 2119
2119 /* 2120 /*
2120 * queue lock must be held 2121 * queue lock must be held
2121 */ 2122 */
2122 static void blk_finish_request(struct request *req, int error) 2123 static void blk_finish_request(struct request *req, int error)
2123 { 2124 {
2124 if (blk_rq_tagged(req)) 2125 if (blk_rq_tagged(req))
2125 blk_queue_end_tag(req->q, req); 2126 blk_queue_end_tag(req->q, req);
2126 2127
2127 BUG_ON(blk_queued_rq(req)); 2128 BUG_ON(blk_queued_rq(req));
2128 2129
2129 if (unlikely(laptop_mode) && blk_fs_request(req)) 2130 if (unlikely(laptop_mode) && blk_fs_request(req))
2130 laptop_io_completion(&req->q->backing_dev_info); 2131 laptop_io_completion(&req->q->backing_dev_info);
2131 2132
2132 blk_delete_timer(req); 2133 blk_delete_timer(req);
2133 2134
2134 blk_account_io_done(req); 2135 blk_account_io_done(req);
2135 2136
2136 if (req->end_io) 2137 if (req->end_io)
2137 req->end_io(req, error); 2138 req->end_io(req, error);
2138 else { 2139 else {
2139 if (blk_bidi_rq(req)) 2140 if (blk_bidi_rq(req))
2140 __blk_put_request(req->next_rq->q, req->next_rq); 2141 __blk_put_request(req->next_rq->q, req->next_rq);
2141 2142
2142 __blk_put_request(req->q, req); 2143 __blk_put_request(req->q, req);
2143 } 2144 }
2144 } 2145 }
2145 2146
2146 /** 2147 /**
2147 * blk_end_bidi_request - Complete a bidi request 2148 * blk_end_bidi_request - Complete a bidi request
2148 * @rq: the request to complete 2149 * @rq: the request to complete
2149 * @error: %0 for success, < %0 for error 2150 * @error: %0 for success, < %0 for error
2150 * @nr_bytes: number of bytes to complete @rq 2151 * @nr_bytes: number of bytes to complete @rq
2151 * @bidi_bytes: number of bytes to complete @rq->next_rq 2152 * @bidi_bytes: number of bytes to complete @rq->next_rq
2152 * 2153 *
2153 * Description: 2154 * Description:
2154 * Ends I/O on a number of bytes attached to @rq and @rq->next_rq. 2155 * Ends I/O on a number of bytes attached to @rq and @rq->next_rq.
2155 * Drivers that supports bidi can safely call this member for any 2156 * Drivers that supports bidi can safely call this member for any
2156 * type of request, bidi or uni. In the later case @bidi_bytes is 2157 * type of request, bidi or uni. In the later case @bidi_bytes is
2157 * just ignored. 2158 * just ignored.
2158 * 2159 *
2159 * Return: 2160 * Return:
2160 * %false - we are done with this request 2161 * %false - we are done with this request
2161 * %true - still buffers pending for this request 2162 * %true - still buffers pending for this request
2162 **/ 2163 **/
2163 static bool blk_end_bidi_request(struct request *rq, int error, 2164 static bool blk_end_bidi_request(struct request *rq, int error,
2164 unsigned int nr_bytes, unsigned int bidi_bytes) 2165 unsigned int nr_bytes, unsigned int bidi_bytes)
2165 { 2166 {
2166 struct request_queue *q = rq->q; 2167 struct request_queue *q = rq->q;
2167 unsigned long flags; 2168 unsigned long flags;
2168 2169
2169 if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes)) 2170 if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes))
2170 return true; 2171 return true;
2171 2172
2172 spin_lock_irqsave(q->queue_lock, flags); 2173 spin_lock_irqsave(q->queue_lock, flags);
2173 blk_finish_request(rq, error); 2174 blk_finish_request(rq, error);
2174 spin_unlock_irqrestore(q->queue_lock, flags); 2175 spin_unlock_irqrestore(q->queue_lock, flags);
2175 2176
2176 return false; 2177 return false;
2177 } 2178 }
2178 2179
2179 /** 2180 /**
2180 * __blk_end_bidi_request - Complete a bidi request with queue lock held 2181 * __blk_end_bidi_request - Complete a bidi request with queue lock held
2181 * @rq: the request to complete 2182 * @rq: the request to complete
2182 * @error: %0 for success, < %0 for error 2183 * @error: %0 for success, < %0 for error
2183 * @nr_bytes: number of bytes to complete @rq 2184 * @nr_bytes: number of bytes to complete @rq
2184 * @bidi_bytes: number of bytes to complete @rq->next_rq 2185 * @bidi_bytes: number of bytes to complete @rq->next_rq
2185 * 2186 *
2186 * Description: 2187 * Description:
2187 * Identical to blk_end_bidi_request() except that queue lock is 2188 * Identical to blk_end_bidi_request() except that queue lock is
2188 * assumed to be locked on entry and remains so on return. 2189 * assumed to be locked on entry and remains so on return.
2189 * 2190 *
2190 * Return: 2191 * Return:
2191 * %false - we are done with this request 2192 * %false - we are done with this request
2192 * %true - still buffers pending for this request 2193 * %true - still buffers pending for this request
2193 **/ 2194 **/
2194 static bool __blk_end_bidi_request(struct request *rq, int error, 2195 static bool __blk_end_bidi_request(struct request *rq, int error,
2195 unsigned int nr_bytes, unsigned int bidi_bytes) 2196 unsigned int nr_bytes, unsigned int bidi_bytes)
2196 { 2197 {
2197 if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes)) 2198 if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes))
2198 return true; 2199 return true;
2199 2200
2200 blk_finish_request(rq, error); 2201 blk_finish_request(rq, error);
2201 2202
2202 return false; 2203 return false;
2203 } 2204 }
2204 2205
2205 /** 2206 /**
2206 * blk_end_request - Helper function for drivers to complete the request. 2207 * blk_end_request - Helper function for drivers to complete the request.
2207 * @rq: the request being processed 2208 * @rq: the request being processed
2208 * @error: %0 for success, < %0 for error 2209 * @error: %0 for success, < %0 for error
2209 * @nr_bytes: number of bytes to complete 2210 * @nr_bytes: number of bytes to complete
2210 * 2211 *
2211 * Description: 2212 * Description:
2212 * Ends I/O on a number of bytes attached to @rq. 2213 * Ends I/O on a number of bytes attached to @rq.
2213 * If @rq has leftover, sets it up for the next range of segments. 2214 * If @rq has leftover, sets it up for the next range of segments.
2214 * 2215 *
2215 * Return: 2216 * Return:
2216 * %false - we are done with this request 2217 * %false - we are done with this request
2217 * %true - still buffers pending for this request 2218 * %true - still buffers pending for this request
2218 **/ 2219 **/
2219 bool blk_end_request(struct request *rq, int error, unsigned int nr_bytes) 2220 bool blk_end_request(struct request *rq, int error, unsigned int nr_bytes)
2220 { 2221 {
2221 return blk_end_bidi_request(rq, error, nr_bytes, 0); 2222 return blk_end_bidi_request(rq, error, nr_bytes, 0);
2222 } 2223 }
2223 EXPORT_SYMBOL(blk_end_request); 2224 EXPORT_SYMBOL(blk_end_request);
2224 2225
2225 /** 2226 /**
2226 * blk_end_request_all - Helper function for drives to finish the request. 2227 * blk_end_request_all - Helper function for drives to finish the request.
2227 * @rq: the request to finish 2228 * @rq: the request to finish
2228 * @error: %0 for success, < %0 for error 2229 * @error: %0 for success, < %0 for error
2229 * 2230 *
2230 * Description: 2231 * Description:
2231 * Completely finish @rq. 2232 * Completely finish @rq.
2232 */ 2233 */
2233 void blk_end_request_all(struct request *rq, int error) 2234 void blk_end_request_all(struct request *rq, int error)
2234 { 2235 {
2235 bool pending; 2236 bool pending;
2236 unsigned int bidi_bytes = 0; 2237 unsigned int bidi_bytes = 0;
2237 2238
2238 if (unlikely(blk_bidi_rq(rq))) 2239 if (unlikely(blk_bidi_rq(rq)))
2239 bidi_bytes = blk_rq_bytes(rq->next_rq); 2240 bidi_bytes = blk_rq_bytes(rq->next_rq);
2240 2241
2241 pending = blk_end_bidi_request(rq, error, blk_rq_bytes(rq), bidi_bytes); 2242 pending = blk_end_bidi_request(rq, error, blk_rq_bytes(rq), bidi_bytes);
2242 BUG_ON(pending); 2243 BUG_ON(pending);
2243 } 2244 }
2244 EXPORT_SYMBOL(blk_end_request_all); 2245 EXPORT_SYMBOL(blk_end_request_all);
2245 2246
2246 /** 2247 /**
2247 * blk_end_request_cur - Helper function to finish the current request chunk. 2248 * blk_end_request_cur - Helper function to finish the current request chunk.
2248 * @rq: the request to finish the current chunk for 2249 * @rq: the request to finish the current chunk for
2249 * @error: %0 for success, < %0 for error 2250 * @error: %0 for success, < %0 for error
2250 * 2251 *
2251 * Description: 2252 * Description:
2252 * Complete the current consecutively mapped chunk from @rq. 2253 * Complete the current consecutively mapped chunk from @rq.
2253 * 2254 *
2254 * Return: 2255 * Return:
2255 * %false - we are done with this request 2256 * %false - we are done with this request
2256 * %true - still buffers pending for this request 2257 * %true - still buffers pending for this request
2257 */ 2258 */
2258 bool blk_end_request_cur(struct request *rq, int error) 2259 bool blk_end_request_cur(struct request *rq, int error)
2259 { 2260 {
2260 return blk_end_request(rq, error, blk_rq_cur_bytes(rq)); 2261 return blk_end_request(rq, error, blk_rq_cur_bytes(rq));
2261 } 2262 }
2262 EXPORT_SYMBOL(blk_end_request_cur); 2263 EXPORT_SYMBOL(blk_end_request_cur);
2263 2264
2264 /** 2265 /**
2265 * blk_end_request_err - Finish a request till the next failure boundary. 2266 * blk_end_request_err - Finish a request till the next failure boundary.
2266 * @rq: the request to finish till the next failure boundary for 2267 * @rq: the request to finish till the next failure boundary for
2267 * @error: must be negative errno 2268 * @error: must be negative errno
2268 * 2269 *
2269 * Description: 2270 * Description:
2270 * Complete @rq till the next failure boundary. 2271 * Complete @rq till the next failure boundary.
2271 * 2272 *
2272 * Return: 2273 * Return:
2273 * %false - we are done with this request 2274 * %false - we are done with this request
2274 * %true - still buffers pending for this request 2275 * %true - still buffers pending for this request
2275 */ 2276 */
2276 bool blk_end_request_err(struct request *rq, int error) 2277 bool blk_end_request_err(struct request *rq, int error)
2277 { 2278 {
2278 WARN_ON(error >= 0); 2279 WARN_ON(error >= 0);
2279 return blk_end_request(rq, error, blk_rq_err_bytes(rq)); 2280 return blk_end_request(rq, error, blk_rq_err_bytes(rq));
2280 } 2281 }
2281 EXPORT_SYMBOL_GPL(blk_end_request_err); 2282 EXPORT_SYMBOL_GPL(blk_end_request_err);
2282 2283
2283 /** 2284 /**
2284 * __blk_end_request - Helper function for drivers to complete the request. 2285 * __blk_end_request - Helper function for drivers to complete the request.
2285 * @rq: the request being processed 2286 * @rq: the request being processed
2286 * @error: %0 for success, < %0 for error 2287 * @error: %0 for success, < %0 for error
2287 * @nr_bytes: number of bytes to complete 2288 * @nr_bytes: number of bytes to complete
2288 * 2289 *
2289 * Description: 2290 * Description:
2290 * Must be called with queue lock held unlike blk_end_request(). 2291 * Must be called with queue lock held unlike blk_end_request().
2291 * 2292 *
2292 * Return: 2293 * Return:
2293 * %false - we are done with this request 2294 * %false - we are done with this request
2294 * %true - still buffers pending for this request 2295 * %true - still buffers pending for this request
2295 **/ 2296 **/
2296 bool __blk_end_request(struct request *rq, int error, unsigned int nr_bytes) 2297 bool __blk_end_request(struct request *rq, int error, unsigned int nr_bytes)
2297 { 2298 {
2298 return __blk_end_bidi_request(rq, error, nr_bytes, 0); 2299 return __blk_end_bidi_request(rq, error, nr_bytes, 0);
2299 } 2300 }
2300 EXPORT_SYMBOL(__blk_end_request); 2301 EXPORT_SYMBOL(__blk_end_request);
2301 2302
2302 /** 2303 /**
2303 * __blk_end_request_all - Helper function for drives to finish the request. 2304 * __blk_end_request_all - Helper function for drives to finish the request.
2304 * @rq: the request to finish 2305 * @rq: the request to finish
2305 * @error: %0 for success, < %0 for error 2306 * @error: %0 for success, < %0 for error
2306 * 2307 *
2307 * Description: 2308 * Description:
2308 * Completely finish @rq. Must be called with queue lock held. 2309 * Completely finish @rq. Must be called with queue lock held.
2309 */ 2310 */
2310 void __blk_end_request_all(struct request *rq, int error) 2311 void __blk_end_request_all(struct request *rq, int error)
2311 { 2312 {
2312 bool pending; 2313 bool pending;
2313 unsigned int bidi_bytes = 0; 2314 unsigned int bidi_bytes = 0;
2314 2315
2315 if (unlikely(blk_bidi_rq(rq))) 2316 if (unlikely(blk_bidi_rq(rq)))
2316 bidi_bytes = blk_rq_bytes(rq->next_rq); 2317 bidi_bytes = blk_rq_bytes(rq->next_rq);
2317 2318
2318 pending = __blk_end_bidi_request(rq, error, blk_rq_bytes(rq), bidi_bytes); 2319 pending = __blk_end_bidi_request(rq, error, blk_rq_bytes(rq), bidi_bytes);
2319 BUG_ON(pending); 2320 BUG_ON(pending);
2320 } 2321 }
2321 EXPORT_SYMBOL(__blk_end_request_all); 2322 EXPORT_SYMBOL(__blk_end_request_all);
2322 2323
2323 /** 2324 /**
2324 * __blk_end_request_cur - Helper function to finish the current request chunk. 2325 * __blk_end_request_cur - Helper function to finish the current request chunk.
2325 * @rq: the request to finish the current chunk for 2326 * @rq: the request to finish the current chunk for
2326 * @error: %0 for success, < %0 for error 2327 * @error: %0 for success, < %0 for error
2327 * 2328 *
2328 * Description: 2329 * Description:
2329 * Complete the current consecutively mapped chunk from @rq. Must 2330 * Complete the current consecutively mapped chunk from @rq. Must
2330 * be called with queue lock held. 2331 * be called with queue lock held.
2331 * 2332 *
2332 * Return: 2333 * Return:
2333 * %false - we are done with this request 2334 * %false - we are done with this request
2334 * %true - still buffers pending for this request 2335 * %true - still buffers pending for this request
2335 */ 2336 */
2336 bool __blk_end_request_cur(struct request *rq, int error) 2337 bool __blk_end_request_cur(struct request *rq, int error)
2337 { 2338 {
2338 return __blk_end_request(rq, error, blk_rq_cur_bytes(rq)); 2339 return __blk_end_request(rq, error, blk_rq_cur_bytes(rq));
2339 } 2340 }
2340 EXPORT_SYMBOL(__blk_end_request_cur); 2341 EXPORT_SYMBOL(__blk_end_request_cur);
2341 2342
2342 /** 2343 /**
2343 * __blk_end_request_err - Finish a request till the next failure boundary. 2344 * __blk_end_request_err - Finish a request till the next failure boundary.
2344 * @rq: the request to finish till the next failure boundary for 2345 * @rq: the request to finish till the next failure boundary for
2345 * @error: must be negative errno 2346 * @error: must be negative errno
2346 * 2347 *
2347 * Description: 2348 * Description:
2348 * Complete @rq till the next failure boundary. Must be called 2349 * Complete @rq till the next failure boundary. Must be called
2349 * with queue lock held. 2350 * with queue lock held.
2350 * 2351 *
2351 * Return: 2352 * Return:
2352 * %false - we are done with this request 2353 * %false - we are done with this request
2353 * %true - still buffers pending for this request 2354 * %true - still buffers pending for this request
2354 */ 2355 */
2355 bool __blk_end_request_err(struct request *rq, int error) 2356 bool __blk_end_request_err(struct request *rq, int error)
2356 { 2357 {
2357 WARN_ON(error >= 0); 2358 WARN_ON(error >= 0);
2358 return __blk_end_request(rq, error, blk_rq_err_bytes(rq)); 2359 return __blk_end_request(rq, error, blk_rq_err_bytes(rq));
2359 } 2360 }
2360 EXPORT_SYMBOL_GPL(__blk_end_request_err); 2361 EXPORT_SYMBOL_GPL(__blk_end_request_err);
2361 2362
2362 void blk_rq_bio_prep(struct request_queue *q, struct request *rq, 2363 void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
2363 struct bio *bio) 2364 struct bio *bio)
2364 { 2365 {
2365 /* Bit 0 (R/W) is identical in rq->cmd_flags and bio->bi_rw */ 2366 /* Bit 0 (R/W) is identical in rq->cmd_flags and bio->bi_rw */
2366 rq->cmd_flags |= bio->bi_rw & REQ_RW; 2367 rq->cmd_flags |= bio->bi_rw & REQ_RW;
2367 2368
2368 if (bio_has_data(bio)) { 2369 if (bio_has_data(bio)) {
2369 rq->nr_phys_segments = bio_phys_segments(q, bio); 2370 rq->nr_phys_segments = bio_phys_segments(q, bio);
2370 rq->buffer = bio_data(bio); 2371 rq->buffer = bio_data(bio);
2371 } 2372 }
2372 rq->__data_len = bio->bi_size; 2373 rq->__data_len = bio->bi_size;
2373 rq->bio = rq->biotail = bio; 2374 rq->bio = rq->biotail = bio;
2374 2375
2375 if (bio->bi_bdev) 2376 if (bio->bi_bdev)
2376 rq->rq_disk = bio->bi_bdev->bd_disk; 2377 rq->rq_disk = bio->bi_bdev->bd_disk;
2377 } 2378 }
2378 2379
2379 #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 2380 #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
2380 /** 2381 /**
2381 * rq_flush_dcache_pages - Helper function to flush all pages in a request 2382 * rq_flush_dcache_pages - Helper function to flush all pages in a request
2382 * @rq: the request to be flushed 2383 * @rq: the request to be flushed
2383 * 2384 *
2384 * Description: 2385 * Description:
2385 * Flush all pages in @rq. 2386 * Flush all pages in @rq.
2386 */ 2387 */
2387 void rq_flush_dcache_pages(struct request *rq) 2388 void rq_flush_dcache_pages(struct request *rq)
2388 { 2389 {
2389 struct req_iterator iter; 2390 struct req_iterator iter;
2390 struct bio_vec *bvec; 2391 struct bio_vec *bvec;
2391 2392
2392 rq_for_each_segment(bvec, rq, iter) 2393 rq_for_each_segment(bvec, rq, iter)
2393 flush_dcache_page(bvec->bv_page); 2394 flush_dcache_page(bvec->bv_page);
2394 } 2395 }
2395 EXPORT_SYMBOL_GPL(rq_flush_dcache_pages); 2396 EXPORT_SYMBOL_GPL(rq_flush_dcache_pages);
2396 #endif 2397 #endif
2397 2398
2398 /** 2399 /**
2399 * blk_lld_busy - Check if underlying low-level drivers of a device are busy 2400 * blk_lld_busy - Check if underlying low-level drivers of a device are busy
2400 * @q : the queue of the device being checked 2401 * @q : the queue of the device being checked
2401 * 2402 *
2402 * Description: 2403 * Description:
2403 * Check if underlying low-level drivers of a device are busy. 2404 * Check if underlying low-level drivers of a device are busy.
2404 * If the drivers want to export their busy state, they must set own 2405 * If the drivers want to export their busy state, they must set own
2405 * exporting function using blk_queue_lld_busy() first. 2406 * exporting function using blk_queue_lld_busy() first.
2406 * 2407 *
2407 * Basically, this function is used only by request stacking drivers 2408 * Basically, this function is used only by request stacking drivers
2408 * to stop dispatching requests to underlying devices when underlying 2409 * to stop dispatching requests to underlying devices when underlying
2409 * devices are busy. This behavior helps more I/O merging on the queue 2410 * devices are busy. This behavior helps more I/O merging on the queue
2410 * of the request stacking driver and prevents I/O throughput regression 2411 * of the request stacking driver and prevents I/O throughput regression
2411 * on burst I/O load. 2412 * on burst I/O load.
2412 * 2413 *
2413 * Return: 2414 * Return:
2414 * 0 - Not busy (The request stacking driver should dispatch request) 2415 * 0 - Not busy (The request stacking driver should dispatch request)
2415 * 1 - Busy (The request stacking driver should stop dispatching request) 2416 * 1 - Busy (The request stacking driver should stop dispatching request)
2416 */ 2417 */
2417 int blk_lld_busy(struct request_queue *q) 2418 int blk_lld_busy(struct request_queue *q)
2418 { 2419 {
2419 if (q->lld_busy_fn) 2420 if (q->lld_busy_fn)
2420 return q->lld_busy_fn(q); 2421 return q->lld_busy_fn(q);
2421 2422
2422 return 0; 2423 return 0;
2423 } 2424 }
2424 EXPORT_SYMBOL_GPL(blk_lld_busy); 2425 EXPORT_SYMBOL_GPL(blk_lld_busy);
2425 2426
2426 /** 2427 /**
2427 * blk_rq_unprep_clone - Helper function to free all bios in a cloned request 2428 * blk_rq_unprep_clone - Helper function to free all bios in a cloned request
2428 * @rq: the clone request to be cleaned up 2429 * @rq: the clone request to be cleaned up
2429 * 2430 *
2430 * Description: 2431 * Description:
2431 * Free all bios in @rq for a cloned request. 2432 * Free all bios in @rq for a cloned request.
2432 */ 2433 */
2433 void blk_rq_unprep_clone(struct request *rq) 2434 void blk_rq_unprep_clone(struct request *rq)
2434 { 2435 {
2435 struct bio *bio; 2436 struct bio *bio;
2436 2437
2437 while ((bio = rq->bio) != NULL) { 2438 while ((bio = rq->bio) != NULL) {
2438 rq->bio = bio->bi_next; 2439 rq->bio = bio->bi_next;
2439 2440
2440 bio_put(bio); 2441 bio_put(bio);
2441 } 2442 }
2442 } 2443 }
2443 EXPORT_SYMBOL_GPL(blk_rq_unprep_clone); 2444 EXPORT_SYMBOL_GPL(blk_rq_unprep_clone);
2444 2445
2445 /* 2446 /*
2446 * Copy attributes of the original request to the clone request. 2447 * Copy attributes of the original request to the clone request.
2447 * The actual data parts (e.g. ->cmd, ->buffer, ->sense) are not copied. 2448 * The actual data parts (e.g. ->cmd, ->buffer, ->sense) are not copied.
2448 */ 2449 */
2449 static void __blk_rq_prep_clone(struct request *dst, struct request *src) 2450 static void __blk_rq_prep_clone(struct request *dst, struct request *src)
2450 { 2451 {
2451 dst->cpu = src->cpu; 2452 dst->cpu = src->cpu;
2452 dst->cmd_flags = (rq_data_dir(src) | REQ_NOMERGE); 2453 dst->cmd_flags = (rq_data_dir(src) | REQ_NOMERGE);
2453 dst->cmd_type = src->cmd_type; 2454 dst->cmd_type = src->cmd_type;
2454 dst->__sector = blk_rq_pos(src); 2455 dst->__sector = blk_rq_pos(src);
2455 dst->__data_len = blk_rq_bytes(src); 2456 dst->__data_len = blk_rq_bytes(src);
2456 dst->nr_phys_segments = src->nr_phys_segments; 2457 dst->nr_phys_segments = src->nr_phys_segments;
2457 dst->ioprio = src->ioprio; 2458 dst->ioprio = src->ioprio;
2458 dst->extra_len = src->extra_len; 2459 dst->extra_len = src->extra_len;
2459 } 2460 }
2460 2461
2461 /** 2462 /**
2462 * blk_rq_prep_clone - Helper function to setup clone request 2463 * blk_rq_prep_clone - Helper function to setup clone request
2463 * @rq: the request to be setup 2464 * @rq: the request to be setup
2464 * @rq_src: original request to be cloned 2465 * @rq_src: original request to be cloned
2465 * @bs: bio_set that bios for clone are allocated from 2466 * @bs: bio_set that bios for clone are allocated from
2466 * @gfp_mask: memory allocation mask for bio 2467 * @gfp_mask: memory allocation mask for bio
2467 * @bio_ctr: setup function to be called for each clone bio. 2468 * @bio_ctr: setup function to be called for each clone bio.
2468 * Returns %0 for success, non %0 for failure. 2469 * Returns %0 for success, non %0 for failure.
2469 * @data: private data to be passed to @bio_ctr 2470 * @data: private data to be passed to @bio_ctr
2470 * 2471 *
2471 * Description: 2472 * Description:
2472 * Clones bios in @rq_src to @rq, and copies attributes of @rq_src to @rq. 2473 * Clones bios in @rq_src to @rq, and copies attributes of @rq_src to @rq.
2473 * The actual data parts of @rq_src (e.g. ->cmd, ->buffer, ->sense) 2474 * The actual data parts of @rq_src (e.g. ->cmd, ->buffer, ->sense)
2474 * are not copied, and copying such parts is the caller's responsibility. 2475 * are not copied, and copying such parts is the caller's responsibility.
2475 * Also, pages which the original bios are pointing to are not copied 2476 * Also, pages which the original bios are pointing to are not copied
2476 * and the cloned bios just point same pages. 2477 * and the cloned bios just point same pages.
2477 * So cloned bios must be completed before original bios, which means 2478 * So cloned bios must be completed before original bios, which means
2478 * the caller must complete @rq before @rq_src. 2479 * the caller must complete @rq before @rq_src.
2479 */ 2480 */
2480 int blk_rq_prep_clone(struct request *rq, struct request *rq_src, 2481 int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
2481 struct bio_set *bs, gfp_t gfp_mask, 2482 struct bio_set *bs, gfp_t gfp_mask,
2482 int (*bio_ctr)(struct bio *, struct bio *, void *), 2483 int (*bio_ctr)(struct bio *, struct bio *, void *),
2483 void *data) 2484 void *data)
2484 { 2485 {
2485 struct bio *bio, *bio_src; 2486 struct bio *bio, *bio_src;
2486 2487
2487 if (!bs) 2488 if (!bs)
2488 bs = fs_bio_set; 2489 bs = fs_bio_set;
2489 2490
2490 blk_rq_init(NULL, rq); 2491 blk_rq_init(NULL, rq);
2491 2492
2492 __rq_for_each_bio(bio_src, rq_src) { 2493 __rq_for_each_bio(bio_src, rq_src) {
2493 bio = bio_alloc_bioset(gfp_mask, bio_src->bi_max_vecs, bs); 2494 bio = bio_alloc_bioset(gfp_mask, bio_src->bi_max_vecs, bs);
2494 if (!bio) 2495 if (!bio)
2495 goto free_and_out; 2496 goto free_and_out;
2496 2497
2497 __bio_clone(bio, bio_src); 2498 __bio_clone(bio, bio_src);
2498 2499
2499 if (bio_integrity(bio_src) && 2500 if (bio_integrity(bio_src) &&
2500 bio_integrity_clone(bio, bio_src, gfp_mask, bs)) 2501 bio_integrity_clone(bio, bio_src, gfp_mask, bs))
2501 goto free_and_out; 2502 goto free_and_out;
2502 2503
2503 if (bio_ctr && bio_ctr(bio, bio_src, data)) 2504 if (bio_ctr && bio_ctr(bio, bio_src, data))
2504 goto free_and_out; 2505 goto free_and_out;
2505 2506
2506 if (rq->bio) { 2507 if (rq->bio) {
2507 rq->biotail->bi_next = bio; 2508 rq->biotail->bi_next = bio;
2508 rq->biotail = bio; 2509 rq->biotail = bio;
2509 } else 2510 } else
2510 rq->bio = rq->biotail = bio; 2511 rq->bio = rq->biotail = bio;
2511 } 2512 }
2512 2513
2513 __blk_rq_prep_clone(rq, rq_src); 2514 __blk_rq_prep_clone(rq, rq_src);
2514 2515
2515 return 0; 2516 return 0;
2516 2517
2517 free_and_out: 2518 free_and_out:
2518 if (bio) 2519 if (bio)
2519 bio_free(bio, bs); 2520 bio_free(bio, bs);
2520 blk_rq_unprep_clone(rq); 2521 blk_rq_unprep_clone(rq);
2521 2522
2522 return -ENOMEM; 2523 return -ENOMEM;
2523 } 2524 }
2524 EXPORT_SYMBOL_GPL(blk_rq_prep_clone); 2525 EXPORT_SYMBOL_GPL(blk_rq_prep_clone);
2525 2526
2526 int kblockd_schedule_work(struct request_queue *q, struct work_struct *work) 2527 int kblockd_schedule_work(struct request_queue *q, struct work_struct *work)
2527 { 2528 {
2528 return queue_work(kblockd_workqueue, work); 2529 return queue_work(kblockd_workqueue, work);
2529 } 2530 }
2530 EXPORT_SYMBOL(kblockd_schedule_work); 2531 EXPORT_SYMBOL(kblockd_schedule_work);
2531 2532
2532 int __init blk_dev_init(void) 2533 int __init blk_dev_init(void)
2533 { 2534 {
2534 BUILD_BUG_ON(__REQ_NR_BITS > 8 * 2535 BUILD_BUG_ON(__REQ_NR_BITS > 8 *
2535 sizeof(((struct request *)0)->cmd_flags)); 2536 sizeof(((struct request *)0)->cmd_flags));
2536 2537
2537 kblockd_workqueue = create_workqueue("kblockd"); 2538 kblockd_workqueue = create_workqueue("kblockd");
2538 if (!kblockd_workqueue) 2539 if (!kblockd_workqueue)
2539 panic("Failed to create kblockd\n"); 2540 panic("Failed to create kblockd\n");
2540 2541
2541 request_cachep = kmem_cache_create("blkdev_requests", 2542 request_cachep = kmem_cache_create("blkdev_requests",
2542 sizeof(struct request), 0, SLAB_PANIC, NULL); 2543 sizeof(struct request), 0, SLAB_PANIC, NULL);
2543 2544
2544 blk_requestq_cachep = kmem_cache_create("blkdev_queue", 2545 blk_requestq_cachep = kmem_cache_create("blkdev_queue",
2545 sizeof(struct request_queue), 0, SLAB_PANIC, NULL); 2546 sizeof(struct request_queue), 0, SLAB_PANIC, NULL);
2546 2547
2547 return 0; 2548 return 0;
2548 } 2549 }
2549 2550
1 /* 1 /*
2 * Functions related to sysfs handling 2 * Functions related to sysfs handling
3 */ 3 */
4 #include <linux/kernel.h> 4 #include <linux/kernel.h>
5 #include <linux/slab.h> 5 #include <linux/slab.h>
6 #include <linux/module.h> 6 #include <linux/module.h>
7 #include <linux/bio.h> 7 #include <linux/bio.h>
8 #include <linux/blkdev.h> 8 #include <linux/blkdev.h>
9 #include <linux/blktrace_api.h> 9 #include <linux/blktrace_api.h>
10 10
11 #include "blk.h" 11 #include "blk.h"
12 12
13 struct queue_sysfs_entry { 13 struct queue_sysfs_entry {
14 struct attribute attr; 14 struct attribute attr;
15 ssize_t (*show)(struct request_queue *, char *); 15 ssize_t (*show)(struct request_queue *, char *);
16 ssize_t (*store)(struct request_queue *, const char *, size_t); 16 ssize_t (*store)(struct request_queue *, const char *, size_t);
17 }; 17 };
18 18
19 static ssize_t 19 static ssize_t
20 queue_var_show(unsigned long var, char *page) 20 queue_var_show(unsigned long var, char *page)
21 { 21 {
22 return sprintf(page, "%lu\n", var); 22 return sprintf(page, "%lu\n", var);
23 } 23 }
24 24
25 static ssize_t 25 static ssize_t
26 queue_var_store(unsigned long *var, const char *page, size_t count) 26 queue_var_store(unsigned long *var, const char *page, size_t count)
27 { 27 {
28 char *p = (char *) page; 28 char *p = (char *) page;
29 29
30 *var = simple_strtoul(p, &p, 10); 30 *var = simple_strtoul(p, &p, 10);
31 return count; 31 return count;
32 } 32 }
33 33
34 static ssize_t queue_requests_show(struct request_queue *q, char *page) 34 static ssize_t queue_requests_show(struct request_queue *q, char *page)
35 { 35 {
36 return queue_var_show(q->nr_requests, (page)); 36 return queue_var_show(q->nr_requests, (page));
37 } 37 }
38 38
39 static ssize_t 39 static ssize_t
40 queue_requests_store(struct request_queue *q, const char *page, size_t count) 40 queue_requests_store(struct request_queue *q, const char *page, size_t count)
41 { 41 {
42 struct request_list *rl = &q->rq; 42 struct request_list *rl = &q->rq;
43 unsigned long nr; 43 unsigned long nr;
44 int ret; 44 int ret;
45 45
46 if (!q->request_fn) 46 if (!q->request_fn)
47 return -EINVAL; 47 return -EINVAL;
48 48
49 ret = queue_var_store(&nr, page, count); 49 ret = queue_var_store(&nr, page, count);
50 if (nr < BLKDEV_MIN_RQ) 50 if (nr < BLKDEV_MIN_RQ)
51 nr = BLKDEV_MIN_RQ; 51 nr = BLKDEV_MIN_RQ;
52 52
53 spin_lock_irq(q->queue_lock); 53 spin_lock_irq(q->queue_lock);
54 q->nr_requests = nr; 54 q->nr_requests = nr;
55 blk_queue_congestion_threshold(q); 55 blk_queue_congestion_threshold(q);
56 56
57 if (rl->count[BLK_RW_SYNC] >= queue_congestion_on_threshold(q)) 57 if (rl->count[BLK_RW_SYNC] >= queue_congestion_on_threshold(q))
58 blk_set_queue_congested(q, BLK_RW_SYNC); 58 blk_set_queue_congested(q, BLK_RW_SYNC);
59 else if (rl->count[BLK_RW_SYNC] < queue_congestion_off_threshold(q)) 59 else if (rl->count[BLK_RW_SYNC] < queue_congestion_off_threshold(q))
60 blk_clear_queue_congested(q, BLK_RW_SYNC); 60 blk_clear_queue_congested(q, BLK_RW_SYNC);
61 61
62 if (rl->count[BLK_RW_ASYNC] >= queue_congestion_on_threshold(q)) 62 if (rl->count[BLK_RW_ASYNC] >= queue_congestion_on_threshold(q))
63 blk_set_queue_congested(q, BLK_RW_ASYNC); 63 blk_set_queue_congested(q, BLK_RW_ASYNC);
64 else if (rl->count[BLK_RW_ASYNC] < queue_congestion_off_threshold(q)) 64 else if (rl->count[BLK_RW_ASYNC] < queue_congestion_off_threshold(q))
65 blk_clear_queue_congested(q, BLK_RW_ASYNC); 65 blk_clear_queue_congested(q, BLK_RW_ASYNC);
66 66
67 if (rl->count[BLK_RW_SYNC] >= q->nr_requests) { 67 if (rl->count[BLK_RW_SYNC] >= q->nr_requests) {
68 blk_set_queue_full(q, BLK_RW_SYNC); 68 blk_set_queue_full(q, BLK_RW_SYNC);
69 } else if (rl->count[BLK_RW_SYNC]+1 <= q->nr_requests) { 69 } else if (rl->count[BLK_RW_SYNC]+1 <= q->nr_requests) {
70 blk_clear_queue_full(q, BLK_RW_SYNC); 70 blk_clear_queue_full(q, BLK_RW_SYNC);
71 wake_up(&rl->wait[BLK_RW_SYNC]); 71 wake_up(&rl->wait[BLK_RW_SYNC]);
72 } 72 }
73 73
74 if (rl->count[BLK_RW_ASYNC] >= q->nr_requests) { 74 if (rl->count[BLK_RW_ASYNC] >= q->nr_requests) {
75 blk_set_queue_full(q, BLK_RW_ASYNC); 75 blk_set_queue_full(q, BLK_RW_ASYNC);
76 } else if (rl->count[BLK_RW_ASYNC]+1 <= q->nr_requests) { 76 } else if (rl->count[BLK_RW_ASYNC]+1 <= q->nr_requests) {
77 blk_clear_queue_full(q, BLK_RW_ASYNC); 77 blk_clear_queue_full(q, BLK_RW_ASYNC);
78 wake_up(&rl->wait[BLK_RW_ASYNC]); 78 wake_up(&rl->wait[BLK_RW_ASYNC]);
79 } 79 }
80 spin_unlock_irq(q->queue_lock); 80 spin_unlock_irq(q->queue_lock);
81 return ret; 81 return ret;
82 } 82 }
83 83
84 static ssize_t queue_ra_show(struct request_queue *q, char *page) 84 static ssize_t queue_ra_show(struct request_queue *q, char *page)
85 { 85 {
86 unsigned long ra_kb = q->backing_dev_info.ra_pages << 86 unsigned long ra_kb = q->backing_dev_info.ra_pages <<
87 (PAGE_CACHE_SHIFT - 10); 87 (PAGE_CACHE_SHIFT - 10);
88 88
89 return queue_var_show(ra_kb, (page)); 89 return queue_var_show(ra_kb, (page));
90 } 90 }
91 91
92 static ssize_t 92 static ssize_t
93 queue_ra_store(struct request_queue *q, const char *page, size_t count) 93 queue_ra_store(struct request_queue *q, const char *page, size_t count)
94 { 94 {
95 unsigned long ra_kb; 95 unsigned long ra_kb;
96 ssize_t ret = queue_var_store(&ra_kb, page, count); 96 ssize_t ret = queue_var_store(&ra_kb, page, count);
97 97
98 q->backing_dev_info.ra_pages = ra_kb >> (PAGE_CACHE_SHIFT - 10); 98 q->backing_dev_info.ra_pages = ra_kb >> (PAGE_CACHE_SHIFT - 10);
99 99
100 return ret; 100 return ret;
101 } 101 }
102 102
103 static ssize_t queue_max_sectors_show(struct request_queue *q, char *page) 103 static ssize_t queue_max_sectors_show(struct request_queue *q, char *page)
104 { 104 {
105 int max_sectors_kb = queue_max_sectors(q) >> 1; 105 int max_sectors_kb = queue_max_sectors(q) >> 1;
106 106
107 return queue_var_show(max_sectors_kb, (page)); 107 return queue_var_show(max_sectors_kb, (page));
108 } 108 }
109 109
110 static ssize_t queue_max_segments_show(struct request_queue *q, char *page) 110 static ssize_t queue_max_segments_show(struct request_queue *q, char *page)
111 { 111 {
112 return queue_var_show(queue_max_segments(q), (page)); 112 return queue_var_show(queue_max_segments(q), (page));
113 } 113 }
114 114
115 static ssize_t queue_max_segment_size_show(struct request_queue *q, char *page) 115 static ssize_t queue_max_segment_size_show(struct request_queue *q, char *page)
116 { 116 {
117 if (test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags)) 117 if (test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags))
118 return queue_var_show(queue_max_segment_size(q), (page)); 118 return queue_var_show(queue_max_segment_size(q), (page));
119 119
120 return queue_var_show(PAGE_CACHE_SIZE, (page)); 120 return queue_var_show(PAGE_CACHE_SIZE, (page));
121 } 121 }
122 122
123 static ssize_t queue_logical_block_size_show(struct request_queue *q, char *page) 123 static ssize_t queue_logical_block_size_show(struct request_queue *q, char *page)
124 { 124 {
125 return queue_var_show(queue_logical_block_size(q), page); 125 return queue_var_show(queue_logical_block_size(q), page);
126 } 126 }
127 127
128 static ssize_t queue_physical_block_size_show(struct request_queue *q, char *page) 128 static ssize_t queue_physical_block_size_show(struct request_queue *q, char *page)
129 { 129 {
130 return queue_var_show(queue_physical_block_size(q), page); 130 return queue_var_show(queue_physical_block_size(q), page);
131 } 131 }
132 132
133 static ssize_t queue_io_min_show(struct request_queue *q, char *page) 133 static ssize_t queue_io_min_show(struct request_queue *q, char *page)
134 { 134 {
135 return queue_var_show(queue_io_min(q), page); 135 return queue_var_show(queue_io_min(q), page);
136 } 136 }
137 137
138 static ssize_t queue_io_opt_show(struct request_queue *q, char *page) 138 static ssize_t queue_io_opt_show(struct request_queue *q, char *page)
139 { 139 {
140 return queue_var_show(queue_io_opt(q), page); 140 return queue_var_show(queue_io_opt(q), page);
141 } 141 }
142 142
143 static ssize_t queue_discard_granularity_show(struct request_queue *q, char *page) 143 static ssize_t queue_discard_granularity_show(struct request_queue *q, char *page)
144 { 144 {
145 return queue_var_show(q->limits.discard_granularity, page); 145 return queue_var_show(q->limits.discard_granularity, page);
146 } 146 }
147 147
148 static ssize_t queue_discard_max_show(struct request_queue *q, char *page) 148 static ssize_t queue_discard_max_show(struct request_queue *q, char *page)
149 { 149 {
150 return queue_var_show(q->limits.max_discard_sectors << 9, page); 150 return queue_var_show(q->limits.max_discard_sectors << 9, page);
151 } 151 }
152 152
153 static ssize_t queue_discard_zeroes_data_show(struct request_queue *q, char *page) 153 static ssize_t queue_discard_zeroes_data_show(struct request_queue *q, char *page)
154 { 154 {
155 return queue_var_show(queue_discard_zeroes_data(q), page); 155 return queue_var_show(queue_discard_zeroes_data(q), page);
156 } 156 }
157 157
158 static ssize_t 158 static ssize_t
159 queue_max_sectors_store(struct request_queue *q, const char *page, size_t count) 159 queue_max_sectors_store(struct request_queue *q, const char *page, size_t count)
160 { 160 {
161 unsigned long max_sectors_kb, 161 unsigned long max_sectors_kb,
162 max_hw_sectors_kb = queue_max_hw_sectors(q) >> 1, 162 max_hw_sectors_kb = queue_max_hw_sectors(q) >> 1,
163 page_kb = 1 << (PAGE_CACHE_SHIFT - 10); 163 page_kb = 1 << (PAGE_CACHE_SHIFT - 10);
164 ssize_t ret = queue_var_store(&max_sectors_kb, page, count); 164 ssize_t ret = queue_var_store(&max_sectors_kb, page, count);
165 165
166 if (max_sectors_kb > max_hw_sectors_kb || max_sectors_kb < page_kb) 166 if (max_sectors_kb > max_hw_sectors_kb || max_sectors_kb < page_kb)
167 return -EINVAL; 167 return -EINVAL;
168 168
169 spin_lock_irq(q->queue_lock); 169 spin_lock_irq(q->queue_lock);
170 q->limits.max_sectors = max_sectors_kb << 1; 170 q->limits.max_sectors = max_sectors_kb << 1;
171 spin_unlock_irq(q->queue_lock); 171 spin_unlock_irq(q->queue_lock);
172 172
173 return ret; 173 return ret;
174 } 174 }
175 175
176 static ssize_t queue_max_hw_sectors_show(struct request_queue *q, char *page) 176 static ssize_t queue_max_hw_sectors_show(struct request_queue *q, char *page)
177 { 177 {
178 int max_hw_sectors_kb = queue_max_hw_sectors(q) >> 1; 178 int max_hw_sectors_kb = queue_max_hw_sectors(q) >> 1;
179 179
180 return queue_var_show(max_hw_sectors_kb, (page)); 180 return queue_var_show(max_hw_sectors_kb, (page));
181 } 181 }
182 182
183 static ssize_t queue_nonrot_show(struct request_queue *q, char *page) 183 static ssize_t queue_nonrot_show(struct request_queue *q, char *page)
184 { 184 {
185 return queue_var_show(!blk_queue_nonrot(q), page); 185 return queue_var_show(!blk_queue_nonrot(q), page);
186 } 186 }
187 187
188 static ssize_t queue_nonrot_store(struct request_queue *q, const char *page, 188 static ssize_t queue_nonrot_store(struct request_queue *q, const char *page,
189 size_t count) 189 size_t count)
190 { 190 {
191 unsigned long nm; 191 unsigned long nm;
192 ssize_t ret = queue_var_store(&nm, page, count); 192 ssize_t ret = queue_var_store(&nm, page, count);
193 193
194 spin_lock_irq(q->queue_lock); 194 spin_lock_irq(q->queue_lock);
195 if (nm) 195 if (nm)
196 queue_flag_clear(QUEUE_FLAG_NONROT, q); 196 queue_flag_clear(QUEUE_FLAG_NONROT, q);
197 else 197 else
198 queue_flag_set(QUEUE_FLAG_NONROT, q); 198 queue_flag_set(QUEUE_FLAG_NONROT, q);
199 spin_unlock_irq(q->queue_lock); 199 spin_unlock_irq(q->queue_lock);
200 200
201 return ret; 201 return ret;
202 } 202 }
203 203
204 static ssize_t queue_nomerges_show(struct request_queue *q, char *page) 204 static ssize_t queue_nomerges_show(struct request_queue *q, char *page)
205 { 205 {
206 return queue_var_show((blk_queue_nomerges(q) << 1) | 206 return queue_var_show((blk_queue_nomerges(q) << 1) |
207 blk_queue_noxmerges(q), page); 207 blk_queue_noxmerges(q), page);
208 } 208 }
209 209
210 static ssize_t queue_nomerges_store(struct request_queue *q, const char *page, 210 static ssize_t queue_nomerges_store(struct request_queue *q, const char *page,
211 size_t count) 211 size_t count)
212 { 212 {
213 unsigned long nm; 213 unsigned long nm;
214 ssize_t ret = queue_var_store(&nm, page, count); 214 ssize_t ret = queue_var_store(&nm, page, count);
215 215
216 spin_lock_irq(q->queue_lock); 216 spin_lock_irq(q->queue_lock);
217 queue_flag_clear(QUEUE_FLAG_NOMERGES, q); 217 queue_flag_clear(QUEUE_FLAG_NOMERGES, q);
218 queue_flag_clear(QUEUE_FLAG_NOXMERGES, q); 218 queue_flag_clear(QUEUE_FLAG_NOXMERGES, q);
219 if (nm == 2) 219 if (nm == 2)
220 queue_flag_set(QUEUE_FLAG_NOMERGES, q); 220 queue_flag_set(QUEUE_FLAG_NOMERGES, q);
221 else if (nm) 221 else if (nm)
222 queue_flag_set(QUEUE_FLAG_NOXMERGES, q); 222 queue_flag_set(QUEUE_FLAG_NOXMERGES, q);
223 spin_unlock_irq(q->queue_lock); 223 spin_unlock_irq(q->queue_lock);
224 224
225 return ret; 225 return ret;
226 } 226 }
227 227
228 static ssize_t queue_rq_affinity_show(struct request_queue *q, char *page) 228 static ssize_t queue_rq_affinity_show(struct request_queue *q, char *page)
229 { 229 {
230 bool set = test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags); 230 bool set = test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags);
231 231
232 return queue_var_show(set, page); 232 return queue_var_show(set, page);
233 } 233 }
234 234
235 static ssize_t 235 static ssize_t
236 queue_rq_affinity_store(struct request_queue *q, const char *page, size_t count) 236 queue_rq_affinity_store(struct request_queue *q, const char *page, size_t count)
237 { 237 {
238 ssize_t ret = -EINVAL; 238 ssize_t ret = -EINVAL;
239 #if defined(CONFIG_USE_GENERIC_SMP_HELPERS) 239 #if defined(CONFIG_USE_GENERIC_SMP_HELPERS)
240 unsigned long val; 240 unsigned long val;
241 241
242 ret = queue_var_store(&val, page, count); 242 ret = queue_var_store(&val, page, count);
243 spin_lock_irq(q->queue_lock); 243 spin_lock_irq(q->queue_lock);
244 if (val) 244 if (val)
245 queue_flag_set(QUEUE_FLAG_SAME_COMP, q); 245 queue_flag_set(QUEUE_FLAG_SAME_COMP, q);
246 else 246 else
247 queue_flag_clear(QUEUE_FLAG_SAME_COMP, q); 247 queue_flag_clear(QUEUE_FLAG_SAME_COMP, q);
248 spin_unlock_irq(q->queue_lock); 248 spin_unlock_irq(q->queue_lock);
249 #endif 249 #endif
250 return ret; 250 return ret;
251 } 251 }
252 252
253 static ssize_t queue_random_show(struct request_queue *q, char *page)
254 {
255 return queue_var_show(blk_queue_add_random(q), page);
256 }
257
258 static ssize_t queue_random_store(struct request_queue *q, const char *page,
259 size_t count)
260 {
261 unsigned long val;
262 ssize_t ret = queue_var_store(&val, page, count);
263
264 spin_lock_irq(q->queue_lock);
265 if (val)
266 queue_flag_set(QUEUE_FLAG_ADD_RANDOM, q);
267 else
268 queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, q);
269 spin_unlock_irq(q->queue_lock);
270
271 return ret;
272 }
273
253 static ssize_t queue_iostats_show(struct request_queue *q, char *page) 274 static ssize_t queue_iostats_show(struct request_queue *q, char *page)
254 { 275 {
255 return queue_var_show(blk_queue_io_stat(q), page); 276 return queue_var_show(blk_queue_io_stat(q), page);
256 } 277 }
257 278
258 static ssize_t queue_iostats_store(struct request_queue *q, const char *page, 279 static ssize_t queue_iostats_store(struct request_queue *q, const char *page,
259 size_t count) 280 size_t count)
260 { 281 {
261 unsigned long stats; 282 unsigned long stats;
262 ssize_t ret = queue_var_store(&stats, page, count); 283 ssize_t ret = queue_var_store(&stats, page, count);
263 284
264 spin_lock_irq(q->queue_lock); 285 spin_lock_irq(q->queue_lock);
265 if (stats) 286 if (stats)
266 queue_flag_set(QUEUE_FLAG_IO_STAT, q); 287 queue_flag_set(QUEUE_FLAG_IO_STAT, q);
267 else 288 else
268 queue_flag_clear(QUEUE_FLAG_IO_STAT, q); 289 queue_flag_clear(QUEUE_FLAG_IO_STAT, q);
269 spin_unlock_irq(q->queue_lock); 290 spin_unlock_irq(q->queue_lock);
270 291
271 return ret; 292 return ret;
272 } 293 }
273 294
274 static struct queue_sysfs_entry queue_requests_entry = { 295 static struct queue_sysfs_entry queue_requests_entry = {
275 .attr = {.name = "nr_requests", .mode = S_IRUGO | S_IWUSR }, 296 .attr = {.name = "nr_requests", .mode = S_IRUGO | S_IWUSR },
276 .show = queue_requests_show, 297 .show = queue_requests_show,
277 .store = queue_requests_store, 298 .store = queue_requests_store,
278 }; 299 };
279 300
280 static struct queue_sysfs_entry queue_ra_entry = { 301 static struct queue_sysfs_entry queue_ra_entry = {
281 .attr = {.name = "read_ahead_kb", .mode = S_IRUGO | S_IWUSR }, 302 .attr = {.name = "read_ahead_kb", .mode = S_IRUGO | S_IWUSR },
282 .show = queue_ra_show, 303 .show = queue_ra_show,
283 .store = queue_ra_store, 304 .store = queue_ra_store,
284 }; 305 };
285 306
286 static struct queue_sysfs_entry queue_max_sectors_entry = { 307 static struct queue_sysfs_entry queue_max_sectors_entry = {
287 .attr = {.name = "max_sectors_kb", .mode = S_IRUGO | S_IWUSR }, 308 .attr = {.name = "max_sectors_kb", .mode = S_IRUGO | S_IWUSR },
288 .show = queue_max_sectors_show, 309 .show = queue_max_sectors_show,
289 .store = queue_max_sectors_store, 310 .store = queue_max_sectors_store,
290 }; 311 };
291 312
292 static struct queue_sysfs_entry queue_max_hw_sectors_entry = { 313 static struct queue_sysfs_entry queue_max_hw_sectors_entry = {
293 .attr = {.name = "max_hw_sectors_kb", .mode = S_IRUGO }, 314 .attr = {.name = "max_hw_sectors_kb", .mode = S_IRUGO },
294 .show = queue_max_hw_sectors_show, 315 .show = queue_max_hw_sectors_show,
295 }; 316 };
296 317
297 static struct queue_sysfs_entry queue_max_segments_entry = { 318 static struct queue_sysfs_entry queue_max_segments_entry = {
298 .attr = {.name = "max_segments", .mode = S_IRUGO }, 319 .attr = {.name = "max_segments", .mode = S_IRUGO },
299 .show = queue_max_segments_show, 320 .show = queue_max_segments_show,
300 }; 321 };
301 322
302 static struct queue_sysfs_entry queue_max_segment_size_entry = { 323 static struct queue_sysfs_entry queue_max_segment_size_entry = {
303 .attr = {.name = "max_segment_size", .mode = S_IRUGO }, 324 .attr = {.name = "max_segment_size", .mode = S_IRUGO },
304 .show = queue_max_segment_size_show, 325 .show = queue_max_segment_size_show,
305 }; 326 };
306 327
307 static struct queue_sysfs_entry queue_iosched_entry = { 328 static struct queue_sysfs_entry queue_iosched_entry = {
308 .attr = {.name = "scheduler", .mode = S_IRUGO | S_IWUSR }, 329 .attr = {.name = "scheduler", .mode = S_IRUGO | S_IWUSR },
309 .show = elv_iosched_show, 330 .show = elv_iosched_show,
310 .store = elv_iosched_store, 331 .store = elv_iosched_store,
311 }; 332 };
312 333
313 static struct queue_sysfs_entry queue_hw_sector_size_entry = { 334 static struct queue_sysfs_entry queue_hw_sector_size_entry = {
314 .attr = {.name = "hw_sector_size", .mode = S_IRUGO }, 335 .attr = {.name = "hw_sector_size", .mode = S_IRUGO },
315 .show = queue_logical_block_size_show, 336 .show = queue_logical_block_size_show,
316 }; 337 };
317 338
318 static struct queue_sysfs_entry queue_logical_block_size_entry = { 339 static struct queue_sysfs_entry queue_logical_block_size_entry = {
319 .attr = {.name = "logical_block_size", .mode = S_IRUGO }, 340 .attr = {.name = "logical_block_size", .mode = S_IRUGO },
320 .show = queue_logical_block_size_show, 341 .show = queue_logical_block_size_show,
321 }; 342 };
322 343
323 static struct queue_sysfs_entry queue_physical_block_size_entry = { 344 static struct queue_sysfs_entry queue_physical_block_size_entry = {
324 .attr = {.name = "physical_block_size", .mode = S_IRUGO }, 345 .attr = {.name = "physical_block_size", .mode = S_IRUGO },
325 .show = queue_physical_block_size_show, 346 .show = queue_physical_block_size_show,
326 }; 347 };
327 348
328 static struct queue_sysfs_entry queue_io_min_entry = { 349 static struct queue_sysfs_entry queue_io_min_entry = {
329 .attr = {.name = "minimum_io_size", .mode = S_IRUGO }, 350 .attr = {.name = "minimum_io_size", .mode = S_IRUGO },
330 .show = queue_io_min_show, 351 .show = queue_io_min_show,
331 }; 352 };
332 353
333 static struct queue_sysfs_entry queue_io_opt_entry = { 354 static struct queue_sysfs_entry queue_io_opt_entry = {
334 .attr = {.name = "optimal_io_size", .mode = S_IRUGO }, 355 .attr = {.name = "optimal_io_size", .mode = S_IRUGO },
335 .show = queue_io_opt_show, 356 .show = queue_io_opt_show,
336 }; 357 };
337 358
338 static struct queue_sysfs_entry queue_discard_granularity_entry = { 359 static struct queue_sysfs_entry queue_discard_granularity_entry = {
339 .attr = {.name = "discard_granularity", .mode = S_IRUGO }, 360 .attr = {.name = "discard_granularity", .mode = S_IRUGO },
340 .show = queue_discard_granularity_show, 361 .show = queue_discard_granularity_show,
341 }; 362 };
342 363
343 static struct queue_sysfs_entry queue_discard_max_entry = { 364 static struct queue_sysfs_entry queue_discard_max_entry = {
344 .attr = {.name = "discard_max_bytes", .mode = S_IRUGO }, 365 .attr = {.name = "discard_max_bytes", .mode = S_IRUGO },
345 .show = queue_discard_max_show, 366 .show = queue_discard_max_show,
346 }; 367 };
347 368
348 static struct queue_sysfs_entry queue_discard_zeroes_data_entry = { 369 static struct queue_sysfs_entry queue_discard_zeroes_data_entry = {
349 .attr = {.name = "discard_zeroes_data", .mode = S_IRUGO }, 370 .attr = {.name = "discard_zeroes_data", .mode = S_IRUGO },
350 .show = queue_discard_zeroes_data_show, 371 .show = queue_discard_zeroes_data_show,
351 }; 372 };
352 373
353 static struct queue_sysfs_entry queue_nonrot_entry = { 374 static struct queue_sysfs_entry queue_nonrot_entry = {
354 .attr = {.name = "rotational", .mode = S_IRUGO | S_IWUSR }, 375 .attr = {.name = "rotational", .mode = S_IRUGO | S_IWUSR },
355 .show = queue_nonrot_show, 376 .show = queue_nonrot_show,
356 .store = queue_nonrot_store, 377 .store = queue_nonrot_store,
357 }; 378 };
358 379
359 static struct queue_sysfs_entry queue_nomerges_entry = { 380 static struct queue_sysfs_entry queue_nomerges_entry = {
360 .attr = {.name = "nomerges", .mode = S_IRUGO | S_IWUSR }, 381 .attr = {.name = "nomerges", .mode = S_IRUGO | S_IWUSR },
361 .show = queue_nomerges_show, 382 .show = queue_nomerges_show,
362 .store = queue_nomerges_store, 383 .store = queue_nomerges_store,
363 }; 384 };
364 385
365 static struct queue_sysfs_entry queue_rq_affinity_entry = { 386 static struct queue_sysfs_entry queue_rq_affinity_entry = {
366 .attr = {.name = "rq_affinity", .mode = S_IRUGO | S_IWUSR }, 387 .attr = {.name = "rq_affinity", .mode = S_IRUGO | S_IWUSR },
367 .show = queue_rq_affinity_show, 388 .show = queue_rq_affinity_show,
368 .store = queue_rq_affinity_store, 389 .store = queue_rq_affinity_store,
369 }; 390 };
370 391
371 static struct queue_sysfs_entry queue_iostats_entry = { 392 static struct queue_sysfs_entry queue_iostats_entry = {
372 .attr = {.name = "iostats", .mode = S_IRUGO | S_IWUSR }, 393 .attr = {.name = "iostats", .mode = S_IRUGO | S_IWUSR },
373 .show = queue_iostats_show, 394 .show = queue_iostats_show,
374 .store = queue_iostats_store, 395 .store = queue_iostats_store,
375 }; 396 };
376 397
398 static struct queue_sysfs_entry queue_random_entry = {
399 .attr = {.name = "add_random", .mode = S_IRUGO | S_IWUSR },
400 .show = queue_random_show,
401 .store = queue_random_store,
402 };
403
377 static struct attribute *default_attrs[] = { 404 static struct attribute *default_attrs[] = {
378 &queue_requests_entry.attr, 405 &queue_requests_entry.attr,
379 &queue_ra_entry.attr, 406 &queue_ra_entry.attr,
380 &queue_max_hw_sectors_entry.attr, 407 &queue_max_hw_sectors_entry.attr,
381 &queue_max_sectors_entry.attr, 408 &queue_max_sectors_entry.attr,
382 &queue_max_segments_entry.attr, 409 &queue_max_segments_entry.attr,
383 &queue_max_segment_size_entry.attr, 410 &queue_max_segment_size_entry.attr,
384 &queue_iosched_entry.attr, 411 &queue_iosched_entry.attr,
385 &queue_hw_sector_size_entry.attr, 412 &queue_hw_sector_size_entry.attr,
386 &queue_logical_block_size_entry.attr, 413 &queue_logical_block_size_entry.attr,
387 &queue_physical_block_size_entry.attr, 414 &queue_physical_block_size_entry.attr,
388 &queue_io_min_entry.attr, 415 &queue_io_min_entry.attr,
389 &queue_io_opt_entry.attr, 416 &queue_io_opt_entry.attr,
390 &queue_discard_granularity_entry.attr, 417 &queue_discard_granularity_entry.attr,
391 &queue_discard_max_entry.attr, 418 &queue_discard_max_entry.attr,
392 &queue_discard_zeroes_data_entry.attr, 419 &queue_discard_zeroes_data_entry.attr,
393 &queue_nonrot_entry.attr, 420 &queue_nonrot_entry.attr,
394 &queue_nomerges_entry.attr, 421 &queue_nomerges_entry.attr,
395 &queue_rq_affinity_entry.attr, 422 &queue_rq_affinity_entry.attr,
396 &queue_iostats_entry.attr, 423 &queue_iostats_entry.attr,
424 &queue_random_entry.attr,
397 NULL, 425 NULL,
398 }; 426 };
399 427
400 #define to_queue(atr) container_of((atr), struct queue_sysfs_entry, attr) 428 #define to_queue(atr) container_of((atr), struct queue_sysfs_entry, attr)
401 429
402 static ssize_t 430 static ssize_t
403 queue_attr_show(struct kobject *kobj, struct attribute *attr, char *page) 431 queue_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
404 { 432 {
405 struct queue_sysfs_entry *entry = to_queue(attr); 433 struct queue_sysfs_entry *entry = to_queue(attr);
406 struct request_queue *q = 434 struct request_queue *q =
407 container_of(kobj, struct request_queue, kobj); 435 container_of(kobj, struct request_queue, kobj);
408 ssize_t res; 436 ssize_t res;
409 437
410 if (!entry->show) 438 if (!entry->show)
411 return -EIO; 439 return -EIO;
412 mutex_lock(&q->sysfs_lock); 440 mutex_lock(&q->sysfs_lock);
413 if (test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)) { 441 if (test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)) {
414 mutex_unlock(&q->sysfs_lock); 442 mutex_unlock(&q->sysfs_lock);
415 return -ENOENT; 443 return -ENOENT;
416 } 444 }
417 res = entry->show(q, page); 445 res = entry->show(q, page);
418 mutex_unlock(&q->sysfs_lock); 446 mutex_unlock(&q->sysfs_lock);
419 return res; 447 return res;
420 } 448 }
421 449
422 static ssize_t 450 static ssize_t
423 queue_attr_store(struct kobject *kobj, struct attribute *attr, 451 queue_attr_store(struct kobject *kobj, struct attribute *attr,
424 const char *page, size_t length) 452 const char *page, size_t length)
425 { 453 {
426 struct queue_sysfs_entry *entry = to_queue(attr); 454 struct queue_sysfs_entry *entry = to_queue(attr);
427 struct request_queue *q; 455 struct request_queue *q;
428 ssize_t res; 456 ssize_t res;
429 457
430 if (!entry->store) 458 if (!entry->store)
431 return -EIO; 459 return -EIO;
432 460
433 q = container_of(kobj, struct request_queue, kobj); 461 q = container_of(kobj, struct request_queue, kobj);
434 mutex_lock(&q->sysfs_lock); 462 mutex_lock(&q->sysfs_lock);
435 if (test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)) { 463 if (test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)) {
436 mutex_unlock(&q->sysfs_lock); 464 mutex_unlock(&q->sysfs_lock);
437 return -ENOENT; 465 return -ENOENT;
438 } 466 }
439 res = entry->store(q, page, length); 467 res = entry->store(q, page, length);
440 mutex_unlock(&q->sysfs_lock); 468 mutex_unlock(&q->sysfs_lock);
441 return res; 469 return res;
442 } 470 }
443 471
444 /** 472 /**
445 * blk_cleanup_queue: - release a &struct request_queue when it is no longer needed 473 * blk_cleanup_queue: - release a &struct request_queue when it is no longer needed
446 * @kobj: the kobj belonging of the request queue to be released 474 * @kobj: the kobj belonging of the request queue to be released
447 * 475 *
448 * Description: 476 * Description:
449 * blk_cleanup_queue is the pair to blk_init_queue() or 477 * blk_cleanup_queue is the pair to blk_init_queue() or
450 * blk_queue_make_request(). It should be called when a request queue is 478 * blk_queue_make_request(). It should be called when a request queue is
451 * being released; typically when a block device is being de-registered. 479 * being released; typically when a block device is being de-registered.
452 * Currently, its primary task it to free all the &struct request 480 * Currently, its primary task it to free all the &struct request
453 * structures that were allocated to the queue and the queue itself. 481 * structures that were allocated to the queue and the queue itself.
454 * 482 *
455 * Caveat: 483 * Caveat:
456 * Hopefully the low level driver will have finished any 484 * Hopefully the low level driver will have finished any
457 * outstanding requests first... 485 * outstanding requests first...
458 **/ 486 **/
459 static void blk_release_queue(struct kobject *kobj) 487 static void blk_release_queue(struct kobject *kobj)
460 { 488 {
461 struct request_queue *q = 489 struct request_queue *q =
462 container_of(kobj, struct request_queue, kobj); 490 container_of(kobj, struct request_queue, kobj);
463 struct request_list *rl = &q->rq; 491 struct request_list *rl = &q->rq;
464 492
465 blk_sync_queue(q); 493 blk_sync_queue(q);
466 494
467 if (rl->rq_pool) 495 if (rl->rq_pool)
468 mempool_destroy(rl->rq_pool); 496 mempool_destroy(rl->rq_pool);
469 497
470 if (q->queue_tags) 498 if (q->queue_tags)
471 __blk_queue_free_tags(q); 499 __blk_queue_free_tags(q);
472 500
473 blk_trace_shutdown(q); 501 blk_trace_shutdown(q);
474 502
475 bdi_destroy(&q->backing_dev_info); 503 bdi_destroy(&q->backing_dev_info);
476 kmem_cache_free(blk_requestq_cachep, q); 504 kmem_cache_free(blk_requestq_cachep, q);
477 } 505 }
478 506
479 static const struct sysfs_ops queue_sysfs_ops = { 507 static const struct sysfs_ops queue_sysfs_ops = {
480 .show = queue_attr_show, 508 .show = queue_attr_show,
481 .store = queue_attr_store, 509 .store = queue_attr_store,
482 }; 510 };
483 511
484 struct kobj_type blk_queue_ktype = { 512 struct kobj_type blk_queue_ktype = {
485 .sysfs_ops = &queue_sysfs_ops, 513 .sysfs_ops = &queue_sysfs_ops,
486 .default_attrs = default_attrs, 514 .default_attrs = default_attrs,
487 .release = blk_release_queue, 515 .release = blk_release_queue,
488 }; 516 };
489 517
490 int blk_register_queue(struct gendisk *disk) 518 int blk_register_queue(struct gendisk *disk)
491 { 519 {
492 int ret; 520 int ret;
493 struct device *dev = disk_to_dev(disk); 521 struct device *dev = disk_to_dev(disk);
494 522
495 struct request_queue *q = disk->queue; 523 struct request_queue *q = disk->queue;
496 524
497 if (WARN_ON(!q)) 525 if (WARN_ON(!q))
498 return -ENXIO; 526 return -ENXIO;
499 527
500 ret = blk_trace_init_sysfs(dev); 528 ret = blk_trace_init_sysfs(dev);
501 if (ret) 529 if (ret)
502 return ret; 530 return ret;
503 531
504 ret = kobject_add(&q->kobj, kobject_get(&dev->kobj), "%s", "queue"); 532 ret = kobject_add(&q->kobj, kobject_get(&dev->kobj), "%s", "queue");
505 if (ret < 0) 533 if (ret < 0)
506 return ret; 534 return ret;
507 535
508 kobject_uevent(&q->kobj, KOBJ_ADD); 536 kobject_uevent(&q->kobj, KOBJ_ADD);
509 537
510 if (!q->request_fn) 538 if (!q->request_fn)
511 return 0; 539 return 0;
512 540
513 ret = elv_register_queue(q); 541 ret = elv_register_queue(q);
514 if (ret) { 542 if (ret) {
515 kobject_uevent(&q->kobj, KOBJ_REMOVE); 543 kobject_uevent(&q->kobj, KOBJ_REMOVE);
516 kobject_del(&q->kobj); 544 kobject_del(&q->kobj);
517 blk_trace_remove_sysfs(disk_to_dev(disk)); 545 blk_trace_remove_sysfs(disk_to_dev(disk));
518 return ret; 546 return ret;
519 } 547 }
520 548
521 return 0; 549 return 0;
522 } 550 }
523 551
524 void blk_unregister_queue(struct gendisk *disk) 552 void blk_unregister_queue(struct gendisk *disk)
525 { 553 {
526 struct request_queue *q = disk->queue; 554 struct request_queue *q = disk->queue;
527 555
528 if (WARN_ON(!q)) 556 if (WARN_ON(!q))
529 return; 557 return;
530 558
531 if (q->request_fn) 559 if (q->request_fn)
532 elv_unregister_queue(q); 560 elv_unregister_queue(q);
533 561
534 kobject_uevent(&q->kobj, KOBJ_REMOVE); 562 kobject_uevent(&q->kobj, KOBJ_REMOVE);
535 kobject_del(&q->kobj); 563 kobject_del(&q->kobj);
536 blk_trace_remove_sysfs(disk_to_dev(disk)); 564 blk_trace_remove_sysfs(disk_to_dev(disk));
537 kobject_put(&disk_to_dev(disk)->kobj); 565 kobject_put(&disk_to_dev(disk)->kobj);
538 } 566 }
539 567
include/linux/blkdev.h
1 #ifndef _LINUX_BLKDEV_H 1 #ifndef _LINUX_BLKDEV_H
2 #define _LINUX_BLKDEV_H 2 #define _LINUX_BLKDEV_H
3 3
4 #ifdef CONFIG_BLOCK 4 #ifdef CONFIG_BLOCK
5 5
6 #include <linux/sched.h> 6 #include <linux/sched.h>
7 #include <linux/major.h> 7 #include <linux/major.h>
8 #include <linux/genhd.h> 8 #include <linux/genhd.h>
9 #include <linux/list.h> 9 #include <linux/list.h>
10 #include <linux/timer.h> 10 #include <linux/timer.h>
11 #include <linux/workqueue.h> 11 #include <linux/workqueue.h>
12 #include <linux/pagemap.h> 12 #include <linux/pagemap.h>
13 #include <linux/backing-dev.h> 13 #include <linux/backing-dev.h>
14 #include <linux/wait.h> 14 #include <linux/wait.h>
15 #include <linux/mempool.h> 15 #include <linux/mempool.h>
16 #include <linux/bio.h> 16 #include <linux/bio.h>
17 #include <linux/module.h> 17 #include <linux/module.h>
18 #include <linux/stringify.h> 18 #include <linux/stringify.h>
19 #include <linux/gfp.h> 19 #include <linux/gfp.h>
20 #include <linux/bsg.h> 20 #include <linux/bsg.h>
21 #include <linux/smp.h> 21 #include <linux/smp.h>
22 22
23 #include <asm/scatterlist.h> 23 #include <asm/scatterlist.h>
24 24
25 struct scsi_ioctl_command; 25 struct scsi_ioctl_command;
26 26
27 struct request_queue; 27 struct request_queue;
28 struct elevator_queue; 28 struct elevator_queue;
29 struct request_pm_state; 29 struct request_pm_state;
30 struct blk_trace; 30 struct blk_trace;
31 struct request; 31 struct request;
32 struct sg_io_hdr; 32 struct sg_io_hdr;
33 33
34 #define BLKDEV_MIN_RQ 4 34 #define BLKDEV_MIN_RQ 4
35 #define BLKDEV_MAX_RQ 128 /* Default maximum */ 35 #define BLKDEV_MAX_RQ 128 /* Default maximum */
36 36
37 struct request; 37 struct request;
38 typedef void (rq_end_io_fn)(struct request *, int); 38 typedef void (rq_end_io_fn)(struct request *, int);
39 39
40 struct request_list { 40 struct request_list {
41 /* 41 /*
42 * count[], starved[], and wait[] are indexed by 42 * count[], starved[], and wait[] are indexed by
43 * BLK_RW_SYNC/BLK_RW_ASYNC 43 * BLK_RW_SYNC/BLK_RW_ASYNC
44 */ 44 */
45 int count[2]; 45 int count[2];
46 int starved[2]; 46 int starved[2];
47 int elvpriv; 47 int elvpriv;
48 mempool_t *rq_pool; 48 mempool_t *rq_pool;
49 wait_queue_head_t wait[2]; 49 wait_queue_head_t wait[2];
50 }; 50 };
51 51
52 /* 52 /*
53 * request command types 53 * request command types
54 */ 54 */
55 enum rq_cmd_type_bits { 55 enum rq_cmd_type_bits {
56 REQ_TYPE_FS = 1, /* fs request */ 56 REQ_TYPE_FS = 1, /* fs request */
57 REQ_TYPE_BLOCK_PC, /* scsi command */ 57 REQ_TYPE_BLOCK_PC, /* scsi command */
58 REQ_TYPE_SENSE, /* sense request */ 58 REQ_TYPE_SENSE, /* sense request */
59 REQ_TYPE_PM_SUSPEND, /* suspend request */ 59 REQ_TYPE_PM_SUSPEND, /* suspend request */
60 REQ_TYPE_PM_RESUME, /* resume request */ 60 REQ_TYPE_PM_RESUME, /* resume request */
61 REQ_TYPE_PM_SHUTDOWN, /* shutdown request */ 61 REQ_TYPE_PM_SHUTDOWN, /* shutdown request */
62 REQ_TYPE_SPECIAL, /* driver defined type */ 62 REQ_TYPE_SPECIAL, /* driver defined type */
63 REQ_TYPE_LINUX_BLOCK, /* generic block layer message */ 63 REQ_TYPE_LINUX_BLOCK, /* generic block layer message */
64 /* 64 /*
65 * for ATA/ATAPI devices. this really doesn't belong here, ide should 65 * for ATA/ATAPI devices. this really doesn't belong here, ide should
66 * use REQ_TYPE_SPECIAL and use rq->cmd[0] with the range of driver 66 * use REQ_TYPE_SPECIAL and use rq->cmd[0] with the range of driver
67 * private REQ_LB opcodes to differentiate what type of request this is 67 * private REQ_LB opcodes to differentiate what type of request this is
68 */ 68 */
69 REQ_TYPE_ATA_TASKFILE, 69 REQ_TYPE_ATA_TASKFILE,
70 REQ_TYPE_ATA_PC, 70 REQ_TYPE_ATA_PC,
71 }; 71 };
72 72
73 /* 73 /*
74 * For request of type REQ_TYPE_LINUX_BLOCK, rq->cmd[0] is the opcode being 74 * For request of type REQ_TYPE_LINUX_BLOCK, rq->cmd[0] is the opcode being
75 * sent down (similar to how REQ_TYPE_BLOCK_PC means that ->cmd[] holds a 75 * sent down (similar to how REQ_TYPE_BLOCK_PC means that ->cmd[] holds a
76 * SCSI cdb. 76 * SCSI cdb.
77 * 77 *
78 * 0x00 -> 0x3f are driver private, to be used for whatever purpose they need, 78 * 0x00 -> 0x3f are driver private, to be used for whatever purpose they need,
79 * typically to differentiate REQ_TYPE_SPECIAL requests. 79 * typically to differentiate REQ_TYPE_SPECIAL requests.
80 * 80 *
81 */ 81 */
82 enum { 82 enum {
83 REQ_LB_OP_EJECT = 0x40, /* eject request */ 83 REQ_LB_OP_EJECT = 0x40, /* eject request */
84 REQ_LB_OP_FLUSH = 0x41, /* flush request */ 84 REQ_LB_OP_FLUSH = 0x41, /* flush request */
85 }; 85 };
86 86
87 /* 87 /*
88 * request type modified bits. first four bits match BIO_RW* bits, important 88 * request type modified bits. first four bits match BIO_RW* bits, important
89 */ 89 */
90 enum rq_flag_bits { 90 enum rq_flag_bits {
91 __REQ_RW, /* not set, read. set, write */ 91 __REQ_RW, /* not set, read. set, write */
92 __REQ_FAILFAST_DEV, /* no driver retries of device errors */ 92 __REQ_FAILFAST_DEV, /* no driver retries of device errors */
93 __REQ_FAILFAST_TRANSPORT, /* no driver retries of transport errors */ 93 __REQ_FAILFAST_TRANSPORT, /* no driver retries of transport errors */
94 __REQ_FAILFAST_DRIVER, /* no driver retries of driver errors */ 94 __REQ_FAILFAST_DRIVER, /* no driver retries of driver errors */
95 /* above flags must match BIO_RW_* */ 95 /* above flags must match BIO_RW_* */
96 __REQ_DISCARD, /* request to discard sectors */ 96 __REQ_DISCARD, /* request to discard sectors */
97 __REQ_SORTED, /* elevator knows about this request */ 97 __REQ_SORTED, /* elevator knows about this request */
98 __REQ_SOFTBARRIER, /* may not be passed by ioscheduler */ 98 __REQ_SOFTBARRIER, /* may not be passed by ioscheduler */
99 __REQ_HARDBARRIER, /* may not be passed by drive either */ 99 __REQ_HARDBARRIER, /* may not be passed by drive either */
100 __REQ_FUA, /* forced unit access */ 100 __REQ_FUA, /* forced unit access */
101 __REQ_NOMERGE, /* don't touch this for merging */ 101 __REQ_NOMERGE, /* don't touch this for merging */
102 __REQ_STARTED, /* drive already may have started this one */ 102 __REQ_STARTED, /* drive already may have started this one */
103 __REQ_DONTPREP, /* don't call prep for this one */ 103 __REQ_DONTPREP, /* don't call prep for this one */
104 __REQ_QUEUED, /* uses queueing */ 104 __REQ_QUEUED, /* uses queueing */
105 __REQ_ELVPRIV, /* elevator private data attached */ 105 __REQ_ELVPRIV, /* elevator private data attached */
106 __REQ_FAILED, /* set if the request failed */ 106 __REQ_FAILED, /* set if the request failed */
107 __REQ_QUIET, /* don't worry about errors */ 107 __REQ_QUIET, /* don't worry about errors */
108 __REQ_PREEMPT, /* set for "ide_preempt" requests */ 108 __REQ_PREEMPT, /* set for "ide_preempt" requests */
109 __REQ_ORDERED_COLOR, /* is before or after barrier */ 109 __REQ_ORDERED_COLOR, /* is before or after barrier */
110 __REQ_RW_SYNC, /* request is sync (sync write or read) */ 110 __REQ_RW_SYNC, /* request is sync (sync write or read) */
111 __REQ_ALLOCED, /* request came from our alloc pool */ 111 __REQ_ALLOCED, /* request came from our alloc pool */
112 __REQ_RW_META, /* metadata io request */ 112 __REQ_RW_META, /* metadata io request */
113 __REQ_COPY_USER, /* contains copies of user pages */ 113 __REQ_COPY_USER, /* contains copies of user pages */
114 __REQ_INTEGRITY, /* integrity metadata has been remapped */ 114 __REQ_INTEGRITY, /* integrity metadata has been remapped */
115 __REQ_NOIDLE, /* Don't anticipate more IO after this one */ 115 __REQ_NOIDLE, /* Don't anticipate more IO after this one */
116 __REQ_IO_STAT, /* account I/O stat */ 116 __REQ_IO_STAT, /* account I/O stat */
117 __REQ_MIXED_MERGE, /* merge of different types, fail separately */ 117 __REQ_MIXED_MERGE, /* merge of different types, fail separately */
118 __REQ_NR_BITS, /* stops here */ 118 __REQ_NR_BITS, /* stops here */
119 }; 119 };
120 120
121 #define REQ_RW (1 << __REQ_RW) 121 #define REQ_RW (1 << __REQ_RW)
122 #define REQ_FAILFAST_DEV (1 << __REQ_FAILFAST_DEV) 122 #define REQ_FAILFAST_DEV (1 << __REQ_FAILFAST_DEV)
123 #define REQ_FAILFAST_TRANSPORT (1 << __REQ_FAILFAST_TRANSPORT) 123 #define REQ_FAILFAST_TRANSPORT (1 << __REQ_FAILFAST_TRANSPORT)
124 #define REQ_FAILFAST_DRIVER (1 << __REQ_FAILFAST_DRIVER) 124 #define REQ_FAILFAST_DRIVER (1 << __REQ_FAILFAST_DRIVER)
125 #define REQ_DISCARD (1 << __REQ_DISCARD) 125 #define REQ_DISCARD (1 << __REQ_DISCARD)
126 #define REQ_SORTED (1 << __REQ_SORTED) 126 #define REQ_SORTED (1 << __REQ_SORTED)
127 #define REQ_SOFTBARRIER (1 << __REQ_SOFTBARRIER) 127 #define REQ_SOFTBARRIER (1 << __REQ_SOFTBARRIER)
128 #define REQ_HARDBARRIER (1 << __REQ_HARDBARRIER) 128 #define REQ_HARDBARRIER (1 << __REQ_HARDBARRIER)
129 #define REQ_FUA (1 << __REQ_FUA) 129 #define REQ_FUA (1 << __REQ_FUA)
130 #define REQ_NOMERGE (1 << __REQ_NOMERGE) 130 #define REQ_NOMERGE (1 << __REQ_NOMERGE)
131 #define REQ_STARTED (1 << __REQ_STARTED) 131 #define REQ_STARTED (1 << __REQ_STARTED)
132 #define REQ_DONTPREP (1 << __REQ_DONTPREP) 132 #define REQ_DONTPREP (1 << __REQ_DONTPREP)
133 #define REQ_QUEUED (1 << __REQ_QUEUED) 133 #define REQ_QUEUED (1 << __REQ_QUEUED)
134 #define REQ_ELVPRIV (1 << __REQ_ELVPRIV) 134 #define REQ_ELVPRIV (1 << __REQ_ELVPRIV)
135 #define REQ_FAILED (1 << __REQ_FAILED) 135 #define REQ_FAILED (1 << __REQ_FAILED)
136 #define REQ_QUIET (1 << __REQ_QUIET) 136 #define REQ_QUIET (1 << __REQ_QUIET)
137 #define REQ_PREEMPT (1 << __REQ_PREEMPT) 137 #define REQ_PREEMPT (1 << __REQ_PREEMPT)
138 #define REQ_ORDERED_COLOR (1 << __REQ_ORDERED_COLOR) 138 #define REQ_ORDERED_COLOR (1 << __REQ_ORDERED_COLOR)
139 #define REQ_RW_SYNC (1 << __REQ_RW_SYNC) 139 #define REQ_RW_SYNC (1 << __REQ_RW_SYNC)
140 #define REQ_ALLOCED (1 << __REQ_ALLOCED) 140 #define REQ_ALLOCED (1 << __REQ_ALLOCED)
141 #define REQ_RW_META (1 << __REQ_RW_META) 141 #define REQ_RW_META (1 << __REQ_RW_META)
142 #define REQ_COPY_USER (1 << __REQ_COPY_USER) 142 #define REQ_COPY_USER (1 << __REQ_COPY_USER)
143 #define REQ_INTEGRITY (1 << __REQ_INTEGRITY) 143 #define REQ_INTEGRITY (1 << __REQ_INTEGRITY)
144 #define REQ_NOIDLE (1 << __REQ_NOIDLE) 144 #define REQ_NOIDLE (1 << __REQ_NOIDLE)
145 #define REQ_IO_STAT (1 << __REQ_IO_STAT) 145 #define REQ_IO_STAT (1 << __REQ_IO_STAT)
146 #define REQ_MIXED_MERGE (1 << __REQ_MIXED_MERGE) 146 #define REQ_MIXED_MERGE (1 << __REQ_MIXED_MERGE)
147 147
148 #define REQ_FAILFAST_MASK (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | \ 148 #define REQ_FAILFAST_MASK (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | \
149 REQ_FAILFAST_DRIVER) 149 REQ_FAILFAST_DRIVER)
150 150
151 #define BLK_MAX_CDB 16 151 #define BLK_MAX_CDB 16
152 152
153 /* 153 /*
154 * try to put the fields that are referenced together in the same cacheline. 154 * try to put the fields that are referenced together in the same cacheline.
155 * if you modify this structure, be sure to check block/blk-core.c:rq_init() 155 * if you modify this structure, be sure to check block/blk-core.c:rq_init()
156 * as well! 156 * as well!
157 */ 157 */
158 struct request { 158 struct request {
159 struct list_head queuelist; 159 struct list_head queuelist;
160 struct call_single_data csd; 160 struct call_single_data csd;
161 161
162 struct request_queue *q; 162 struct request_queue *q;
163 163
164 unsigned int cmd_flags; 164 unsigned int cmd_flags;
165 enum rq_cmd_type_bits cmd_type; 165 enum rq_cmd_type_bits cmd_type;
166 unsigned long atomic_flags; 166 unsigned long atomic_flags;
167 167
168 int cpu; 168 int cpu;
169 169
170 /* the following two fields are internal, NEVER access directly */ 170 /* the following two fields are internal, NEVER access directly */
171 unsigned int __data_len; /* total data len */ 171 unsigned int __data_len; /* total data len */
172 sector_t __sector; /* sector cursor */ 172 sector_t __sector; /* sector cursor */
173 173
174 struct bio *bio; 174 struct bio *bio;
175 struct bio *biotail; 175 struct bio *biotail;
176 176
177 struct hlist_node hash; /* merge hash */ 177 struct hlist_node hash; /* merge hash */
178 /* 178 /*
179 * The rb_node is only used inside the io scheduler, requests 179 * The rb_node is only used inside the io scheduler, requests
180 * are pruned when moved to the dispatch queue. So let the 180 * are pruned when moved to the dispatch queue. So let the
181 * completion_data share space with the rb_node. 181 * completion_data share space with the rb_node.
182 */ 182 */
183 union { 183 union {
184 struct rb_node rb_node; /* sort/lookup */ 184 struct rb_node rb_node; /* sort/lookup */
185 void *completion_data; 185 void *completion_data;
186 }; 186 };
187 187
188 /* 188 /*
189 * Three pointers are available for the IO schedulers, if they need 189 * Three pointers are available for the IO schedulers, if they need
190 * more they have to dynamically allocate it. 190 * more they have to dynamically allocate it.
191 */ 191 */
192 void *elevator_private; 192 void *elevator_private;
193 void *elevator_private2; 193 void *elevator_private2;
194 void *elevator_private3; 194 void *elevator_private3;
195 195
196 struct gendisk *rq_disk; 196 struct gendisk *rq_disk;
197 unsigned long start_time; 197 unsigned long start_time;
198 #ifdef CONFIG_BLK_CGROUP 198 #ifdef CONFIG_BLK_CGROUP
199 unsigned long long start_time_ns; 199 unsigned long long start_time_ns;
200 unsigned long long io_start_time_ns; /* when passed to hardware */ 200 unsigned long long io_start_time_ns; /* when passed to hardware */
201 #endif 201 #endif
202 /* Number of scatter-gather DMA addr+len pairs after 202 /* Number of scatter-gather DMA addr+len pairs after
203 * physical address coalescing is performed. 203 * physical address coalescing is performed.
204 */ 204 */
205 unsigned short nr_phys_segments; 205 unsigned short nr_phys_segments;
206 206
207 unsigned short ioprio; 207 unsigned short ioprio;
208 208
209 int ref_count; 209 int ref_count;
210 210
211 void *special; /* opaque pointer available for LLD use */ 211 void *special; /* opaque pointer available for LLD use */
212 char *buffer; /* kaddr of the current segment if available */ 212 char *buffer; /* kaddr of the current segment if available */
213 213
214 int tag; 214 int tag;
215 int errors; 215 int errors;
216 216
217 /* 217 /*
218 * when request is used as a packet command carrier 218 * when request is used as a packet command carrier
219 */ 219 */
220 unsigned char __cmd[BLK_MAX_CDB]; 220 unsigned char __cmd[BLK_MAX_CDB];
221 unsigned char *cmd; 221 unsigned char *cmd;
222 unsigned short cmd_len; 222 unsigned short cmd_len;
223 223
224 unsigned int extra_len; /* length of alignment and padding */ 224 unsigned int extra_len; /* length of alignment and padding */
225 unsigned int sense_len; 225 unsigned int sense_len;
226 unsigned int resid_len; /* residual count */ 226 unsigned int resid_len; /* residual count */
227 void *sense; 227 void *sense;
228 228
229 unsigned long deadline; 229 unsigned long deadline;
230 struct list_head timeout_list; 230 struct list_head timeout_list;
231 unsigned int timeout; 231 unsigned int timeout;
232 int retries; 232 int retries;
233 233
234 /* 234 /*
235 * completion callback. 235 * completion callback.
236 */ 236 */
237 rq_end_io_fn *end_io; 237 rq_end_io_fn *end_io;
238 void *end_io_data; 238 void *end_io_data;
239 239
240 /* for bidi */ 240 /* for bidi */
241 struct request *next_rq; 241 struct request *next_rq;
242 }; 242 };
243 243
244 static inline unsigned short req_get_ioprio(struct request *req) 244 static inline unsigned short req_get_ioprio(struct request *req)
245 { 245 {
246 return req->ioprio; 246 return req->ioprio;
247 } 247 }
248 248
249 /* 249 /*
250 * State information carried for REQ_TYPE_PM_SUSPEND and REQ_TYPE_PM_RESUME 250 * State information carried for REQ_TYPE_PM_SUSPEND and REQ_TYPE_PM_RESUME
251 * requests. Some step values could eventually be made generic. 251 * requests. Some step values could eventually be made generic.
252 */ 252 */
253 struct request_pm_state 253 struct request_pm_state
254 { 254 {
255 /* PM state machine step value, currently driver specific */ 255 /* PM state machine step value, currently driver specific */
256 int pm_step; 256 int pm_step;
257 /* requested PM state value (S1, S2, S3, S4, ...) */ 257 /* requested PM state value (S1, S2, S3, S4, ...) */
258 u32 pm_state; 258 u32 pm_state;
259 void* data; /* for driver use */ 259 void* data; /* for driver use */
260 }; 260 };
261 261
262 #include <linux/elevator.h> 262 #include <linux/elevator.h>
263 263
264 typedef void (request_fn_proc) (struct request_queue *q); 264 typedef void (request_fn_proc) (struct request_queue *q);
265 typedef int (make_request_fn) (struct request_queue *q, struct bio *bio); 265 typedef int (make_request_fn) (struct request_queue *q, struct bio *bio);
266 typedef int (prep_rq_fn) (struct request_queue *, struct request *); 266 typedef int (prep_rq_fn) (struct request_queue *, struct request *);
267 typedef void (unplug_fn) (struct request_queue *); 267 typedef void (unplug_fn) (struct request_queue *);
268 268
269 struct bio_vec; 269 struct bio_vec;
270 struct bvec_merge_data { 270 struct bvec_merge_data {
271 struct block_device *bi_bdev; 271 struct block_device *bi_bdev;
272 sector_t bi_sector; 272 sector_t bi_sector;
273 unsigned bi_size; 273 unsigned bi_size;
274 unsigned long bi_rw; 274 unsigned long bi_rw;
275 }; 275 };
276 typedef int (merge_bvec_fn) (struct request_queue *, struct bvec_merge_data *, 276 typedef int (merge_bvec_fn) (struct request_queue *, struct bvec_merge_data *,
277 struct bio_vec *); 277 struct bio_vec *);
278 typedef void (prepare_flush_fn) (struct request_queue *, struct request *); 278 typedef void (prepare_flush_fn) (struct request_queue *, struct request *);
279 typedef void (softirq_done_fn)(struct request *); 279 typedef void (softirq_done_fn)(struct request *);
280 typedef int (dma_drain_needed_fn)(struct request *); 280 typedef int (dma_drain_needed_fn)(struct request *);
281 typedef int (lld_busy_fn) (struct request_queue *q); 281 typedef int (lld_busy_fn) (struct request_queue *q);
282 282
283 enum blk_eh_timer_return { 283 enum blk_eh_timer_return {
284 BLK_EH_NOT_HANDLED, 284 BLK_EH_NOT_HANDLED,
285 BLK_EH_HANDLED, 285 BLK_EH_HANDLED,
286 BLK_EH_RESET_TIMER, 286 BLK_EH_RESET_TIMER,
287 }; 287 };
288 288
289 typedef enum blk_eh_timer_return (rq_timed_out_fn)(struct request *); 289 typedef enum blk_eh_timer_return (rq_timed_out_fn)(struct request *);
290 290
291 enum blk_queue_state { 291 enum blk_queue_state {
292 Queue_down, 292 Queue_down,
293 Queue_up, 293 Queue_up,
294 }; 294 };
295 295
296 struct blk_queue_tag { 296 struct blk_queue_tag {
297 struct request **tag_index; /* map of busy tags */ 297 struct request **tag_index; /* map of busy tags */
298 unsigned long *tag_map; /* bit map of free/busy tags */ 298 unsigned long *tag_map; /* bit map of free/busy tags */
299 int busy; /* current depth */ 299 int busy; /* current depth */
300 int max_depth; /* what we will send to device */ 300 int max_depth; /* what we will send to device */
301 int real_max_depth; /* what the array can hold */ 301 int real_max_depth; /* what the array can hold */
302 atomic_t refcnt; /* map can be shared */ 302 atomic_t refcnt; /* map can be shared */
303 }; 303 };
304 304
305 #define BLK_SCSI_MAX_CMDS (256) 305 #define BLK_SCSI_MAX_CMDS (256)
306 #define BLK_SCSI_CMD_PER_LONG (BLK_SCSI_MAX_CMDS / (sizeof(long) * 8)) 306 #define BLK_SCSI_CMD_PER_LONG (BLK_SCSI_MAX_CMDS / (sizeof(long) * 8))
307 307
308 struct queue_limits { 308 struct queue_limits {
309 unsigned long bounce_pfn; 309 unsigned long bounce_pfn;
310 unsigned long seg_boundary_mask; 310 unsigned long seg_boundary_mask;
311 311
312 unsigned int max_hw_sectors; 312 unsigned int max_hw_sectors;
313 unsigned int max_sectors; 313 unsigned int max_sectors;
314 unsigned int max_segment_size; 314 unsigned int max_segment_size;
315 unsigned int physical_block_size; 315 unsigned int physical_block_size;
316 unsigned int alignment_offset; 316 unsigned int alignment_offset;
317 unsigned int io_min; 317 unsigned int io_min;
318 unsigned int io_opt; 318 unsigned int io_opt;
319 unsigned int max_discard_sectors; 319 unsigned int max_discard_sectors;
320 unsigned int discard_granularity; 320 unsigned int discard_granularity;
321 unsigned int discard_alignment; 321 unsigned int discard_alignment;
322 322
323 unsigned short logical_block_size; 323 unsigned short logical_block_size;
324 unsigned short max_segments; 324 unsigned short max_segments;
325 325
326 unsigned char misaligned; 326 unsigned char misaligned;
327 unsigned char discard_misaligned; 327 unsigned char discard_misaligned;
328 unsigned char no_cluster; 328 unsigned char no_cluster;
329 signed char discard_zeroes_data; 329 signed char discard_zeroes_data;
330 }; 330 };
331 331
332 struct request_queue 332 struct request_queue
333 { 333 {
334 /* 334 /*
335 * Together with queue_head for cacheline sharing 335 * Together with queue_head for cacheline sharing
336 */ 336 */
337 struct list_head queue_head; 337 struct list_head queue_head;
338 struct request *last_merge; 338 struct request *last_merge;
339 struct elevator_queue *elevator; 339 struct elevator_queue *elevator;
340 340
341 /* 341 /*
342 * the queue request freelist, one for reads and one for writes 342 * the queue request freelist, one for reads and one for writes
343 */ 343 */
344 struct request_list rq; 344 struct request_list rq;
345 345
346 request_fn_proc *request_fn; 346 request_fn_proc *request_fn;
347 make_request_fn *make_request_fn; 347 make_request_fn *make_request_fn;
348 prep_rq_fn *prep_rq_fn; 348 prep_rq_fn *prep_rq_fn;
349 unplug_fn *unplug_fn; 349 unplug_fn *unplug_fn;
350 merge_bvec_fn *merge_bvec_fn; 350 merge_bvec_fn *merge_bvec_fn;
351 prepare_flush_fn *prepare_flush_fn; 351 prepare_flush_fn *prepare_flush_fn;
352 softirq_done_fn *softirq_done_fn; 352 softirq_done_fn *softirq_done_fn;
353 rq_timed_out_fn *rq_timed_out_fn; 353 rq_timed_out_fn *rq_timed_out_fn;
354 dma_drain_needed_fn *dma_drain_needed; 354 dma_drain_needed_fn *dma_drain_needed;
355 lld_busy_fn *lld_busy_fn; 355 lld_busy_fn *lld_busy_fn;
356 356
357 /* 357 /*
358 * Dispatch queue sorting 358 * Dispatch queue sorting
359 */ 359 */
360 sector_t end_sector; 360 sector_t end_sector;
361 struct request *boundary_rq; 361 struct request *boundary_rq;
362 362
363 /* 363 /*
364 * Auto-unplugging state 364 * Auto-unplugging state
365 */ 365 */
366 struct timer_list unplug_timer; 366 struct timer_list unplug_timer;
367 int unplug_thresh; /* After this many requests */ 367 int unplug_thresh; /* After this many requests */
368 unsigned long unplug_delay; /* After this many jiffies */ 368 unsigned long unplug_delay; /* After this many jiffies */
369 struct work_struct unplug_work; 369 struct work_struct unplug_work;
370 370
371 struct backing_dev_info backing_dev_info; 371 struct backing_dev_info backing_dev_info;
372 372
373 /* 373 /*
374 * The queue owner gets to use this for whatever they like. 374 * The queue owner gets to use this for whatever they like.
375 * ll_rw_blk doesn't touch it. 375 * ll_rw_blk doesn't touch it.
376 */ 376 */
377 void *queuedata; 377 void *queuedata;
378 378
379 /* 379 /*
380 * queue needs bounce pages for pages above this limit 380 * queue needs bounce pages for pages above this limit
381 */ 381 */
382 gfp_t bounce_gfp; 382 gfp_t bounce_gfp;
383 383
384 /* 384 /*
385 * various queue flags, see QUEUE_* below 385 * various queue flags, see QUEUE_* below
386 */ 386 */
387 unsigned long queue_flags; 387 unsigned long queue_flags;
388 388
389 /* 389 /*
390 * protects queue structures from reentrancy. ->__queue_lock should 390 * protects queue structures from reentrancy. ->__queue_lock should
391 * _never_ be used directly, it is queue private. always use 391 * _never_ be used directly, it is queue private. always use
392 * ->queue_lock. 392 * ->queue_lock.
393 */ 393 */
394 spinlock_t __queue_lock; 394 spinlock_t __queue_lock;
395 spinlock_t *queue_lock; 395 spinlock_t *queue_lock;
396 396
397 /* 397 /*
398 * queue kobject 398 * queue kobject
399 */ 399 */
400 struct kobject kobj; 400 struct kobject kobj;
401 401
402 /* 402 /*
403 * queue settings 403 * queue settings
404 */ 404 */
405 unsigned long nr_requests; /* Max # of requests */ 405 unsigned long nr_requests; /* Max # of requests */
406 unsigned int nr_congestion_on; 406 unsigned int nr_congestion_on;
407 unsigned int nr_congestion_off; 407 unsigned int nr_congestion_off;
408 unsigned int nr_batching; 408 unsigned int nr_batching;
409 409
410 void *dma_drain_buffer; 410 void *dma_drain_buffer;
411 unsigned int dma_drain_size; 411 unsigned int dma_drain_size;
412 unsigned int dma_pad_mask; 412 unsigned int dma_pad_mask;
413 unsigned int dma_alignment; 413 unsigned int dma_alignment;
414 414
415 struct blk_queue_tag *queue_tags; 415 struct blk_queue_tag *queue_tags;
416 struct list_head tag_busy_list; 416 struct list_head tag_busy_list;
417 417
418 unsigned int nr_sorted; 418 unsigned int nr_sorted;
419 unsigned int in_flight[2]; 419 unsigned int in_flight[2];
420 420
421 unsigned int rq_timeout; 421 unsigned int rq_timeout;
422 struct timer_list timeout; 422 struct timer_list timeout;
423 struct list_head timeout_list; 423 struct list_head timeout_list;
424 424
425 struct queue_limits limits; 425 struct queue_limits limits;
426 426
427 /* 427 /*
428 * sg stuff 428 * sg stuff
429 */ 429 */
430 unsigned int sg_timeout; 430 unsigned int sg_timeout;
431 unsigned int sg_reserved_size; 431 unsigned int sg_reserved_size;
432 int node; 432 int node;
433 #ifdef CONFIG_BLK_DEV_IO_TRACE 433 #ifdef CONFIG_BLK_DEV_IO_TRACE
434 struct blk_trace *blk_trace; 434 struct blk_trace *blk_trace;
435 #endif 435 #endif
436 /* 436 /*
437 * reserved for flush operations 437 * reserved for flush operations
438 */ 438 */
439 unsigned int ordered, next_ordered, ordseq; 439 unsigned int ordered, next_ordered, ordseq;
440 int orderr, ordcolor; 440 int orderr, ordcolor;
441 struct request pre_flush_rq, bar_rq, post_flush_rq; 441 struct request pre_flush_rq, bar_rq, post_flush_rq;
442 struct request *orig_bar_rq; 442 struct request *orig_bar_rq;
443 443
444 struct mutex sysfs_lock; 444 struct mutex sysfs_lock;
445 445
446 #if defined(CONFIG_BLK_DEV_BSG) 446 #if defined(CONFIG_BLK_DEV_BSG)
447 struct bsg_class_device bsg_dev; 447 struct bsg_class_device bsg_dev;
448 #endif 448 #endif
449 }; 449 };
450 450
451 #define QUEUE_FLAG_CLUSTER 0 /* cluster several segments into 1 */ 451 #define QUEUE_FLAG_CLUSTER 0 /* cluster several segments into 1 */
452 #define QUEUE_FLAG_QUEUED 1 /* uses generic tag queueing */ 452 #define QUEUE_FLAG_QUEUED 1 /* uses generic tag queueing */
453 #define QUEUE_FLAG_STOPPED 2 /* queue is stopped */ 453 #define QUEUE_FLAG_STOPPED 2 /* queue is stopped */
454 #define QUEUE_FLAG_SYNCFULL 3 /* read queue has been filled */ 454 #define QUEUE_FLAG_SYNCFULL 3 /* read queue has been filled */
455 #define QUEUE_FLAG_ASYNCFULL 4 /* write queue has been filled */ 455 #define QUEUE_FLAG_ASYNCFULL 4 /* write queue has been filled */
456 #define QUEUE_FLAG_DEAD 5 /* queue being torn down */ 456 #define QUEUE_FLAG_DEAD 5 /* queue being torn down */
457 #define QUEUE_FLAG_REENTER 6 /* Re-entrancy avoidance */ 457 #define QUEUE_FLAG_REENTER 6 /* Re-entrancy avoidance */
458 #define QUEUE_FLAG_PLUGGED 7 /* queue is plugged */ 458 #define QUEUE_FLAG_PLUGGED 7 /* queue is plugged */
459 #define QUEUE_FLAG_ELVSWITCH 8 /* don't use elevator, just do FIFO */ 459 #define QUEUE_FLAG_ELVSWITCH 8 /* don't use elevator, just do FIFO */
460 #define QUEUE_FLAG_BIDI 9 /* queue supports bidi requests */ 460 #define QUEUE_FLAG_BIDI 9 /* queue supports bidi requests */
461 #define QUEUE_FLAG_NOMERGES 10 /* disable merge attempts */ 461 #define QUEUE_FLAG_NOMERGES 10 /* disable merge attempts */
462 #define QUEUE_FLAG_SAME_COMP 11 /* force complete on same CPU */ 462 #define QUEUE_FLAG_SAME_COMP 11 /* force complete on same CPU */
463 #define QUEUE_FLAG_FAIL_IO 12 /* fake timeout */ 463 #define QUEUE_FLAG_FAIL_IO 12 /* fake timeout */
464 #define QUEUE_FLAG_STACKABLE 13 /* supports request stacking */ 464 #define QUEUE_FLAG_STACKABLE 13 /* supports request stacking */
465 #define QUEUE_FLAG_NONROT 14 /* non-rotational device (SSD) */ 465 #define QUEUE_FLAG_NONROT 14 /* non-rotational device (SSD) */
466 #define QUEUE_FLAG_VIRT QUEUE_FLAG_NONROT /* paravirt device */ 466 #define QUEUE_FLAG_VIRT QUEUE_FLAG_NONROT /* paravirt device */
467 #define QUEUE_FLAG_IO_STAT 15 /* do IO stats */ 467 #define QUEUE_FLAG_IO_STAT 15 /* do IO stats */
468 #define QUEUE_FLAG_DISCARD 16 /* supports DISCARD */ 468 #define QUEUE_FLAG_DISCARD 16 /* supports DISCARD */
469 #define QUEUE_FLAG_NOXMERGES 17 /* No extended merges */ 469 #define QUEUE_FLAG_NOXMERGES 17 /* No extended merges */
470 #define QUEUE_FLAG_ADD_RANDOM 18 /* Contributes to random pool */
470 471
471 #define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ 472 #define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \
472 (1 << QUEUE_FLAG_CLUSTER) | \ 473 (1 << QUEUE_FLAG_CLUSTER) | \
473 (1 << QUEUE_FLAG_STACKABLE) | \ 474 (1 << QUEUE_FLAG_STACKABLE) | \
474 (1 << QUEUE_FLAG_SAME_COMP)) 475 (1 << QUEUE_FLAG_SAME_COMP) | \
476 (1 << QUEUE_FLAG_ADD_RANDOM))
475 477
476 static inline int queue_is_locked(struct request_queue *q) 478 static inline int queue_is_locked(struct request_queue *q)
477 { 479 {
478 #ifdef CONFIG_SMP 480 #ifdef CONFIG_SMP
479 spinlock_t *lock = q->queue_lock; 481 spinlock_t *lock = q->queue_lock;
480 return lock && spin_is_locked(lock); 482 return lock && spin_is_locked(lock);
481 #else 483 #else
482 return 1; 484 return 1;
483 #endif 485 #endif
484 } 486 }
485 487
486 static inline void queue_flag_set_unlocked(unsigned int flag, 488 static inline void queue_flag_set_unlocked(unsigned int flag,
487 struct request_queue *q) 489 struct request_queue *q)
488 { 490 {
489 __set_bit(flag, &q->queue_flags); 491 __set_bit(flag, &q->queue_flags);
490 } 492 }
491 493
492 static inline int queue_flag_test_and_clear(unsigned int flag, 494 static inline int queue_flag_test_and_clear(unsigned int flag,
493 struct request_queue *q) 495 struct request_queue *q)
494 { 496 {
495 WARN_ON_ONCE(!queue_is_locked(q)); 497 WARN_ON_ONCE(!queue_is_locked(q));
496 498
497 if (test_bit(flag, &q->queue_flags)) { 499 if (test_bit(flag, &q->queue_flags)) {
498 __clear_bit(flag, &q->queue_flags); 500 __clear_bit(flag, &q->queue_flags);
499 return 1; 501 return 1;
500 } 502 }
501 503
502 return 0; 504 return 0;
503 } 505 }
504 506
505 static inline int queue_flag_test_and_set(unsigned int flag, 507 static inline int queue_flag_test_and_set(unsigned int flag,
506 struct request_queue *q) 508 struct request_queue *q)
507 { 509 {
508 WARN_ON_ONCE(!queue_is_locked(q)); 510 WARN_ON_ONCE(!queue_is_locked(q));
509 511
510 if (!test_bit(flag, &q->queue_flags)) { 512 if (!test_bit(flag, &q->queue_flags)) {
511 __set_bit(flag, &q->queue_flags); 513 __set_bit(flag, &q->queue_flags);
512 return 0; 514 return 0;
513 } 515 }
514 516
515 return 1; 517 return 1;
516 } 518 }
517 519
518 static inline void queue_flag_set(unsigned int flag, struct request_queue *q) 520 static inline void queue_flag_set(unsigned int flag, struct request_queue *q)
519 { 521 {
520 WARN_ON_ONCE(!queue_is_locked(q)); 522 WARN_ON_ONCE(!queue_is_locked(q));
521 __set_bit(flag, &q->queue_flags); 523 __set_bit(flag, &q->queue_flags);
522 } 524 }
523 525
524 static inline void queue_flag_clear_unlocked(unsigned int flag, 526 static inline void queue_flag_clear_unlocked(unsigned int flag,
525 struct request_queue *q) 527 struct request_queue *q)
526 { 528 {
527 __clear_bit(flag, &q->queue_flags); 529 __clear_bit(flag, &q->queue_flags);
528 } 530 }
529 531
530 static inline int queue_in_flight(struct request_queue *q) 532 static inline int queue_in_flight(struct request_queue *q)
531 { 533 {
532 return q->in_flight[0] + q->in_flight[1]; 534 return q->in_flight[0] + q->in_flight[1];
533 } 535 }
534 536
535 static inline void queue_flag_clear(unsigned int flag, struct request_queue *q) 537 static inline void queue_flag_clear(unsigned int flag, struct request_queue *q)
536 { 538 {
537 WARN_ON_ONCE(!queue_is_locked(q)); 539 WARN_ON_ONCE(!queue_is_locked(q));
538 __clear_bit(flag, &q->queue_flags); 540 __clear_bit(flag, &q->queue_flags);
539 } 541 }
540 542
541 enum { 543 enum {
542 /* 544 /*
543 * Hardbarrier is supported with one of the following methods. 545 * Hardbarrier is supported with one of the following methods.
544 * 546 *
545 * NONE : hardbarrier unsupported 547 * NONE : hardbarrier unsupported
546 * DRAIN : ordering by draining is enough 548 * DRAIN : ordering by draining is enough
547 * DRAIN_FLUSH : ordering by draining w/ pre and post flushes 549 * DRAIN_FLUSH : ordering by draining w/ pre and post flushes
548 * DRAIN_FUA : ordering by draining w/ pre flush and FUA write 550 * DRAIN_FUA : ordering by draining w/ pre flush and FUA write
549 * TAG : ordering by tag is enough 551 * TAG : ordering by tag is enough
550 * TAG_FLUSH : ordering by tag w/ pre and post flushes 552 * TAG_FLUSH : ordering by tag w/ pre and post flushes
551 * TAG_FUA : ordering by tag w/ pre flush and FUA write 553 * TAG_FUA : ordering by tag w/ pre flush and FUA write
552 */ 554 */
553 QUEUE_ORDERED_BY_DRAIN = 0x01, 555 QUEUE_ORDERED_BY_DRAIN = 0x01,
554 QUEUE_ORDERED_BY_TAG = 0x02, 556 QUEUE_ORDERED_BY_TAG = 0x02,
555 QUEUE_ORDERED_DO_PREFLUSH = 0x10, 557 QUEUE_ORDERED_DO_PREFLUSH = 0x10,
556 QUEUE_ORDERED_DO_BAR = 0x20, 558 QUEUE_ORDERED_DO_BAR = 0x20,
557 QUEUE_ORDERED_DO_POSTFLUSH = 0x40, 559 QUEUE_ORDERED_DO_POSTFLUSH = 0x40,
558 QUEUE_ORDERED_DO_FUA = 0x80, 560 QUEUE_ORDERED_DO_FUA = 0x80,
559 561
560 QUEUE_ORDERED_NONE = 0x00, 562 QUEUE_ORDERED_NONE = 0x00,
561 563
562 QUEUE_ORDERED_DRAIN = QUEUE_ORDERED_BY_DRAIN | 564 QUEUE_ORDERED_DRAIN = QUEUE_ORDERED_BY_DRAIN |
563 QUEUE_ORDERED_DO_BAR, 565 QUEUE_ORDERED_DO_BAR,
564 QUEUE_ORDERED_DRAIN_FLUSH = QUEUE_ORDERED_DRAIN | 566 QUEUE_ORDERED_DRAIN_FLUSH = QUEUE_ORDERED_DRAIN |
565 QUEUE_ORDERED_DO_PREFLUSH | 567 QUEUE_ORDERED_DO_PREFLUSH |
566 QUEUE_ORDERED_DO_POSTFLUSH, 568 QUEUE_ORDERED_DO_POSTFLUSH,
567 QUEUE_ORDERED_DRAIN_FUA = QUEUE_ORDERED_DRAIN | 569 QUEUE_ORDERED_DRAIN_FUA = QUEUE_ORDERED_DRAIN |
568 QUEUE_ORDERED_DO_PREFLUSH | 570 QUEUE_ORDERED_DO_PREFLUSH |
569 QUEUE_ORDERED_DO_FUA, 571 QUEUE_ORDERED_DO_FUA,
570 572
571 QUEUE_ORDERED_TAG = QUEUE_ORDERED_BY_TAG | 573 QUEUE_ORDERED_TAG = QUEUE_ORDERED_BY_TAG |
572 QUEUE_ORDERED_DO_BAR, 574 QUEUE_ORDERED_DO_BAR,
573 QUEUE_ORDERED_TAG_FLUSH = QUEUE_ORDERED_TAG | 575 QUEUE_ORDERED_TAG_FLUSH = QUEUE_ORDERED_TAG |
574 QUEUE_ORDERED_DO_PREFLUSH | 576 QUEUE_ORDERED_DO_PREFLUSH |
575 QUEUE_ORDERED_DO_POSTFLUSH, 577 QUEUE_ORDERED_DO_POSTFLUSH,
576 QUEUE_ORDERED_TAG_FUA = QUEUE_ORDERED_TAG | 578 QUEUE_ORDERED_TAG_FUA = QUEUE_ORDERED_TAG |
577 QUEUE_ORDERED_DO_PREFLUSH | 579 QUEUE_ORDERED_DO_PREFLUSH |
578 QUEUE_ORDERED_DO_FUA, 580 QUEUE_ORDERED_DO_FUA,
579 581
580 /* 582 /*
581 * Ordered operation sequence 583 * Ordered operation sequence
582 */ 584 */
583 QUEUE_ORDSEQ_STARTED = 0x01, /* flushing in progress */ 585 QUEUE_ORDSEQ_STARTED = 0x01, /* flushing in progress */
584 QUEUE_ORDSEQ_DRAIN = 0x02, /* waiting for the queue to be drained */ 586 QUEUE_ORDSEQ_DRAIN = 0x02, /* waiting for the queue to be drained */
585 QUEUE_ORDSEQ_PREFLUSH = 0x04, /* pre-flushing in progress */ 587 QUEUE_ORDSEQ_PREFLUSH = 0x04, /* pre-flushing in progress */
586 QUEUE_ORDSEQ_BAR = 0x08, /* original barrier req in progress */ 588 QUEUE_ORDSEQ_BAR = 0x08, /* original barrier req in progress */
587 QUEUE_ORDSEQ_POSTFLUSH = 0x10, /* post-flushing in progress */ 589 QUEUE_ORDSEQ_POSTFLUSH = 0x10, /* post-flushing in progress */
588 QUEUE_ORDSEQ_DONE = 0x20, 590 QUEUE_ORDSEQ_DONE = 0x20,
589 }; 591 };
590 592
591 #define blk_queue_plugged(q) test_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags) 593 #define blk_queue_plugged(q) test_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags)
592 #define blk_queue_tagged(q) test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags) 594 #define blk_queue_tagged(q) test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags)
593 #define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags) 595 #define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags)
594 #define blk_queue_nomerges(q) test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags) 596 #define blk_queue_nomerges(q) test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags)
595 #define blk_queue_noxmerges(q) \ 597 #define blk_queue_noxmerges(q) \
596 test_bit(QUEUE_FLAG_NOXMERGES, &(q)->queue_flags) 598 test_bit(QUEUE_FLAG_NOXMERGES, &(q)->queue_flags)
597 #define blk_queue_nonrot(q) test_bit(QUEUE_FLAG_NONROT, &(q)->queue_flags) 599 #define blk_queue_nonrot(q) test_bit(QUEUE_FLAG_NONROT, &(q)->queue_flags)
598 #define blk_queue_io_stat(q) test_bit(QUEUE_FLAG_IO_STAT, &(q)->queue_flags) 600 #define blk_queue_io_stat(q) test_bit(QUEUE_FLAG_IO_STAT, &(q)->queue_flags)
601 #define blk_queue_add_random(q) test_bit(QUEUE_FLAG_ADD_RANDOM, &(q)->queue_flags)
599 #define blk_queue_flushing(q) ((q)->ordseq) 602 #define blk_queue_flushing(q) ((q)->ordseq)
600 #define blk_queue_stackable(q) \ 603 #define blk_queue_stackable(q) \
601 test_bit(QUEUE_FLAG_STACKABLE, &(q)->queue_flags) 604 test_bit(QUEUE_FLAG_STACKABLE, &(q)->queue_flags)
602 #define blk_queue_discard(q) test_bit(QUEUE_FLAG_DISCARD, &(q)->queue_flags) 605 #define blk_queue_discard(q) test_bit(QUEUE_FLAG_DISCARD, &(q)->queue_flags)
603 606
604 #define blk_fs_request(rq) ((rq)->cmd_type == REQ_TYPE_FS) 607 #define blk_fs_request(rq) ((rq)->cmd_type == REQ_TYPE_FS)
605 #define blk_pc_request(rq) ((rq)->cmd_type == REQ_TYPE_BLOCK_PC) 608 #define blk_pc_request(rq) ((rq)->cmd_type == REQ_TYPE_BLOCK_PC)
606 #define blk_special_request(rq) ((rq)->cmd_type == REQ_TYPE_SPECIAL) 609 #define blk_special_request(rq) ((rq)->cmd_type == REQ_TYPE_SPECIAL)
607 #define blk_sense_request(rq) ((rq)->cmd_type == REQ_TYPE_SENSE) 610 #define blk_sense_request(rq) ((rq)->cmd_type == REQ_TYPE_SENSE)
608 611
609 #define blk_failfast_dev(rq) ((rq)->cmd_flags & REQ_FAILFAST_DEV) 612 #define blk_failfast_dev(rq) ((rq)->cmd_flags & REQ_FAILFAST_DEV)
610 #define blk_failfast_transport(rq) ((rq)->cmd_flags & REQ_FAILFAST_TRANSPORT) 613 #define blk_failfast_transport(rq) ((rq)->cmd_flags & REQ_FAILFAST_TRANSPORT)
611 #define blk_failfast_driver(rq) ((rq)->cmd_flags & REQ_FAILFAST_DRIVER) 614 #define blk_failfast_driver(rq) ((rq)->cmd_flags & REQ_FAILFAST_DRIVER)
612 #define blk_noretry_request(rq) (blk_failfast_dev(rq) || \ 615 #define blk_noretry_request(rq) (blk_failfast_dev(rq) || \
613 blk_failfast_transport(rq) || \ 616 blk_failfast_transport(rq) || \
614 blk_failfast_driver(rq)) 617 blk_failfast_driver(rq))
615 #define blk_rq_started(rq) ((rq)->cmd_flags & REQ_STARTED) 618 #define blk_rq_started(rq) ((rq)->cmd_flags & REQ_STARTED)
616 #define blk_rq_io_stat(rq) ((rq)->cmd_flags & REQ_IO_STAT) 619 #define blk_rq_io_stat(rq) ((rq)->cmd_flags & REQ_IO_STAT)
617 #define blk_rq_quiet(rq) ((rq)->cmd_flags & REQ_QUIET) 620 #define blk_rq_quiet(rq) ((rq)->cmd_flags & REQ_QUIET)
618 621
619 #define blk_account_rq(rq) (blk_rq_started(rq) && (blk_fs_request(rq) || blk_discard_rq(rq))) 622 #define blk_account_rq(rq) (blk_rq_started(rq) && (blk_fs_request(rq) || blk_discard_rq(rq)))
620 623
621 #define blk_pm_suspend_request(rq) ((rq)->cmd_type == REQ_TYPE_PM_SUSPEND) 624 #define blk_pm_suspend_request(rq) ((rq)->cmd_type == REQ_TYPE_PM_SUSPEND)
622 #define blk_pm_resume_request(rq) ((rq)->cmd_type == REQ_TYPE_PM_RESUME) 625 #define blk_pm_resume_request(rq) ((rq)->cmd_type == REQ_TYPE_PM_RESUME)
623 #define blk_pm_request(rq) \ 626 #define blk_pm_request(rq) \
624 (blk_pm_suspend_request(rq) || blk_pm_resume_request(rq)) 627 (blk_pm_suspend_request(rq) || blk_pm_resume_request(rq))
625 628
626 #define blk_rq_cpu_valid(rq) ((rq)->cpu != -1) 629 #define blk_rq_cpu_valid(rq) ((rq)->cpu != -1)
627 #define blk_sorted_rq(rq) ((rq)->cmd_flags & REQ_SORTED) 630 #define blk_sorted_rq(rq) ((rq)->cmd_flags & REQ_SORTED)
628 #define blk_barrier_rq(rq) ((rq)->cmd_flags & REQ_HARDBARRIER) 631 #define blk_barrier_rq(rq) ((rq)->cmd_flags & REQ_HARDBARRIER)
629 #define blk_fua_rq(rq) ((rq)->cmd_flags & REQ_FUA) 632 #define blk_fua_rq(rq) ((rq)->cmd_flags & REQ_FUA)
630 #define blk_discard_rq(rq) ((rq)->cmd_flags & REQ_DISCARD) 633 #define blk_discard_rq(rq) ((rq)->cmd_flags & REQ_DISCARD)
631 #define blk_bidi_rq(rq) ((rq)->next_rq != NULL) 634 #define blk_bidi_rq(rq) ((rq)->next_rq != NULL)
632 /* rq->queuelist of dequeued request must be list_empty() */ 635 /* rq->queuelist of dequeued request must be list_empty() */
633 #define blk_queued_rq(rq) (!list_empty(&(rq)->queuelist)) 636 #define blk_queued_rq(rq) (!list_empty(&(rq)->queuelist))
634 637
635 #define list_entry_rq(ptr) list_entry((ptr), struct request, queuelist) 638 #define list_entry_rq(ptr) list_entry((ptr), struct request, queuelist)
636 639
637 #define rq_data_dir(rq) ((rq)->cmd_flags & 1) 640 #define rq_data_dir(rq) ((rq)->cmd_flags & 1)
638 641
639 /* 642 /*
640 * We regard a request as sync, if either a read or a sync write 643 * We regard a request as sync, if either a read or a sync write
641 */ 644 */
642 static inline bool rw_is_sync(unsigned int rw_flags) 645 static inline bool rw_is_sync(unsigned int rw_flags)
643 { 646 {
644 return !(rw_flags & REQ_RW) || (rw_flags & REQ_RW_SYNC); 647 return !(rw_flags & REQ_RW) || (rw_flags & REQ_RW_SYNC);
645 } 648 }
646 649
647 static inline bool rq_is_sync(struct request *rq) 650 static inline bool rq_is_sync(struct request *rq)
648 { 651 {
649 return rw_is_sync(rq->cmd_flags); 652 return rw_is_sync(rq->cmd_flags);
650 } 653 }
651 654
652 #define rq_is_meta(rq) ((rq)->cmd_flags & REQ_RW_META) 655 #define rq_is_meta(rq) ((rq)->cmd_flags & REQ_RW_META)
653 #define rq_noidle(rq) ((rq)->cmd_flags & REQ_NOIDLE) 656 #define rq_noidle(rq) ((rq)->cmd_flags & REQ_NOIDLE)
654 657
655 static inline int blk_queue_full(struct request_queue *q, int sync) 658 static inline int blk_queue_full(struct request_queue *q, int sync)
656 { 659 {
657 if (sync) 660 if (sync)
658 return test_bit(QUEUE_FLAG_SYNCFULL, &q->queue_flags); 661 return test_bit(QUEUE_FLAG_SYNCFULL, &q->queue_flags);
659 return test_bit(QUEUE_FLAG_ASYNCFULL, &q->queue_flags); 662 return test_bit(QUEUE_FLAG_ASYNCFULL, &q->queue_flags);
660 } 663 }
661 664
662 static inline void blk_set_queue_full(struct request_queue *q, int sync) 665 static inline void blk_set_queue_full(struct request_queue *q, int sync)
663 { 666 {
664 if (sync) 667 if (sync)
665 queue_flag_set(QUEUE_FLAG_SYNCFULL, q); 668 queue_flag_set(QUEUE_FLAG_SYNCFULL, q);
666 else 669 else
667 queue_flag_set(QUEUE_FLAG_ASYNCFULL, q); 670 queue_flag_set(QUEUE_FLAG_ASYNCFULL, q);
668 } 671 }
669 672
670 static inline void blk_clear_queue_full(struct request_queue *q, int sync) 673 static inline void blk_clear_queue_full(struct request_queue *q, int sync)
671 { 674 {
672 if (sync) 675 if (sync)
673 queue_flag_clear(QUEUE_FLAG_SYNCFULL, q); 676 queue_flag_clear(QUEUE_FLAG_SYNCFULL, q);
674 else 677 else
675 queue_flag_clear(QUEUE_FLAG_ASYNCFULL, q); 678 queue_flag_clear(QUEUE_FLAG_ASYNCFULL, q);
676 } 679 }
677 680
678 681
679 /* 682 /*
680 * mergeable request must not have _NOMERGE or _BARRIER bit set, nor may 683 * mergeable request must not have _NOMERGE or _BARRIER bit set, nor may
681 * it already be started by driver. 684 * it already be started by driver.
682 */ 685 */
683 #define RQ_NOMERGE_FLAGS \ 686 #define RQ_NOMERGE_FLAGS \
684 (REQ_NOMERGE | REQ_STARTED | REQ_HARDBARRIER | REQ_SOFTBARRIER) 687 (REQ_NOMERGE | REQ_STARTED | REQ_HARDBARRIER | REQ_SOFTBARRIER)
685 #define rq_mergeable(rq) \ 688 #define rq_mergeable(rq) \
686 (!((rq)->cmd_flags & RQ_NOMERGE_FLAGS) && \ 689 (!((rq)->cmd_flags & RQ_NOMERGE_FLAGS) && \
687 (blk_discard_rq(rq) || blk_fs_request((rq)))) 690 (blk_discard_rq(rq) || blk_fs_request((rq))))
688 691
689 /* 692 /*
690 * q->prep_rq_fn return values 693 * q->prep_rq_fn return values
691 */ 694 */
692 #define BLKPREP_OK 0 /* serve it */ 695 #define BLKPREP_OK 0 /* serve it */
693 #define BLKPREP_KILL 1 /* fatal error, kill */ 696 #define BLKPREP_KILL 1 /* fatal error, kill */
694 #define BLKPREP_DEFER 2 /* leave on queue */ 697 #define BLKPREP_DEFER 2 /* leave on queue */
695 698
696 extern unsigned long blk_max_low_pfn, blk_max_pfn; 699 extern unsigned long blk_max_low_pfn, blk_max_pfn;
697 700
698 /* 701 /*
699 * standard bounce addresses: 702 * standard bounce addresses:
700 * 703 *
701 * BLK_BOUNCE_HIGH : bounce all highmem pages 704 * BLK_BOUNCE_HIGH : bounce all highmem pages
702 * BLK_BOUNCE_ANY : don't bounce anything 705 * BLK_BOUNCE_ANY : don't bounce anything
703 * BLK_BOUNCE_ISA : bounce pages above ISA DMA boundary 706 * BLK_BOUNCE_ISA : bounce pages above ISA DMA boundary
704 */ 707 */
705 708
706 #if BITS_PER_LONG == 32 709 #if BITS_PER_LONG == 32
707 #define BLK_BOUNCE_HIGH ((u64)blk_max_low_pfn << PAGE_SHIFT) 710 #define BLK_BOUNCE_HIGH ((u64)blk_max_low_pfn << PAGE_SHIFT)
708 #else 711 #else
709 #define BLK_BOUNCE_HIGH -1ULL 712 #define BLK_BOUNCE_HIGH -1ULL
710 #endif 713 #endif
711 #define BLK_BOUNCE_ANY (-1ULL) 714 #define BLK_BOUNCE_ANY (-1ULL)
712 #define BLK_BOUNCE_ISA (ISA_DMA_THRESHOLD) 715 #define BLK_BOUNCE_ISA (ISA_DMA_THRESHOLD)
713 716
714 /* 717 /*
715 * default timeout for SG_IO if none specified 718 * default timeout for SG_IO if none specified
716 */ 719 */
717 #define BLK_DEFAULT_SG_TIMEOUT (60 * HZ) 720 #define BLK_DEFAULT_SG_TIMEOUT (60 * HZ)
718 #define BLK_MIN_SG_TIMEOUT (7 * HZ) 721 #define BLK_MIN_SG_TIMEOUT (7 * HZ)
719 722
720 #ifdef CONFIG_BOUNCE 723 #ifdef CONFIG_BOUNCE
721 extern int init_emergency_isa_pool(void); 724 extern int init_emergency_isa_pool(void);
722 extern void blk_queue_bounce(struct request_queue *q, struct bio **bio); 725 extern void blk_queue_bounce(struct request_queue *q, struct bio **bio);
723 #else 726 #else
724 static inline int init_emergency_isa_pool(void) 727 static inline int init_emergency_isa_pool(void)
725 { 728 {
726 return 0; 729 return 0;
727 } 730 }
728 static inline void blk_queue_bounce(struct request_queue *q, struct bio **bio) 731 static inline void blk_queue_bounce(struct request_queue *q, struct bio **bio)
729 { 732 {
730 } 733 }
731 #endif /* CONFIG_MMU */ 734 #endif /* CONFIG_MMU */
732 735
733 struct rq_map_data { 736 struct rq_map_data {
734 struct page **pages; 737 struct page **pages;
735 int page_order; 738 int page_order;
736 int nr_entries; 739 int nr_entries;
737 unsigned long offset; 740 unsigned long offset;
738 int null_mapped; 741 int null_mapped;
739 int from_user; 742 int from_user;
740 }; 743 };
741 744
742 struct req_iterator { 745 struct req_iterator {
743 int i; 746 int i;
744 struct bio *bio; 747 struct bio *bio;
745 }; 748 };
746 749
747 /* This should not be used directly - use rq_for_each_segment */ 750 /* This should not be used directly - use rq_for_each_segment */
748 #define for_each_bio(_bio) \ 751 #define for_each_bio(_bio) \
749 for (; _bio; _bio = _bio->bi_next) 752 for (; _bio; _bio = _bio->bi_next)
750 #define __rq_for_each_bio(_bio, rq) \ 753 #define __rq_for_each_bio(_bio, rq) \
751 if ((rq->bio)) \ 754 if ((rq->bio)) \
752 for (_bio = (rq)->bio; _bio; _bio = _bio->bi_next) 755 for (_bio = (rq)->bio; _bio; _bio = _bio->bi_next)
753 756
754 #define rq_for_each_segment(bvl, _rq, _iter) \ 757 #define rq_for_each_segment(bvl, _rq, _iter) \
755 __rq_for_each_bio(_iter.bio, _rq) \ 758 __rq_for_each_bio(_iter.bio, _rq) \
756 bio_for_each_segment(bvl, _iter.bio, _iter.i) 759 bio_for_each_segment(bvl, _iter.bio, _iter.i)
757 760
758 #define rq_iter_last(rq, _iter) \ 761 #define rq_iter_last(rq, _iter) \
759 (_iter.bio->bi_next == NULL && _iter.i == _iter.bio->bi_vcnt-1) 762 (_iter.bio->bi_next == NULL && _iter.i == _iter.bio->bi_vcnt-1)
760 763
761 #ifndef ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 764 #ifndef ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
762 # error "You should define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE for your platform" 765 # error "You should define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE for your platform"
763 #endif 766 #endif
764 #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 767 #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
765 extern void rq_flush_dcache_pages(struct request *rq); 768 extern void rq_flush_dcache_pages(struct request *rq);
766 #else 769 #else
767 static inline void rq_flush_dcache_pages(struct request *rq) 770 static inline void rq_flush_dcache_pages(struct request *rq)
768 { 771 {
769 } 772 }
770 #endif 773 #endif
771 774
772 extern int blk_register_queue(struct gendisk *disk); 775 extern int blk_register_queue(struct gendisk *disk);
773 extern void blk_unregister_queue(struct gendisk *disk); 776 extern void blk_unregister_queue(struct gendisk *disk);
774 extern void register_disk(struct gendisk *dev); 777 extern void register_disk(struct gendisk *dev);
775 extern void generic_make_request(struct bio *bio); 778 extern void generic_make_request(struct bio *bio);
776 extern void blk_rq_init(struct request_queue *q, struct request *rq); 779 extern void blk_rq_init(struct request_queue *q, struct request *rq);
777 extern void blk_put_request(struct request *); 780 extern void blk_put_request(struct request *);
778 extern void __blk_put_request(struct request_queue *, struct request *); 781 extern void __blk_put_request(struct request_queue *, struct request *);
779 extern struct request *blk_get_request(struct request_queue *, int, gfp_t); 782 extern struct request *blk_get_request(struct request_queue *, int, gfp_t);
780 extern struct request *blk_make_request(struct request_queue *, struct bio *, 783 extern struct request *blk_make_request(struct request_queue *, struct bio *,
781 gfp_t); 784 gfp_t);
782 extern void blk_insert_request(struct request_queue *, struct request *, int, void *); 785 extern void blk_insert_request(struct request_queue *, struct request *, int, void *);
783 extern void blk_requeue_request(struct request_queue *, struct request *); 786 extern void blk_requeue_request(struct request_queue *, struct request *);
784 extern int blk_rq_check_limits(struct request_queue *q, struct request *rq); 787 extern int blk_rq_check_limits(struct request_queue *q, struct request *rq);
785 extern int blk_lld_busy(struct request_queue *q); 788 extern int blk_lld_busy(struct request_queue *q);
786 extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src, 789 extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
787 struct bio_set *bs, gfp_t gfp_mask, 790 struct bio_set *bs, gfp_t gfp_mask,
788 int (*bio_ctr)(struct bio *, struct bio *, void *), 791 int (*bio_ctr)(struct bio *, struct bio *, void *),
789 void *data); 792 void *data);
790 extern void blk_rq_unprep_clone(struct request *rq); 793 extern void blk_rq_unprep_clone(struct request *rq);
791 extern int blk_insert_cloned_request(struct request_queue *q, 794 extern int blk_insert_cloned_request(struct request_queue *q,
792 struct request *rq); 795 struct request *rq);
793 extern void blk_plug_device(struct request_queue *); 796 extern void blk_plug_device(struct request_queue *);
794 extern void blk_plug_device_unlocked(struct request_queue *); 797 extern void blk_plug_device_unlocked(struct request_queue *);
795 extern int blk_remove_plug(struct request_queue *); 798 extern int blk_remove_plug(struct request_queue *);
796 extern void blk_recount_segments(struct request_queue *, struct bio *); 799 extern void blk_recount_segments(struct request_queue *, struct bio *);
797 extern int scsi_cmd_ioctl(struct request_queue *, struct gendisk *, fmode_t, 800 extern int scsi_cmd_ioctl(struct request_queue *, struct gendisk *, fmode_t,
798 unsigned int, void __user *); 801 unsigned int, void __user *);
799 extern int sg_scsi_ioctl(struct request_queue *, struct gendisk *, fmode_t, 802 extern int sg_scsi_ioctl(struct request_queue *, struct gendisk *, fmode_t,
800 struct scsi_ioctl_command __user *); 803 struct scsi_ioctl_command __user *);
801 804
802 /* 805 /*
803 * A queue has just exitted congestion. Note this in the global counter of 806 * A queue has just exitted congestion. Note this in the global counter of
804 * congested queues, and wake up anyone who was waiting for requests to be 807 * congested queues, and wake up anyone who was waiting for requests to be
805 * put back. 808 * put back.
806 */ 809 */
807 static inline void blk_clear_queue_congested(struct request_queue *q, int sync) 810 static inline void blk_clear_queue_congested(struct request_queue *q, int sync)
808 { 811 {
809 clear_bdi_congested(&q->backing_dev_info, sync); 812 clear_bdi_congested(&q->backing_dev_info, sync);
810 } 813 }
811 814
812 /* 815 /*
813 * A queue has just entered congestion. Flag that in the queue's VM-visible 816 * A queue has just entered congestion. Flag that in the queue's VM-visible
814 * state flags and increment the global gounter of congested queues. 817 * state flags and increment the global gounter of congested queues.
815 */ 818 */
816 static inline void blk_set_queue_congested(struct request_queue *q, int sync) 819 static inline void blk_set_queue_congested(struct request_queue *q, int sync)
817 { 820 {
818 set_bdi_congested(&q->backing_dev_info, sync); 821 set_bdi_congested(&q->backing_dev_info, sync);
819 } 822 }
820 823
821 extern void blk_start_queue(struct request_queue *q); 824 extern void blk_start_queue(struct request_queue *q);
822 extern void blk_stop_queue(struct request_queue *q); 825 extern void blk_stop_queue(struct request_queue *q);
823 extern void blk_sync_queue(struct request_queue *q); 826 extern void blk_sync_queue(struct request_queue *q);
824 extern void __blk_stop_queue(struct request_queue *q); 827 extern void __blk_stop_queue(struct request_queue *q);
825 extern void __blk_run_queue(struct request_queue *); 828 extern void __blk_run_queue(struct request_queue *);
826 extern void blk_run_queue(struct request_queue *); 829 extern void blk_run_queue(struct request_queue *);
827 extern int blk_rq_map_user(struct request_queue *, struct request *, 830 extern int blk_rq_map_user(struct request_queue *, struct request *,
828 struct rq_map_data *, void __user *, unsigned long, 831 struct rq_map_data *, void __user *, unsigned long,
829 gfp_t); 832 gfp_t);
830 extern int blk_rq_unmap_user(struct bio *); 833 extern int blk_rq_unmap_user(struct bio *);
831 extern int blk_rq_map_kern(struct request_queue *, struct request *, void *, unsigned int, gfp_t); 834 extern int blk_rq_map_kern(struct request_queue *, struct request *, void *, unsigned int, gfp_t);
832 extern int blk_rq_map_user_iov(struct request_queue *, struct request *, 835 extern int blk_rq_map_user_iov(struct request_queue *, struct request *,
833 struct rq_map_data *, struct sg_iovec *, int, 836 struct rq_map_data *, struct sg_iovec *, int,
834 unsigned int, gfp_t); 837 unsigned int, gfp_t);
835 extern int blk_execute_rq(struct request_queue *, struct gendisk *, 838 extern int blk_execute_rq(struct request_queue *, struct gendisk *,
836 struct request *, int); 839 struct request *, int);
837 extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *, 840 extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *,
838 struct request *, int, rq_end_io_fn *); 841 struct request *, int, rq_end_io_fn *);
839 extern void blk_unplug(struct request_queue *q); 842 extern void blk_unplug(struct request_queue *q);
840 843
841 static inline struct request_queue *bdev_get_queue(struct block_device *bdev) 844 static inline struct request_queue *bdev_get_queue(struct block_device *bdev)
842 { 845 {
843 return bdev->bd_disk->queue; 846 return bdev->bd_disk->queue;
844 } 847 }
845 848
846 /* 849 /*
847 * blk_rq_pos() : the current sector 850 * blk_rq_pos() : the current sector
848 * blk_rq_bytes() : bytes left in the entire request 851 * blk_rq_bytes() : bytes left in the entire request
849 * blk_rq_cur_bytes() : bytes left in the current segment 852 * blk_rq_cur_bytes() : bytes left in the current segment
850 * blk_rq_err_bytes() : bytes left till the next error boundary 853 * blk_rq_err_bytes() : bytes left till the next error boundary
851 * blk_rq_sectors() : sectors left in the entire request 854 * blk_rq_sectors() : sectors left in the entire request
852 * blk_rq_cur_sectors() : sectors left in the current segment 855 * blk_rq_cur_sectors() : sectors left in the current segment
853 */ 856 */
854 static inline sector_t blk_rq_pos(const struct request *rq) 857 static inline sector_t blk_rq_pos(const struct request *rq)
855 { 858 {
856 return rq->__sector; 859 return rq->__sector;
857 } 860 }
858 861
859 static inline unsigned int blk_rq_bytes(const struct request *rq) 862 static inline unsigned int blk_rq_bytes(const struct request *rq)
860 { 863 {
861 return rq->__data_len; 864 return rq->__data_len;
862 } 865 }
863 866
864 static inline int blk_rq_cur_bytes(const struct request *rq) 867 static inline int blk_rq_cur_bytes(const struct request *rq)
865 { 868 {
866 return rq->bio ? bio_cur_bytes(rq->bio) : 0; 869 return rq->bio ? bio_cur_bytes(rq->bio) : 0;
867 } 870 }
868 871
869 extern unsigned int blk_rq_err_bytes(const struct request *rq); 872 extern unsigned int blk_rq_err_bytes(const struct request *rq);
870 873
871 static inline unsigned int blk_rq_sectors(const struct request *rq) 874 static inline unsigned int blk_rq_sectors(const struct request *rq)
872 { 875 {
873 return blk_rq_bytes(rq) >> 9; 876 return blk_rq_bytes(rq) >> 9;
874 } 877 }
875 878
876 static inline unsigned int blk_rq_cur_sectors(const struct request *rq) 879 static inline unsigned int blk_rq_cur_sectors(const struct request *rq)
877 { 880 {
878 return blk_rq_cur_bytes(rq) >> 9; 881 return blk_rq_cur_bytes(rq) >> 9;
879 } 882 }
880 883
881 /* 884 /*
882 * Request issue related functions. 885 * Request issue related functions.
883 */ 886 */
884 extern struct request *blk_peek_request(struct request_queue *q); 887 extern struct request *blk_peek_request(struct request_queue *q);
885 extern void blk_start_request(struct request *rq); 888 extern void blk_start_request(struct request *rq);
886 extern struct request *blk_fetch_request(struct request_queue *q); 889 extern struct request *blk_fetch_request(struct request_queue *q);
887 890
888 /* 891 /*
889 * Request completion related functions. 892 * Request completion related functions.
890 * 893 *
891 * blk_update_request() completes given number of bytes and updates 894 * blk_update_request() completes given number of bytes and updates
892 * the request without completing it. 895 * the request without completing it.
893 * 896 *
894 * blk_end_request() and friends. __blk_end_request() must be called 897 * blk_end_request() and friends. __blk_end_request() must be called
895 * with the request queue spinlock acquired. 898 * with the request queue spinlock acquired.
896 * 899 *
897 * Several drivers define their own end_request and call 900 * Several drivers define their own end_request and call
898 * blk_end_request() for parts of the original function. 901 * blk_end_request() for parts of the original function.
899 * This prevents code duplication in drivers. 902 * This prevents code duplication in drivers.
900 */ 903 */
901 extern bool blk_update_request(struct request *rq, int error, 904 extern bool blk_update_request(struct request *rq, int error,
902 unsigned int nr_bytes); 905 unsigned int nr_bytes);
903 extern bool blk_end_request(struct request *rq, int error, 906 extern bool blk_end_request(struct request *rq, int error,
904 unsigned int nr_bytes); 907 unsigned int nr_bytes);
905 extern void blk_end_request_all(struct request *rq, int error); 908 extern void blk_end_request_all(struct request *rq, int error);
906 extern bool blk_end_request_cur(struct request *rq, int error); 909 extern bool blk_end_request_cur(struct request *rq, int error);
907 extern bool blk_end_request_err(struct request *rq, int error); 910 extern bool blk_end_request_err(struct request *rq, int error);
908 extern bool __blk_end_request(struct request *rq, int error, 911 extern bool __blk_end_request(struct request *rq, int error,
909 unsigned int nr_bytes); 912 unsigned int nr_bytes);
910 extern void __blk_end_request_all(struct request *rq, int error); 913 extern void __blk_end_request_all(struct request *rq, int error);
911 extern bool __blk_end_request_cur(struct request *rq, int error); 914 extern bool __blk_end_request_cur(struct request *rq, int error);
912 extern bool __blk_end_request_err(struct request *rq, int error); 915 extern bool __blk_end_request_err(struct request *rq, int error);
913 916
914 extern void blk_complete_request(struct request *); 917 extern void blk_complete_request(struct request *);
915 extern void __blk_complete_request(struct request *); 918 extern void __blk_complete_request(struct request *);
916 extern void blk_abort_request(struct request *); 919 extern void blk_abort_request(struct request *);
917 extern void blk_abort_queue(struct request_queue *); 920 extern void blk_abort_queue(struct request_queue *);
918 921
919 /* 922 /*
920 * Access functions for manipulating queue properties 923 * Access functions for manipulating queue properties
921 */ 924 */
922 extern struct request_queue *blk_init_queue_node(request_fn_proc *rfn, 925 extern struct request_queue *blk_init_queue_node(request_fn_proc *rfn,
923 spinlock_t *lock, int node_id); 926 spinlock_t *lock, int node_id);
924 extern struct request_queue *blk_init_allocated_queue_node(struct request_queue *, 927 extern struct request_queue *blk_init_allocated_queue_node(struct request_queue *,
925 request_fn_proc *, 928 request_fn_proc *,
926 spinlock_t *, int node_id); 929 spinlock_t *, int node_id);
927 extern struct request_queue *blk_init_queue(request_fn_proc *, spinlock_t *); 930 extern struct request_queue *blk_init_queue(request_fn_proc *, spinlock_t *);
928 extern struct request_queue *blk_init_allocated_queue(struct request_queue *, 931 extern struct request_queue *blk_init_allocated_queue(struct request_queue *,
929 request_fn_proc *, spinlock_t *); 932 request_fn_proc *, spinlock_t *);
930 extern void blk_cleanup_queue(struct request_queue *); 933 extern void blk_cleanup_queue(struct request_queue *);
931 extern void blk_queue_make_request(struct request_queue *, make_request_fn *); 934 extern void blk_queue_make_request(struct request_queue *, make_request_fn *);
932 extern void blk_queue_bounce_limit(struct request_queue *, u64); 935 extern void blk_queue_bounce_limit(struct request_queue *, u64);
933 extern void blk_queue_max_hw_sectors(struct request_queue *, unsigned int); 936 extern void blk_queue_max_hw_sectors(struct request_queue *, unsigned int);
934 extern void blk_queue_max_segments(struct request_queue *, unsigned short); 937 extern void blk_queue_max_segments(struct request_queue *, unsigned short);
935 extern void blk_queue_max_segment_size(struct request_queue *, unsigned int); 938 extern void blk_queue_max_segment_size(struct request_queue *, unsigned int);
936 extern void blk_queue_max_discard_sectors(struct request_queue *q, 939 extern void blk_queue_max_discard_sectors(struct request_queue *q,
937 unsigned int max_discard_sectors); 940 unsigned int max_discard_sectors);
938 extern void blk_queue_logical_block_size(struct request_queue *, unsigned short); 941 extern void blk_queue_logical_block_size(struct request_queue *, unsigned short);
939 extern void blk_queue_physical_block_size(struct request_queue *, unsigned short); 942 extern void blk_queue_physical_block_size(struct request_queue *, unsigned short);
940 extern void blk_queue_alignment_offset(struct request_queue *q, 943 extern void blk_queue_alignment_offset(struct request_queue *q,
941 unsigned int alignment); 944 unsigned int alignment);
942 extern void blk_limits_io_min(struct queue_limits *limits, unsigned int min); 945 extern void blk_limits_io_min(struct queue_limits *limits, unsigned int min);
943 extern void blk_queue_io_min(struct request_queue *q, unsigned int min); 946 extern void blk_queue_io_min(struct request_queue *q, unsigned int min);
944 extern void blk_limits_io_opt(struct queue_limits *limits, unsigned int opt); 947 extern void blk_limits_io_opt(struct queue_limits *limits, unsigned int opt);
945 extern void blk_queue_io_opt(struct request_queue *q, unsigned int opt); 948 extern void blk_queue_io_opt(struct request_queue *q, unsigned int opt);
946 extern void blk_set_default_limits(struct queue_limits *lim); 949 extern void blk_set_default_limits(struct queue_limits *lim);
947 extern int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, 950 extern int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
948 sector_t offset); 951 sector_t offset);
949 extern int bdev_stack_limits(struct queue_limits *t, struct block_device *bdev, 952 extern int bdev_stack_limits(struct queue_limits *t, struct block_device *bdev,
950 sector_t offset); 953 sector_t offset);
951 extern void disk_stack_limits(struct gendisk *disk, struct block_device *bdev, 954 extern void disk_stack_limits(struct gendisk *disk, struct block_device *bdev,
952 sector_t offset); 955 sector_t offset);
953 extern void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b); 956 extern void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b);
954 extern void blk_queue_dma_pad(struct request_queue *, unsigned int); 957 extern void blk_queue_dma_pad(struct request_queue *, unsigned int);
955 extern void blk_queue_update_dma_pad(struct request_queue *, unsigned int); 958 extern void blk_queue_update_dma_pad(struct request_queue *, unsigned int);
956 extern int blk_queue_dma_drain(struct request_queue *q, 959 extern int blk_queue_dma_drain(struct request_queue *q,
957 dma_drain_needed_fn *dma_drain_needed, 960 dma_drain_needed_fn *dma_drain_needed,
958 void *buf, unsigned int size); 961 void *buf, unsigned int size);
959 extern void blk_queue_lld_busy(struct request_queue *q, lld_busy_fn *fn); 962 extern void blk_queue_lld_busy(struct request_queue *q, lld_busy_fn *fn);
960 extern void blk_queue_segment_boundary(struct request_queue *, unsigned long); 963 extern void blk_queue_segment_boundary(struct request_queue *, unsigned long);
961 extern void blk_queue_prep_rq(struct request_queue *, prep_rq_fn *pfn); 964 extern void blk_queue_prep_rq(struct request_queue *, prep_rq_fn *pfn);
962 extern void blk_queue_merge_bvec(struct request_queue *, merge_bvec_fn *); 965 extern void blk_queue_merge_bvec(struct request_queue *, merge_bvec_fn *);
963 extern void blk_queue_dma_alignment(struct request_queue *, int); 966 extern void blk_queue_dma_alignment(struct request_queue *, int);
964 extern void blk_queue_update_dma_alignment(struct request_queue *, int); 967 extern void blk_queue_update_dma_alignment(struct request_queue *, int);
965 extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *); 968 extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *);
966 extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *); 969 extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *);
967 extern void blk_queue_rq_timeout(struct request_queue *, unsigned int); 970 extern void blk_queue_rq_timeout(struct request_queue *, unsigned int);
968 extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev); 971 extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev);
969 extern int blk_queue_ordered(struct request_queue *, unsigned, prepare_flush_fn *); 972 extern int blk_queue_ordered(struct request_queue *, unsigned, prepare_flush_fn *);
970 extern bool blk_do_ordered(struct request_queue *, struct request **); 973 extern bool blk_do_ordered(struct request_queue *, struct request **);
971 extern unsigned blk_ordered_cur_seq(struct request_queue *); 974 extern unsigned blk_ordered_cur_seq(struct request_queue *);
972 extern unsigned blk_ordered_req_seq(struct request *); 975 extern unsigned blk_ordered_req_seq(struct request *);
973 extern bool blk_ordered_complete_seq(struct request_queue *, unsigned, int); 976 extern bool blk_ordered_complete_seq(struct request_queue *, unsigned, int);
974 977
975 extern int blk_rq_map_sg(struct request_queue *, struct request *, struct scatterlist *); 978 extern int blk_rq_map_sg(struct request_queue *, struct request *, struct scatterlist *);
976 extern void blk_dump_rq_flags(struct request *, char *); 979 extern void blk_dump_rq_flags(struct request *, char *);
977 extern void generic_unplug_device(struct request_queue *); 980 extern void generic_unplug_device(struct request_queue *);
978 extern long nr_blockdev_pages(void); 981 extern long nr_blockdev_pages(void);
979 982
980 int blk_get_queue(struct request_queue *); 983 int blk_get_queue(struct request_queue *);
981 struct request_queue *blk_alloc_queue(gfp_t); 984 struct request_queue *blk_alloc_queue(gfp_t);
982 struct request_queue *blk_alloc_queue_node(gfp_t, int); 985 struct request_queue *blk_alloc_queue_node(gfp_t, int);
983 extern void blk_put_queue(struct request_queue *); 986 extern void blk_put_queue(struct request_queue *);
984 987
985 /* 988 /*
986 * tag stuff 989 * tag stuff
987 */ 990 */
988 #define blk_rq_tagged(rq) ((rq)->cmd_flags & REQ_QUEUED) 991 #define blk_rq_tagged(rq) ((rq)->cmd_flags & REQ_QUEUED)
989 extern int blk_queue_start_tag(struct request_queue *, struct request *); 992 extern int blk_queue_start_tag(struct request_queue *, struct request *);
990 extern struct request *blk_queue_find_tag(struct request_queue *, int); 993 extern struct request *blk_queue_find_tag(struct request_queue *, int);
991 extern void blk_queue_end_tag(struct request_queue *, struct request *); 994 extern void blk_queue_end_tag(struct request_queue *, struct request *);
992 extern int blk_queue_init_tags(struct request_queue *, int, struct blk_queue_tag *); 995 extern int blk_queue_init_tags(struct request_queue *, int, struct blk_queue_tag *);
993 extern void blk_queue_free_tags(struct request_queue *); 996 extern void blk_queue_free_tags(struct request_queue *);
994 extern int blk_queue_resize_tags(struct request_queue *, int); 997 extern int blk_queue_resize_tags(struct request_queue *, int);
995 extern void blk_queue_invalidate_tags(struct request_queue *); 998 extern void blk_queue_invalidate_tags(struct request_queue *);
996 extern struct blk_queue_tag *blk_init_tags(int); 999 extern struct blk_queue_tag *blk_init_tags(int);
997 extern void blk_free_tags(struct blk_queue_tag *); 1000 extern void blk_free_tags(struct blk_queue_tag *);
998 1001
999 static inline struct request *blk_map_queue_find_tag(struct blk_queue_tag *bqt, 1002 static inline struct request *blk_map_queue_find_tag(struct blk_queue_tag *bqt,
1000 int tag) 1003 int tag)
1001 { 1004 {
1002 if (unlikely(bqt == NULL || tag >= bqt->real_max_depth)) 1005 if (unlikely(bqt == NULL || tag >= bqt->real_max_depth))
1003 return NULL; 1006 return NULL;
1004 return bqt->tag_index[tag]; 1007 return bqt->tag_index[tag];
1005 } 1008 }
1006 enum{ 1009 enum{
1007 BLKDEV_WAIT, /* wait for completion */ 1010 BLKDEV_WAIT, /* wait for completion */
1008 BLKDEV_BARRIER, /*issue request with barrier */ 1011 BLKDEV_BARRIER, /*issue request with barrier */
1009 }; 1012 };
1010 #define BLKDEV_IFL_WAIT (1 << BLKDEV_WAIT) 1013 #define BLKDEV_IFL_WAIT (1 << BLKDEV_WAIT)
1011 #define BLKDEV_IFL_BARRIER (1 << BLKDEV_BARRIER) 1014 #define BLKDEV_IFL_BARRIER (1 << BLKDEV_BARRIER)
1012 extern int blkdev_issue_flush(struct block_device *, gfp_t, sector_t *, 1015 extern int blkdev_issue_flush(struct block_device *, gfp_t, sector_t *,
1013 unsigned long); 1016 unsigned long);
1014 extern int blkdev_issue_discard(struct block_device *bdev, sector_t sector, 1017 extern int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
1015 sector_t nr_sects, gfp_t gfp_mask, unsigned long flags); 1018 sector_t nr_sects, gfp_t gfp_mask, unsigned long flags);
1016 extern int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, 1019 extern int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
1017 sector_t nr_sects, gfp_t gfp_mask, unsigned long flags); 1020 sector_t nr_sects, gfp_t gfp_mask, unsigned long flags);
1018 static inline int sb_issue_discard(struct super_block *sb, 1021 static inline int sb_issue_discard(struct super_block *sb,
1019 sector_t block, sector_t nr_blocks) 1022 sector_t block, sector_t nr_blocks)
1020 { 1023 {
1021 block <<= (sb->s_blocksize_bits - 9); 1024 block <<= (sb->s_blocksize_bits - 9);
1022 nr_blocks <<= (sb->s_blocksize_bits - 9); 1025 nr_blocks <<= (sb->s_blocksize_bits - 9);
1023 return blkdev_issue_discard(sb->s_bdev, block, nr_blocks, GFP_KERNEL, 1026 return blkdev_issue_discard(sb->s_bdev, block, nr_blocks, GFP_KERNEL,
1024 BLKDEV_IFL_WAIT | BLKDEV_IFL_BARRIER); 1027 BLKDEV_IFL_WAIT | BLKDEV_IFL_BARRIER);
1025 } 1028 }
1026 1029
1027 extern int blk_verify_command(unsigned char *cmd, fmode_t has_write_perm); 1030 extern int blk_verify_command(unsigned char *cmd, fmode_t has_write_perm);
1028 1031
1029 enum blk_default_limits { 1032 enum blk_default_limits {
1030 BLK_MAX_SEGMENTS = 128, 1033 BLK_MAX_SEGMENTS = 128,
1031 BLK_SAFE_MAX_SECTORS = 255, 1034 BLK_SAFE_MAX_SECTORS = 255,
1032 BLK_DEF_MAX_SECTORS = 1024, 1035 BLK_DEF_MAX_SECTORS = 1024,
1033 BLK_MAX_SEGMENT_SIZE = 65536, 1036 BLK_MAX_SEGMENT_SIZE = 65536,
1034 BLK_SEG_BOUNDARY_MASK = 0xFFFFFFFFUL, 1037 BLK_SEG_BOUNDARY_MASK = 0xFFFFFFFFUL,
1035 }; 1038 };
1036 1039
1037 #define blkdev_entry_to_request(entry) list_entry((entry), struct request, queuelist) 1040 #define blkdev_entry_to_request(entry) list_entry((entry), struct request, queuelist)
1038 1041
1039 static inline unsigned long queue_bounce_pfn(struct request_queue *q) 1042 static inline unsigned long queue_bounce_pfn(struct request_queue *q)
1040 { 1043 {
1041 return q->limits.bounce_pfn; 1044 return q->limits.bounce_pfn;
1042 } 1045 }
1043 1046
1044 static inline unsigned long queue_segment_boundary(struct request_queue *q) 1047 static inline unsigned long queue_segment_boundary(struct request_queue *q)
1045 { 1048 {
1046 return q->limits.seg_boundary_mask; 1049 return q->limits.seg_boundary_mask;
1047 } 1050 }
1048 1051
1049 static inline unsigned int queue_max_sectors(struct request_queue *q) 1052 static inline unsigned int queue_max_sectors(struct request_queue *q)
1050 { 1053 {
1051 return q->limits.max_sectors; 1054 return q->limits.max_sectors;
1052 } 1055 }
1053 1056
1054 static inline unsigned int queue_max_hw_sectors(struct request_queue *q) 1057 static inline unsigned int queue_max_hw_sectors(struct request_queue *q)
1055 { 1058 {
1056 return q->limits.max_hw_sectors; 1059 return q->limits.max_hw_sectors;
1057 } 1060 }
1058 1061
1059 static inline unsigned short queue_max_segments(struct request_queue *q) 1062 static inline unsigned short queue_max_segments(struct request_queue *q)
1060 { 1063 {
1061 return q->limits.max_segments; 1064 return q->limits.max_segments;
1062 } 1065 }
1063 1066
1064 static inline unsigned int queue_max_segment_size(struct request_queue *q) 1067 static inline unsigned int queue_max_segment_size(struct request_queue *q)
1065 { 1068 {
1066 return q->limits.max_segment_size; 1069 return q->limits.max_segment_size;
1067 } 1070 }
1068 1071
1069 static inline unsigned short queue_logical_block_size(struct request_queue *q) 1072 static inline unsigned short queue_logical_block_size(struct request_queue *q)
1070 { 1073 {
1071 int retval = 512; 1074 int retval = 512;
1072 1075
1073 if (q && q->limits.logical_block_size) 1076 if (q && q->limits.logical_block_size)
1074 retval = q->limits.logical_block_size; 1077 retval = q->limits.logical_block_size;
1075 1078
1076 return retval; 1079 return retval;
1077 } 1080 }
1078 1081
1079 static inline unsigned short bdev_logical_block_size(struct block_device *bdev) 1082 static inline unsigned short bdev_logical_block_size(struct block_device *bdev)
1080 { 1083 {
1081 return queue_logical_block_size(bdev_get_queue(bdev)); 1084 return queue_logical_block_size(bdev_get_queue(bdev));
1082 } 1085 }
1083 1086
1084 static inline unsigned int queue_physical_block_size(struct request_queue *q) 1087 static inline unsigned int queue_physical_block_size(struct request_queue *q)
1085 { 1088 {
1086 return q->limits.physical_block_size; 1089 return q->limits.physical_block_size;
1087 } 1090 }
1088 1091
1089 static inline int bdev_physical_block_size(struct block_device *bdev) 1092 static inline int bdev_physical_block_size(struct block_device *bdev)
1090 { 1093 {
1091 return queue_physical_block_size(bdev_get_queue(bdev)); 1094 return queue_physical_block_size(bdev_get_queue(bdev));
1092 } 1095 }
1093 1096
1094 static inline unsigned int queue_io_min(struct request_queue *q) 1097 static inline unsigned int queue_io_min(struct request_queue *q)
1095 { 1098 {
1096 return q->limits.io_min; 1099 return q->limits.io_min;
1097 } 1100 }
1098 1101
1099 static inline int bdev_io_min(struct block_device *bdev) 1102 static inline int bdev_io_min(struct block_device *bdev)
1100 { 1103 {
1101 return queue_io_min(bdev_get_queue(bdev)); 1104 return queue_io_min(bdev_get_queue(bdev));
1102 } 1105 }
1103 1106
1104 static inline unsigned int queue_io_opt(struct request_queue *q) 1107 static inline unsigned int queue_io_opt(struct request_queue *q)
1105 { 1108 {
1106 return q->limits.io_opt; 1109 return q->limits.io_opt;
1107 } 1110 }
1108 1111
1109 static inline int bdev_io_opt(struct block_device *bdev) 1112 static inline int bdev_io_opt(struct block_device *bdev)
1110 { 1113 {
1111 return queue_io_opt(bdev_get_queue(bdev)); 1114 return queue_io_opt(bdev_get_queue(bdev));
1112 } 1115 }
1113 1116
1114 static inline int queue_alignment_offset(struct request_queue *q) 1117 static inline int queue_alignment_offset(struct request_queue *q)
1115 { 1118 {
1116 if (q->limits.misaligned) 1119 if (q->limits.misaligned)
1117 return -1; 1120 return -1;
1118 1121
1119 return q->limits.alignment_offset; 1122 return q->limits.alignment_offset;
1120 } 1123 }
1121 1124
1122 static inline int queue_limit_alignment_offset(struct queue_limits *lim, sector_t sector) 1125 static inline int queue_limit_alignment_offset(struct queue_limits *lim, sector_t sector)
1123 { 1126 {
1124 unsigned int granularity = max(lim->physical_block_size, lim->io_min); 1127 unsigned int granularity = max(lim->physical_block_size, lim->io_min);
1125 unsigned int alignment = (sector << 9) & (granularity - 1); 1128 unsigned int alignment = (sector << 9) & (granularity - 1);
1126 1129
1127 return (granularity + lim->alignment_offset - alignment) 1130 return (granularity + lim->alignment_offset - alignment)
1128 & (granularity - 1); 1131 & (granularity - 1);
1129 } 1132 }
1130 1133
1131 static inline int bdev_alignment_offset(struct block_device *bdev) 1134 static inline int bdev_alignment_offset(struct block_device *bdev)
1132 { 1135 {
1133 struct request_queue *q = bdev_get_queue(bdev); 1136 struct request_queue *q = bdev_get_queue(bdev);
1134 1137
1135 if (q->limits.misaligned) 1138 if (q->limits.misaligned)
1136 return -1; 1139 return -1;
1137 1140
1138 if (bdev != bdev->bd_contains) 1141 if (bdev != bdev->bd_contains)
1139 return bdev->bd_part->alignment_offset; 1142 return bdev->bd_part->alignment_offset;
1140 1143
1141 return q->limits.alignment_offset; 1144 return q->limits.alignment_offset;
1142 } 1145 }
1143 1146
1144 static inline int queue_discard_alignment(struct request_queue *q) 1147 static inline int queue_discard_alignment(struct request_queue *q)
1145 { 1148 {
1146 if (q->limits.discard_misaligned) 1149 if (q->limits.discard_misaligned)
1147 return -1; 1150 return -1;
1148 1151
1149 return q->limits.discard_alignment; 1152 return q->limits.discard_alignment;
1150 } 1153 }
1151 1154
1152 static inline int queue_limit_discard_alignment(struct queue_limits *lim, sector_t sector) 1155 static inline int queue_limit_discard_alignment(struct queue_limits *lim, sector_t sector)
1153 { 1156 {
1154 unsigned int alignment = (sector << 9) & (lim->discard_granularity - 1); 1157 unsigned int alignment = (sector << 9) & (lim->discard_granularity - 1);
1155 1158
1156 return (lim->discard_granularity + lim->discard_alignment - alignment) 1159 return (lim->discard_granularity + lim->discard_alignment - alignment)
1157 & (lim->discard_granularity - 1); 1160 & (lim->discard_granularity - 1);
1158 } 1161 }
1159 1162
1160 static inline unsigned int queue_discard_zeroes_data(struct request_queue *q) 1163 static inline unsigned int queue_discard_zeroes_data(struct request_queue *q)
1161 { 1164 {
1162 if (q->limits.discard_zeroes_data == 1) 1165 if (q->limits.discard_zeroes_data == 1)
1163 return 1; 1166 return 1;
1164 1167
1165 return 0; 1168 return 0;
1166 } 1169 }
1167 1170
1168 static inline unsigned int bdev_discard_zeroes_data(struct block_device *bdev) 1171 static inline unsigned int bdev_discard_zeroes_data(struct block_device *bdev)
1169 { 1172 {
1170 return queue_discard_zeroes_data(bdev_get_queue(bdev)); 1173 return queue_discard_zeroes_data(bdev_get_queue(bdev));
1171 } 1174 }
1172 1175
1173 static inline int queue_dma_alignment(struct request_queue *q) 1176 static inline int queue_dma_alignment(struct request_queue *q)
1174 { 1177 {
1175 return q ? q->dma_alignment : 511; 1178 return q ? q->dma_alignment : 511;
1176 } 1179 }
1177 1180
1178 static inline int blk_rq_aligned(struct request_queue *q, void *addr, 1181 static inline int blk_rq_aligned(struct request_queue *q, void *addr,
1179 unsigned int len) 1182 unsigned int len)
1180 { 1183 {
1181 unsigned int alignment = queue_dma_alignment(q) | q->dma_pad_mask; 1184 unsigned int alignment = queue_dma_alignment(q) | q->dma_pad_mask;
1182 return !((unsigned long)addr & alignment) && !(len & alignment); 1185 return !((unsigned long)addr & alignment) && !(len & alignment);
1183 } 1186 }
1184 1187
1185 /* assumes size > 256 */ 1188 /* assumes size > 256 */
1186 static inline unsigned int blksize_bits(unsigned int size) 1189 static inline unsigned int blksize_bits(unsigned int size)
1187 { 1190 {
1188 unsigned int bits = 8; 1191 unsigned int bits = 8;
1189 do { 1192 do {
1190 bits++; 1193 bits++;
1191 size >>= 1; 1194 size >>= 1;
1192 } while (size > 256); 1195 } while (size > 256);
1193 return bits; 1196 return bits;
1194 } 1197 }
1195 1198
1196 static inline unsigned int block_size(struct block_device *bdev) 1199 static inline unsigned int block_size(struct block_device *bdev)
1197 { 1200 {
1198 return bdev->bd_block_size; 1201 return bdev->bd_block_size;
1199 } 1202 }
1200 1203
1201 typedef struct {struct page *v;} Sector; 1204 typedef struct {struct page *v;} Sector;
1202 1205
1203 unsigned char *read_dev_sector(struct block_device *, sector_t, Sector *); 1206 unsigned char *read_dev_sector(struct block_device *, sector_t, Sector *);
1204 1207
1205 static inline void put_dev_sector(Sector p) 1208 static inline void put_dev_sector(Sector p)
1206 { 1209 {
1207 page_cache_release(p.v); 1210 page_cache_release(p.v);
1208 } 1211 }
1209 1212
1210 struct work_struct; 1213 struct work_struct;
1211 int kblockd_schedule_work(struct request_queue *q, struct work_struct *work); 1214 int kblockd_schedule_work(struct request_queue *q, struct work_struct *work);
1212 1215
1213 #ifdef CONFIG_BLK_CGROUP 1216 #ifdef CONFIG_BLK_CGROUP
1214 /* 1217 /*
1215 * This should not be using sched_clock(). A real patch is in progress 1218 * This should not be using sched_clock(). A real patch is in progress
1216 * to fix this up, until that is in place we need to disable preemption 1219 * to fix this up, until that is in place we need to disable preemption
1217 * around sched_clock() in this function and set_io_start_time_ns(). 1220 * around sched_clock() in this function and set_io_start_time_ns().
1218 */ 1221 */
1219 static inline void set_start_time_ns(struct request *req) 1222 static inline void set_start_time_ns(struct request *req)
1220 { 1223 {
1221 preempt_disable(); 1224 preempt_disable();
1222 req->start_time_ns = sched_clock(); 1225 req->start_time_ns = sched_clock();
1223 preempt_enable(); 1226 preempt_enable();
1224 } 1227 }
1225 1228
1226 static inline void set_io_start_time_ns(struct request *req) 1229 static inline void set_io_start_time_ns(struct request *req)
1227 { 1230 {
1228 preempt_disable(); 1231 preempt_disable();
1229 req->io_start_time_ns = sched_clock(); 1232 req->io_start_time_ns = sched_clock();
1230 preempt_enable(); 1233 preempt_enable();
1231 } 1234 }
1232 1235
1233 static inline uint64_t rq_start_time_ns(struct request *req) 1236 static inline uint64_t rq_start_time_ns(struct request *req)
1234 { 1237 {
1235 return req->start_time_ns; 1238 return req->start_time_ns;
1236 } 1239 }
1237 1240
1238 static inline uint64_t rq_io_start_time_ns(struct request *req) 1241 static inline uint64_t rq_io_start_time_ns(struct request *req)
1239 { 1242 {
1240 return req->io_start_time_ns; 1243 return req->io_start_time_ns;
1241 } 1244 }
1242 #else 1245 #else
1243 static inline void set_start_time_ns(struct request *req) {} 1246 static inline void set_start_time_ns(struct request *req) {}
1244 static inline void set_io_start_time_ns(struct request *req) {} 1247 static inline void set_io_start_time_ns(struct request *req) {}
1245 static inline uint64_t rq_start_time_ns(struct request *req) 1248 static inline uint64_t rq_start_time_ns(struct request *req)
1246 { 1249 {
1247 return 0; 1250 return 0;
1248 } 1251 }
1249 static inline uint64_t rq_io_start_time_ns(struct request *req) 1252 static inline uint64_t rq_io_start_time_ns(struct request *req)
1250 { 1253 {
1251 return 0; 1254 return 0;
1252 } 1255 }
1253 #endif 1256 #endif
1254 1257
1255 #define MODULE_ALIAS_BLOCKDEV(major,minor) \ 1258 #define MODULE_ALIAS_BLOCKDEV(major,minor) \
1256 MODULE_ALIAS("block-major-" __stringify(major) "-" __stringify(minor)) 1259 MODULE_ALIAS("block-major-" __stringify(major) "-" __stringify(minor))
1257 #define MODULE_ALIAS_BLOCKDEV_MAJOR(major) \ 1260 #define MODULE_ALIAS_BLOCKDEV_MAJOR(major) \
1258 MODULE_ALIAS("block-major-" __stringify(major) "-*") 1261 MODULE_ALIAS("block-major-" __stringify(major) "-*")
1259 1262
1260 #if defined(CONFIG_BLK_DEV_INTEGRITY) 1263 #if defined(CONFIG_BLK_DEV_INTEGRITY)
1261 1264
1262 #define INTEGRITY_FLAG_READ 2 /* verify data integrity on read */ 1265 #define INTEGRITY_FLAG_READ 2 /* verify data integrity on read */
1263 #define INTEGRITY_FLAG_WRITE 4 /* generate data integrity on write */ 1266 #define INTEGRITY_FLAG_WRITE 4 /* generate data integrity on write */
1264 1267
1265 struct blk_integrity_exchg { 1268 struct blk_integrity_exchg {
1266 void *prot_buf; 1269 void *prot_buf;
1267 void *data_buf; 1270 void *data_buf;
1268 sector_t sector; 1271 sector_t sector;
1269 unsigned int data_size; 1272 unsigned int data_size;
1270 unsigned short sector_size; 1273 unsigned short sector_size;
1271 const char *disk_name; 1274 const char *disk_name;
1272 }; 1275 };
1273 1276
1274 typedef void (integrity_gen_fn) (struct blk_integrity_exchg *); 1277 typedef void (integrity_gen_fn) (struct blk_integrity_exchg *);
1275 typedef int (integrity_vrfy_fn) (struct blk_integrity_exchg *); 1278 typedef int (integrity_vrfy_fn) (struct blk_integrity_exchg *);
1276 typedef void (integrity_set_tag_fn) (void *, void *, unsigned int); 1279 typedef void (integrity_set_tag_fn) (void *, void *, unsigned int);
1277 typedef void (integrity_get_tag_fn) (void *, void *, unsigned int); 1280 typedef void (integrity_get_tag_fn) (void *, void *, unsigned int);
1278 1281
1279 struct blk_integrity { 1282 struct blk_integrity {
1280 integrity_gen_fn *generate_fn; 1283 integrity_gen_fn *generate_fn;
1281 integrity_vrfy_fn *verify_fn; 1284 integrity_vrfy_fn *verify_fn;
1282 integrity_set_tag_fn *set_tag_fn; 1285 integrity_set_tag_fn *set_tag_fn;
1283 integrity_get_tag_fn *get_tag_fn; 1286 integrity_get_tag_fn *get_tag_fn;
1284 1287
1285 unsigned short flags; 1288 unsigned short flags;
1286 unsigned short tuple_size; 1289 unsigned short tuple_size;
1287 unsigned short sector_size; 1290 unsigned short sector_size;
1288 unsigned short tag_size; 1291 unsigned short tag_size;
1289 1292
1290 const char *name; 1293 const char *name;
1291 1294
1292 struct kobject kobj; 1295 struct kobject kobj;
1293 }; 1296 };
1294 1297
1295 extern int blk_integrity_register(struct gendisk *, struct blk_integrity *); 1298 extern int blk_integrity_register(struct gendisk *, struct blk_integrity *);
1296 extern void blk_integrity_unregister(struct gendisk *); 1299 extern void blk_integrity_unregister(struct gendisk *);
1297 extern int blk_integrity_compare(struct gendisk *, struct gendisk *); 1300 extern int blk_integrity_compare(struct gendisk *, struct gendisk *);
1298 extern int blk_rq_map_integrity_sg(struct request *, struct scatterlist *); 1301 extern int blk_rq_map_integrity_sg(struct request *, struct scatterlist *);
1299 extern int blk_rq_count_integrity_sg(struct request *); 1302 extern int blk_rq_count_integrity_sg(struct request *);
1300 1303
1301 static inline 1304 static inline
1302 struct blk_integrity *bdev_get_integrity(struct block_device *bdev) 1305 struct blk_integrity *bdev_get_integrity(struct block_device *bdev)
1303 { 1306 {
1304 return bdev->bd_disk->integrity; 1307 return bdev->bd_disk->integrity;
1305 } 1308 }
1306 1309
1307 static inline struct blk_integrity *blk_get_integrity(struct gendisk *disk) 1310 static inline struct blk_integrity *blk_get_integrity(struct gendisk *disk)
1308 { 1311 {
1309 return disk->integrity; 1312 return disk->integrity;
1310 } 1313 }
1311 1314
1312 static inline int blk_integrity_rq(struct request *rq) 1315 static inline int blk_integrity_rq(struct request *rq)
1313 { 1316 {
1314 if (rq->bio == NULL) 1317 if (rq->bio == NULL)
1315 return 0; 1318 return 0;
1316 1319
1317 return bio_integrity(rq->bio); 1320 return bio_integrity(rq->bio);
1318 } 1321 }
1319 1322
1320 #else /* CONFIG_BLK_DEV_INTEGRITY */ 1323 #else /* CONFIG_BLK_DEV_INTEGRITY */
1321 1324
1322 #define blk_integrity_rq(rq) (0) 1325 #define blk_integrity_rq(rq) (0)
1323 #define blk_rq_count_integrity_sg(a) (0) 1326 #define blk_rq_count_integrity_sg(a) (0)
1324 #define blk_rq_map_integrity_sg(a, b) (0) 1327 #define blk_rq_map_integrity_sg(a, b) (0)
1325 #define bdev_get_integrity(a) (0) 1328 #define bdev_get_integrity(a) (0)
1326 #define blk_get_integrity(a) (0) 1329 #define blk_get_integrity(a) (0)
1327 #define blk_integrity_compare(a, b) (0) 1330 #define blk_integrity_compare(a, b) (0)
1328 #define blk_integrity_register(a, b) (0) 1331 #define blk_integrity_register(a, b) (0)
1329 #define blk_integrity_unregister(a) do { } while (0); 1332 #define blk_integrity_unregister(a) do { } while (0);
1330 1333
1331 #endif /* CONFIG_BLK_DEV_INTEGRITY */ 1334 #endif /* CONFIG_BLK_DEV_INTEGRITY */
1332 1335
1333 struct block_device_operations { 1336 struct block_device_operations {
1334 int (*open) (struct block_device *, fmode_t); 1337 int (*open) (struct block_device *, fmode_t);
1335 int (*release) (struct gendisk *, fmode_t); 1338 int (*release) (struct gendisk *, fmode_t);
1336 int (*locked_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); 1339 int (*locked_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
1337 int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); 1340 int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
1338 int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); 1341 int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
1339 int (*direct_access) (struct block_device *, sector_t, 1342 int (*direct_access) (struct block_device *, sector_t,
1340 void **, unsigned long *); 1343 void **, unsigned long *);
1341 int (*media_changed) (struct gendisk *); 1344 int (*media_changed) (struct gendisk *);
1342 void (*unlock_native_capacity) (struct gendisk *); 1345 void (*unlock_native_capacity) (struct gendisk *);
1343 int (*revalidate_disk) (struct gendisk *); 1346 int (*revalidate_disk) (struct gendisk *);
1344 int (*getgeo)(struct block_device *, struct hd_geometry *); 1347 int (*getgeo)(struct block_device *, struct hd_geometry *);
1345 /* this callback is with swap_lock and sometimes page table lock held */ 1348 /* this callback is with swap_lock and sometimes page table lock held */
1346 void (*swap_slot_free_notify) (struct block_device *, unsigned long); 1349 void (*swap_slot_free_notify) (struct block_device *, unsigned long);
1347 struct module *owner; 1350 struct module *owner;
1348 }; 1351 };
1349 1352
1350 extern int __blkdev_driver_ioctl(struct block_device *, fmode_t, unsigned int, 1353 extern int __blkdev_driver_ioctl(struct block_device *, fmode_t, unsigned int,
1351 unsigned long); 1354 unsigned long);
1352 #else /* CONFIG_BLOCK */ 1355 #else /* CONFIG_BLOCK */
1353 /* 1356 /*
1354 * stubs for when the block layer is configured out 1357 * stubs for when the block layer is configured out
1355 */ 1358 */
1356 #define buffer_heads_over_limit 0 1359 #define buffer_heads_over_limit 0
1357 1360
1358 static inline long nr_blockdev_pages(void) 1361 static inline long nr_blockdev_pages(void)
1359 { 1362 {
1360 return 0; 1363 return 0;
1361 } 1364 }
1362 1365
1363 #endif /* CONFIG_BLOCK */ 1366 #endif /* CONFIG_BLOCK */
1364 1367
1365 #endif 1368 #endif
1366 1369