Commit 32fab448e5e86694beade415e750363538ea5f49

Authored by Kiyoshi Ueda
Committed by Jens Axboe
1 parent e3335de940

block: add request update interface

This patch adds blk_update_request(), which updates struct request
with completing its data part, but doesn't complete the struct
request itself.
Though it looks like end_that_request_first() of older kernels,
blk_update_request() should be used only by request stacking drivers.

Request-based dm will use it in bio->bi_end_io callback to update
the original request when a data part of a cloned request completes.
Followings are additional background information of why request-based
dm needs this interface.

  - Request stacking drivers can't use blk_end_request() directly from
    the lower driver's completion context (bio->bi_end_io or rq->end_io),
    because some device drivers (e.g. ide) may try to complete
    their request with queue lock held, and it may cause deadlock.
    See below for detailed description of possible deadlock:
    <http://marc.info/?l=linux-kernel&m=120311479108569&w=2>

  - To solve that, request-based dm offloads the completion of
    cloned struct request to softirq context (i.e. using
    blk_complete_request() from rq->end_io).

  - Though it is possible to use the same solution from bio->bi_end_io,
    it will delay the notification of bio completion to the original
    submitter.  Also, it will cause inefficient partial completion,
    because the lower driver can't perform the cloned request anymore
    and request-based dm needs to requeue and redispatch it to
    the lower driver again later.  That's not good.

  - So request-based dm needs blk_update_request() to perform the bio
    completion in the lower driver's completion context, which is more
    efficient.

Signed-off-by: Kiyoshi Ueda <k-ueda@ct.jp.nec.com>
Signed-off-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

Showing 2 changed files with 50 additions and 9 deletions Inline Diff

1 /* 1 /*
2 * Copyright (C) 1991, 1992 Linus Torvalds 2 * Copyright (C) 1991, 1992 Linus Torvalds
3 * Copyright (C) 1994, Karl Keyte: Added support for disk statistics 3 * Copyright (C) 1994, Karl Keyte: Added support for disk statistics
4 * Elevator latency, (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE 4 * Elevator latency, (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
5 * Queue request tables / lock, selectable elevator, Jens Axboe <axboe@suse.de> 5 * Queue request tables / lock, selectable elevator, Jens Axboe <axboe@suse.de>
6 * kernel-doc documentation started by NeilBrown <neilb@cse.unsw.edu.au> 6 * kernel-doc documentation started by NeilBrown <neilb@cse.unsw.edu.au>
7 * - July2000 7 * - July2000
8 * bio rewrite, highmem i/o, etc, Jens Axboe <axboe@suse.de> - may 2001 8 * bio rewrite, highmem i/o, etc, Jens Axboe <axboe@suse.de> - may 2001
9 */ 9 */
10 10
11 /* 11 /*
12 * This handles all read/write requests to block devices 12 * This handles all read/write requests to block devices
13 */ 13 */
14 #include <linux/kernel.h> 14 #include <linux/kernel.h>
15 #include <linux/module.h> 15 #include <linux/module.h>
16 #include <linux/backing-dev.h> 16 #include <linux/backing-dev.h>
17 #include <linux/bio.h> 17 #include <linux/bio.h>
18 #include <linux/blkdev.h> 18 #include <linux/blkdev.h>
19 #include <linux/highmem.h> 19 #include <linux/highmem.h>
20 #include <linux/mm.h> 20 #include <linux/mm.h>
21 #include <linux/kernel_stat.h> 21 #include <linux/kernel_stat.h>
22 #include <linux/string.h> 22 #include <linux/string.h>
23 #include <linux/init.h> 23 #include <linux/init.h>
24 #include <linux/completion.h> 24 #include <linux/completion.h>
25 #include <linux/slab.h> 25 #include <linux/slab.h>
26 #include <linux/swap.h> 26 #include <linux/swap.h>
27 #include <linux/writeback.h> 27 #include <linux/writeback.h>
28 #include <linux/task_io_accounting_ops.h> 28 #include <linux/task_io_accounting_ops.h>
29 #include <linux/blktrace_api.h> 29 #include <linux/blktrace_api.h>
30 #include <linux/fault-inject.h> 30 #include <linux/fault-inject.h>
31 31
32 #include "blk.h" 32 #include "blk.h"
33 33
34 static int __make_request(struct request_queue *q, struct bio *bio); 34 static int __make_request(struct request_queue *q, struct bio *bio);
35 35
36 /* 36 /*
37 * For the allocated request tables 37 * For the allocated request tables
38 */ 38 */
39 static struct kmem_cache *request_cachep; 39 static struct kmem_cache *request_cachep;
40 40
41 /* 41 /*
42 * For queue allocation 42 * For queue allocation
43 */ 43 */
44 struct kmem_cache *blk_requestq_cachep; 44 struct kmem_cache *blk_requestq_cachep;
45 45
46 /* 46 /*
47 * Controlling structure to kblockd 47 * Controlling structure to kblockd
48 */ 48 */
49 static struct workqueue_struct *kblockd_workqueue; 49 static struct workqueue_struct *kblockd_workqueue;
50 50
51 static void drive_stat_acct(struct request *rq, int new_io) 51 static void drive_stat_acct(struct request *rq, int new_io)
52 { 52 {
53 struct hd_struct *part; 53 struct hd_struct *part;
54 int rw = rq_data_dir(rq); 54 int rw = rq_data_dir(rq);
55 int cpu; 55 int cpu;
56 56
57 if (!blk_fs_request(rq) || !rq->rq_disk) 57 if (!blk_fs_request(rq) || !rq->rq_disk)
58 return; 58 return;
59 59
60 cpu = part_stat_lock(); 60 cpu = part_stat_lock();
61 part = disk_map_sector_rcu(rq->rq_disk, rq->sector); 61 part = disk_map_sector_rcu(rq->rq_disk, rq->sector);
62 62
63 if (!new_io) 63 if (!new_io)
64 part_stat_inc(cpu, part, merges[rw]); 64 part_stat_inc(cpu, part, merges[rw]);
65 else { 65 else {
66 part_round_stats(cpu, part); 66 part_round_stats(cpu, part);
67 part_inc_in_flight(part); 67 part_inc_in_flight(part);
68 } 68 }
69 69
70 part_stat_unlock(); 70 part_stat_unlock();
71 } 71 }
72 72
73 void blk_queue_congestion_threshold(struct request_queue *q) 73 void blk_queue_congestion_threshold(struct request_queue *q)
74 { 74 {
75 int nr; 75 int nr;
76 76
77 nr = q->nr_requests - (q->nr_requests / 8) + 1; 77 nr = q->nr_requests - (q->nr_requests / 8) + 1;
78 if (nr > q->nr_requests) 78 if (nr > q->nr_requests)
79 nr = q->nr_requests; 79 nr = q->nr_requests;
80 q->nr_congestion_on = nr; 80 q->nr_congestion_on = nr;
81 81
82 nr = q->nr_requests - (q->nr_requests / 8) - (q->nr_requests / 16) - 1; 82 nr = q->nr_requests - (q->nr_requests / 8) - (q->nr_requests / 16) - 1;
83 if (nr < 1) 83 if (nr < 1)
84 nr = 1; 84 nr = 1;
85 q->nr_congestion_off = nr; 85 q->nr_congestion_off = nr;
86 } 86 }
87 87
88 /** 88 /**
89 * blk_get_backing_dev_info - get the address of a queue's backing_dev_info 89 * blk_get_backing_dev_info - get the address of a queue's backing_dev_info
90 * @bdev: device 90 * @bdev: device
91 * 91 *
92 * Locates the passed device's request queue and returns the address of its 92 * Locates the passed device's request queue and returns the address of its
93 * backing_dev_info 93 * backing_dev_info
94 * 94 *
95 * Will return NULL if the request queue cannot be located. 95 * Will return NULL if the request queue cannot be located.
96 */ 96 */
97 struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev) 97 struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev)
98 { 98 {
99 struct backing_dev_info *ret = NULL; 99 struct backing_dev_info *ret = NULL;
100 struct request_queue *q = bdev_get_queue(bdev); 100 struct request_queue *q = bdev_get_queue(bdev);
101 101
102 if (q) 102 if (q)
103 ret = &q->backing_dev_info; 103 ret = &q->backing_dev_info;
104 return ret; 104 return ret;
105 } 105 }
106 EXPORT_SYMBOL(blk_get_backing_dev_info); 106 EXPORT_SYMBOL(blk_get_backing_dev_info);
107 107
108 void blk_rq_init(struct request_queue *q, struct request *rq) 108 void blk_rq_init(struct request_queue *q, struct request *rq)
109 { 109 {
110 memset(rq, 0, sizeof(*rq)); 110 memset(rq, 0, sizeof(*rq));
111 111
112 INIT_LIST_HEAD(&rq->queuelist); 112 INIT_LIST_HEAD(&rq->queuelist);
113 INIT_LIST_HEAD(&rq->timeout_list); 113 INIT_LIST_HEAD(&rq->timeout_list);
114 rq->cpu = -1; 114 rq->cpu = -1;
115 rq->q = q; 115 rq->q = q;
116 rq->sector = rq->hard_sector = (sector_t) -1; 116 rq->sector = rq->hard_sector = (sector_t) -1;
117 INIT_HLIST_NODE(&rq->hash); 117 INIT_HLIST_NODE(&rq->hash);
118 RB_CLEAR_NODE(&rq->rb_node); 118 RB_CLEAR_NODE(&rq->rb_node);
119 rq->cmd = rq->__cmd; 119 rq->cmd = rq->__cmd;
120 rq->tag = -1; 120 rq->tag = -1;
121 rq->ref_count = 1; 121 rq->ref_count = 1;
122 } 122 }
123 EXPORT_SYMBOL(blk_rq_init); 123 EXPORT_SYMBOL(blk_rq_init);
124 124
125 static void req_bio_endio(struct request *rq, struct bio *bio, 125 static void req_bio_endio(struct request *rq, struct bio *bio,
126 unsigned int nbytes, int error) 126 unsigned int nbytes, int error)
127 { 127 {
128 struct request_queue *q = rq->q; 128 struct request_queue *q = rq->q;
129 129
130 if (&q->bar_rq != rq) { 130 if (&q->bar_rq != rq) {
131 if (error) 131 if (error)
132 clear_bit(BIO_UPTODATE, &bio->bi_flags); 132 clear_bit(BIO_UPTODATE, &bio->bi_flags);
133 else if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) 133 else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
134 error = -EIO; 134 error = -EIO;
135 135
136 if (unlikely(nbytes > bio->bi_size)) { 136 if (unlikely(nbytes > bio->bi_size)) {
137 printk(KERN_ERR "%s: want %u bytes done, %u left\n", 137 printk(KERN_ERR "%s: want %u bytes done, %u left\n",
138 __func__, nbytes, bio->bi_size); 138 __func__, nbytes, bio->bi_size);
139 nbytes = bio->bi_size; 139 nbytes = bio->bi_size;
140 } 140 }
141 141
142 bio->bi_size -= nbytes; 142 bio->bi_size -= nbytes;
143 bio->bi_sector += (nbytes >> 9); 143 bio->bi_sector += (nbytes >> 9);
144 144
145 if (bio_integrity(bio)) 145 if (bio_integrity(bio))
146 bio_integrity_advance(bio, nbytes); 146 bio_integrity_advance(bio, nbytes);
147 147
148 if (bio->bi_size == 0) 148 if (bio->bi_size == 0)
149 bio_endio(bio, error); 149 bio_endio(bio, error);
150 } else { 150 } else {
151 151
152 /* 152 /*
153 * Okay, this is the barrier request in progress, just 153 * Okay, this is the barrier request in progress, just
154 * record the error; 154 * record the error;
155 */ 155 */
156 if (error && !q->orderr) 156 if (error && !q->orderr)
157 q->orderr = error; 157 q->orderr = error;
158 } 158 }
159 } 159 }
160 160
161 void blk_dump_rq_flags(struct request *rq, char *msg) 161 void blk_dump_rq_flags(struct request *rq, char *msg)
162 { 162 {
163 int bit; 163 int bit;
164 164
165 printk(KERN_INFO "%s: dev %s: type=%x, flags=%x\n", msg, 165 printk(KERN_INFO "%s: dev %s: type=%x, flags=%x\n", msg,
166 rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->cmd_type, 166 rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->cmd_type,
167 rq->cmd_flags); 167 rq->cmd_flags);
168 168
169 printk(KERN_INFO " sector %llu, nr/cnr %lu/%u\n", 169 printk(KERN_INFO " sector %llu, nr/cnr %lu/%u\n",
170 (unsigned long long)rq->sector, 170 (unsigned long long)rq->sector,
171 rq->nr_sectors, 171 rq->nr_sectors,
172 rq->current_nr_sectors); 172 rq->current_nr_sectors);
173 printk(KERN_INFO " bio %p, biotail %p, buffer %p, data %p, len %u\n", 173 printk(KERN_INFO " bio %p, biotail %p, buffer %p, data %p, len %u\n",
174 rq->bio, rq->biotail, 174 rq->bio, rq->biotail,
175 rq->buffer, rq->data, 175 rq->buffer, rq->data,
176 rq->data_len); 176 rq->data_len);
177 177
178 if (blk_pc_request(rq)) { 178 if (blk_pc_request(rq)) {
179 printk(KERN_INFO " cdb: "); 179 printk(KERN_INFO " cdb: ");
180 for (bit = 0; bit < BLK_MAX_CDB; bit++) 180 for (bit = 0; bit < BLK_MAX_CDB; bit++)
181 printk("%02x ", rq->cmd[bit]); 181 printk("%02x ", rq->cmd[bit]);
182 printk("\n"); 182 printk("\n");
183 } 183 }
184 } 184 }
185 EXPORT_SYMBOL(blk_dump_rq_flags); 185 EXPORT_SYMBOL(blk_dump_rq_flags);
186 186
187 /* 187 /*
188 * "plug" the device if there are no outstanding requests: this will 188 * "plug" the device if there are no outstanding requests: this will
189 * force the transfer to start only after we have put all the requests 189 * force the transfer to start only after we have put all the requests
190 * on the list. 190 * on the list.
191 * 191 *
192 * This is called with interrupts off and no requests on the queue and 192 * This is called with interrupts off and no requests on the queue and
193 * with the queue lock held. 193 * with the queue lock held.
194 */ 194 */
195 void blk_plug_device(struct request_queue *q) 195 void blk_plug_device(struct request_queue *q)
196 { 196 {
197 WARN_ON(!irqs_disabled()); 197 WARN_ON(!irqs_disabled());
198 198
199 /* 199 /*
200 * don't plug a stopped queue, it must be paired with blk_start_queue() 200 * don't plug a stopped queue, it must be paired with blk_start_queue()
201 * which will restart the queueing 201 * which will restart the queueing
202 */ 202 */
203 if (blk_queue_stopped(q)) 203 if (blk_queue_stopped(q))
204 return; 204 return;
205 205
206 if (!queue_flag_test_and_set(QUEUE_FLAG_PLUGGED, q)) { 206 if (!queue_flag_test_and_set(QUEUE_FLAG_PLUGGED, q)) {
207 mod_timer(&q->unplug_timer, jiffies + q->unplug_delay); 207 mod_timer(&q->unplug_timer, jiffies + q->unplug_delay);
208 blk_add_trace_generic(q, NULL, 0, BLK_TA_PLUG); 208 blk_add_trace_generic(q, NULL, 0, BLK_TA_PLUG);
209 } 209 }
210 } 210 }
211 EXPORT_SYMBOL(blk_plug_device); 211 EXPORT_SYMBOL(blk_plug_device);
212 212
213 /** 213 /**
214 * blk_plug_device_unlocked - plug a device without queue lock held 214 * blk_plug_device_unlocked - plug a device without queue lock held
215 * @q: The &struct request_queue to plug 215 * @q: The &struct request_queue to plug
216 * 216 *
217 * Description: 217 * Description:
218 * Like @blk_plug_device(), but grabs the queue lock and disables 218 * Like @blk_plug_device(), but grabs the queue lock and disables
219 * interrupts. 219 * interrupts.
220 **/ 220 **/
221 void blk_plug_device_unlocked(struct request_queue *q) 221 void blk_plug_device_unlocked(struct request_queue *q)
222 { 222 {
223 unsigned long flags; 223 unsigned long flags;
224 224
225 spin_lock_irqsave(q->queue_lock, flags); 225 spin_lock_irqsave(q->queue_lock, flags);
226 blk_plug_device(q); 226 blk_plug_device(q);
227 spin_unlock_irqrestore(q->queue_lock, flags); 227 spin_unlock_irqrestore(q->queue_lock, flags);
228 } 228 }
229 EXPORT_SYMBOL(blk_plug_device_unlocked); 229 EXPORT_SYMBOL(blk_plug_device_unlocked);
230 230
231 /* 231 /*
232 * remove the queue from the plugged list, if present. called with 232 * remove the queue from the plugged list, if present. called with
233 * queue lock held and interrupts disabled. 233 * queue lock held and interrupts disabled.
234 */ 234 */
235 int blk_remove_plug(struct request_queue *q) 235 int blk_remove_plug(struct request_queue *q)
236 { 236 {
237 WARN_ON(!irqs_disabled()); 237 WARN_ON(!irqs_disabled());
238 238
239 if (!queue_flag_test_and_clear(QUEUE_FLAG_PLUGGED, q)) 239 if (!queue_flag_test_and_clear(QUEUE_FLAG_PLUGGED, q))
240 return 0; 240 return 0;
241 241
242 del_timer(&q->unplug_timer); 242 del_timer(&q->unplug_timer);
243 return 1; 243 return 1;
244 } 244 }
245 EXPORT_SYMBOL(blk_remove_plug); 245 EXPORT_SYMBOL(blk_remove_plug);
246 246
247 /* 247 /*
248 * remove the plug and let it rip.. 248 * remove the plug and let it rip..
249 */ 249 */
250 void __generic_unplug_device(struct request_queue *q) 250 void __generic_unplug_device(struct request_queue *q)
251 { 251 {
252 if (unlikely(blk_queue_stopped(q))) 252 if (unlikely(blk_queue_stopped(q)))
253 return; 253 return;
254 254
255 if (!blk_remove_plug(q)) 255 if (!blk_remove_plug(q))
256 return; 256 return;
257 257
258 q->request_fn(q); 258 q->request_fn(q);
259 } 259 }
260 EXPORT_SYMBOL(__generic_unplug_device); 260 EXPORT_SYMBOL(__generic_unplug_device);
261 261
262 /** 262 /**
263 * generic_unplug_device - fire a request queue 263 * generic_unplug_device - fire a request queue
264 * @q: The &struct request_queue in question 264 * @q: The &struct request_queue in question
265 * 265 *
266 * Description: 266 * Description:
267 * Linux uses plugging to build bigger requests queues before letting 267 * Linux uses plugging to build bigger requests queues before letting
268 * the device have at them. If a queue is plugged, the I/O scheduler 268 * the device have at them. If a queue is plugged, the I/O scheduler
269 * is still adding and merging requests on the queue. Once the queue 269 * is still adding and merging requests on the queue. Once the queue
270 * gets unplugged, the request_fn defined for the queue is invoked and 270 * gets unplugged, the request_fn defined for the queue is invoked and
271 * transfers started. 271 * transfers started.
272 **/ 272 **/
273 void generic_unplug_device(struct request_queue *q) 273 void generic_unplug_device(struct request_queue *q)
274 { 274 {
275 if (blk_queue_plugged(q)) { 275 if (blk_queue_plugged(q)) {
276 spin_lock_irq(q->queue_lock); 276 spin_lock_irq(q->queue_lock);
277 __generic_unplug_device(q); 277 __generic_unplug_device(q);
278 spin_unlock_irq(q->queue_lock); 278 spin_unlock_irq(q->queue_lock);
279 } 279 }
280 } 280 }
281 EXPORT_SYMBOL(generic_unplug_device); 281 EXPORT_SYMBOL(generic_unplug_device);
282 282
283 static void blk_backing_dev_unplug(struct backing_dev_info *bdi, 283 static void blk_backing_dev_unplug(struct backing_dev_info *bdi,
284 struct page *page) 284 struct page *page)
285 { 285 {
286 struct request_queue *q = bdi->unplug_io_data; 286 struct request_queue *q = bdi->unplug_io_data;
287 287
288 blk_unplug(q); 288 blk_unplug(q);
289 } 289 }
290 290
291 void blk_unplug_work(struct work_struct *work) 291 void blk_unplug_work(struct work_struct *work)
292 { 292 {
293 struct request_queue *q = 293 struct request_queue *q =
294 container_of(work, struct request_queue, unplug_work); 294 container_of(work, struct request_queue, unplug_work);
295 295
296 blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_IO, NULL, 296 blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_IO, NULL,
297 q->rq.count[READ] + q->rq.count[WRITE]); 297 q->rq.count[READ] + q->rq.count[WRITE]);
298 298
299 q->unplug_fn(q); 299 q->unplug_fn(q);
300 } 300 }
301 301
302 void blk_unplug_timeout(unsigned long data) 302 void blk_unplug_timeout(unsigned long data)
303 { 303 {
304 struct request_queue *q = (struct request_queue *)data; 304 struct request_queue *q = (struct request_queue *)data;
305 305
306 blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_TIMER, NULL, 306 blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_TIMER, NULL,
307 q->rq.count[READ] + q->rq.count[WRITE]); 307 q->rq.count[READ] + q->rq.count[WRITE]);
308 308
309 kblockd_schedule_work(q, &q->unplug_work); 309 kblockd_schedule_work(q, &q->unplug_work);
310 } 310 }
311 311
312 void blk_unplug(struct request_queue *q) 312 void blk_unplug(struct request_queue *q)
313 { 313 {
314 /* 314 /*
315 * devices don't necessarily have an ->unplug_fn defined 315 * devices don't necessarily have an ->unplug_fn defined
316 */ 316 */
317 if (q->unplug_fn) { 317 if (q->unplug_fn) {
318 blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_IO, NULL, 318 blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_IO, NULL,
319 q->rq.count[READ] + q->rq.count[WRITE]); 319 q->rq.count[READ] + q->rq.count[WRITE]);
320 320
321 q->unplug_fn(q); 321 q->unplug_fn(q);
322 } 322 }
323 } 323 }
324 EXPORT_SYMBOL(blk_unplug); 324 EXPORT_SYMBOL(blk_unplug);
325 325
326 static void blk_invoke_request_fn(struct request_queue *q) 326 static void blk_invoke_request_fn(struct request_queue *q)
327 { 327 {
328 /* 328 /*
329 * one level of recursion is ok and is much faster than kicking 329 * one level of recursion is ok and is much faster than kicking
330 * the unplug handling 330 * the unplug handling
331 */ 331 */
332 if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) { 332 if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) {
333 q->request_fn(q); 333 q->request_fn(q);
334 queue_flag_clear(QUEUE_FLAG_REENTER, q); 334 queue_flag_clear(QUEUE_FLAG_REENTER, q);
335 } else { 335 } else {
336 queue_flag_set(QUEUE_FLAG_PLUGGED, q); 336 queue_flag_set(QUEUE_FLAG_PLUGGED, q);
337 kblockd_schedule_work(q, &q->unplug_work); 337 kblockd_schedule_work(q, &q->unplug_work);
338 } 338 }
339 } 339 }
340 340
341 /** 341 /**
342 * blk_start_queue - restart a previously stopped queue 342 * blk_start_queue - restart a previously stopped queue
343 * @q: The &struct request_queue in question 343 * @q: The &struct request_queue in question
344 * 344 *
345 * Description: 345 * Description:
346 * blk_start_queue() will clear the stop flag on the queue, and call 346 * blk_start_queue() will clear the stop flag on the queue, and call
347 * the request_fn for the queue if it was in a stopped state when 347 * the request_fn for the queue if it was in a stopped state when
348 * entered. Also see blk_stop_queue(). Queue lock must be held. 348 * entered. Also see blk_stop_queue(). Queue lock must be held.
349 **/ 349 **/
350 void blk_start_queue(struct request_queue *q) 350 void blk_start_queue(struct request_queue *q)
351 { 351 {
352 WARN_ON(!irqs_disabled()); 352 WARN_ON(!irqs_disabled());
353 353
354 queue_flag_clear(QUEUE_FLAG_STOPPED, q); 354 queue_flag_clear(QUEUE_FLAG_STOPPED, q);
355 blk_invoke_request_fn(q); 355 blk_invoke_request_fn(q);
356 } 356 }
357 EXPORT_SYMBOL(blk_start_queue); 357 EXPORT_SYMBOL(blk_start_queue);
358 358
359 /** 359 /**
360 * blk_stop_queue - stop a queue 360 * blk_stop_queue - stop a queue
361 * @q: The &struct request_queue in question 361 * @q: The &struct request_queue in question
362 * 362 *
363 * Description: 363 * Description:
364 * The Linux block layer assumes that a block driver will consume all 364 * The Linux block layer assumes that a block driver will consume all
365 * entries on the request queue when the request_fn strategy is called. 365 * entries on the request queue when the request_fn strategy is called.
366 * Often this will not happen, because of hardware limitations (queue 366 * Often this will not happen, because of hardware limitations (queue
367 * depth settings). If a device driver gets a 'queue full' response, 367 * depth settings). If a device driver gets a 'queue full' response,
368 * or if it simply chooses not to queue more I/O at one point, it can 368 * or if it simply chooses not to queue more I/O at one point, it can
369 * call this function to prevent the request_fn from being called until 369 * call this function to prevent the request_fn from being called until
370 * the driver has signalled it's ready to go again. This happens by calling 370 * the driver has signalled it's ready to go again. This happens by calling
371 * blk_start_queue() to restart queue operations. Queue lock must be held. 371 * blk_start_queue() to restart queue operations. Queue lock must be held.
372 **/ 372 **/
373 void blk_stop_queue(struct request_queue *q) 373 void blk_stop_queue(struct request_queue *q)
374 { 374 {
375 blk_remove_plug(q); 375 blk_remove_plug(q);
376 queue_flag_set(QUEUE_FLAG_STOPPED, q); 376 queue_flag_set(QUEUE_FLAG_STOPPED, q);
377 } 377 }
378 EXPORT_SYMBOL(blk_stop_queue); 378 EXPORT_SYMBOL(blk_stop_queue);
379 379
380 /** 380 /**
381 * blk_sync_queue - cancel any pending callbacks on a queue 381 * blk_sync_queue - cancel any pending callbacks on a queue
382 * @q: the queue 382 * @q: the queue
383 * 383 *
384 * Description: 384 * Description:
385 * The block layer may perform asynchronous callback activity 385 * The block layer may perform asynchronous callback activity
386 * on a queue, such as calling the unplug function after a timeout. 386 * on a queue, such as calling the unplug function after a timeout.
387 * A block device may call blk_sync_queue to ensure that any 387 * A block device may call blk_sync_queue to ensure that any
388 * such activity is cancelled, thus allowing it to release resources 388 * such activity is cancelled, thus allowing it to release resources
389 * that the callbacks might use. The caller must already have made sure 389 * that the callbacks might use. The caller must already have made sure
390 * that its ->make_request_fn will not re-add plugging prior to calling 390 * that its ->make_request_fn will not re-add plugging prior to calling
391 * this function. 391 * this function.
392 * 392 *
393 */ 393 */
394 void blk_sync_queue(struct request_queue *q) 394 void blk_sync_queue(struct request_queue *q)
395 { 395 {
396 del_timer_sync(&q->unplug_timer); 396 del_timer_sync(&q->unplug_timer);
397 kblockd_flush_work(&q->unplug_work); 397 kblockd_flush_work(&q->unplug_work);
398 } 398 }
399 EXPORT_SYMBOL(blk_sync_queue); 399 EXPORT_SYMBOL(blk_sync_queue);
400 400
401 /** 401 /**
402 * blk_run_queue - run a single device queue 402 * blk_run_queue - run a single device queue
403 * @q: The queue to run 403 * @q: The queue to run
404 */ 404 */
405 void __blk_run_queue(struct request_queue *q) 405 void __blk_run_queue(struct request_queue *q)
406 { 406 {
407 blk_remove_plug(q); 407 blk_remove_plug(q);
408 408
409 /* 409 /*
410 * Only recurse once to avoid overrunning the stack, let the unplug 410 * Only recurse once to avoid overrunning the stack, let the unplug
411 * handling reinvoke the handler shortly if we already got there. 411 * handling reinvoke the handler shortly if we already got there.
412 */ 412 */
413 if (!elv_queue_empty(q)) 413 if (!elv_queue_empty(q))
414 blk_invoke_request_fn(q); 414 blk_invoke_request_fn(q);
415 } 415 }
416 EXPORT_SYMBOL(__blk_run_queue); 416 EXPORT_SYMBOL(__blk_run_queue);
417 417
418 /** 418 /**
419 * blk_run_queue - run a single device queue 419 * blk_run_queue - run a single device queue
420 * @q: The queue to run 420 * @q: The queue to run
421 */ 421 */
422 void blk_run_queue(struct request_queue *q) 422 void blk_run_queue(struct request_queue *q)
423 { 423 {
424 unsigned long flags; 424 unsigned long flags;
425 425
426 spin_lock_irqsave(q->queue_lock, flags); 426 spin_lock_irqsave(q->queue_lock, flags);
427 __blk_run_queue(q); 427 __blk_run_queue(q);
428 spin_unlock_irqrestore(q->queue_lock, flags); 428 spin_unlock_irqrestore(q->queue_lock, flags);
429 } 429 }
430 EXPORT_SYMBOL(blk_run_queue); 430 EXPORT_SYMBOL(blk_run_queue);
431 431
432 void blk_put_queue(struct request_queue *q) 432 void blk_put_queue(struct request_queue *q)
433 { 433 {
434 kobject_put(&q->kobj); 434 kobject_put(&q->kobj);
435 } 435 }
436 436
437 void blk_cleanup_queue(struct request_queue *q) 437 void blk_cleanup_queue(struct request_queue *q)
438 { 438 {
439 /* 439 /*
440 * We know we have process context here, so we can be a little 440 * We know we have process context here, so we can be a little
441 * cautious and ensure that pending block actions on this device 441 * cautious and ensure that pending block actions on this device
442 * are done before moving on. Going into this function, we should 442 * are done before moving on. Going into this function, we should
443 * not have processes doing IO to this device. 443 * not have processes doing IO to this device.
444 */ 444 */
445 blk_sync_queue(q); 445 blk_sync_queue(q);
446 446
447 mutex_lock(&q->sysfs_lock); 447 mutex_lock(&q->sysfs_lock);
448 queue_flag_set_unlocked(QUEUE_FLAG_DEAD, q); 448 queue_flag_set_unlocked(QUEUE_FLAG_DEAD, q);
449 mutex_unlock(&q->sysfs_lock); 449 mutex_unlock(&q->sysfs_lock);
450 450
451 if (q->elevator) 451 if (q->elevator)
452 elevator_exit(q->elevator); 452 elevator_exit(q->elevator);
453 453
454 blk_put_queue(q); 454 blk_put_queue(q);
455 } 455 }
456 EXPORT_SYMBOL(blk_cleanup_queue); 456 EXPORT_SYMBOL(blk_cleanup_queue);
457 457
458 static int blk_init_free_list(struct request_queue *q) 458 static int blk_init_free_list(struct request_queue *q)
459 { 459 {
460 struct request_list *rl = &q->rq; 460 struct request_list *rl = &q->rq;
461 461
462 rl->count[READ] = rl->count[WRITE] = 0; 462 rl->count[READ] = rl->count[WRITE] = 0;
463 rl->starved[READ] = rl->starved[WRITE] = 0; 463 rl->starved[READ] = rl->starved[WRITE] = 0;
464 rl->elvpriv = 0; 464 rl->elvpriv = 0;
465 init_waitqueue_head(&rl->wait[READ]); 465 init_waitqueue_head(&rl->wait[READ]);
466 init_waitqueue_head(&rl->wait[WRITE]); 466 init_waitqueue_head(&rl->wait[WRITE]);
467 467
468 rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab, 468 rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab,
469 mempool_free_slab, request_cachep, q->node); 469 mempool_free_slab, request_cachep, q->node);
470 470
471 if (!rl->rq_pool) 471 if (!rl->rq_pool)
472 return -ENOMEM; 472 return -ENOMEM;
473 473
474 return 0; 474 return 0;
475 } 475 }
476 476
477 struct request_queue *blk_alloc_queue(gfp_t gfp_mask) 477 struct request_queue *blk_alloc_queue(gfp_t gfp_mask)
478 { 478 {
479 return blk_alloc_queue_node(gfp_mask, -1); 479 return blk_alloc_queue_node(gfp_mask, -1);
480 } 480 }
481 EXPORT_SYMBOL(blk_alloc_queue); 481 EXPORT_SYMBOL(blk_alloc_queue);
482 482
483 struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) 483 struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
484 { 484 {
485 struct request_queue *q; 485 struct request_queue *q;
486 int err; 486 int err;
487 487
488 q = kmem_cache_alloc_node(blk_requestq_cachep, 488 q = kmem_cache_alloc_node(blk_requestq_cachep,
489 gfp_mask | __GFP_ZERO, node_id); 489 gfp_mask | __GFP_ZERO, node_id);
490 if (!q) 490 if (!q)
491 return NULL; 491 return NULL;
492 492
493 q->backing_dev_info.unplug_io_fn = blk_backing_dev_unplug; 493 q->backing_dev_info.unplug_io_fn = blk_backing_dev_unplug;
494 q->backing_dev_info.unplug_io_data = q; 494 q->backing_dev_info.unplug_io_data = q;
495 err = bdi_init(&q->backing_dev_info); 495 err = bdi_init(&q->backing_dev_info);
496 if (err) { 496 if (err) {
497 kmem_cache_free(blk_requestq_cachep, q); 497 kmem_cache_free(blk_requestq_cachep, q);
498 return NULL; 498 return NULL;
499 } 499 }
500 500
501 init_timer(&q->unplug_timer); 501 init_timer(&q->unplug_timer);
502 setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q); 502 setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q);
503 INIT_LIST_HEAD(&q->timeout_list); 503 INIT_LIST_HEAD(&q->timeout_list);
504 504
505 kobject_init(&q->kobj, &blk_queue_ktype); 505 kobject_init(&q->kobj, &blk_queue_ktype);
506 506
507 mutex_init(&q->sysfs_lock); 507 mutex_init(&q->sysfs_lock);
508 spin_lock_init(&q->__queue_lock); 508 spin_lock_init(&q->__queue_lock);
509 509
510 return q; 510 return q;
511 } 511 }
512 EXPORT_SYMBOL(blk_alloc_queue_node); 512 EXPORT_SYMBOL(blk_alloc_queue_node);
513 513
514 /** 514 /**
515 * blk_init_queue - prepare a request queue for use with a block device 515 * blk_init_queue - prepare a request queue for use with a block device
516 * @rfn: The function to be called to process requests that have been 516 * @rfn: The function to be called to process requests that have been
517 * placed on the queue. 517 * placed on the queue.
518 * @lock: Request queue spin lock 518 * @lock: Request queue spin lock
519 * 519 *
520 * Description: 520 * Description:
521 * If a block device wishes to use the standard request handling procedures, 521 * If a block device wishes to use the standard request handling procedures,
522 * which sorts requests and coalesces adjacent requests, then it must 522 * which sorts requests and coalesces adjacent requests, then it must
523 * call blk_init_queue(). The function @rfn will be called when there 523 * call blk_init_queue(). The function @rfn will be called when there
524 * are requests on the queue that need to be processed. If the device 524 * are requests on the queue that need to be processed. If the device
525 * supports plugging, then @rfn may not be called immediately when requests 525 * supports plugging, then @rfn may not be called immediately when requests
526 * are available on the queue, but may be called at some time later instead. 526 * are available on the queue, but may be called at some time later instead.
527 * Plugged queues are generally unplugged when a buffer belonging to one 527 * Plugged queues are generally unplugged when a buffer belonging to one
528 * of the requests on the queue is needed, or due to memory pressure. 528 * of the requests on the queue is needed, or due to memory pressure.
529 * 529 *
530 * @rfn is not required, or even expected, to remove all requests off the 530 * @rfn is not required, or even expected, to remove all requests off the
531 * queue, but only as many as it can handle at a time. If it does leave 531 * queue, but only as many as it can handle at a time. If it does leave
532 * requests on the queue, it is responsible for arranging that the requests 532 * requests on the queue, it is responsible for arranging that the requests
533 * get dealt with eventually. 533 * get dealt with eventually.
534 * 534 *
535 * The queue spin lock must be held while manipulating the requests on the 535 * The queue spin lock must be held while manipulating the requests on the
536 * request queue; this lock will be taken also from interrupt context, so irq 536 * request queue; this lock will be taken also from interrupt context, so irq
537 * disabling is needed for it. 537 * disabling is needed for it.
538 * 538 *
539 * Function returns a pointer to the initialized request queue, or %NULL if 539 * Function returns a pointer to the initialized request queue, or %NULL if
540 * it didn't succeed. 540 * it didn't succeed.
541 * 541 *
542 * Note: 542 * Note:
543 * blk_init_queue() must be paired with a blk_cleanup_queue() call 543 * blk_init_queue() must be paired with a blk_cleanup_queue() call
544 * when the block device is deactivated (such as at module unload). 544 * when the block device is deactivated (such as at module unload).
545 **/ 545 **/
546 546
547 struct request_queue *blk_init_queue(request_fn_proc *rfn, spinlock_t *lock) 547 struct request_queue *blk_init_queue(request_fn_proc *rfn, spinlock_t *lock)
548 { 548 {
549 return blk_init_queue_node(rfn, lock, -1); 549 return blk_init_queue_node(rfn, lock, -1);
550 } 550 }
551 EXPORT_SYMBOL(blk_init_queue); 551 EXPORT_SYMBOL(blk_init_queue);
552 552
553 struct request_queue * 553 struct request_queue *
554 blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id) 554 blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)
555 { 555 {
556 struct request_queue *q = blk_alloc_queue_node(GFP_KERNEL, node_id); 556 struct request_queue *q = blk_alloc_queue_node(GFP_KERNEL, node_id);
557 557
558 if (!q) 558 if (!q)
559 return NULL; 559 return NULL;
560 560
561 q->node = node_id; 561 q->node = node_id;
562 if (blk_init_free_list(q)) { 562 if (blk_init_free_list(q)) {
563 kmem_cache_free(blk_requestq_cachep, q); 563 kmem_cache_free(blk_requestq_cachep, q);
564 return NULL; 564 return NULL;
565 } 565 }
566 566
567 /* 567 /*
568 * if caller didn't supply a lock, they get per-queue locking with 568 * if caller didn't supply a lock, they get per-queue locking with
569 * our embedded lock 569 * our embedded lock
570 */ 570 */
571 if (!lock) 571 if (!lock)
572 lock = &q->__queue_lock; 572 lock = &q->__queue_lock;
573 573
574 q->request_fn = rfn; 574 q->request_fn = rfn;
575 q->prep_rq_fn = NULL; 575 q->prep_rq_fn = NULL;
576 q->unplug_fn = generic_unplug_device; 576 q->unplug_fn = generic_unplug_device;
577 q->queue_flags = (1 << QUEUE_FLAG_CLUSTER); 577 q->queue_flags = (1 << QUEUE_FLAG_CLUSTER);
578 q->queue_lock = lock; 578 q->queue_lock = lock;
579 579
580 blk_queue_segment_boundary(q, 0xffffffff); 580 blk_queue_segment_boundary(q, 0xffffffff);
581 581
582 blk_queue_make_request(q, __make_request); 582 blk_queue_make_request(q, __make_request);
583 blk_queue_max_segment_size(q, MAX_SEGMENT_SIZE); 583 blk_queue_max_segment_size(q, MAX_SEGMENT_SIZE);
584 584
585 blk_queue_max_hw_segments(q, MAX_HW_SEGMENTS); 585 blk_queue_max_hw_segments(q, MAX_HW_SEGMENTS);
586 blk_queue_max_phys_segments(q, MAX_PHYS_SEGMENTS); 586 blk_queue_max_phys_segments(q, MAX_PHYS_SEGMENTS);
587 587
588 q->sg_reserved_size = INT_MAX; 588 q->sg_reserved_size = INT_MAX;
589 589
590 blk_set_cmd_filter_defaults(&q->cmd_filter); 590 blk_set_cmd_filter_defaults(&q->cmd_filter);
591 591
592 /* 592 /*
593 * all done 593 * all done
594 */ 594 */
595 if (!elevator_init(q, NULL)) { 595 if (!elevator_init(q, NULL)) {
596 blk_queue_congestion_threshold(q); 596 blk_queue_congestion_threshold(q);
597 return q; 597 return q;
598 } 598 }
599 599
600 blk_put_queue(q); 600 blk_put_queue(q);
601 return NULL; 601 return NULL;
602 } 602 }
603 EXPORT_SYMBOL(blk_init_queue_node); 603 EXPORT_SYMBOL(blk_init_queue_node);
604 604
605 int blk_get_queue(struct request_queue *q) 605 int blk_get_queue(struct request_queue *q)
606 { 606 {
607 if (likely(!test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) { 607 if (likely(!test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) {
608 kobject_get(&q->kobj); 608 kobject_get(&q->kobj);
609 return 0; 609 return 0;
610 } 610 }
611 611
612 return 1; 612 return 1;
613 } 613 }
614 614
615 static inline void blk_free_request(struct request_queue *q, struct request *rq) 615 static inline void blk_free_request(struct request_queue *q, struct request *rq)
616 { 616 {
617 if (rq->cmd_flags & REQ_ELVPRIV) 617 if (rq->cmd_flags & REQ_ELVPRIV)
618 elv_put_request(q, rq); 618 elv_put_request(q, rq);
619 mempool_free(rq, q->rq.rq_pool); 619 mempool_free(rq, q->rq.rq_pool);
620 } 620 }
621 621
622 static struct request * 622 static struct request *
623 blk_alloc_request(struct request_queue *q, int rw, int priv, gfp_t gfp_mask) 623 blk_alloc_request(struct request_queue *q, int rw, int priv, gfp_t gfp_mask)
624 { 624 {
625 struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask); 625 struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask);
626 626
627 if (!rq) 627 if (!rq)
628 return NULL; 628 return NULL;
629 629
630 blk_rq_init(q, rq); 630 blk_rq_init(q, rq);
631 631
632 rq->cmd_flags = rw | REQ_ALLOCED; 632 rq->cmd_flags = rw | REQ_ALLOCED;
633 633
634 if (priv) { 634 if (priv) {
635 if (unlikely(elv_set_request(q, rq, gfp_mask))) { 635 if (unlikely(elv_set_request(q, rq, gfp_mask))) {
636 mempool_free(rq, q->rq.rq_pool); 636 mempool_free(rq, q->rq.rq_pool);
637 return NULL; 637 return NULL;
638 } 638 }
639 rq->cmd_flags |= REQ_ELVPRIV; 639 rq->cmd_flags |= REQ_ELVPRIV;
640 } 640 }
641 641
642 return rq; 642 return rq;
643 } 643 }
644 644
645 /* 645 /*
646 * ioc_batching returns true if the ioc is a valid batching request and 646 * ioc_batching returns true if the ioc is a valid batching request and
647 * should be given priority access to a request. 647 * should be given priority access to a request.
648 */ 648 */
649 static inline int ioc_batching(struct request_queue *q, struct io_context *ioc) 649 static inline int ioc_batching(struct request_queue *q, struct io_context *ioc)
650 { 650 {
651 if (!ioc) 651 if (!ioc)
652 return 0; 652 return 0;
653 653
654 /* 654 /*
655 * Make sure the process is able to allocate at least 1 request 655 * Make sure the process is able to allocate at least 1 request
656 * even if the batch times out, otherwise we could theoretically 656 * even if the batch times out, otherwise we could theoretically
657 * lose wakeups. 657 * lose wakeups.
658 */ 658 */
659 return ioc->nr_batch_requests == q->nr_batching || 659 return ioc->nr_batch_requests == q->nr_batching ||
660 (ioc->nr_batch_requests > 0 660 (ioc->nr_batch_requests > 0
661 && time_before(jiffies, ioc->last_waited + BLK_BATCH_TIME)); 661 && time_before(jiffies, ioc->last_waited + BLK_BATCH_TIME));
662 } 662 }
663 663
664 /* 664 /*
665 * ioc_set_batching sets ioc to be a new "batcher" if it is not one. This 665 * ioc_set_batching sets ioc to be a new "batcher" if it is not one. This
666 * will cause the process to be a "batcher" on all queues in the system. This 666 * will cause the process to be a "batcher" on all queues in the system. This
667 * is the behaviour we want though - once it gets a wakeup it should be given 667 * is the behaviour we want though - once it gets a wakeup it should be given
668 * a nice run. 668 * a nice run.
669 */ 669 */
670 static void ioc_set_batching(struct request_queue *q, struct io_context *ioc) 670 static void ioc_set_batching(struct request_queue *q, struct io_context *ioc)
671 { 671 {
672 if (!ioc || ioc_batching(q, ioc)) 672 if (!ioc || ioc_batching(q, ioc))
673 return; 673 return;
674 674
675 ioc->nr_batch_requests = q->nr_batching; 675 ioc->nr_batch_requests = q->nr_batching;
676 ioc->last_waited = jiffies; 676 ioc->last_waited = jiffies;
677 } 677 }
678 678
679 static void __freed_request(struct request_queue *q, int rw) 679 static void __freed_request(struct request_queue *q, int rw)
680 { 680 {
681 struct request_list *rl = &q->rq; 681 struct request_list *rl = &q->rq;
682 682
683 if (rl->count[rw] < queue_congestion_off_threshold(q)) 683 if (rl->count[rw] < queue_congestion_off_threshold(q))
684 blk_clear_queue_congested(q, rw); 684 blk_clear_queue_congested(q, rw);
685 685
686 if (rl->count[rw] + 1 <= q->nr_requests) { 686 if (rl->count[rw] + 1 <= q->nr_requests) {
687 if (waitqueue_active(&rl->wait[rw])) 687 if (waitqueue_active(&rl->wait[rw]))
688 wake_up(&rl->wait[rw]); 688 wake_up(&rl->wait[rw]);
689 689
690 blk_clear_queue_full(q, rw); 690 blk_clear_queue_full(q, rw);
691 } 691 }
692 } 692 }
693 693
694 /* 694 /*
695 * A request has just been released. Account for it, update the full and 695 * A request has just been released. Account for it, update the full and
696 * congestion status, wake up any waiters. Called under q->queue_lock. 696 * congestion status, wake up any waiters. Called under q->queue_lock.
697 */ 697 */
698 static void freed_request(struct request_queue *q, int rw, int priv) 698 static void freed_request(struct request_queue *q, int rw, int priv)
699 { 699 {
700 struct request_list *rl = &q->rq; 700 struct request_list *rl = &q->rq;
701 701
702 rl->count[rw]--; 702 rl->count[rw]--;
703 if (priv) 703 if (priv)
704 rl->elvpriv--; 704 rl->elvpriv--;
705 705
706 __freed_request(q, rw); 706 __freed_request(q, rw);
707 707
708 if (unlikely(rl->starved[rw ^ 1])) 708 if (unlikely(rl->starved[rw ^ 1]))
709 __freed_request(q, rw ^ 1); 709 __freed_request(q, rw ^ 1);
710 } 710 }
711 711
712 #define blkdev_free_rq(list) list_entry((list)->next, struct request, queuelist) 712 #define blkdev_free_rq(list) list_entry((list)->next, struct request, queuelist)
713 /* 713 /*
714 * Get a free request, queue_lock must be held. 714 * Get a free request, queue_lock must be held.
715 * Returns NULL on failure, with queue_lock held. 715 * Returns NULL on failure, with queue_lock held.
716 * Returns !NULL on success, with queue_lock *not held*. 716 * Returns !NULL on success, with queue_lock *not held*.
717 */ 717 */
718 static struct request *get_request(struct request_queue *q, int rw_flags, 718 static struct request *get_request(struct request_queue *q, int rw_flags,
719 struct bio *bio, gfp_t gfp_mask) 719 struct bio *bio, gfp_t gfp_mask)
720 { 720 {
721 struct request *rq = NULL; 721 struct request *rq = NULL;
722 struct request_list *rl = &q->rq; 722 struct request_list *rl = &q->rq;
723 struct io_context *ioc = NULL; 723 struct io_context *ioc = NULL;
724 const int rw = rw_flags & 0x01; 724 const int rw = rw_flags & 0x01;
725 int may_queue, priv; 725 int may_queue, priv;
726 726
727 may_queue = elv_may_queue(q, rw_flags); 727 may_queue = elv_may_queue(q, rw_flags);
728 if (may_queue == ELV_MQUEUE_NO) 728 if (may_queue == ELV_MQUEUE_NO)
729 goto rq_starved; 729 goto rq_starved;
730 730
731 if (rl->count[rw]+1 >= queue_congestion_on_threshold(q)) { 731 if (rl->count[rw]+1 >= queue_congestion_on_threshold(q)) {
732 if (rl->count[rw]+1 >= q->nr_requests) { 732 if (rl->count[rw]+1 >= q->nr_requests) {
733 ioc = current_io_context(GFP_ATOMIC, q->node); 733 ioc = current_io_context(GFP_ATOMIC, q->node);
734 /* 734 /*
735 * The queue will fill after this allocation, so set 735 * The queue will fill after this allocation, so set
736 * it as full, and mark this process as "batching". 736 * it as full, and mark this process as "batching".
737 * This process will be allowed to complete a batch of 737 * This process will be allowed to complete a batch of
738 * requests, others will be blocked. 738 * requests, others will be blocked.
739 */ 739 */
740 if (!blk_queue_full(q, rw)) { 740 if (!blk_queue_full(q, rw)) {
741 ioc_set_batching(q, ioc); 741 ioc_set_batching(q, ioc);
742 blk_set_queue_full(q, rw); 742 blk_set_queue_full(q, rw);
743 } else { 743 } else {
744 if (may_queue != ELV_MQUEUE_MUST 744 if (may_queue != ELV_MQUEUE_MUST
745 && !ioc_batching(q, ioc)) { 745 && !ioc_batching(q, ioc)) {
746 /* 746 /*
747 * The queue is full and the allocating 747 * The queue is full and the allocating
748 * process is not a "batcher", and not 748 * process is not a "batcher", and not
749 * exempted by the IO scheduler 749 * exempted by the IO scheduler
750 */ 750 */
751 goto out; 751 goto out;
752 } 752 }
753 } 753 }
754 } 754 }
755 blk_set_queue_congested(q, rw); 755 blk_set_queue_congested(q, rw);
756 } 756 }
757 757
758 /* 758 /*
759 * Only allow batching queuers to allocate up to 50% over the defined 759 * Only allow batching queuers to allocate up to 50% over the defined
760 * limit of requests, otherwise we could have thousands of requests 760 * limit of requests, otherwise we could have thousands of requests
761 * allocated with any setting of ->nr_requests 761 * allocated with any setting of ->nr_requests
762 */ 762 */
763 if (rl->count[rw] >= (3 * q->nr_requests / 2)) 763 if (rl->count[rw] >= (3 * q->nr_requests / 2))
764 goto out; 764 goto out;
765 765
766 rl->count[rw]++; 766 rl->count[rw]++;
767 rl->starved[rw] = 0; 767 rl->starved[rw] = 0;
768 768
769 priv = !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); 769 priv = !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
770 if (priv) 770 if (priv)
771 rl->elvpriv++; 771 rl->elvpriv++;
772 772
773 spin_unlock_irq(q->queue_lock); 773 spin_unlock_irq(q->queue_lock);
774 774
775 rq = blk_alloc_request(q, rw_flags, priv, gfp_mask); 775 rq = blk_alloc_request(q, rw_flags, priv, gfp_mask);
776 if (unlikely(!rq)) { 776 if (unlikely(!rq)) {
777 /* 777 /*
778 * Allocation failed presumably due to memory. Undo anything 778 * Allocation failed presumably due to memory. Undo anything
779 * we might have messed up. 779 * we might have messed up.
780 * 780 *
781 * Allocating task should really be put onto the front of the 781 * Allocating task should really be put onto the front of the
782 * wait queue, but this is pretty rare. 782 * wait queue, but this is pretty rare.
783 */ 783 */
784 spin_lock_irq(q->queue_lock); 784 spin_lock_irq(q->queue_lock);
785 freed_request(q, rw, priv); 785 freed_request(q, rw, priv);
786 786
787 /* 787 /*
788 * in the very unlikely event that allocation failed and no 788 * in the very unlikely event that allocation failed and no
789 * requests for this direction was pending, mark us starved 789 * requests for this direction was pending, mark us starved
790 * so that freeing of a request in the other direction will 790 * so that freeing of a request in the other direction will
791 * notice us. another possible fix would be to split the 791 * notice us. another possible fix would be to split the
792 * rq mempool into READ and WRITE 792 * rq mempool into READ and WRITE
793 */ 793 */
794 rq_starved: 794 rq_starved:
795 if (unlikely(rl->count[rw] == 0)) 795 if (unlikely(rl->count[rw] == 0))
796 rl->starved[rw] = 1; 796 rl->starved[rw] = 1;
797 797
798 goto out; 798 goto out;
799 } 799 }
800 800
801 /* 801 /*
802 * ioc may be NULL here, and ioc_batching will be false. That's 802 * ioc may be NULL here, and ioc_batching will be false. That's
803 * OK, if the queue is under the request limit then requests need 803 * OK, if the queue is under the request limit then requests need
804 * not count toward the nr_batch_requests limit. There will always 804 * not count toward the nr_batch_requests limit. There will always
805 * be some limit enforced by BLK_BATCH_TIME. 805 * be some limit enforced by BLK_BATCH_TIME.
806 */ 806 */
807 if (ioc_batching(q, ioc)) 807 if (ioc_batching(q, ioc))
808 ioc->nr_batch_requests--; 808 ioc->nr_batch_requests--;
809 809
810 blk_add_trace_generic(q, bio, rw, BLK_TA_GETRQ); 810 blk_add_trace_generic(q, bio, rw, BLK_TA_GETRQ);
811 out: 811 out:
812 return rq; 812 return rq;
813 } 813 }
814 814
815 /* 815 /*
816 * No available requests for this queue, unplug the device and wait for some 816 * No available requests for this queue, unplug the device and wait for some
817 * requests to become available. 817 * requests to become available.
818 * 818 *
819 * Called with q->queue_lock held, and returns with it unlocked. 819 * Called with q->queue_lock held, and returns with it unlocked.
820 */ 820 */
821 static struct request *get_request_wait(struct request_queue *q, int rw_flags, 821 static struct request *get_request_wait(struct request_queue *q, int rw_flags,
822 struct bio *bio) 822 struct bio *bio)
823 { 823 {
824 const int rw = rw_flags & 0x01; 824 const int rw = rw_flags & 0x01;
825 struct request *rq; 825 struct request *rq;
826 826
827 rq = get_request(q, rw_flags, bio, GFP_NOIO); 827 rq = get_request(q, rw_flags, bio, GFP_NOIO);
828 while (!rq) { 828 while (!rq) {
829 DEFINE_WAIT(wait); 829 DEFINE_WAIT(wait);
830 struct io_context *ioc; 830 struct io_context *ioc;
831 struct request_list *rl = &q->rq; 831 struct request_list *rl = &q->rq;
832 832
833 prepare_to_wait_exclusive(&rl->wait[rw], &wait, 833 prepare_to_wait_exclusive(&rl->wait[rw], &wait,
834 TASK_UNINTERRUPTIBLE); 834 TASK_UNINTERRUPTIBLE);
835 835
836 blk_add_trace_generic(q, bio, rw, BLK_TA_SLEEPRQ); 836 blk_add_trace_generic(q, bio, rw, BLK_TA_SLEEPRQ);
837 837
838 __generic_unplug_device(q); 838 __generic_unplug_device(q);
839 spin_unlock_irq(q->queue_lock); 839 spin_unlock_irq(q->queue_lock);
840 io_schedule(); 840 io_schedule();
841 841
842 /* 842 /*
843 * After sleeping, we become a "batching" process and 843 * After sleeping, we become a "batching" process and
844 * will be able to allocate at least one request, and 844 * will be able to allocate at least one request, and
845 * up to a big batch of them for a small period time. 845 * up to a big batch of them for a small period time.
846 * See ioc_batching, ioc_set_batching 846 * See ioc_batching, ioc_set_batching
847 */ 847 */
848 ioc = current_io_context(GFP_NOIO, q->node); 848 ioc = current_io_context(GFP_NOIO, q->node);
849 ioc_set_batching(q, ioc); 849 ioc_set_batching(q, ioc);
850 850
851 spin_lock_irq(q->queue_lock); 851 spin_lock_irq(q->queue_lock);
852 finish_wait(&rl->wait[rw], &wait); 852 finish_wait(&rl->wait[rw], &wait);
853 853
854 rq = get_request(q, rw_flags, bio, GFP_NOIO); 854 rq = get_request(q, rw_flags, bio, GFP_NOIO);
855 }; 855 };
856 856
857 return rq; 857 return rq;
858 } 858 }
859 859
860 struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask) 860 struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask)
861 { 861 {
862 struct request *rq; 862 struct request *rq;
863 863
864 BUG_ON(rw != READ && rw != WRITE); 864 BUG_ON(rw != READ && rw != WRITE);
865 865
866 spin_lock_irq(q->queue_lock); 866 spin_lock_irq(q->queue_lock);
867 if (gfp_mask & __GFP_WAIT) { 867 if (gfp_mask & __GFP_WAIT) {
868 rq = get_request_wait(q, rw, NULL); 868 rq = get_request_wait(q, rw, NULL);
869 } else { 869 } else {
870 rq = get_request(q, rw, NULL, gfp_mask); 870 rq = get_request(q, rw, NULL, gfp_mask);
871 if (!rq) 871 if (!rq)
872 spin_unlock_irq(q->queue_lock); 872 spin_unlock_irq(q->queue_lock);
873 } 873 }
874 /* q->queue_lock is unlocked at this point */ 874 /* q->queue_lock is unlocked at this point */
875 875
876 return rq; 876 return rq;
877 } 877 }
878 EXPORT_SYMBOL(blk_get_request); 878 EXPORT_SYMBOL(blk_get_request);
879 879
880 /** 880 /**
881 * blk_start_queueing - initiate dispatch of requests to device 881 * blk_start_queueing - initiate dispatch of requests to device
882 * @q: request queue to kick into gear 882 * @q: request queue to kick into gear
883 * 883 *
884 * This is basically a helper to remove the need to know whether a queue 884 * This is basically a helper to remove the need to know whether a queue
885 * is plugged or not if someone just wants to initiate dispatch of requests 885 * is plugged or not if someone just wants to initiate dispatch of requests
886 * for this queue. 886 * for this queue.
887 * 887 *
888 * The queue lock must be held with interrupts disabled. 888 * The queue lock must be held with interrupts disabled.
889 */ 889 */
890 void blk_start_queueing(struct request_queue *q) 890 void blk_start_queueing(struct request_queue *q)
891 { 891 {
892 if (!blk_queue_plugged(q)) 892 if (!blk_queue_plugged(q))
893 q->request_fn(q); 893 q->request_fn(q);
894 else 894 else
895 __generic_unplug_device(q); 895 __generic_unplug_device(q);
896 } 896 }
897 EXPORT_SYMBOL(blk_start_queueing); 897 EXPORT_SYMBOL(blk_start_queueing);
898 898
899 /** 899 /**
900 * blk_requeue_request - put a request back on queue 900 * blk_requeue_request - put a request back on queue
901 * @q: request queue where request should be inserted 901 * @q: request queue where request should be inserted
902 * @rq: request to be inserted 902 * @rq: request to be inserted
903 * 903 *
904 * Description: 904 * Description:
905 * Drivers often keep queueing requests until the hardware cannot accept 905 * Drivers often keep queueing requests until the hardware cannot accept
906 * more, when that condition happens we need to put the request back 906 * more, when that condition happens we need to put the request back
907 * on the queue. Must be called with queue lock held. 907 * on the queue. Must be called with queue lock held.
908 */ 908 */
909 void blk_requeue_request(struct request_queue *q, struct request *rq) 909 void blk_requeue_request(struct request_queue *q, struct request *rq)
910 { 910 {
911 blk_delete_timer(rq); 911 blk_delete_timer(rq);
912 blk_clear_rq_complete(rq); 912 blk_clear_rq_complete(rq);
913 blk_add_trace_rq(q, rq, BLK_TA_REQUEUE); 913 blk_add_trace_rq(q, rq, BLK_TA_REQUEUE);
914 914
915 if (blk_rq_tagged(rq)) 915 if (blk_rq_tagged(rq))
916 blk_queue_end_tag(q, rq); 916 blk_queue_end_tag(q, rq);
917 917
918 elv_requeue_request(q, rq); 918 elv_requeue_request(q, rq);
919 } 919 }
920 EXPORT_SYMBOL(blk_requeue_request); 920 EXPORT_SYMBOL(blk_requeue_request);
921 921
922 /** 922 /**
923 * blk_insert_request - insert a special request into a request queue 923 * blk_insert_request - insert a special request into a request queue
924 * @q: request queue where request should be inserted 924 * @q: request queue where request should be inserted
925 * @rq: request to be inserted 925 * @rq: request to be inserted
926 * @at_head: insert request at head or tail of queue 926 * @at_head: insert request at head or tail of queue
927 * @data: private data 927 * @data: private data
928 * 928 *
929 * Description: 929 * Description:
930 * Many block devices need to execute commands asynchronously, so they don't 930 * Many block devices need to execute commands asynchronously, so they don't
931 * block the whole kernel from preemption during request execution. This is 931 * block the whole kernel from preemption during request execution. This is
932 * accomplished normally by inserting aritficial requests tagged as 932 * accomplished normally by inserting aritficial requests tagged as
933 * REQ_TYPE_SPECIAL in to the corresponding request queue, and letting them 933 * REQ_TYPE_SPECIAL in to the corresponding request queue, and letting them
934 * be scheduled for actual execution by the request queue. 934 * be scheduled for actual execution by the request queue.
935 * 935 *
936 * We have the option of inserting the head or the tail of the queue. 936 * We have the option of inserting the head or the tail of the queue.
937 * Typically we use the tail for new ioctls and so forth. We use the head 937 * Typically we use the tail for new ioctls and so forth. We use the head
938 * of the queue for things like a QUEUE_FULL message from a device, or a 938 * of the queue for things like a QUEUE_FULL message from a device, or a
939 * host that is unable to accept a particular command. 939 * host that is unable to accept a particular command.
940 */ 940 */
941 void blk_insert_request(struct request_queue *q, struct request *rq, 941 void blk_insert_request(struct request_queue *q, struct request *rq,
942 int at_head, void *data) 942 int at_head, void *data)
943 { 943 {
944 int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK; 944 int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK;
945 unsigned long flags; 945 unsigned long flags;
946 946
947 /* 947 /*
948 * tell I/O scheduler that this isn't a regular read/write (ie it 948 * tell I/O scheduler that this isn't a regular read/write (ie it
949 * must not attempt merges on this) and that it acts as a soft 949 * must not attempt merges on this) and that it acts as a soft
950 * barrier 950 * barrier
951 */ 951 */
952 rq->cmd_type = REQ_TYPE_SPECIAL; 952 rq->cmd_type = REQ_TYPE_SPECIAL;
953 rq->cmd_flags |= REQ_SOFTBARRIER; 953 rq->cmd_flags |= REQ_SOFTBARRIER;
954 954
955 rq->special = data; 955 rq->special = data;
956 956
957 spin_lock_irqsave(q->queue_lock, flags); 957 spin_lock_irqsave(q->queue_lock, flags);
958 958
959 /* 959 /*
960 * If command is tagged, release the tag 960 * If command is tagged, release the tag
961 */ 961 */
962 if (blk_rq_tagged(rq)) 962 if (blk_rq_tagged(rq))
963 blk_queue_end_tag(q, rq); 963 blk_queue_end_tag(q, rq);
964 964
965 drive_stat_acct(rq, 1); 965 drive_stat_acct(rq, 1);
966 __elv_add_request(q, rq, where, 0); 966 __elv_add_request(q, rq, where, 0);
967 blk_start_queueing(q); 967 blk_start_queueing(q);
968 spin_unlock_irqrestore(q->queue_lock, flags); 968 spin_unlock_irqrestore(q->queue_lock, flags);
969 } 969 }
970 EXPORT_SYMBOL(blk_insert_request); 970 EXPORT_SYMBOL(blk_insert_request);
971 971
972 /* 972 /*
973 * add-request adds a request to the linked list. 973 * add-request adds a request to the linked list.
974 * queue lock is held and interrupts disabled, as we muck with the 974 * queue lock is held and interrupts disabled, as we muck with the
975 * request queue list. 975 * request queue list.
976 */ 976 */
977 static inline void add_request(struct request_queue *q, struct request *req) 977 static inline void add_request(struct request_queue *q, struct request *req)
978 { 978 {
979 drive_stat_acct(req, 1); 979 drive_stat_acct(req, 1);
980 980
981 /* 981 /*
982 * elevator indicated where it wants this request to be 982 * elevator indicated where it wants this request to be
983 * inserted at elevator_merge time 983 * inserted at elevator_merge time
984 */ 984 */
985 __elv_add_request(q, req, ELEVATOR_INSERT_SORT, 0); 985 __elv_add_request(q, req, ELEVATOR_INSERT_SORT, 0);
986 } 986 }
987 987
988 static void part_round_stats_single(int cpu, struct hd_struct *part, 988 static void part_round_stats_single(int cpu, struct hd_struct *part,
989 unsigned long now) 989 unsigned long now)
990 { 990 {
991 if (now == part->stamp) 991 if (now == part->stamp)
992 return; 992 return;
993 993
994 if (part->in_flight) { 994 if (part->in_flight) {
995 __part_stat_add(cpu, part, time_in_queue, 995 __part_stat_add(cpu, part, time_in_queue,
996 part->in_flight * (now - part->stamp)); 996 part->in_flight * (now - part->stamp));
997 __part_stat_add(cpu, part, io_ticks, (now - part->stamp)); 997 __part_stat_add(cpu, part, io_ticks, (now - part->stamp));
998 } 998 }
999 part->stamp = now; 999 part->stamp = now;
1000 } 1000 }
1001 1001
1002 /** 1002 /**
1003 * part_round_stats() - Round off the performance stats on a struct 1003 * part_round_stats() - Round off the performance stats on a struct
1004 * disk_stats. 1004 * disk_stats.
1005 * 1005 *
1006 * The average IO queue length and utilisation statistics are maintained 1006 * The average IO queue length and utilisation statistics are maintained
1007 * by observing the current state of the queue length and the amount of 1007 * by observing the current state of the queue length and the amount of
1008 * time it has been in this state for. 1008 * time it has been in this state for.
1009 * 1009 *
1010 * Normally, that accounting is done on IO completion, but that can result 1010 * Normally, that accounting is done on IO completion, but that can result
1011 * in more than a second's worth of IO being accounted for within any one 1011 * in more than a second's worth of IO being accounted for within any one
1012 * second, leading to >100% utilisation. To deal with that, we call this 1012 * second, leading to >100% utilisation. To deal with that, we call this
1013 * function to do a round-off before returning the results when reading 1013 * function to do a round-off before returning the results when reading
1014 * /proc/diskstats. This accounts immediately for all queue usage up to 1014 * /proc/diskstats. This accounts immediately for all queue usage up to
1015 * the current jiffies and restarts the counters again. 1015 * the current jiffies and restarts the counters again.
1016 */ 1016 */
1017 void part_round_stats(int cpu, struct hd_struct *part) 1017 void part_round_stats(int cpu, struct hd_struct *part)
1018 { 1018 {
1019 unsigned long now = jiffies; 1019 unsigned long now = jiffies;
1020 1020
1021 if (part->partno) 1021 if (part->partno)
1022 part_round_stats_single(cpu, &part_to_disk(part)->part0, now); 1022 part_round_stats_single(cpu, &part_to_disk(part)->part0, now);
1023 part_round_stats_single(cpu, part, now); 1023 part_round_stats_single(cpu, part, now);
1024 } 1024 }
1025 EXPORT_SYMBOL_GPL(part_round_stats); 1025 EXPORT_SYMBOL_GPL(part_round_stats);
1026 1026
1027 /* 1027 /*
1028 * queue lock must be held 1028 * queue lock must be held
1029 */ 1029 */
1030 void __blk_put_request(struct request_queue *q, struct request *req) 1030 void __blk_put_request(struct request_queue *q, struct request *req)
1031 { 1031 {
1032 if (unlikely(!q)) 1032 if (unlikely(!q))
1033 return; 1033 return;
1034 if (unlikely(--req->ref_count)) 1034 if (unlikely(--req->ref_count))
1035 return; 1035 return;
1036 1036
1037 elv_completed_request(q, req); 1037 elv_completed_request(q, req);
1038 1038
1039 /* 1039 /*
1040 * Request may not have originated from ll_rw_blk. if not, 1040 * Request may not have originated from ll_rw_blk. if not,
1041 * it didn't come out of our reserved rq pools 1041 * it didn't come out of our reserved rq pools
1042 */ 1042 */
1043 if (req->cmd_flags & REQ_ALLOCED) { 1043 if (req->cmd_flags & REQ_ALLOCED) {
1044 int rw = rq_data_dir(req); 1044 int rw = rq_data_dir(req);
1045 int priv = req->cmd_flags & REQ_ELVPRIV; 1045 int priv = req->cmd_flags & REQ_ELVPRIV;
1046 1046
1047 BUG_ON(!list_empty(&req->queuelist)); 1047 BUG_ON(!list_empty(&req->queuelist));
1048 BUG_ON(!hlist_unhashed(&req->hash)); 1048 BUG_ON(!hlist_unhashed(&req->hash));
1049 1049
1050 blk_free_request(q, req); 1050 blk_free_request(q, req);
1051 freed_request(q, rw, priv); 1051 freed_request(q, rw, priv);
1052 } 1052 }
1053 } 1053 }
1054 EXPORT_SYMBOL_GPL(__blk_put_request); 1054 EXPORT_SYMBOL_GPL(__blk_put_request);
1055 1055
1056 void blk_put_request(struct request *req) 1056 void blk_put_request(struct request *req)
1057 { 1057 {
1058 unsigned long flags; 1058 unsigned long flags;
1059 struct request_queue *q = req->q; 1059 struct request_queue *q = req->q;
1060 1060
1061 spin_lock_irqsave(q->queue_lock, flags); 1061 spin_lock_irqsave(q->queue_lock, flags);
1062 __blk_put_request(q, req); 1062 __blk_put_request(q, req);
1063 spin_unlock_irqrestore(q->queue_lock, flags); 1063 spin_unlock_irqrestore(q->queue_lock, flags);
1064 } 1064 }
1065 EXPORT_SYMBOL(blk_put_request); 1065 EXPORT_SYMBOL(blk_put_request);
1066 1066
1067 void init_request_from_bio(struct request *req, struct bio *bio) 1067 void init_request_from_bio(struct request *req, struct bio *bio)
1068 { 1068 {
1069 req->cpu = bio->bi_comp_cpu; 1069 req->cpu = bio->bi_comp_cpu;
1070 req->cmd_type = REQ_TYPE_FS; 1070 req->cmd_type = REQ_TYPE_FS;
1071 1071
1072 /* 1072 /*
1073 * inherit FAILFAST from bio (for read-ahead, and explicit FAILFAST) 1073 * inherit FAILFAST from bio (for read-ahead, and explicit FAILFAST)
1074 */ 1074 */
1075 if (bio_rw_ahead(bio) || bio_failfast(bio)) 1075 if (bio_rw_ahead(bio) || bio_failfast(bio))
1076 req->cmd_flags |= REQ_FAILFAST; 1076 req->cmd_flags |= REQ_FAILFAST;
1077 1077
1078 /* 1078 /*
1079 * REQ_BARRIER implies no merging, but lets make it explicit 1079 * REQ_BARRIER implies no merging, but lets make it explicit
1080 */ 1080 */
1081 if (unlikely(bio_discard(bio))) { 1081 if (unlikely(bio_discard(bio))) {
1082 req->cmd_flags |= REQ_DISCARD; 1082 req->cmd_flags |= REQ_DISCARD;
1083 if (bio_barrier(bio)) 1083 if (bio_barrier(bio))
1084 req->cmd_flags |= REQ_SOFTBARRIER; 1084 req->cmd_flags |= REQ_SOFTBARRIER;
1085 req->q->prepare_discard_fn(req->q, req); 1085 req->q->prepare_discard_fn(req->q, req);
1086 } else if (unlikely(bio_barrier(bio))) 1086 } else if (unlikely(bio_barrier(bio)))
1087 req->cmd_flags |= (REQ_HARDBARRIER | REQ_NOMERGE); 1087 req->cmd_flags |= (REQ_HARDBARRIER | REQ_NOMERGE);
1088 1088
1089 if (bio_sync(bio)) 1089 if (bio_sync(bio))
1090 req->cmd_flags |= REQ_RW_SYNC; 1090 req->cmd_flags |= REQ_RW_SYNC;
1091 if (bio_rw_meta(bio)) 1091 if (bio_rw_meta(bio))
1092 req->cmd_flags |= REQ_RW_META; 1092 req->cmd_flags |= REQ_RW_META;
1093 1093
1094 req->errors = 0; 1094 req->errors = 0;
1095 req->hard_sector = req->sector = bio->bi_sector; 1095 req->hard_sector = req->sector = bio->bi_sector;
1096 req->ioprio = bio_prio(bio); 1096 req->ioprio = bio_prio(bio);
1097 req->start_time = jiffies; 1097 req->start_time = jiffies;
1098 blk_rq_bio_prep(req->q, req, bio); 1098 blk_rq_bio_prep(req->q, req, bio);
1099 } 1099 }
1100 1100
1101 static int __make_request(struct request_queue *q, struct bio *bio) 1101 static int __make_request(struct request_queue *q, struct bio *bio)
1102 { 1102 {
1103 struct request *req; 1103 struct request *req;
1104 int el_ret, nr_sectors, barrier, discard, err; 1104 int el_ret, nr_sectors, barrier, discard, err;
1105 const unsigned short prio = bio_prio(bio); 1105 const unsigned short prio = bio_prio(bio);
1106 const int sync = bio_sync(bio); 1106 const int sync = bio_sync(bio);
1107 int rw_flags; 1107 int rw_flags;
1108 1108
1109 nr_sectors = bio_sectors(bio); 1109 nr_sectors = bio_sectors(bio);
1110 1110
1111 /* 1111 /*
1112 * low level driver can indicate that it wants pages above a 1112 * low level driver can indicate that it wants pages above a
1113 * certain limit bounced to low memory (ie for highmem, or even 1113 * certain limit bounced to low memory (ie for highmem, or even
1114 * ISA dma in theory) 1114 * ISA dma in theory)
1115 */ 1115 */
1116 blk_queue_bounce(q, &bio); 1116 blk_queue_bounce(q, &bio);
1117 1117
1118 barrier = bio_barrier(bio); 1118 barrier = bio_barrier(bio);
1119 if (unlikely(barrier) && bio_has_data(bio) && 1119 if (unlikely(barrier) && bio_has_data(bio) &&
1120 (q->next_ordered == QUEUE_ORDERED_NONE)) { 1120 (q->next_ordered == QUEUE_ORDERED_NONE)) {
1121 err = -EOPNOTSUPP; 1121 err = -EOPNOTSUPP;
1122 goto end_io; 1122 goto end_io;
1123 } 1123 }
1124 1124
1125 discard = bio_discard(bio); 1125 discard = bio_discard(bio);
1126 if (unlikely(discard) && !q->prepare_discard_fn) { 1126 if (unlikely(discard) && !q->prepare_discard_fn) {
1127 err = -EOPNOTSUPP; 1127 err = -EOPNOTSUPP;
1128 goto end_io; 1128 goto end_io;
1129 } 1129 }
1130 1130
1131 spin_lock_irq(q->queue_lock); 1131 spin_lock_irq(q->queue_lock);
1132 1132
1133 if (unlikely(barrier) || elv_queue_empty(q)) 1133 if (unlikely(barrier) || elv_queue_empty(q))
1134 goto get_rq; 1134 goto get_rq;
1135 1135
1136 el_ret = elv_merge(q, &req, bio); 1136 el_ret = elv_merge(q, &req, bio);
1137 switch (el_ret) { 1137 switch (el_ret) {
1138 case ELEVATOR_BACK_MERGE: 1138 case ELEVATOR_BACK_MERGE:
1139 BUG_ON(!rq_mergeable(req)); 1139 BUG_ON(!rq_mergeable(req));
1140 1140
1141 if (!ll_back_merge_fn(q, req, bio)) 1141 if (!ll_back_merge_fn(q, req, bio))
1142 break; 1142 break;
1143 1143
1144 blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE); 1144 blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE);
1145 1145
1146 req->biotail->bi_next = bio; 1146 req->biotail->bi_next = bio;
1147 req->biotail = bio; 1147 req->biotail = bio;
1148 req->nr_sectors = req->hard_nr_sectors += nr_sectors; 1148 req->nr_sectors = req->hard_nr_sectors += nr_sectors;
1149 req->ioprio = ioprio_best(req->ioprio, prio); 1149 req->ioprio = ioprio_best(req->ioprio, prio);
1150 if (!blk_rq_cpu_valid(req)) 1150 if (!blk_rq_cpu_valid(req))
1151 req->cpu = bio->bi_comp_cpu; 1151 req->cpu = bio->bi_comp_cpu;
1152 drive_stat_acct(req, 0); 1152 drive_stat_acct(req, 0);
1153 if (!attempt_back_merge(q, req)) 1153 if (!attempt_back_merge(q, req))
1154 elv_merged_request(q, req, el_ret); 1154 elv_merged_request(q, req, el_ret);
1155 goto out; 1155 goto out;
1156 1156
1157 case ELEVATOR_FRONT_MERGE: 1157 case ELEVATOR_FRONT_MERGE:
1158 BUG_ON(!rq_mergeable(req)); 1158 BUG_ON(!rq_mergeable(req));
1159 1159
1160 if (!ll_front_merge_fn(q, req, bio)) 1160 if (!ll_front_merge_fn(q, req, bio))
1161 break; 1161 break;
1162 1162
1163 blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE); 1163 blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE);
1164 1164
1165 bio->bi_next = req->bio; 1165 bio->bi_next = req->bio;
1166 req->bio = bio; 1166 req->bio = bio;
1167 1167
1168 /* 1168 /*
1169 * may not be valid. if the low level driver said 1169 * may not be valid. if the low level driver said
1170 * it didn't need a bounce buffer then it better 1170 * it didn't need a bounce buffer then it better
1171 * not touch req->buffer either... 1171 * not touch req->buffer either...
1172 */ 1172 */
1173 req->buffer = bio_data(bio); 1173 req->buffer = bio_data(bio);
1174 req->current_nr_sectors = bio_cur_sectors(bio); 1174 req->current_nr_sectors = bio_cur_sectors(bio);
1175 req->hard_cur_sectors = req->current_nr_sectors; 1175 req->hard_cur_sectors = req->current_nr_sectors;
1176 req->sector = req->hard_sector = bio->bi_sector; 1176 req->sector = req->hard_sector = bio->bi_sector;
1177 req->nr_sectors = req->hard_nr_sectors += nr_sectors; 1177 req->nr_sectors = req->hard_nr_sectors += nr_sectors;
1178 req->ioprio = ioprio_best(req->ioprio, prio); 1178 req->ioprio = ioprio_best(req->ioprio, prio);
1179 if (!blk_rq_cpu_valid(req)) 1179 if (!blk_rq_cpu_valid(req))
1180 req->cpu = bio->bi_comp_cpu; 1180 req->cpu = bio->bi_comp_cpu;
1181 drive_stat_acct(req, 0); 1181 drive_stat_acct(req, 0);
1182 if (!attempt_front_merge(q, req)) 1182 if (!attempt_front_merge(q, req))
1183 elv_merged_request(q, req, el_ret); 1183 elv_merged_request(q, req, el_ret);
1184 goto out; 1184 goto out;
1185 1185
1186 /* ELV_NO_MERGE: elevator says don't/can't merge. */ 1186 /* ELV_NO_MERGE: elevator says don't/can't merge. */
1187 default: 1187 default:
1188 ; 1188 ;
1189 } 1189 }
1190 1190
1191 get_rq: 1191 get_rq:
1192 /* 1192 /*
1193 * This sync check and mask will be re-done in init_request_from_bio(), 1193 * This sync check and mask will be re-done in init_request_from_bio(),
1194 * but we need to set it earlier to expose the sync flag to the 1194 * but we need to set it earlier to expose the sync flag to the
1195 * rq allocator and io schedulers. 1195 * rq allocator and io schedulers.
1196 */ 1196 */
1197 rw_flags = bio_data_dir(bio); 1197 rw_flags = bio_data_dir(bio);
1198 if (sync) 1198 if (sync)
1199 rw_flags |= REQ_RW_SYNC; 1199 rw_flags |= REQ_RW_SYNC;
1200 1200
1201 /* 1201 /*
1202 * Grab a free request. This is might sleep but can not fail. 1202 * Grab a free request. This is might sleep but can not fail.
1203 * Returns with the queue unlocked. 1203 * Returns with the queue unlocked.
1204 */ 1204 */
1205 req = get_request_wait(q, rw_flags, bio); 1205 req = get_request_wait(q, rw_flags, bio);
1206 1206
1207 /* 1207 /*
1208 * After dropping the lock and possibly sleeping here, our request 1208 * After dropping the lock and possibly sleeping here, our request
1209 * may now be mergeable after it had proven unmergeable (above). 1209 * may now be mergeable after it had proven unmergeable (above).
1210 * We don't worry about that case for efficiency. It won't happen 1210 * We don't worry about that case for efficiency. It won't happen
1211 * often, and the elevators are able to handle it. 1211 * often, and the elevators are able to handle it.
1212 */ 1212 */
1213 init_request_from_bio(req, bio); 1213 init_request_from_bio(req, bio);
1214 1214
1215 spin_lock_irq(q->queue_lock); 1215 spin_lock_irq(q->queue_lock);
1216 if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) || 1216 if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) ||
1217 bio_flagged(bio, BIO_CPU_AFFINE)) 1217 bio_flagged(bio, BIO_CPU_AFFINE))
1218 req->cpu = blk_cpu_to_group(smp_processor_id()); 1218 req->cpu = blk_cpu_to_group(smp_processor_id());
1219 if (elv_queue_empty(q)) 1219 if (elv_queue_empty(q))
1220 blk_plug_device(q); 1220 blk_plug_device(q);
1221 add_request(q, req); 1221 add_request(q, req);
1222 out: 1222 out:
1223 if (sync) 1223 if (sync)
1224 __generic_unplug_device(q); 1224 __generic_unplug_device(q);
1225 spin_unlock_irq(q->queue_lock); 1225 spin_unlock_irq(q->queue_lock);
1226 return 0; 1226 return 0;
1227 1227
1228 end_io: 1228 end_io:
1229 bio_endio(bio, err); 1229 bio_endio(bio, err);
1230 return 0; 1230 return 0;
1231 } 1231 }
1232 1232
1233 /* 1233 /*
1234 * If bio->bi_dev is a partition, remap the location 1234 * If bio->bi_dev is a partition, remap the location
1235 */ 1235 */
1236 static inline void blk_partition_remap(struct bio *bio) 1236 static inline void blk_partition_remap(struct bio *bio)
1237 { 1237 {
1238 struct block_device *bdev = bio->bi_bdev; 1238 struct block_device *bdev = bio->bi_bdev;
1239 1239
1240 if (bio_sectors(bio) && bdev != bdev->bd_contains) { 1240 if (bio_sectors(bio) && bdev != bdev->bd_contains) {
1241 struct hd_struct *p = bdev->bd_part; 1241 struct hd_struct *p = bdev->bd_part;
1242 1242
1243 bio->bi_sector += p->start_sect; 1243 bio->bi_sector += p->start_sect;
1244 bio->bi_bdev = bdev->bd_contains; 1244 bio->bi_bdev = bdev->bd_contains;
1245 1245
1246 blk_add_trace_remap(bdev_get_queue(bio->bi_bdev), bio, 1246 blk_add_trace_remap(bdev_get_queue(bio->bi_bdev), bio,
1247 bdev->bd_dev, bio->bi_sector, 1247 bdev->bd_dev, bio->bi_sector,
1248 bio->bi_sector - p->start_sect); 1248 bio->bi_sector - p->start_sect);
1249 } 1249 }
1250 } 1250 }
1251 1251
1252 static void handle_bad_sector(struct bio *bio) 1252 static void handle_bad_sector(struct bio *bio)
1253 { 1253 {
1254 char b[BDEVNAME_SIZE]; 1254 char b[BDEVNAME_SIZE];
1255 1255
1256 printk(KERN_INFO "attempt to access beyond end of device\n"); 1256 printk(KERN_INFO "attempt to access beyond end of device\n");
1257 printk(KERN_INFO "%s: rw=%ld, want=%Lu, limit=%Lu\n", 1257 printk(KERN_INFO "%s: rw=%ld, want=%Lu, limit=%Lu\n",
1258 bdevname(bio->bi_bdev, b), 1258 bdevname(bio->bi_bdev, b),
1259 bio->bi_rw, 1259 bio->bi_rw,
1260 (unsigned long long)bio->bi_sector + bio_sectors(bio), 1260 (unsigned long long)bio->bi_sector + bio_sectors(bio),
1261 (long long)(bio->bi_bdev->bd_inode->i_size >> 9)); 1261 (long long)(bio->bi_bdev->bd_inode->i_size >> 9));
1262 1262
1263 set_bit(BIO_EOF, &bio->bi_flags); 1263 set_bit(BIO_EOF, &bio->bi_flags);
1264 } 1264 }
1265 1265
1266 #ifdef CONFIG_FAIL_MAKE_REQUEST 1266 #ifdef CONFIG_FAIL_MAKE_REQUEST
1267 1267
1268 static DECLARE_FAULT_ATTR(fail_make_request); 1268 static DECLARE_FAULT_ATTR(fail_make_request);
1269 1269
1270 static int __init setup_fail_make_request(char *str) 1270 static int __init setup_fail_make_request(char *str)
1271 { 1271 {
1272 return setup_fault_attr(&fail_make_request, str); 1272 return setup_fault_attr(&fail_make_request, str);
1273 } 1273 }
1274 __setup("fail_make_request=", setup_fail_make_request); 1274 __setup("fail_make_request=", setup_fail_make_request);
1275 1275
1276 static int should_fail_request(struct bio *bio) 1276 static int should_fail_request(struct bio *bio)
1277 { 1277 {
1278 struct hd_struct *part = bio->bi_bdev->bd_part; 1278 struct hd_struct *part = bio->bi_bdev->bd_part;
1279 1279
1280 if (part_to_disk(part)->part0.make_it_fail || part->make_it_fail) 1280 if (part_to_disk(part)->part0.make_it_fail || part->make_it_fail)
1281 return should_fail(&fail_make_request, bio->bi_size); 1281 return should_fail(&fail_make_request, bio->bi_size);
1282 1282
1283 return 0; 1283 return 0;
1284 } 1284 }
1285 1285
1286 static int __init fail_make_request_debugfs(void) 1286 static int __init fail_make_request_debugfs(void)
1287 { 1287 {
1288 return init_fault_attr_dentries(&fail_make_request, 1288 return init_fault_attr_dentries(&fail_make_request,
1289 "fail_make_request"); 1289 "fail_make_request");
1290 } 1290 }
1291 1291
1292 late_initcall(fail_make_request_debugfs); 1292 late_initcall(fail_make_request_debugfs);
1293 1293
1294 #else /* CONFIG_FAIL_MAKE_REQUEST */ 1294 #else /* CONFIG_FAIL_MAKE_REQUEST */
1295 1295
1296 static inline int should_fail_request(struct bio *bio) 1296 static inline int should_fail_request(struct bio *bio)
1297 { 1297 {
1298 return 0; 1298 return 0;
1299 } 1299 }
1300 1300
1301 #endif /* CONFIG_FAIL_MAKE_REQUEST */ 1301 #endif /* CONFIG_FAIL_MAKE_REQUEST */
1302 1302
1303 /* 1303 /*
1304 * Check whether this bio extends beyond the end of the device. 1304 * Check whether this bio extends beyond the end of the device.
1305 */ 1305 */
1306 static inline int bio_check_eod(struct bio *bio, unsigned int nr_sectors) 1306 static inline int bio_check_eod(struct bio *bio, unsigned int nr_sectors)
1307 { 1307 {
1308 sector_t maxsector; 1308 sector_t maxsector;
1309 1309
1310 if (!nr_sectors) 1310 if (!nr_sectors)
1311 return 0; 1311 return 0;
1312 1312
1313 /* Test device or partition size, when known. */ 1313 /* Test device or partition size, when known. */
1314 maxsector = bio->bi_bdev->bd_inode->i_size >> 9; 1314 maxsector = bio->bi_bdev->bd_inode->i_size >> 9;
1315 if (maxsector) { 1315 if (maxsector) {
1316 sector_t sector = bio->bi_sector; 1316 sector_t sector = bio->bi_sector;
1317 1317
1318 if (maxsector < nr_sectors || maxsector - nr_sectors < sector) { 1318 if (maxsector < nr_sectors || maxsector - nr_sectors < sector) {
1319 /* 1319 /*
1320 * This may well happen - the kernel calls bread() 1320 * This may well happen - the kernel calls bread()
1321 * without checking the size of the device, e.g., when 1321 * without checking the size of the device, e.g., when
1322 * mounting a device. 1322 * mounting a device.
1323 */ 1323 */
1324 handle_bad_sector(bio); 1324 handle_bad_sector(bio);
1325 return 1; 1325 return 1;
1326 } 1326 }
1327 } 1327 }
1328 1328
1329 return 0; 1329 return 0;
1330 } 1330 }
1331 1331
1332 /** 1332 /**
1333 * generic_make_request - hand a buffer to its device driver for I/O 1333 * generic_make_request - hand a buffer to its device driver for I/O
1334 * @bio: The bio describing the location in memory and on the device. 1334 * @bio: The bio describing the location in memory and on the device.
1335 * 1335 *
1336 * generic_make_request() is used to make I/O requests of block 1336 * generic_make_request() is used to make I/O requests of block
1337 * devices. It is passed a &struct bio, which describes the I/O that needs 1337 * devices. It is passed a &struct bio, which describes the I/O that needs
1338 * to be done. 1338 * to be done.
1339 * 1339 *
1340 * generic_make_request() does not return any status. The 1340 * generic_make_request() does not return any status. The
1341 * success/failure status of the request, along with notification of 1341 * success/failure status of the request, along with notification of
1342 * completion, is delivered asynchronously through the bio->bi_end_io 1342 * completion, is delivered asynchronously through the bio->bi_end_io
1343 * function described (one day) else where. 1343 * function described (one day) else where.
1344 * 1344 *
1345 * The caller of generic_make_request must make sure that bi_io_vec 1345 * The caller of generic_make_request must make sure that bi_io_vec
1346 * are set to describe the memory buffer, and that bi_dev and bi_sector are 1346 * are set to describe the memory buffer, and that bi_dev and bi_sector are
1347 * set to describe the device address, and the 1347 * set to describe the device address, and the
1348 * bi_end_io and optionally bi_private are set to describe how 1348 * bi_end_io and optionally bi_private are set to describe how
1349 * completion notification should be signaled. 1349 * completion notification should be signaled.
1350 * 1350 *
1351 * generic_make_request and the drivers it calls may use bi_next if this 1351 * generic_make_request and the drivers it calls may use bi_next if this
1352 * bio happens to be merged with someone else, and may change bi_dev and 1352 * bio happens to be merged with someone else, and may change bi_dev and
1353 * bi_sector for remaps as it sees fit. So the values of these fields 1353 * bi_sector for remaps as it sees fit. So the values of these fields
1354 * should NOT be depended on after the call to generic_make_request. 1354 * should NOT be depended on after the call to generic_make_request.
1355 */ 1355 */
1356 static inline void __generic_make_request(struct bio *bio) 1356 static inline void __generic_make_request(struct bio *bio)
1357 { 1357 {
1358 struct request_queue *q; 1358 struct request_queue *q;
1359 sector_t old_sector; 1359 sector_t old_sector;
1360 int ret, nr_sectors = bio_sectors(bio); 1360 int ret, nr_sectors = bio_sectors(bio);
1361 dev_t old_dev; 1361 dev_t old_dev;
1362 int err = -EIO; 1362 int err = -EIO;
1363 1363
1364 might_sleep(); 1364 might_sleep();
1365 1365
1366 if (bio_check_eod(bio, nr_sectors)) 1366 if (bio_check_eod(bio, nr_sectors))
1367 goto end_io; 1367 goto end_io;
1368 1368
1369 /* 1369 /*
1370 * Resolve the mapping until finished. (drivers are 1370 * Resolve the mapping until finished. (drivers are
1371 * still free to implement/resolve their own stacking 1371 * still free to implement/resolve their own stacking
1372 * by explicitly returning 0) 1372 * by explicitly returning 0)
1373 * 1373 *
1374 * NOTE: we don't repeat the blk_size check for each new device. 1374 * NOTE: we don't repeat the blk_size check for each new device.
1375 * Stacking drivers are expected to know what they are doing. 1375 * Stacking drivers are expected to know what they are doing.
1376 */ 1376 */
1377 old_sector = -1; 1377 old_sector = -1;
1378 old_dev = 0; 1378 old_dev = 0;
1379 do { 1379 do {
1380 char b[BDEVNAME_SIZE]; 1380 char b[BDEVNAME_SIZE];
1381 1381
1382 q = bdev_get_queue(bio->bi_bdev); 1382 q = bdev_get_queue(bio->bi_bdev);
1383 if (!q) { 1383 if (!q) {
1384 printk(KERN_ERR 1384 printk(KERN_ERR
1385 "generic_make_request: Trying to access " 1385 "generic_make_request: Trying to access "
1386 "nonexistent block-device %s (%Lu)\n", 1386 "nonexistent block-device %s (%Lu)\n",
1387 bdevname(bio->bi_bdev, b), 1387 bdevname(bio->bi_bdev, b),
1388 (long long) bio->bi_sector); 1388 (long long) bio->bi_sector);
1389 end_io: 1389 end_io:
1390 bio_endio(bio, err); 1390 bio_endio(bio, err);
1391 break; 1391 break;
1392 } 1392 }
1393 1393
1394 if (unlikely(nr_sectors > q->max_hw_sectors)) { 1394 if (unlikely(nr_sectors > q->max_hw_sectors)) {
1395 printk(KERN_ERR "bio too big device %s (%u > %u)\n", 1395 printk(KERN_ERR "bio too big device %s (%u > %u)\n",
1396 bdevname(bio->bi_bdev, b), 1396 bdevname(bio->bi_bdev, b),
1397 bio_sectors(bio), 1397 bio_sectors(bio),
1398 q->max_hw_sectors); 1398 q->max_hw_sectors);
1399 goto end_io; 1399 goto end_io;
1400 } 1400 }
1401 1401
1402 if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) 1402 if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)))
1403 goto end_io; 1403 goto end_io;
1404 1404
1405 if (should_fail_request(bio)) 1405 if (should_fail_request(bio))
1406 goto end_io; 1406 goto end_io;
1407 1407
1408 /* 1408 /*
1409 * If this device has partitions, remap block n 1409 * If this device has partitions, remap block n
1410 * of partition p to block n+start(p) of the disk. 1410 * of partition p to block n+start(p) of the disk.
1411 */ 1411 */
1412 blk_partition_remap(bio); 1412 blk_partition_remap(bio);
1413 1413
1414 if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) 1414 if (bio_integrity_enabled(bio) && bio_integrity_prep(bio))
1415 goto end_io; 1415 goto end_io;
1416 1416
1417 if (old_sector != -1) 1417 if (old_sector != -1)
1418 blk_add_trace_remap(q, bio, old_dev, bio->bi_sector, 1418 blk_add_trace_remap(q, bio, old_dev, bio->bi_sector,
1419 old_sector); 1419 old_sector);
1420 1420
1421 blk_add_trace_bio(q, bio, BLK_TA_QUEUE); 1421 blk_add_trace_bio(q, bio, BLK_TA_QUEUE);
1422 1422
1423 old_sector = bio->bi_sector; 1423 old_sector = bio->bi_sector;
1424 old_dev = bio->bi_bdev->bd_dev; 1424 old_dev = bio->bi_bdev->bd_dev;
1425 1425
1426 if (bio_check_eod(bio, nr_sectors)) 1426 if (bio_check_eod(bio, nr_sectors))
1427 goto end_io; 1427 goto end_io;
1428 if ((bio_empty_barrier(bio) && !q->prepare_flush_fn) || 1428 if ((bio_empty_barrier(bio) && !q->prepare_flush_fn) ||
1429 (bio_discard(bio) && !q->prepare_discard_fn)) { 1429 (bio_discard(bio) && !q->prepare_discard_fn)) {
1430 err = -EOPNOTSUPP; 1430 err = -EOPNOTSUPP;
1431 goto end_io; 1431 goto end_io;
1432 } 1432 }
1433 1433
1434 ret = q->make_request_fn(q, bio); 1434 ret = q->make_request_fn(q, bio);
1435 } while (ret); 1435 } while (ret);
1436 } 1436 }
1437 1437
1438 /* 1438 /*
1439 * We only want one ->make_request_fn to be active at a time, 1439 * We only want one ->make_request_fn to be active at a time,
1440 * else stack usage with stacked devices could be a problem. 1440 * else stack usage with stacked devices could be a problem.
1441 * So use current->bio_{list,tail} to keep a list of requests 1441 * So use current->bio_{list,tail} to keep a list of requests
1442 * submited by a make_request_fn function. 1442 * submited by a make_request_fn function.
1443 * current->bio_tail is also used as a flag to say if 1443 * current->bio_tail is also used as a flag to say if
1444 * generic_make_request is currently active in this task or not. 1444 * generic_make_request is currently active in this task or not.
1445 * If it is NULL, then no make_request is active. If it is non-NULL, 1445 * If it is NULL, then no make_request is active. If it is non-NULL,
1446 * then a make_request is active, and new requests should be added 1446 * then a make_request is active, and new requests should be added
1447 * at the tail 1447 * at the tail
1448 */ 1448 */
1449 void generic_make_request(struct bio *bio) 1449 void generic_make_request(struct bio *bio)
1450 { 1450 {
1451 if (current->bio_tail) { 1451 if (current->bio_tail) {
1452 /* make_request is active */ 1452 /* make_request is active */
1453 *(current->bio_tail) = bio; 1453 *(current->bio_tail) = bio;
1454 bio->bi_next = NULL; 1454 bio->bi_next = NULL;
1455 current->bio_tail = &bio->bi_next; 1455 current->bio_tail = &bio->bi_next;
1456 return; 1456 return;
1457 } 1457 }
1458 /* following loop may be a bit non-obvious, and so deserves some 1458 /* following loop may be a bit non-obvious, and so deserves some
1459 * explanation. 1459 * explanation.
1460 * Before entering the loop, bio->bi_next is NULL (as all callers 1460 * Before entering the loop, bio->bi_next is NULL (as all callers
1461 * ensure that) so we have a list with a single bio. 1461 * ensure that) so we have a list with a single bio.
1462 * We pretend that we have just taken it off a longer list, so 1462 * We pretend that we have just taken it off a longer list, so
1463 * we assign bio_list to the next (which is NULL) and bio_tail 1463 * we assign bio_list to the next (which is NULL) and bio_tail
1464 * to &bio_list, thus initialising the bio_list of new bios to be 1464 * to &bio_list, thus initialising the bio_list of new bios to be
1465 * added. __generic_make_request may indeed add some more bios 1465 * added. __generic_make_request may indeed add some more bios
1466 * through a recursive call to generic_make_request. If it 1466 * through a recursive call to generic_make_request. If it
1467 * did, we find a non-NULL value in bio_list and re-enter the loop 1467 * did, we find a non-NULL value in bio_list and re-enter the loop
1468 * from the top. In this case we really did just take the bio 1468 * from the top. In this case we really did just take the bio
1469 * of the top of the list (no pretending) and so fixup bio_list and 1469 * of the top of the list (no pretending) and so fixup bio_list and
1470 * bio_tail or bi_next, and call into __generic_make_request again. 1470 * bio_tail or bi_next, and call into __generic_make_request again.
1471 * 1471 *
1472 * The loop was structured like this to make only one call to 1472 * The loop was structured like this to make only one call to
1473 * __generic_make_request (which is important as it is large and 1473 * __generic_make_request (which is important as it is large and
1474 * inlined) and to keep the structure simple. 1474 * inlined) and to keep the structure simple.
1475 */ 1475 */
1476 BUG_ON(bio->bi_next); 1476 BUG_ON(bio->bi_next);
1477 do { 1477 do {
1478 current->bio_list = bio->bi_next; 1478 current->bio_list = bio->bi_next;
1479 if (bio->bi_next == NULL) 1479 if (bio->bi_next == NULL)
1480 current->bio_tail = &current->bio_list; 1480 current->bio_tail = &current->bio_list;
1481 else 1481 else
1482 bio->bi_next = NULL; 1482 bio->bi_next = NULL;
1483 __generic_make_request(bio); 1483 __generic_make_request(bio);
1484 bio = current->bio_list; 1484 bio = current->bio_list;
1485 } while (bio); 1485 } while (bio);
1486 current->bio_tail = NULL; /* deactivate */ 1486 current->bio_tail = NULL; /* deactivate */
1487 } 1487 }
1488 EXPORT_SYMBOL(generic_make_request); 1488 EXPORT_SYMBOL(generic_make_request);
1489 1489
1490 /** 1490 /**
1491 * submit_bio - submit a bio to the block device layer for I/O 1491 * submit_bio - submit a bio to the block device layer for I/O
1492 * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead) 1492 * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead)
1493 * @bio: The &struct bio which describes the I/O 1493 * @bio: The &struct bio which describes the I/O
1494 * 1494 *
1495 * submit_bio() is very similar in purpose to generic_make_request(), and 1495 * submit_bio() is very similar in purpose to generic_make_request(), and
1496 * uses that function to do most of the work. Both are fairly rough 1496 * uses that function to do most of the work. Both are fairly rough
1497 * interfaces; @bio must be presetup and ready for I/O. 1497 * interfaces; @bio must be presetup and ready for I/O.
1498 * 1498 *
1499 */ 1499 */
1500 void submit_bio(int rw, struct bio *bio) 1500 void submit_bio(int rw, struct bio *bio)
1501 { 1501 {
1502 int count = bio_sectors(bio); 1502 int count = bio_sectors(bio);
1503 1503
1504 bio->bi_rw |= rw; 1504 bio->bi_rw |= rw;
1505 1505
1506 /* 1506 /*
1507 * If it's a regular read/write or a barrier with data attached, 1507 * If it's a regular read/write or a barrier with data attached,
1508 * go through the normal accounting stuff before submission. 1508 * go through the normal accounting stuff before submission.
1509 */ 1509 */
1510 if (bio_has_data(bio)) { 1510 if (bio_has_data(bio)) {
1511 if (rw & WRITE) { 1511 if (rw & WRITE) {
1512 count_vm_events(PGPGOUT, count); 1512 count_vm_events(PGPGOUT, count);
1513 } else { 1513 } else {
1514 task_io_account_read(bio->bi_size); 1514 task_io_account_read(bio->bi_size);
1515 count_vm_events(PGPGIN, count); 1515 count_vm_events(PGPGIN, count);
1516 } 1516 }
1517 1517
1518 if (unlikely(block_dump)) { 1518 if (unlikely(block_dump)) {
1519 char b[BDEVNAME_SIZE]; 1519 char b[BDEVNAME_SIZE];
1520 printk(KERN_DEBUG "%s(%d): %s block %Lu on %s\n", 1520 printk(KERN_DEBUG "%s(%d): %s block %Lu on %s\n",
1521 current->comm, task_pid_nr(current), 1521 current->comm, task_pid_nr(current),
1522 (rw & WRITE) ? "WRITE" : "READ", 1522 (rw & WRITE) ? "WRITE" : "READ",
1523 (unsigned long long)bio->bi_sector, 1523 (unsigned long long)bio->bi_sector,
1524 bdevname(bio->bi_bdev, b)); 1524 bdevname(bio->bi_bdev, b));
1525 } 1525 }
1526 } 1526 }
1527 1527
1528 generic_make_request(bio); 1528 generic_make_request(bio);
1529 } 1529 }
1530 EXPORT_SYMBOL(submit_bio); 1530 EXPORT_SYMBOL(submit_bio);
1531 1531
1532 /** 1532 /**
1533 * __end_that_request_first - end I/O on a request 1533 * __end_that_request_first - end I/O on a request
1534 * @req: the request being processed 1534 * @req: the request being processed
1535 * @error: %0 for success, < %0 for error 1535 * @error: %0 for success, < %0 for error
1536 * @nr_bytes: number of bytes to complete 1536 * @nr_bytes: number of bytes to complete
1537 * 1537 *
1538 * Description: 1538 * Description:
1539 * Ends I/O on a number of bytes attached to @req, and sets it up 1539 * Ends I/O on a number of bytes attached to @req, and sets it up
1540 * for the next range of segments (if any) in the cluster. 1540 * for the next range of segments (if any) in the cluster.
1541 * 1541 *
1542 * Return: 1542 * Return:
1543 * %0 - we are done with this request, call end_that_request_last() 1543 * %0 - we are done with this request, call end_that_request_last()
1544 * %1 - still buffers pending for this request 1544 * %1 - still buffers pending for this request
1545 **/ 1545 **/
1546 static int __end_that_request_first(struct request *req, int error, 1546 static int __end_that_request_first(struct request *req, int error,
1547 int nr_bytes) 1547 int nr_bytes)
1548 { 1548 {
1549 int total_bytes, bio_nbytes, next_idx = 0; 1549 int total_bytes, bio_nbytes, next_idx = 0;
1550 struct bio *bio; 1550 struct bio *bio;
1551 1551
1552 blk_add_trace_rq(req->q, req, BLK_TA_COMPLETE); 1552 blk_add_trace_rq(req->q, req, BLK_TA_COMPLETE);
1553 1553
1554 /* 1554 /*
1555 * for a REQ_TYPE_BLOCK_PC request, we want to carry any eventual 1555 * for a REQ_TYPE_BLOCK_PC request, we want to carry any eventual
1556 * sense key with us all the way through 1556 * sense key with us all the way through
1557 */ 1557 */
1558 if (!blk_pc_request(req)) 1558 if (!blk_pc_request(req))
1559 req->errors = 0; 1559 req->errors = 0;
1560 1560
1561 if (error && (blk_fs_request(req) && !(req->cmd_flags & REQ_QUIET))) { 1561 if (error && (blk_fs_request(req) && !(req->cmd_flags & REQ_QUIET))) {
1562 printk(KERN_ERR "end_request: I/O error, dev %s, sector %llu\n", 1562 printk(KERN_ERR "end_request: I/O error, dev %s, sector %llu\n",
1563 req->rq_disk ? req->rq_disk->disk_name : "?", 1563 req->rq_disk ? req->rq_disk->disk_name : "?",
1564 (unsigned long long)req->sector); 1564 (unsigned long long)req->sector);
1565 } 1565 }
1566 1566
1567 if (blk_fs_request(req) && req->rq_disk) { 1567 if (blk_fs_request(req) && req->rq_disk) {
1568 const int rw = rq_data_dir(req); 1568 const int rw = rq_data_dir(req);
1569 struct hd_struct *part; 1569 struct hd_struct *part;
1570 int cpu; 1570 int cpu;
1571 1571
1572 cpu = part_stat_lock(); 1572 cpu = part_stat_lock();
1573 part = disk_map_sector_rcu(req->rq_disk, req->sector); 1573 part = disk_map_sector_rcu(req->rq_disk, req->sector);
1574 part_stat_add(cpu, part, sectors[rw], nr_bytes >> 9); 1574 part_stat_add(cpu, part, sectors[rw], nr_bytes >> 9);
1575 part_stat_unlock(); 1575 part_stat_unlock();
1576 } 1576 }
1577 1577
1578 total_bytes = bio_nbytes = 0; 1578 total_bytes = bio_nbytes = 0;
1579 while ((bio = req->bio) != NULL) { 1579 while ((bio = req->bio) != NULL) {
1580 int nbytes; 1580 int nbytes;
1581 1581
1582 /* 1582 /*
1583 * For an empty barrier request, the low level driver must 1583 * For an empty barrier request, the low level driver must
1584 * store a potential error location in ->sector. We pass 1584 * store a potential error location in ->sector. We pass
1585 * that back up in ->bi_sector. 1585 * that back up in ->bi_sector.
1586 */ 1586 */
1587 if (blk_empty_barrier(req)) 1587 if (blk_empty_barrier(req))
1588 bio->bi_sector = req->sector; 1588 bio->bi_sector = req->sector;
1589 1589
1590 if (nr_bytes >= bio->bi_size) { 1590 if (nr_bytes >= bio->bi_size) {
1591 req->bio = bio->bi_next; 1591 req->bio = bio->bi_next;
1592 nbytes = bio->bi_size; 1592 nbytes = bio->bi_size;
1593 req_bio_endio(req, bio, nbytes, error); 1593 req_bio_endio(req, bio, nbytes, error);
1594 next_idx = 0; 1594 next_idx = 0;
1595 bio_nbytes = 0; 1595 bio_nbytes = 0;
1596 } else { 1596 } else {
1597 int idx = bio->bi_idx + next_idx; 1597 int idx = bio->bi_idx + next_idx;
1598 1598
1599 if (unlikely(bio->bi_idx >= bio->bi_vcnt)) { 1599 if (unlikely(bio->bi_idx >= bio->bi_vcnt)) {
1600 blk_dump_rq_flags(req, "__end_that"); 1600 blk_dump_rq_flags(req, "__end_that");
1601 printk(KERN_ERR "%s: bio idx %d >= vcnt %d\n", 1601 printk(KERN_ERR "%s: bio idx %d >= vcnt %d\n",
1602 __func__, bio->bi_idx, bio->bi_vcnt); 1602 __func__, bio->bi_idx, bio->bi_vcnt);
1603 break; 1603 break;
1604 } 1604 }
1605 1605
1606 nbytes = bio_iovec_idx(bio, idx)->bv_len; 1606 nbytes = bio_iovec_idx(bio, idx)->bv_len;
1607 BIO_BUG_ON(nbytes > bio->bi_size); 1607 BIO_BUG_ON(nbytes > bio->bi_size);
1608 1608
1609 /* 1609 /*
1610 * not a complete bvec done 1610 * not a complete bvec done
1611 */ 1611 */
1612 if (unlikely(nbytes > nr_bytes)) { 1612 if (unlikely(nbytes > nr_bytes)) {
1613 bio_nbytes += nr_bytes; 1613 bio_nbytes += nr_bytes;
1614 total_bytes += nr_bytes; 1614 total_bytes += nr_bytes;
1615 break; 1615 break;
1616 } 1616 }
1617 1617
1618 /* 1618 /*
1619 * advance to the next vector 1619 * advance to the next vector
1620 */ 1620 */
1621 next_idx++; 1621 next_idx++;
1622 bio_nbytes += nbytes; 1622 bio_nbytes += nbytes;
1623 } 1623 }
1624 1624
1625 total_bytes += nbytes; 1625 total_bytes += nbytes;
1626 nr_bytes -= nbytes; 1626 nr_bytes -= nbytes;
1627 1627
1628 bio = req->bio; 1628 bio = req->bio;
1629 if (bio) { 1629 if (bio) {
1630 /* 1630 /*
1631 * end more in this run, or just return 'not-done' 1631 * end more in this run, or just return 'not-done'
1632 */ 1632 */
1633 if (unlikely(nr_bytes <= 0)) 1633 if (unlikely(nr_bytes <= 0))
1634 break; 1634 break;
1635 } 1635 }
1636 } 1636 }
1637 1637
1638 /* 1638 /*
1639 * completely done 1639 * completely done
1640 */ 1640 */
1641 if (!req->bio) 1641 if (!req->bio)
1642 return 0; 1642 return 0;
1643 1643
1644 /* 1644 /*
1645 * if the request wasn't completed, update state 1645 * if the request wasn't completed, update state
1646 */ 1646 */
1647 if (bio_nbytes) { 1647 if (bio_nbytes) {
1648 req_bio_endio(req, bio, bio_nbytes, error); 1648 req_bio_endio(req, bio, bio_nbytes, error);
1649 bio->bi_idx += next_idx; 1649 bio->bi_idx += next_idx;
1650 bio_iovec(bio)->bv_offset += nr_bytes; 1650 bio_iovec(bio)->bv_offset += nr_bytes;
1651 bio_iovec(bio)->bv_len -= nr_bytes; 1651 bio_iovec(bio)->bv_len -= nr_bytes;
1652 } 1652 }
1653 1653
1654 blk_recalc_rq_sectors(req, total_bytes >> 9); 1654 blk_recalc_rq_sectors(req, total_bytes >> 9);
1655 blk_recalc_rq_segments(req); 1655 blk_recalc_rq_segments(req);
1656 return 1; 1656 return 1;
1657 } 1657 }
1658 1658
1659 /* 1659 /*
1660 * queue lock must be held 1660 * queue lock must be held
1661 */ 1661 */
1662 static void end_that_request_last(struct request *req, int error) 1662 static void end_that_request_last(struct request *req, int error)
1663 { 1663 {
1664 struct gendisk *disk = req->rq_disk; 1664 struct gendisk *disk = req->rq_disk;
1665 1665
1666 blk_delete_timer(req); 1666 blk_delete_timer(req);
1667 1667
1668 if (blk_rq_tagged(req)) 1668 if (blk_rq_tagged(req))
1669 blk_queue_end_tag(req->q, req); 1669 blk_queue_end_tag(req->q, req);
1670 1670
1671 if (blk_queued_rq(req)) 1671 if (blk_queued_rq(req))
1672 blkdev_dequeue_request(req); 1672 blkdev_dequeue_request(req);
1673 1673
1674 if (unlikely(laptop_mode) && blk_fs_request(req)) 1674 if (unlikely(laptop_mode) && blk_fs_request(req))
1675 laptop_io_completion(); 1675 laptop_io_completion();
1676 1676
1677 /* 1677 /*
1678 * Account IO completion. bar_rq isn't accounted as a normal 1678 * Account IO completion. bar_rq isn't accounted as a normal
1679 * IO on queueing nor completion. Accounting the containing 1679 * IO on queueing nor completion. Accounting the containing
1680 * request is enough. 1680 * request is enough.
1681 */ 1681 */
1682 if (disk && blk_fs_request(req) && req != &req->q->bar_rq) { 1682 if (disk && blk_fs_request(req) && req != &req->q->bar_rq) {
1683 unsigned long duration = jiffies - req->start_time; 1683 unsigned long duration = jiffies - req->start_time;
1684 const int rw = rq_data_dir(req); 1684 const int rw = rq_data_dir(req);
1685 struct hd_struct *part; 1685 struct hd_struct *part;
1686 int cpu; 1686 int cpu;
1687 1687
1688 cpu = part_stat_lock(); 1688 cpu = part_stat_lock();
1689 part = disk_map_sector_rcu(disk, req->sector); 1689 part = disk_map_sector_rcu(disk, req->sector);
1690 1690
1691 part_stat_inc(cpu, part, ios[rw]); 1691 part_stat_inc(cpu, part, ios[rw]);
1692 part_stat_add(cpu, part, ticks[rw], duration); 1692 part_stat_add(cpu, part, ticks[rw], duration);
1693 part_round_stats(cpu, part); 1693 part_round_stats(cpu, part);
1694 part_dec_in_flight(part); 1694 part_dec_in_flight(part);
1695 1695
1696 part_stat_unlock(); 1696 part_stat_unlock();
1697 } 1697 }
1698 1698
1699 if (req->end_io) 1699 if (req->end_io)
1700 req->end_io(req, error); 1700 req->end_io(req, error);
1701 else { 1701 else {
1702 if (blk_bidi_rq(req)) 1702 if (blk_bidi_rq(req))
1703 __blk_put_request(req->next_rq->q, req->next_rq); 1703 __blk_put_request(req->next_rq->q, req->next_rq);
1704 1704
1705 __blk_put_request(req->q, req); 1705 __blk_put_request(req->q, req);
1706 } 1706 }
1707 } 1707 }
1708 1708
1709 static inline void __end_request(struct request *rq, int uptodate, 1709 static inline void __end_request(struct request *rq, int uptodate,
1710 unsigned int nr_bytes) 1710 unsigned int nr_bytes)
1711 { 1711 {
1712 int error = 0; 1712 int error = 0;
1713 1713
1714 if (uptodate <= 0) 1714 if (uptodate <= 0)
1715 error = uptodate ? uptodate : -EIO; 1715 error = uptodate ? uptodate : -EIO;
1716 1716
1717 __blk_end_request(rq, error, nr_bytes); 1717 __blk_end_request(rq, error, nr_bytes);
1718 } 1718 }
1719 1719
1720 /** 1720 /**
1721 * blk_rq_bytes - Returns bytes left to complete in the entire request 1721 * blk_rq_bytes - Returns bytes left to complete in the entire request
1722 * @rq: the request being processed 1722 * @rq: the request being processed
1723 **/ 1723 **/
1724 unsigned int blk_rq_bytes(struct request *rq) 1724 unsigned int blk_rq_bytes(struct request *rq)
1725 { 1725 {
1726 if (blk_fs_request(rq)) 1726 if (blk_fs_request(rq))
1727 return rq->hard_nr_sectors << 9; 1727 return rq->hard_nr_sectors << 9;
1728 1728
1729 return rq->data_len; 1729 return rq->data_len;
1730 } 1730 }
1731 EXPORT_SYMBOL_GPL(blk_rq_bytes); 1731 EXPORT_SYMBOL_GPL(blk_rq_bytes);
1732 1732
1733 /** 1733 /**
1734 * blk_rq_cur_bytes - Returns bytes left to complete in the current segment 1734 * blk_rq_cur_bytes - Returns bytes left to complete in the current segment
1735 * @rq: the request being processed 1735 * @rq: the request being processed
1736 **/ 1736 **/
1737 unsigned int blk_rq_cur_bytes(struct request *rq) 1737 unsigned int blk_rq_cur_bytes(struct request *rq)
1738 { 1738 {
1739 if (blk_fs_request(rq)) 1739 if (blk_fs_request(rq))
1740 return rq->current_nr_sectors << 9; 1740 return rq->current_nr_sectors << 9;
1741 1741
1742 if (rq->bio) 1742 if (rq->bio)
1743 return rq->bio->bi_size; 1743 return rq->bio->bi_size;
1744 1744
1745 return rq->data_len; 1745 return rq->data_len;
1746 } 1746 }
1747 EXPORT_SYMBOL_GPL(blk_rq_cur_bytes); 1747 EXPORT_SYMBOL_GPL(blk_rq_cur_bytes);
1748 1748
1749 /** 1749 /**
1750 * end_queued_request - end all I/O on a queued request 1750 * end_queued_request - end all I/O on a queued request
1751 * @rq: the request being processed 1751 * @rq: the request being processed
1752 * @uptodate: error value or %0/%1 uptodate flag 1752 * @uptodate: error value or %0/%1 uptodate flag
1753 * 1753 *
1754 * Description: 1754 * Description:
1755 * Ends all I/O on a request, and removes it from the block layer queues. 1755 * Ends all I/O on a request, and removes it from the block layer queues.
1756 * Not suitable for normal I/O completion, unless the driver still has 1756 * Not suitable for normal I/O completion, unless the driver still has
1757 * the request attached to the block layer. 1757 * the request attached to the block layer.
1758 * 1758 *
1759 **/ 1759 **/
1760 void end_queued_request(struct request *rq, int uptodate) 1760 void end_queued_request(struct request *rq, int uptodate)
1761 { 1761 {
1762 __end_request(rq, uptodate, blk_rq_bytes(rq)); 1762 __end_request(rq, uptodate, blk_rq_bytes(rq));
1763 } 1763 }
1764 EXPORT_SYMBOL(end_queued_request); 1764 EXPORT_SYMBOL(end_queued_request);
1765 1765
1766 /** 1766 /**
1767 * end_dequeued_request - end all I/O on a dequeued request 1767 * end_dequeued_request - end all I/O on a dequeued request
1768 * @rq: the request being processed 1768 * @rq: the request being processed
1769 * @uptodate: error value or %0/%1 uptodate flag 1769 * @uptodate: error value or %0/%1 uptodate flag
1770 * 1770 *
1771 * Description: 1771 * Description:
1772 * Ends all I/O on a request. The request must already have been 1772 * Ends all I/O on a request. The request must already have been
1773 * dequeued using blkdev_dequeue_request(), as is normally the case 1773 * dequeued using blkdev_dequeue_request(), as is normally the case
1774 * for most drivers. 1774 * for most drivers.
1775 * 1775 *
1776 **/ 1776 **/
1777 void end_dequeued_request(struct request *rq, int uptodate) 1777 void end_dequeued_request(struct request *rq, int uptodate)
1778 { 1778 {
1779 __end_request(rq, uptodate, blk_rq_bytes(rq)); 1779 __end_request(rq, uptodate, blk_rq_bytes(rq));
1780 } 1780 }
1781 EXPORT_SYMBOL(end_dequeued_request); 1781 EXPORT_SYMBOL(end_dequeued_request);
1782 1782
1783 1783
1784 /** 1784 /**
1785 * end_request - end I/O on the current segment of the request 1785 * end_request - end I/O on the current segment of the request
1786 * @req: the request being processed 1786 * @req: the request being processed
1787 * @uptodate: error value or %0/%1 uptodate flag 1787 * @uptodate: error value or %0/%1 uptodate flag
1788 * 1788 *
1789 * Description: 1789 * Description:
1790 * Ends I/O on the current segment of a request. If that is the only 1790 * Ends I/O on the current segment of a request. If that is the only
1791 * remaining segment, the request is also completed and freed. 1791 * remaining segment, the request is also completed and freed.
1792 * 1792 *
1793 * This is a remnant of how older block drivers handled I/O completions. 1793 * This is a remnant of how older block drivers handled I/O completions.
1794 * Modern drivers typically end I/O on the full request in one go, unless 1794 * Modern drivers typically end I/O on the full request in one go, unless
1795 * they have a residual value to account for. For that case this function 1795 * they have a residual value to account for. For that case this function
1796 * isn't really useful, unless the residual just happens to be the 1796 * isn't really useful, unless the residual just happens to be the
1797 * full current segment. In other words, don't use this function in new 1797 * full current segment. In other words, don't use this function in new
1798 * code. Use blk_end_request() or __blk_end_request() to end partial parts 1798 * code. Use blk_end_request() or __blk_end_request() to end partial parts
1799 * of a request, or end_dequeued_request() and end_queued_request() to 1799 * of a request, or end_dequeued_request() and end_queued_request() to
1800 * completely end IO on a dequeued/queued request. 1800 * completely end IO on a dequeued/queued request.
1801 * 1801 *
1802 **/ 1802 **/
1803 void end_request(struct request *req, int uptodate) 1803 void end_request(struct request *req, int uptodate)
1804 { 1804 {
1805 __end_request(req, uptodate, req->hard_cur_sectors << 9); 1805 __end_request(req, uptodate, req->hard_cur_sectors << 9);
1806 } 1806 }
1807 EXPORT_SYMBOL(end_request); 1807 EXPORT_SYMBOL(end_request);
1808 1808
1809 static int end_that_request_data(struct request *rq, int error,
1810 unsigned int nr_bytes, unsigned int bidi_bytes)
1811 {
1812 if (rq->bio) {
1813 if (__end_that_request_first(rq, error, nr_bytes))
1814 return 1;
1815
1816 /* Bidi request must be completed as a whole */
1817 if (blk_bidi_rq(rq) &&
1818 __end_that_request_first(rq->next_rq, error, bidi_bytes))
1819 return 1;
1820 }
1821
1822 return 0;
1823 }
1824
1809 /** 1825 /**
1810 * blk_end_io - Generic end_io function to complete a request. 1826 * blk_end_io - Generic end_io function to complete a request.
1811 * @rq: the request being processed 1827 * @rq: the request being processed
1812 * @error: %0 for success, < %0 for error 1828 * @error: %0 for success, < %0 for error
1813 * @nr_bytes: number of bytes to complete @rq 1829 * @nr_bytes: number of bytes to complete @rq
1814 * @bidi_bytes: number of bytes to complete @rq->next_rq 1830 * @bidi_bytes: number of bytes to complete @rq->next_rq
1815 * @drv_callback: function called between completion of bios in the request 1831 * @drv_callback: function called between completion of bios in the request
1816 * and completion of the request. 1832 * and completion of the request.
1817 * If the callback returns non %0, this helper returns without 1833 * If the callback returns non %0, this helper returns without
1818 * completion of the request. 1834 * completion of the request.
1819 * 1835 *
1820 * Description: 1836 * Description:
1821 * Ends I/O on a number of bytes attached to @rq and @rq->next_rq. 1837 * Ends I/O on a number of bytes attached to @rq and @rq->next_rq.
1822 * If @rq has leftover, sets it up for the next range of segments. 1838 * If @rq has leftover, sets it up for the next range of segments.
1823 * 1839 *
1824 * Return: 1840 * Return:
1825 * %0 - we are done with this request 1841 * %0 - we are done with this request
1826 * %1 - this request is not freed yet, it still has pending buffers. 1842 * %1 - this request is not freed yet, it still has pending buffers.
1827 **/ 1843 **/
1828 static int blk_end_io(struct request *rq, int error, unsigned int nr_bytes, 1844 static int blk_end_io(struct request *rq, int error, unsigned int nr_bytes,
1829 unsigned int bidi_bytes, 1845 unsigned int bidi_bytes,
1830 int (drv_callback)(struct request *)) 1846 int (drv_callback)(struct request *))
1831 { 1847 {
1832 struct request_queue *q = rq->q; 1848 struct request_queue *q = rq->q;
1833 unsigned long flags = 0UL; 1849 unsigned long flags = 0UL;
1834 1850
1835 if (rq->bio) { 1851 if (end_that_request_data(rq, error, nr_bytes, bidi_bytes))
1836 if (__end_that_request_first(rq, error, nr_bytes)) 1852 return 1;
1837 return 1;
1838 1853
1839 /* Bidi request must be completed as a whole */
1840 if (blk_bidi_rq(rq) &&
1841 __end_that_request_first(rq->next_rq, error, bidi_bytes))
1842 return 1;
1843 }
1844
1845 /* Special feature for tricky drivers */ 1854 /* Special feature for tricky drivers */
1846 if (drv_callback && drv_callback(rq)) 1855 if (drv_callback && drv_callback(rq))
1847 return 1; 1856 return 1;
1848 1857
1849 add_disk_randomness(rq->rq_disk); 1858 add_disk_randomness(rq->rq_disk);
1850 1859
1851 spin_lock_irqsave(q->queue_lock, flags); 1860 spin_lock_irqsave(q->queue_lock, flags);
1852 end_that_request_last(rq, error); 1861 end_that_request_last(rq, error);
1853 spin_unlock_irqrestore(q->queue_lock, flags); 1862 spin_unlock_irqrestore(q->queue_lock, flags);
1854 1863
1855 return 0; 1864 return 0;
1856 } 1865 }
1857 1866
1858 /** 1867 /**
1859 * blk_end_request - Helper function for drivers to complete the request. 1868 * blk_end_request - Helper function for drivers to complete the request.
1860 * @rq: the request being processed 1869 * @rq: the request being processed
1861 * @error: %0 for success, < %0 for error 1870 * @error: %0 for success, < %0 for error
1862 * @nr_bytes: number of bytes to complete 1871 * @nr_bytes: number of bytes to complete
1863 * 1872 *
1864 * Description: 1873 * Description:
1865 * Ends I/O on a number of bytes attached to @rq. 1874 * Ends I/O on a number of bytes attached to @rq.
1866 * If @rq has leftover, sets it up for the next range of segments. 1875 * If @rq has leftover, sets it up for the next range of segments.
1867 * 1876 *
1868 * Return: 1877 * Return:
1869 * %0 - we are done with this request 1878 * %0 - we are done with this request
1870 * %1 - still buffers pending for this request 1879 * %1 - still buffers pending for this request
1871 **/ 1880 **/
1872 int blk_end_request(struct request *rq, int error, unsigned int nr_bytes) 1881 int blk_end_request(struct request *rq, int error, unsigned int nr_bytes)
1873 { 1882 {
1874 return blk_end_io(rq, error, nr_bytes, 0, NULL); 1883 return blk_end_io(rq, error, nr_bytes, 0, NULL);
1875 } 1884 }
1876 EXPORT_SYMBOL_GPL(blk_end_request); 1885 EXPORT_SYMBOL_GPL(blk_end_request);
1877 1886
1878 /** 1887 /**
1879 * __blk_end_request - Helper function for drivers to complete the request. 1888 * __blk_end_request - Helper function for drivers to complete the request.
1880 * @rq: the request being processed 1889 * @rq: the request being processed
1881 * @error: %0 for success, < %0 for error 1890 * @error: %0 for success, < %0 for error
1882 * @nr_bytes: number of bytes to complete 1891 * @nr_bytes: number of bytes to complete
1883 * 1892 *
1884 * Description: 1893 * Description:
1885 * Must be called with queue lock held unlike blk_end_request(). 1894 * Must be called with queue lock held unlike blk_end_request().
1886 * 1895 *
1887 * Return: 1896 * Return:
1888 * %0 - we are done with this request 1897 * %0 - we are done with this request
1889 * %1 - still buffers pending for this request 1898 * %1 - still buffers pending for this request
1890 **/ 1899 **/
1891 int __blk_end_request(struct request *rq, int error, unsigned int nr_bytes) 1900 int __blk_end_request(struct request *rq, int error, unsigned int nr_bytes)
1892 { 1901 {
1893 if (rq->bio && __end_that_request_first(rq, error, nr_bytes)) 1902 if (rq->bio && __end_that_request_first(rq, error, nr_bytes))
1894 return 1; 1903 return 1;
1895 1904
1896 add_disk_randomness(rq->rq_disk); 1905 add_disk_randomness(rq->rq_disk);
1897 1906
1898 end_that_request_last(rq, error); 1907 end_that_request_last(rq, error);
1899 1908
1900 return 0; 1909 return 0;
1901 } 1910 }
1902 EXPORT_SYMBOL_GPL(__blk_end_request); 1911 EXPORT_SYMBOL_GPL(__blk_end_request);
1903 1912
1904 /** 1913 /**
1905 * blk_end_bidi_request - Helper function for drivers to complete bidi request. 1914 * blk_end_bidi_request - Helper function for drivers to complete bidi request.
1906 * @rq: the bidi request being processed 1915 * @rq: the bidi request being processed
1907 * @error: %0 for success, < %0 for error 1916 * @error: %0 for success, < %0 for error
1908 * @nr_bytes: number of bytes to complete @rq 1917 * @nr_bytes: number of bytes to complete @rq
1909 * @bidi_bytes: number of bytes to complete @rq->next_rq 1918 * @bidi_bytes: number of bytes to complete @rq->next_rq
1910 * 1919 *
1911 * Description: 1920 * Description:
1912 * Ends I/O on a number of bytes attached to @rq and @rq->next_rq. 1921 * Ends I/O on a number of bytes attached to @rq and @rq->next_rq.
1913 * 1922 *
1914 * Return: 1923 * Return:
1915 * %0 - we are done with this request 1924 * %0 - we are done with this request
1916 * %1 - still buffers pending for this request 1925 * %1 - still buffers pending for this request
1917 **/ 1926 **/
1918 int blk_end_bidi_request(struct request *rq, int error, unsigned int nr_bytes, 1927 int blk_end_bidi_request(struct request *rq, int error, unsigned int nr_bytes,
1919 unsigned int bidi_bytes) 1928 unsigned int bidi_bytes)
1920 { 1929 {
1921 return blk_end_io(rq, error, nr_bytes, bidi_bytes, NULL); 1930 return blk_end_io(rq, error, nr_bytes, bidi_bytes, NULL);
1922 } 1931 }
1923 EXPORT_SYMBOL_GPL(blk_end_bidi_request); 1932 EXPORT_SYMBOL_GPL(blk_end_bidi_request);
1933
1934 /**
1935 * blk_update_request - Special helper function for request stacking drivers
1936 * @rq: the request being processed
1937 * @error: %0 for success, < %0 for error
1938 * @nr_bytes: number of bytes to complete @rq
1939 *
1940 * Description:
1941 * Ends I/O on a number of bytes attached to @rq, but doesn't complete
1942 * the request structure even if @rq doesn't have leftover.
1943 * If @rq has leftover, sets it up for the next range of segments.
1944 *
1945 * This special helper function is only for request stacking drivers
1946 * (e.g. request-based dm) so that they can handle partial completion.
1947 * Actual device drivers should use blk_end_request instead.
1948 */
1949 void blk_update_request(struct request *rq, int error, unsigned int nr_bytes)
1950 {
1951 if (!end_that_request_data(rq, error, nr_bytes, 0)) {
1952 /*
1953 * These members are not updated in end_that_request_data()
1954 * when all bios are completed.
1955 * Update them so that the request stacking driver can find
1956 * how many bytes remain in the request later.
1957 */
1958 rq->nr_sectors = rq->hard_nr_sectors = 0;
1959 rq->current_nr_sectors = rq->hard_cur_sectors = 0;
1960 }
1961 }
1962 EXPORT_SYMBOL_GPL(blk_update_request);
1924 1963
1925 /** 1964 /**
1926 * blk_end_request_callback - Special helper function for tricky drivers 1965 * blk_end_request_callback - Special helper function for tricky drivers
1927 * @rq: the request being processed 1966 * @rq: the request being processed
1928 * @error: %0 for success, < %0 for error 1967 * @error: %0 for success, < %0 for error
1929 * @nr_bytes: number of bytes to complete 1968 * @nr_bytes: number of bytes to complete
1930 * @drv_callback: function called between completion of bios in the request 1969 * @drv_callback: function called between completion of bios in the request
1931 * and completion of the request. 1970 * and completion of the request.
1932 * If the callback returns non %0, this helper returns without 1971 * If the callback returns non %0, this helper returns without
1933 * completion of the request. 1972 * completion of the request.
1934 * 1973 *
1935 * Description: 1974 * Description:
1936 * Ends I/O on a number of bytes attached to @rq. 1975 * Ends I/O on a number of bytes attached to @rq.
1937 * If @rq has leftover, sets it up for the next range of segments. 1976 * If @rq has leftover, sets it up for the next range of segments.
1938 * 1977 *
1939 * This special helper function is used only for existing tricky drivers. 1978 * This special helper function is used only for existing tricky drivers.
1940 * (e.g. cdrom_newpc_intr() of ide-cd) 1979 * (e.g. cdrom_newpc_intr() of ide-cd)
1941 * This interface will be removed when such drivers are rewritten. 1980 * This interface will be removed when such drivers are rewritten.
1942 * Don't use this interface in other places anymore. 1981 * Don't use this interface in other places anymore.
1943 * 1982 *
1944 * Return: 1983 * Return:
1945 * %0 - we are done with this request 1984 * %0 - we are done with this request
1946 * %1 - this request is not freed yet. 1985 * %1 - this request is not freed yet.
1947 * this request still has pending buffers or 1986 * this request still has pending buffers or
1948 * the driver doesn't want to finish this request yet. 1987 * the driver doesn't want to finish this request yet.
1949 **/ 1988 **/
1950 int blk_end_request_callback(struct request *rq, int error, 1989 int blk_end_request_callback(struct request *rq, int error,
1951 unsigned int nr_bytes, 1990 unsigned int nr_bytes,
1952 int (drv_callback)(struct request *)) 1991 int (drv_callback)(struct request *))
1953 { 1992 {
1954 return blk_end_io(rq, error, nr_bytes, 0, drv_callback); 1993 return blk_end_io(rq, error, nr_bytes, 0, drv_callback);
1955 } 1994 }
1956 EXPORT_SYMBOL_GPL(blk_end_request_callback); 1995 EXPORT_SYMBOL_GPL(blk_end_request_callback);
1957 1996
1958 void blk_rq_bio_prep(struct request_queue *q, struct request *rq, 1997 void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
1959 struct bio *bio) 1998 struct bio *bio)
1960 { 1999 {
1961 /* Bit 0 (R/W) is identical in rq->cmd_flags and bio->bi_rw, and 2000 /* Bit 0 (R/W) is identical in rq->cmd_flags and bio->bi_rw, and
1962 we want BIO_RW_AHEAD (bit 1) to imply REQ_FAILFAST (bit 1). */ 2001 we want BIO_RW_AHEAD (bit 1) to imply REQ_FAILFAST (bit 1). */
1963 rq->cmd_flags |= (bio->bi_rw & 3); 2002 rq->cmd_flags |= (bio->bi_rw & 3);
1964 2003
1965 if (bio_has_data(bio)) { 2004 if (bio_has_data(bio)) {
1966 rq->nr_phys_segments = bio_phys_segments(q, bio); 2005 rq->nr_phys_segments = bio_phys_segments(q, bio);
1967 rq->buffer = bio_data(bio); 2006 rq->buffer = bio_data(bio);
1968 } 2007 }
1969 rq->current_nr_sectors = bio_cur_sectors(bio); 2008 rq->current_nr_sectors = bio_cur_sectors(bio);
1970 rq->hard_cur_sectors = rq->current_nr_sectors; 2009 rq->hard_cur_sectors = rq->current_nr_sectors;
1971 rq->hard_nr_sectors = rq->nr_sectors = bio_sectors(bio); 2010 rq->hard_nr_sectors = rq->nr_sectors = bio_sectors(bio);
1972 rq->data_len = bio->bi_size; 2011 rq->data_len = bio->bi_size;
1973 2012
1974 rq->bio = rq->biotail = bio; 2013 rq->bio = rq->biotail = bio;
1975 2014
1976 if (bio->bi_bdev) 2015 if (bio->bi_bdev)
1977 rq->rq_disk = bio->bi_bdev->bd_disk; 2016 rq->rq_disk = bio->bi_bdev->bd_disk;
1978 } 2017 }
1979 2018
1980 int kblockd_schedule_work(struct request_queue *q, struct work_struct *work) 2019 int kblockd_schedule_work(struct request_queue *q, struct work_struct *work)
1981 { 2020 {
1982 return queue_work(kblockd_workqueue, work); 2021 return queue_work(kblockd_workqueue, work);
1983 } 2022 }
1984 EXPORT_SYMBOL(kblockd_schedule_work); 2023 EXPORT_SYMBOL(kblockd_schedule_work);
1985 2024
1986 void kblockd_flush_work(struct work_struct *work) 2025 void kblockd_flush_work(struct work_struct *work)
1987 { 2026 {
1988 cancel_work_sync(work); 2027 cancel_work_sync(work);
1989 } 2028 }
1990 EXPORT_SYMBOL(kblockd_flush_work); 2029 EXPORT_SYMBOL(kblockd_flush_work);
1991 2030
1992 int __init blk_dev_init(void) 2031 int __init blk_dev_init(void)
1993 { 2032 {
1994 kblockd_workqueue = create_workqueue("kblockd"); 2033 kblockd_workqueue = create_workqueue("kblockd");
1995 if (!kblockd_workqueue) 2034 if (!kblockd_workqueue)
1996 panic("Failed to create kblockd\n"); 2035 panic("Failed to create kblockd\n");
1997 2036
1998 request_cachep = kmem_cache_create("blkdev_requests", 2037 request_cachep = kmem_cache_create("blkdev_requests",
1999 sizeof(struct request), 0, SLAB_PANIC, NULL); 2038 sizeof(struct request), 0, SLAB_PANIC, NULL);
2000 2039
include/linux/blkdev.h
1 #ifndef _LINUX_BLKDEV_H 1 #ifndef _LINUX_BLKDEV_H
2 #define _LINUX_BLKDEV_H 2 #define _LINUX_BLKDEV_H
3 3
4 #ifdef CONFIG_BLOCK 4 #ifdef CONFIG_BLOCK
5 5
6 #include <linux/sched.h> 6 #include <linux/sched.h>
7 #include <linux/major.h> 7 #include <linux/major.h>
8 #include <linux/genhd.h> 8 #include <linux/genhd.h>
9 #include <linux/list.h> 9 #include <linux/list.h>
10 #include <linux/timer.h> 10 #include <linux/timer.h>
11 #include <linux/workqueue.h> 11 #include <linux/workqueue.h>
12 #include <linux/pagemap.h> 12 #include <linux/pagemap.h>
13 #include <linux/backing-dev.h> 13 #include <linux/backing-dev.h>
14 #include <linux/wait.h> 14 #include <linux/wait.h>
15 #include <linux/mempool.h> 15 #include <linux/mempool.h>
16 #include <linux/bio.h> 16 #include <linux/bio.h>
17 #include <linux/module.h> 17 #include <linux/module.h>
18 #include <linux/stringify.h> 18 #include <linux/stringify.h>
19 #include <linux/gfp.h> 19 #include <linux/gfp.h>
20 #include <linux/bsg.h> 20 #include <linux/bsg.h>
21 #include <linux/smp.h> 21 #include <linux/smp.h>
22 22
23 #include <asm/scatterlist.h> 23 #include <asm/scatterlist.h>
24 24
25 struct scsi_ioctl_command; 25 struct scsi_ioctl_command;
26 26
27 struct request_queue; 27 struct request_queue;
28 struct elevator_queue; 28 struct elevator_queue;
29 typedef struct elevator_queue elevator_t; 29 typedef struct elevator_queue elevator_t;
30 struct request_pm_state; 30 struct request_pm_state;
31 struct blk_trace; 31 struct blk_trace;
32 struct request; 32 struct request;
33 struct sg_io_hdr; 33 struct sg_io_hdr;
34 34
35 #define BLKDEV_MIN_RQ 4 35 #define BLKDEV_MIN_RQ 4
36 #define BLKDEV_MAX_RQ 128 /* Default maximum */ 36 #define BLKDEV_MAX_RQ 128 /* Default maximum */
37 37
38 struct request; 38 struct request;
39 typedef void (rq_end_io_fn)(struct request *, int); 39 typedef void (rq_end_io_fn)(struct request *, int);
40 40
41 struct request_list { 41 struct request_list {
42 int count[2]; 42 int count[2];
43 int starved[2]; 43 int starved[2];
44 int elvpriv; 44 int elvpriv;
45 mempool_t *rq_pool; 45 mempool_t *rq_pool;
46 wait_queue_head_t wait[2]; 46 wait_queue_head_t wait[2];
47 }; 47 };
48 48
49 /* 49 /*
50 * request command types 50 * request command types
51 */ 51 */
52 enum rq_cmd_type_bits { 52 enum rq_cmd_type_bits {
53 REQ_TYPE_FS = 1, /* fs request */ 53 REQ_TYPE_FS = 1, /* fs request */
54 REQ_TYPE_BLOCK_PC, /* scsi command */ 54 REQ_TYPE_BLOCK_PC, /* scsi command */
55 REQ_TYPE_SENSE, /* sense request */ 55 REQ_TYPE_SENSE, /* sense request */
56 REQ_TYPE_PM_SUSPEND, /* suspend request */ 56 REQ_TYPE_PM_SUSPEND, /* suspend request */
57 REQ_TYPE_PM_RESUME, /* resume request */ 57 REQ_TYPE_PM_RESUME, /* resume request */
58 REQ_TYPE_PM_SHUTDOWN, /* shutdown request */ 58 REQ_TYPE_PM_SHUTDOWN, /* shutdown request */
59 REQ_TYPE_SPECIAL, /* driver defined type */ 59 REQ_TYPE_SPECIAL, /* driver defined type */
60 REQ_TYPE_LINUX_BLOCK, /* generic block layer message */ 60 REQ_TYPE_LINUX_BLOCK, /* generic block layer message */
61 /* 61 /*
62 * for ATA/ATAPI devices. this really doesn't belong here, ide should 62 * for ATA/ATAPI devices. this really doesn't belong here, ide should
63 * use REQ_TYPE_SPECIAL and use rq->cmd[0] with the range of driver 63 * use REQ_TYPE_SPECIAL and use rq->cmd[0] with the range of driver
64 * private REQ_LB opcodes to differentiate what type of request this is 64 * private REQ_LB opcodes to differentiate what type of request this is
65 */ 65 */
66 REQ_TYPE_ATA_TASKFILE, 66 REQ_TYPE_ATA_TASKFILE,
67 REQ_TYPE_ATA_PC, 67 REQ_TYPE_ATA_PC,
68 }; 68 };
69 69
70 /* 70 /*
71 * For request of type REQ_TYPE_LINUX_BLOCK, rq->cmd[0] is the opcode being 71 * For request of type REQ_TYPE_LINUX_BLOCK, rq->cmd[0] is the opcode being
72 * sent down (similar to how REQ_TYPE_BLOCK_PC means that ->cmd[] holds a 72 * sent down (similar to how REQ_TYPE_BLOCK_PC means that ->cmd[] holds a
73 * SCSI cdb. 73 * SCSI cdb.
74 * 74 *
75 * 0x00 -> 0x3f are driver private, to be used for whatever purpose they need, 75 * 0x00 -> 0x3f are driver private, to be used for whatever purpose they need,
76 * typically to differentiate REQ_TYPE_SPECIAL requests. 76 * typically to differentiate REQ_TYPE_SPECIAL requests.
77 * 77 *
78 */ 78 */
79 enum { 79 enum {
80 REQ_LB_OP_EJECT = 0x40, /* eject request */ 80 REQ_LB_OP_EJECT = 0x40, /* eject request */
81 REQ_LB_OP_FLUSH = 0x41, /* flush request */ 81 REQ_LB_OP_FLUSH = 0x41, /* flush request */
82 REQ_LB_OP_DISCARD = 0x42, /* discard sectors */ 82 REQ_LB_OP_DISCARD = 0x42, /* discard sectors */
83 }; 83 };
84 84
85 /* 85 /*
86 * request type modified bits. first two bits match BIO_RW* bits, important 86 * request type modified bits. first two bits match BIO_RW* bits, important
87 */ 87 */
88 enum rq_flag_bits { 88 enum rq_flag_bits {
89 __REQ_RW, /* not set, read. set, write */ 89 __REQ_RW, /* not set, read. set, write */
90 __REQ_FAILFAST, /* no low level driver retries */ 90 __REQ_FAILFAST, /* no low level driver retries */
91 __REQ_DISCARD, /* request to discard sectors */ 91 __REQ_DISCARD, /* request to discard sectors */
92 __REQ_SORTED, /* elevator knows about this request */ 92 __REQ_SORTED, /* elevator knows about this request */
93 __REQ_SOFTBARRIER, /* may not be passed by ioscheduler */ 93 __REQ_SOFTBARRIER, /* may not be passed by ioscheduler */
94 __REQ_HARDBARRIER, /* may not be passed by drive either */ 94 __REQ_HARDBARRIER, /* may not be passed by drive either */
95 __REQ_FUA, /* forced unit access */ 95 __REQ_FUA, /* forced unit access */
96 __REQ_NOMERGE, /* don't touch this for merging */ 96 __REQ_NOMERGE, /* don't touch this for merging */
97 __REQ_STARTED, /* drive already may have started this one */ 97 __REQ_STARTED, /* drive already may have started this one */
98 __REQ_DONTPREP, /* don't call prep for this one */ 98 __REQ_DONTPREP, /* don't call prep for this one */
99 __REQ_QUEUED, /* uses queueing */ 99 __REQ_QUEUED, /* uses queueing */
100 __REQ_ELVPRIV, /* elevator private data attached */ 100 __REQ_ELVPRIV, /* elevator private data attached */
101 __REQ_FAILED, /* set if the request failed */ 101 __REQ_FAILED, /* set if the request failed */
102 __REQ_QUIET, /* don't worry about errors */ 102 __REQ_QUIET, /* don't worry about errors */
103 __REQ_PREEMPT, /* set for "ide_preempt" requests */ 103 __REQ_PREEMPT, /* set for "ide_preempt" requests */
104 __REQ_ORDERED_COLOR, /* is before or after barrier */ 104 __REQ_ORDERED_COLOR, /* is before or after barrier */
105 __REQ_RW_SYNC, /* request is sync (O_DIRECT) */ 105 __REQ_RW_SYNC, /* request is sync (O_DIRECT) */
106 __REQ_ALLOCED, /* request came from our alloc pool */ 106 __REQ_ALLOCED, /* request came from our alloc pool */
107 __REQ_RW_META, /* metadata io request */ 107 __REQ_RW_META, /* metadata io request */
108 __REQ_COPY_USER, /* contains copies of user pages */ 108 __REQ_COPY_USER, /* contains copies of user pages */
109 __REQ_INTEGRITY, /* integrity metadata has been remapped */ 109 __REQ_INTEGRITY, /* integrity metadata has been remapped */
110 __REQ_NR_BITS, /* stops here */ 110 __REQ_NR_BITS, /* stops here */
111 }; 111 };
112 112
113 #define REQ_RW (1 << __REQ_RW) 113 #define REQ_RW (1 << __REQ_RW)
114 #define REQ_DISCARD (1 << __REQ_DISCARD) 114 #define REQ_DISCARD (1 << __REQ_DISCARD)
115 #define REQ_FAILFAST (1 << __REQ_FAILFAST) 115 #define REQ_FAILFAST (1 << __REQ_FAILFAST)
116 #define REQ_SORTED (1 << __REQ_SORTED) 116 #define REQ_SORTED (1 << __REQ_SORTED)
117 #define REQ_SOFTBARRIER (1 << __REQ_SOFTBARRIER) 117 #define REQ_SOFTBARRIER (1 << __REQ_SOFTBARRIER)
118 #define REQ_HARDBARRIER (1 << __REQ_HARDBARRIER) 118 #define REQ_HARDBARRIER (1 << __REQ_HARDBARRIER)
119 #define REQ_FUA (1 << __REQ_FUA) 119 #define REQ_FUA (1 << __REQ_FUA)
120 #define REQ_NOMERGE (1 << __REQ_NOMERGE) 120 #define REQ_NOMERGE (1 << __REQ_NOMERGE)
121 #define REQ_STARTED (1 << __REQ_STARTED) 121 #define REQ_STARTED (1 << __REQ_STARTED)
122 #define REQ_DONTPREP (1 << __REQ_DONTPREP) 122 #define REQ_DONTPREP (1 << __REQ_DONTPREP)
123 #define REQ_QUEUED (1 << __REQ_QUEUED) 123 #define REQ_QUEUED (1 << __REQ_QUEUED)
124 #define REQ_ELVPRIV (1 << __REQ_ELVPRIV) 124 #define REQ_ELVPRIV (1 << __REQ_ELVPRIV)
125 #define REQ_FAILED (1 << __REQ_FAILED) 125 #define REQ_FAILED (1 << __REQ_FAILED)
126 #define REQ_QUIET (1 << __REQ_QUIET) 126 #define REQ_QUIET (1 << __REQ_QUIET)
127 #define REQ_PREEMPT (1 << __REQ_PREEMPT) 127 #define REQ_PREEMPT (1 << __REQ_PREEMPT)
128 #define REQ_ORDERED_COLOR (1 << __REQ_ORDERED_COLOR) 128 #define REQ_ORDERED_COLOR (1 << __REQ_ORDERED_COLOR)
129 #define REQ_RW_SYNC (1 << __REQ_RW_SYNC) 129 #define REQ_RW_SYNC (1 << __REQ_RW_SYNC)
130 #define REQ_ALLOCED (1 << __REQ_ALLOCED) 130 #define REQ_ALLOCED (1 << __REQ_ALLOCED)
131 #define REQ_RW_META (1 << __REQ_RW_META) 131 #define REQ_RW_META (1 << __REQ_RW_META)
132 #define REQ_COPY_USER (1 << __REQ_COPY_USER) 132 #define REQ_COPY_USER (1 << __REQ_COPY_USER)
133 #define REQ_INTEGRITY (1 << __REQ_INTEGRITY) 133 #define REQ_INTEGRITY (1 << __REQ_INTEGRITY)
134 134
135 #define BLK_MAX_CDB 16 135 #define BLK_MAX_CDB 16
136 136
137 /* 137 /*
138 * try to put the fields that are referenced together in the same cacheline. 138 * try to put the fields that are referenced together in the same cacheline.
139 * if you modify this structure, be sure to check block/blk-core.c:rq_init() 139 * if you modify this structure, be sure to check block/blk-core.c:rq_init()
140 * as well! 140 * as well!
141 */ 141 */
142 struct request { 142 struct request {
143 struct list_head queuelist; 143 struct list_head queuelist;
144 struct call_single_data csd; 144 struct call_single_data csd;
145 int cpu; 145 int cpu;
146 146
147 struct request_queue *q; 147 struct request_queue *q;
148 148
149 unsigned int cmd_flags; 149 unsigned int cmd_flags;
150 enum rq_cmd_type_bits cmd_type; 150 enum rq_cmd_type_bits cmd_type;
151 unsigned long atomic_flags; 151 unsigned long atomic_flags;
152 152
153 /* Maintain bio traversal state for part by part I/O submission. 153 /* Maintain bio traversal state for part by part I/O submission.
154 * hard_* are block layer internals, no driver should touch them! 154 * hard_* are block layer internals, no driver should touch them!
155 */ 155 */
156 156
157 sector_t sector; /* next sector to submit */ 157 sector_t sector; /* next sector to submit */
158 sector_t hard_sector; /* next sector to complete */ 158 sector_t hard_sector; /* next sector to complete */
159 unsigned long nr_sectors; /* no. of sectors left to submit */ 159 unsigned long nr_sectors; /* no. of sectors left to submit */
160 unsigned long hard_nr_sectors; /* no. of sectors left to complete */ 160 unsigned long hard_nr_sectors; /* no. of sectors left to complete */
161 /* no. of sectors left to submit in the current segment */ 161 /* no. of sectors left to submit in the current segment */
162 unsigned int current_nr_sectors; 162 unsigned int current_nr_sectors;
163 163
164 /* no. of sectors left to complete in the current segment */ 164 /* no. of sectors left to complete in the current segment */
165 unsigned int hard_cur_sectors; 165 unsigned int hard_cur_sectors;
166 166
167 struct bio *bio; 167 struct bio *bio;
168 struct bio *biotail; 168 struct bio *biotail;
169 169
170 struct hlist_node hash; /* merge hash */ 170 struct hlist_node hash; /* merge hash */
171 /* 171 /*
172 * The rb_node is only used inside the io scheduler, requests 172 * The rb_node is only used inside the io scheduler, requests
173 * are pruned when moved to the dispatch queue. So let the 173 * are pruned when moved to the dispatch queue. So let the
174 * completion_data share space with the rb_node. 174 * completion_data share space with the rb_node.
175 */ 175 */
176 union { 176 union {
177 struct rb_node rb_node; /* sort/lookup */ 177 struct rb_node rb_node; /* sort/lookup */
178 void *completion_data; 178 void *completion_data;
179 }; 179 };
180 180
181 /* 181 /*
182 * two pointers are available for the IO schedulers, if they need 182 * two pointers are available for the IO schedulers, if they need
183 * more they have to dynamically allocate it. 183 * more they have to dynamically allocate it.
184 */ 184 */
185 void *elevator_private; 185 void *elevator_private;
186 void *elevator_private2; 186 void *elevator_private2;
187 187
188 struct gendisk *rq_disk; 188 struct gendisk *rq_disk;
189 unsigned long start_time; 189 unsigned long start_time;
190 190
191 /* Number of scatter-gather DMA addr+len pairs after 191 /* Number of scatter-gather DMA addr+len pairs after
192 * physical address coalescing is performed. 192 * physical address coalescing is performed.
193 */ 193 */
194 unsigned short nr_phys_segments; 194 unsigned short nr_phys_segments;
195 195
196 unsigned short ioprio; 196 unsigned short ioprio;
197 197
198 void *special; 198 void *special;
199 char *buffer; 199 char *buffer;
200 200
201 int tag; 201 int tag;
202 int errors; 202 int errors;
203 203
204 int ref_count; 204 int ref_count;
205 205
206 /* 206 /*
207 * when request is used as a packet command carrier 207 * when request is used as a packet command carrier
208 */ 208 */
209 unsigned short cmd_len; 209 unsigned short cmd_len;
210 unsigned char __cmd[BLK_MAX_CDB]; 210 unsigned char __cmd[BLK_MAX_CDB];
211 unsigned char *cmd; 211 unsigned char *cmd;
212 212
213 unsigned int data_len; 213 unsigned int data_len;
214 unsigned int extra_len; /* length of alignment and padding */ 214 unsigned int extra_len; /* length of alignment and padding */
215 unsigned int sense_len; 215 unsigned int sense_len;
216 void *data; 216 void *data;
217 void *sense; 217 void *sense;
218 218
219 unsigned long deadline; 219 unsigned long deadline;
220 struct list_head timeout_list; 220 struct list_head timeout_list;
221 unsigned int timeout; 221 unsigned int timeout;
222 int retries; 222 int retries;
223 223
224 /* 224 /*
225 * completion callback. 225 * completion callback.
226 */ 226 */
227 rq_end_io_fn *end_io; 227 rq_end_io_fn *end_io;
228 void *end_io_data; 228 void *end_io_data;
229 229
230 /* for bidi */ 230 /* for bidi */
231 struct request *next_rq; 231 struct request *next_rq;
232 }; 232 };
233 233
234 static inline unsigned short req_get_ioprio(struct request *req) 234 static inline unsigned short req_get_ioprio(struct request *req)
235 { 235 {
236 return req->ioprio; 236 return req->ioprio;
237 } 237 }
238 238
239 /* 239 /*
240 * State information carried for REQ_TYPE_PM_SUSPEND and REQ_TYPE_PM_RESUME 240 * State information carried for REQ_TYPE_PM_SUSPEND and REQ_TYPE_PM_RESUME
241 * requests. Some step values could eventually be made generic. 241 * requests. Some step values could eventually be made generic.
242 */ 242 */
243 struct request_pm_state 243 struct request_pm_state
244 { 244 {
245 /* PM state machine step value, currently driver specific */ 245 /* PM state machine step value, currently driver specific */
246 int pm_step; 246 int pm_step;
247 /* requested PM state value (S1, S2, S3, S4, ...) */ 247 /* requested PM state value (S1, S2, S3, S4, ...) */
248 u32 pm_state; 248 u32 pm_state;
249 void* data; /* for driver use */ 249 void* data; /* for driver use */
250 }; 250 };
251 251
252 #include <linux/elevator.h> 252 #include <linux/elevator.h>
253 253
254 typedef void (request_fn_proc) (struct request_queue *q); 254 typedef void (request_fn_proc) (struct request_queue *q);
255 typedef int (make_request_fn) (struct request_queue *q, struct bio *bio); 255 typedef int (make_request_fn) (struct request_queue *q, struct bio *bio);
256 typedef int (prep_rq_fn) (struct request_queue *, struct request *); 256 typedef int (prep_rq_fn) (struct request_queue *, struct request *);
257 typedef void (unplug_fn) (struct request_queue *); 257 typedef void (unplug_fn) (struct request_queue *);
258 typedef int (prepare_discard_fn) (struct request_queue *, struct request *); 258 typedef int (prepare_discard_fn) (struct request_queue *, struct request *);
259 259
260 struct bio_vec; 260 struct bio_vec;
261 struct bvec_merge_data { 261 struct bvec_merge_data {
262 struct block_device *bi_bdev; 262 struct block_device *bi_bdev;
263 sector_t bi_sector; 263 sector_t bi_sector;
264 unsigned bi_size; 264 unsigned bi_size;
265 unsigned long bi_rw; 265 unsigned long bi_rw;
266 }; 266 };
267 typedef int (merge_bvec_fn) (struct request_queue *, struct bvec_merge_data *, 267 typedef int (merge_bvec_fn) (struct request_queue *, struct bvec_merge_data *,
268 struct bio_vec *); 268 struct bio_vec *);
269 typedef void (prepare_flush_fn) (struct request_queue *, struct request *); 269 typedef void (prepare_flush_fn) (struct request_queue *, struct request *);
270 typedef void (softirq_done_fn)(struct request *); 270 typedef void (softirq_done_fn)(struct request *);
271 typedef int (dma_drain_needed_fn)(struct request *); 271 typedef int (dma_drain_needed_fn)(struct request *);
272 272
273 enum blk_eh_timer_return { 273 enum blk_eh_timer_return {
274 BLK_EH_NOT_HANDLED, 274 BLK_EH_NOT_HANDLED,
275 BLK_EH_HANDLED, 275 BLK_EH_HANDLED,
276 BLK_EH_RESET_TIMER, 276 BLK_EH_RESET_TIMER,
277 }; 277 };
278 278
279 typedef enum blk_eh_timer_return (rq_timed_out_fn)(struct request *); 279 typedef enum blk_eh_timer_return (rq_timed_out_fn)(struct request *);
280 280
281 enum blk_queue_state { 281 enum blk_queue_state {
282 Queue_down, 282 Queue_down,
283 Queue_up, 283 Queue_up,
284 }; 284 };
285 285
286 struct blk_queue_tag { 286 struct blk_queue_tag {
287 struct request **tag_index; /* map of busy tags */ 287 struct request **tag_index; /* map of busy tags */
288 unsigned long *tag_map; /* bit map of free/busy tags */ 288 unsigned long *tag_map; /* bit map of free/busy tags */
289 int busy; /* current depth */ 289 int busy; /* current depth */
290 int max_depth; /* what we will send to device */ 290 int max_depth; /* what we will send to device */
291 int real_max_depth; /* what the array can hold */ 291 int real_max_depth; /* what the array can hold */
292 atomic_t refcnt; /* map can be shared */ 292 atomic_t refcnt; /* map can be shared */
293 }; 293 };
294 294
295 #define BLK_SCSI_MAX_CMDS (256) 295 #define BLK_SCSI_MAX_CMDS (256)
296 #define BLK_SCSI_CMD_PER_LONG (BLK_SCSI_MAX_CMDS / (sizeof(long) * 8)) 296 #define BLK_SCSI_CMD_PER_LONG (BLK_SCSI_MAX_CMDS / (sizeof(long) * 8))
297 297
298 struct blk_cmd_filter { 298 struct blk_cmd_filter {
299 unsigned long read_ok[BLK_SCSI_CMD_PER_LONG]; 299 unsigned long read_ok[BLK_SCSI_CMD_PER_LONG];
300 unsigned long write_ok[BLK_SCSI_CMD_PER_LONG]; 300 unsigned long write_ok[BLK_SCSI_CMD_PER_LONG];
301 struct kobject kobj; 301 struct kobject kobj;
302 }; 302 };
303 303
304 struct request_queue 304 struct request_queue
305 { 305 {
306 /* 306 /*
307 * Together with queue_head for cacheline sharing 307 * Together with queue_head for cacheline sharing
308 */ 308 */
309 struct list_head queue_head; 309 struct list_head queue_head;
310 struct request *last_merge; 310 struct request *last_merge;
311 elevator_t *elevator; 311 elevator_t *elevator;
312 312
313 /* 313 /*
314 * the queue request freelist, one for reads and one for writes 314 * the queue request freelist, one for reads and one for writes
315 */ 315 */
316 struct request_list rq; 316 struct request_list rq;
317 317
318 request_fn_proc *request_fn; 318 request_fn_proc *request_fn;
319 make_request_fn *make_request_fn; 319 make_request_fn *make_request_fn;
320 prep_rq_fn *prep_rq_fn; 320 prep_rq_fn *prep_rq_fn;
321 unplug_fn *unplug_fn; 321 unplug_fn *unplug_fn;
322 prepare_discard_fn *prepare_discard_fn; 322 prepare_discard_fn *prepare_discard_fn;
323 merge_bvec_fn *merge_bvec_fn; 323 merge_bvec_fn *merge_bvec_fn;
324 prepare_flush_fn *prepare_flush_fn; 324 prepare_flush_fn *prepare_flush_fn;
325 softirq_done_fn *softirq_done_fn; 325 softirq_done_fn *softirq_done_fn;
326 rq_timed_out_fn *rq_timed_out_fn; 326 rq_timed_out_fn *rq_timed_out_fn;
327 dma_drain_needed_fn *dma_drain_needed; 327 dma_drain_needed_fn *dma_drain_needed;
328 328
329 /* 329 /*
330 * Dispatch queue sorting 330 * Dispatch queue sorting
331 */ 331 */
332 sector_t end_sector; 332 sector_t end_sector;
333 struct request *boundary_rq; 333 struct request *boundary_rq;
334 334
335 /* 335 /*
336 * Auto-unplugging state 336 * Auto-unplugging state
337 */ 337 */
338 struct timer_list unplug_timer; 338 struct timer_list unplug_timer;
339 int unplug_thresh; /* After this many requests */ 339 int unplug_thresh; /* After this many requests */
340 unsigned long unplug_delay; /* After this many jiffies */ 340 unsigned long unplug_delay; /* After this many jiffies */
341 struct work_struct unplug_work; 341 struct work_struct unplug_work;
342 342
343 struct backing_dev_info backing_dev_info; 343 struct backing_dev_info backing_dev_info;
344 344
345 /* 345 /*
346 * The queue owner gets to use this for whatever they like. 346 * The queue owner gets to use this for whatever they like.
347 * ll_rw_blk doesn't touch it. 347 * ll_rw_blk doesn't touch it.
348 */ 348 */
349 void *queuedata; 349 void *queuedata;
350 350
351 /* 351 /*
352 * queue needs bounce pages for pages above this limit 352 * queue needs bounce pages for pages above this limit
353 */ 353 */
354 unsigned long bounce_pfn; 354 unsigned long bounce_pfn;
355 gfp_t bounce_gfp; 355 gfp_t bounce_gfp;
356 356
357 /* 357 /*
358 * various queue flags, see QUEUE_* below 358 * various queue flags, see QUEUE_* below
359 */ 359 */
360 unsigned long queue_flags; 360 unsigned long queue_flags;
361 361
362 /* 362 /*
363 * protects queue structures from reentrancy. ->__queue_lock should 363 * protects queue structures from reentrancy. ->__queue_lock should
364 * _never_ be used directly, it is queue private. always use 364 * _never_ be used directly, it is queue private. always use
365 * ->queue_lock. 365 * ->queue_lock.
366 */ 366 */
367 spinlock_t __queue_lock; 367 spinlock_t __queue_lock;
368 spinlock_t *queue_lock; 368 spinlock_t *queue_lock;
369 369
370 /* 370 /*
371 * queue kobject 371 * queue kobject
372 */ 372 */
373 struct kobject kobj; 373 struct kobject kobj;
374 374
375 /* 375 /*
376 * queue settings 376 * queue settings
377 */ 377 */
378 unsigned long nr_requests; /* Max # of requests */ 378 unsigned long nr_requests; /* Max # of requests */
379 unsigned int nr_congestion_on; 379 unsigned int nr_congestion_on;
380 unsigned int nr_congestion_off; 380 unsigned int nr_congestion_off;
381 unsigned int nr_batching; 381 unsigned int nr_batching;
382 382
383 unsigned int max_sectors; 383 unsigned int max_sectors;
384 unsigned int max_hw_sectors; 384 unsigned int max_hw_sectors;
385 unsigned short max_phys_segments; 385 unsigned short max_phys_segments;
386 unsigned short max_hw_segments; 386 unsigned short max_hw_segments;
387 unsigned short hardsect_size; 387 unsigned short hardsect_size;
388 unsigned int max_segment_size; 388 unsigned int max_segment_size;
389 389
390 unsigned long seg_boundary_mask; 390 unsigned long seg_boundary_mask;
391 void *dma_drain_buffer; 391 void *dma_drain_buffer;
392 unsigned int dma_drain_size; 392 unsigned int dma_drain_size;
393 unsigned int dma_pad_mask; 393 unsigned int dma_pad_mask;
394 unsigned int dma_alignment; 394 unsigned int dma_alignment;
395 395
396 struct blk_queue_tag *queue_tags; 396 struct blk_queue_tag *queue_tags;
397 struct list_head tag_busy_list; 397 struct list_head tag_busy_list;
398 398
399 unsigned int nr_sorted; 399 unsigned int nr_sorted;
400 unsigned int in_flight; 400 unsigned int in_flight;
401 401
402 unsigned int rq_timeout; 402 unsigned int rq_timeout;
403 struct timer_list timeout; 403 struct timer_list timeout;
404 struct list_head timeout_list; 404 struct list_head timeout_list;
405 405
406 /* 406 /*
407 * sg stuff 407 * sg stuff
408 */ 408 */
409 unsigned int sg_timeout; 409 unsigned int sg_timeout;
410 unsigned int sg_reserved_size; 410 unsigned int sg_reserved_size;
411 int node; 411 int node;
412 #ifdef CONFIG_BLK_DEV_IO_TRACE 412 #ifdef CONFIG_BLK_DEV_IO_TRACE
413 struct blk_trace *blk_trace; 413 struct blk_trace *blk_trace;
414 #endif 414 #endif
415 /* 415 /*
416 * reserved for flush operations 416 * reserved for flush operations
417 */ 417 */
418 unsigned int ordered, next_ordered, ordseq; 418 unsigned int ordered, next_ordered, ordseq;
419 int orderr, ordcolor; 419 int orderr, ordcolor;
420 struct request pre_flush_rq, bar_rq, post_flush_rq; 420 struct request pre_flush_rq, bar_rq, post_flush_rq;
421 struct request *orig_bar_rq; 421 struct request *orig_bar_rq;
422 422
423 struct mutex sysfs_lock; 423 struct mutex sysfs_lock;
424 424
425 #if defined(CONFIG_BLK_DEV_BSG) 425 #if defined(CONFIG_BLK_DEV_BSG)
426 struct bsg_class_device bsg_dev; 426 struct bsg_class_device bsg_dev;
427 #endif 427 #endif
428 struct blk_cmd_filter cmd_filter; 428 struct blk_cmd_filter cmd_filter;
429 }; 429 };
430 430
431 #define QUEUE_FLAG_CLUSTER 0 /* cluster several segments into 1 */ 431 #define QUEUE_FLAG_CLUSTER 0 /* cluster several segments into 1 */
432 #define QUEUE_FLAG_QUEUED 1 /* uses generic tag queueing */ 432 #define QUEUE_FLAG_QUEUED 1 /* uses generic tag queueing */
433 #define QUEUE_FLAG_STOPPED 2 /* queue is stopped */ 433 #define QUEUE_FLAG_STOPPED 2 /* queue is stopped */
434 #define QUEUE_FLAG_READFULL 3 /* read queue has been filled */ 434 #define QUEUE_FLAG_READFULL 3 /* read queue has been filled */
435 #define QUEUE_FLAG_WRITEFULL 4 /* write queue has been filled */ 435 #define QUEUE_FLAG_WRITEFULL 4 /* write queue has been filled */
436 #define QUEUE_FLAG_DEAD 5 /* queue being torn down */ 436 #define QUEUE_FLAG_DEAD 5 /* queue being torn down */
437 #define QUEUE_FLAG_REENTER 6 /* Re-entrancy avoidance */ 437 #define QUEUE_FLAG_REENTER 6 /* Re-entrancy avoidance */
438 #define QUEUE_FLAG_PLUGGED 7 /* queue is plugged */ 438 #define QUEUE_FLAG_PLUGGED 7 /* queue is plugged */
439 #define QUEUE_FLAG_ELVSWITCH 8 /* don't use elevator, just do FIFO */ 439 #define QUEUE_FLAG_ELVSWITCH 8 /* don't use elevator, just do FIFO */
440 #define QUEUE_FLAG_BIDI 9 /* queue supports bidi requests */ 440 #define QUEUE_FLAG_BIDI 9 /* queue supports bidi requests */
441 #define QUEUE_FLAG_NOMERGES 10 /* disable merge attempts */ 441 #define QUEUE_FLAG_NOMERGES 10 /* disable merge attempts */
442 #define QUEUE_FLAG_SAME_COMP 11 /* force complete on same CPU */ 442 #define QUEUE_FLAG_SAME_COMP 11 /* force complete on same CPU */
443 #define QUEUE_FLAG_FAIL_IO 12 /* fake timeout */ 443 #define QUEUE_FLAG_FAIL_IO 12 /* fake timeout */
444 444
445 static inline int queue_is_locked(struct request_queue *q) 445 static inline int queue_is_locked(struct request_queue *q)
446 { 446 {
447 #ifdef CONFIG_SMP 447 #ifdef CONFIG_SMP
448 spinlock_t *lock = q->queue_lock; 448 spinlock_t *lock = q->queue_lock;
449 return lock && spin_is_locked(lock); 449 return lock && spin_is_locked(lock);
450 #else 450 #else
451 return 1; 451 return 1;
452 #endif 452 #endif
453 } 453 }
454 454
455 static inline void queue_flag_set_unlocked(unsigned int flag, 455 static inline void queue_flag_set_unlocked(unsigned int flag,
456 struct request_queue *q) 456 struct request_queue *q)
457 { 457 {
458 __set_bit(flag, &q->queue_flags); 458 __set_bit(flag, &q->queue_flags);
459 } 459 }
460 460
461 static inline int queue_flag_test_and_clear(unsigned int flag, 461 static inline int queue_flag_test_and_clear(unsigned int flag,
462 struct request_queue *q) 462 struct request_queue *q)
463 { 463 {
464 WARN_ON_ONCE(!queue_is_locked(q)); 464 WARN_ON_ONCE(!queue_is_locked(q));
465 465
466 if (test_bit(flag, &q->queue_flags)) { 466 if (test_bit(flag, &q->queue_flags)) {
467 __clear_bit(flag, &q->queue_flags); 467 __clear_bit(flag, &q->queue_flags);
468 return 1; 468 return 1;
469 } 469 }
470 470
471 return 0; 471 return 0;
472 } 472 }
473 473
474 static inline int queue_flag_test_and_set(unsigned int flag, 474 static inline int queue_flag_test_and_set(unsigned int flag,
475 struct request_queue *q) 475 struct request_queue *q)
476 { 476 {
477 WARN_ON_ONCE(!queue_is_locked(q)); 477 WARN_ON_ONCE(!queue_is_locked(q));
478 478
479 if (!test_bit(flag, &q->queue_flags)) { 479 if (!test_bit(flag, &q->queue_flags)) {
480 __set_bit(flag, &q->queue_flags); 480 __set_bit(flag, &q->queue_flags);
481 return 0; 481 return 0;
482 } 482 }
483 483
484 return 1; 484 return 1;
485 } 485 }
486 486
487 static inline void queue_flag_set(unsigned int flag, struct request_queue *q) 487 static inline void queue_flag_set(unsigned int flag, struct request_queue *q)
488 { 488 {
489 WARN_ON_ONCE(!queue_is_locked(q)); 489 WARN_ON_ONCE(!queue_is_locked(q));
490 __set_bit(flag, &q->queue_flags); 490 __set_bit(flag, &q->queue_flags);
491 } 491 }
492 492
493 static inline void queue_flag_clear_unlocked(unsigned int flag, 493 static inline void queue_flag_clear_unlocked(unsigned int flag,
494 struct request_queue *q) 494 struct request_queue *q)
495 { 495 {
496 __clear_bit(flag, &q->queue_flags); 496 __clear_bit(flag, &q->queue_flags);
497 } 497 }
498 498
499 static inline void queue_flag_clear(unsigned int flag, struct request_queue *q) 499 static inline void queue_flag_clear(unsigned int flag, struct request_queue *q)
500 { 500 {
501 WARN_ON_ONCE(!queue_is_locked(q)); 501 WARN_ON_ONCE(!queue_is_locked(q));
502 __clear_bit(flag, &q->queue_flags); 502 __clear_bit(flag, &q->queue_flags);
503 } 503 }
504 504
505 enum { 505 enum {
506 /* 506 /*
507 * Hardbarrier is supported with one of the following methods. 507 * Hardbarrier is supported with one of the following methods.
508 * 508 *
509 * NONE : hardbarrier unsupported 509 * NONE : hardbarrier unsupported
510 * DRAIN : ordering by draining is enough 510 * DRAIN : ordering by draining is enough
511 * DRAIN_FLUSH : ordering by draining w/ pre and post flushes 511 * DRAIN_FLUSH : ordering by draining w/ pre and post flushes
512 * DRAIN_FUA : ordering by draining w/ pre flush and FUA write 512 * DRAIN_FUA : ordering by draining w/ pre flush and FUA write
513 * TAG : ordering by tag is enough 513 * TAG : ordering by tag is enough
514 * TAG_FLUSH : ordering by tag w/ pre and post flushes 514 * TAG_FLUSH : ordering by tag w/ pre and post flushes
515 * TAG_FUA : ordering by tag w/ pre flush and FUA write 515 * TAG_FUA : ordering by tag w/ pre flush and FUA write
516 */ 516 */
517 QUEUE_ORDERED_NONE = 0x00, 517 QUEUE_ORDERED_NONE = 0x00,
518 QUEUE_ORDERED_DRAIN = 0x01, 518 QUEUE_ORDERED_DRAIN = 0x01,
519 QUEUE_ORDERED_TAG = 0x02, 519 QUEUE_ORDERED_TAG = 0x02,
520 520
521 QUEUE_ORDERED_PREFLUSH = 0x10, 521 QUEUE_ORDERED_PREFLUSH = 0x10,
522 QUEUE_ORDERED_POSTFLUSH = 0x20, 522 QUEUE_ORDERED_POSTFLUSH = 0x20,
523 QUEUE_ORDERED_FUA = 0x40, 523 QUEUE_ORDERED_FUA = 0x40,
524 524
525 QUEUE_ORDERED_DRAIN_FLUSH = QUEUE_ORDERED_DRAIN | 525 QUEUE_ORDERED_DRAIN_FLUSH = QUEUE_ORDERED_DRAIN |
526 QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_POSTFLUSH, 526 QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_POSTFLUSH,
527 QUEUE_ORDERED_DRAIN_FUA = QUEUE_ORDERED_DRAIN | 527 QUEUE_ORDERED_DRAIN_FUA = QUEUE_ORDERED_DRAIN |
528 QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_FUA, 528 QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_FUA,
529 QUEUE_ORDERED_TAG_FLUSH = QUEUE_ORDERED_TAG | 529 QUEUE_ORDERED_TAG_FLUSH = QUEUE_ORDERED_TAG |
530 QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_POSTFLUSH, 530 QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_POSTFLUSH,
531 QUEUE_ORDERED_TAG_FUA = QUEUE_ORDERED_TAG | 531 QUEUE_ORDERED_TAG_FUA = QUEUE_ORDERED_TAG |
532 QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_FUA, 532 QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_FUA,
533 533
534 /* 534 /*
535 * Ordered operation sequence 535 * Ordered operation sequence
536 */ 536 */
537 QUEUE_ORDSEQ_STARTED = 0x01, /* flushing in progress */ 537 QUEUE_ORDSEQ_STARTED = 0x01, /* flushing in progress */
538 QUEUE_ORDSEQ_DRAIN = 0x02, /* waiting for the queue to be drained */ 538 QUEUE_ORDSEQ_DRAIN = 0x02, /* waiting for the queue to be drained */
539 QUEUE_ORDSEQ_PREFLUSH = 0x04, /* pre-flushing in progress */ 539 QUEUE_ORDSEQ_PREFLUSH = 0x04, /* pre-flushing in progress */
540 QUEUE_ORDSEQ_BAR = 0x08, /* original barrier req in progress */ 540 QUEUE_ORDSEQ_BAR = 0x08, /* original barrier req in progress */
541 QUEUE_ORDSEQ_POSTFLUSH = 0x10, /* post-flushing in progress */ 541 QUEUE_ORDSEQ_POSTFLUSH = 0x10, /* post-flushing in progress */
542 QUEUE_ORDSEQ_DONE = 0x20, 542 QUEUE_ORDSEQ_DONE = 0x20,
543 }; 543 };
544 544
545 #define blk_queue_plugged(q) test_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags) 545 #define blk_queue_plugged(q) test_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags)
546 #define blk_queue_tagged(q) test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags) 546 #define blk_queue_tagged(q) test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags)
547 #define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags) 547 #define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags)
548 #define blk_queue_nomerges(q) test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags) 548 #define blk_queue_nomerges(q) test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags)
549 #define blk_queue_flushing(q) ((q)->ordseq) 549 #define blk_queue_flushing(q) ((q)->ordseq)
550 550
551 #define blk_fs_request(rq) ((rq)->cmd_type == REQ_TYPE_FS) 551 #define blk_fs_request(rq) ((rq)->cmd_type == REQ_TYPE_FS)
552 #define blk_pc_request(rq) ((rq)->cmd_type == REQ_TYPE_BLOCK_PC) 552 #define blk_pc_request(rq) ((rq)->cmd_type == REQ_TYPE_BLOCK_PC)
553 #define blk_special_request(rq) ((rq)->cmd_type == REQ_TYPE_SPECIAL) 553 #define blk_special_request(rq) ((rq)->cmd_type == REQ_TYPE_SPECIAL)
554 #define blk_sense_request(rq) ((rq)->cmd_type == REQ_TYPE_SENSE) 554 #define blk_sense_request(rq) ((rq)->cmd_type == REQ_TYPE_SENSE)
555 555
556 #define blk_noretry_request(rq) ((rq)->cmd_flags & REQ_FAILFAST) 556 #define blk_noretry_request(rq) ((rq)->cmd_flags & REQ_FAILFAST)
557 #define blk_rq_started(rq) ((rq)->cmd_flags & REQ_STARTED) 557 #define blk_rq_started(rq) ((rq)->cmd_flags & REQ_STARTED)
558 558
559 #define blk_account_rq(rq) (blk_rq_started(rq) && (blk_fs_request(rq) || blk_discard_rq(rq))) 559 #define blk_account_rq(rq) (blk_rq_started(rq) && (blk_fs_request(rq) || blk_discard_rq(rq)))
560 560
561 #define blk_pm_suspend_request(rq) ((rq)->cmd_type == REQ_TYPE_PM_SUSPEND) 561 #define blk_pm_suspend_request(rq) ((rq)->cmd_type == REQ_TYPE_PM_SUSPEND)
562 #define blk_pm_resume_request(rq) ((rq)->cmd_type == REQ_TYPE_PM_RESUME) 562 #define blk_pm_resume_request(rq) ((rq)->cmd_type == REQ_TYPE_PM_RESUME)
563 #define blk_pm_request(rq) \ 563 #define blk_pm_request(rq) \
564 (blk_pm_suspend_request(rq) || blk_pm_resume_request(rq)) 564 (blk_pm_suspend_request(rq) || blk_pm_resume_request(rq))
565 565
566 #define blk_rq_cpu_valid(rq) ((rq)->cpu != -1) 566 #define blk_rq_cpu_valid(rq) ((rq)->cpu != -1)
567 #define blk_sorted_rq(rq) ((rq)->cmd_flags & REQ_SORTED) 567 #define blk_sorted_rq(rq) ((rq)->cmd_flags & REQ_SORTED)
568 #define blk_barrier_rq(rq) ((rq)->cmd_flags & REQ_HARDBARRIER) 568 #define blk_barrier_rq(rq) ((rq)->cmd_flags & REQ_HARDBARRIER)
569 #define blk_fua_rq(rq) ((rq)->cmd_flags & REQ_FUA) 569 #define blk_fua_rq(rq) ((rq)->cmd_flags & REQ_FUA)
570 #define blk_discard_rq(rq) ((rq)->cmd_flags & REQ_DISCARD) 570 #define blk_discard_rq(rq) ((rq)->cmd_flags & REQ_DISCARD)
571 #define blk_bidi_rq(rq) ((rq)->next_rq != NULL) 571 #define blk_bidi_rq(rq) ((rq)->next_rq != NULL)
572 #define blk_empty_barrier(rq) (blk_barrier_rq(rq) && blk_fs_request(rq) && !(rq)->hard_nr_sectors) 572 #define blk_empty_barrier(rq) (blk_barrier_rq(rq) && blk_fs_request(rq) && !(rq)->hard_nr_sectors)
573 /* rq->queuelist of dequeued request must be list_empty() */ 573 /* rq->queuelist of dequeued request must be list_empty() */
574 #define blk_queued_rq(rq) (!list_empty(&(rq)->queuelist)) 574 #define blk_queued_rq(rq) (!list_empty(&(rq)->queuelist))
575 575
576 #define list_entry_rq(ptr) list_entry((ptr), struct request, queuelist) 576 #define list_entry_rq(ptr) list_entry((ptr), struct request, queuelist)
577 577
578 #define rq_data_dir(rq) ((rq)->cmd_flags & 1) 578 #define rq_data_dir(rq) ((rq)->cmd_flags & 1)
579 579
580 /* 580 /*
581 * We regard a request as sync, if it's a READ or a SYNC write. 581 * We regard a request as sync, if it's a READ or a SYNC write.
582 */ 582 */
583 #define rq_is_sync(rq) (rq_data_dir((rq)) == READ || (rq)->cmd_flags & REQ_RW_SYNC) 583 #define rq_is_sync(rq) (rq_data_dir((rq)) == READ || (rq)->cmd_flags & REQ_RW_SYNC)
584 #define rq_is_meta(rq) ((rq)->cmd_flags & REQ_RW_META) 584 #define rq_is_meta(rq) ((rq)->cmd_flags & REQ_RW_META)
585 585
586 static inline int blk_queue_full(struct request_queue *q, int rw) 586 static inline int blk_queue_full(struct request_queue *q, int rw)
587 { 587 {
588 if (rw == READ) 588 if (rw == READ)
589 return test_bit(QUEUE_FLAG_READFULL, &q->queue_flags); 589 return test_bit(QUEUE_FLAG_READFULL, &q->queue_flags);
590 return test_bit(QUEUE_FLAG_WRITEFULL, &q->queue_flags); 590 return test_bit(QUEUE_FLAG_WRITEFULL, &q->queue_flags);
591 } 591 }
592 592
593 static inline void blk_set_queue_full(struct request_queue *q, int rw) 593 static inline void blk_set_queue_full(struct request_queue *q, int rw)
594 { 594 {
595 if (rw == READ) 595 if (rw == READ)
596 queue_flag_set(QUEUE_FLAG_READFULL, q); 596 queue_flag_set(QUEUE_FLAG_READFULL, q);
597 else 597 else
598 queue_flag_set(QUEUE_FLAG_WRITEFULL, q); 598 queue_flag_set(QUEUE_FLAG_WRITEFULL, q);
599 } 599 }
600 600
601 static inline void blk_clear_queue_full(struct request_queue *q, int rw) 601 static inline void blk_clear_queue_full(struct request_queue *q, int rw)
602 { 602 {
603 if (rw == READ) 603 if (rw == READ)
604 queue_flag_clear(QUEUE_FLAG_READFULL, q); 604 queue_flag_clear(QUEUE_FLAG_READFULL, q);
605 else 605 else
606 queue_flag_clear(QUEUE_FLAG_WRITEFULL, q); 606 queue_flag_clear(QUEUE_FLAG_WRITEFULL, q);
607 } 607 }
608 608
609 609
610 /* 610 /*
611 * mergeable request must not have _NOMERGE or _BARRIER bit set, nor may 611 * mergeable request must not have _NOMERGE or _BARRIER bit set, nor may
612 * it already be started by driver. 612 * it already be started by driver.
613 */ 613 */
614 #define RQ_NOMERGE_FLAGS \ 614 #define RQ_NOMERGE_FLAGS \
615 (REQ_NOMERGE | REQ_STARTED | REQ_HARDBARRIER | REQ_SOFTBARRIER) 615 (REQ_NOMERGE | REQ_STARTED | REQ_HARDBARRIER | REQ_SOFTBARRIER)
616 #define rq_mergeable(rq) \ 616 #define rq_mergeable(rq) \
617 (!((rq)->cmd_flags & RQ_NOMERGE_FLAGS) && \ 617 (!((rq)->cmd_flags & RQ_NOMERGE_FLAGS) && \
618 (blk_discard_rq(rq) || blk_fs_request((rq)))) 618 (blk_discard_rq(rq) || blk_fs_request((rq))))
619 619
620 /* 620 /*
621 * q->prep_rq_fn return values 621 * q->prep_rq_fn return values
622 */ 622 */
623 #define BLKPREP_OK 0 /* serve it */ 623 #define BLKPREP_OK 0 /* serve it */
624 #define BLKPREP_KILL 1 /* fatal error, kill */ 624 #define BLKPREP_KILL 1 /* fatal error, kill */
625 #define BLKPREP_DEFER 2 /* leave on queue */ 625 #define BLKPREP_DEFER 2 /* leave on queue */
626 626
627 extern unsigned long blk_max_low_pfn, blk_max_pfn; 627 extern unsigned long blk_max_low_pfn, blk_max_pfn;
628 628
629 /* 629 /*
630 * standard bounce addresses: 630 * standard bounce addresses:
631 * 631 *
632 * BLK_BOUNCE_HIGH : bounce all highmem pages 632 * BLK_BOUNCE_HIGH : bounce all highmem pages
633 * BLK_BOUNCE_ANY : don't bounce anything 633 * BLK_BOUNCE_ANY : don't bounce anything
634 * BLK_BOUNCE_ISA : bounce pages above ISA DMA boundary 634 * BLK_BOUNCE_ISA : bounce pages above ISA DMA boundary
635 */ 635 */
636 636
637 #if BITS_PER_LONG == 32 637 #if BITS_PER_LONG == 32
638 #define BLK_BOUNCE_HIGH ((u64)blk_max_low_pfn << PAGE_SHIFT) 638 #define BLK_BOUNCE_HIGH ((u64)blk_max_low_pfn << PAGE_SHIFT)
639 #else 639 #else
640 #define BLK_BOUNCE_HIGH -1ULL 640 #define BLK_BOUNCE_HIGH -1ULL
641 #endif 641 #endif
642 #define BLK_BOUNCE_ANY (-1ULL) 642 #define BLK_BOUNCE_ANY (-1ULL)
643 #define BLK_BOUNCE_ISA (ISA_DMA_THRESHOLD) 643 #define BLK_BOUNCE_ISA (ISA_DMA_THRESHOLD)
644 644
645 /* 645 /*
646 * default timeout for SG_IO if none specified 646 * default timeout for SG_IO if none specified
647 */ 647 */
648 #define BLK_DEFAULT_SG_TIMEOUT (60 * HZ) 648 #define BLK_DEFAULT_SG_TIMEOUT (60 * HZ)
649 649
650 #ifdef CONFIG_BOUNCE 650 #ifdef CONFIG_BOUNCE
651 extern int init_emergency_isa_pool(void); 651 extern int init_emergency_isa_pool(void);
652 extern void blk_queue_bounce(struct request_queue *q, struct bio **bio); 652 extern void blk_queue_bounce(struct request_queue *q, struct bio **bio);
653 #else 653 #else
654 static inline int init_emergency_isa_pool(void) 654 static inline int init_emergency_isa_pool(void)
655 { 655 {
656 return 0; 656 return 0;
657 } 657 }
658 static inline void blk_queue_bounce(struct request_queue *q, struct bio **bio) 658 static inline void blk_queue_bounce(struct request_queue *q, struct bio **bio)
659 { 659 {
660 } 660 }
661 #endif /* CONFIG_MMU */ 661 #endif /* CONFIG_MMU */
662 662
663 struct rq_map_data { 663 struct rq_map_data {
664 struct page **pages; 664 struct page **pages;
665 int page_order; 665 int page_order;
666 int nr_entries; 666 int nr_entries;
667 }; 667 };
668 668
669 struct req_iterator { 669 struct req_iterator {
670 int i; 670 int i;
671 struct bio *bio; 671 struct bio *bio;
672 }; 672 };
673 673
674 /* This should not be used directly - use rq_for_each_segment */ 674 /* This should not be used directly - use rq_for_each_segment */
675 #define __rq_for_each_bio(_bio, rq) \ 675 #define __rq_for_each_bio(_bio, rq) \
676 if ((rq->bio)) \ 676 if ((rq->bio)) \
677 for (_bio = (rq)->bio; _bio; _bio = _bio->bi_next) 677 for (_bio = (rq)->bio; _bio; _bio = _bio->bi_next)
678 678
679 #define rq_for_each_segment(bvl, _rq, _iter) \ 679 #define rq_for_each_segment(bvl, _rq, _iter) \
680 __rq_for_each_bio(_iter.bio, _rq) \ 680 __rq_for_each_bio(_iter.bio, _rq) \
681 bio_for_each_segment(bvl, _iter.bio, _iter.i) 681 bio_for_each_segment(bvl, _iter.bio, _iter.i)
682 682
683 #define rq_iter_last(rq, _iter) \ 683 #define rq_iter_last(rq, _iter) \
684 (_iter.bio->bi_next == NULL && _iter.i == _iter.bio->bi_vcnt-1) 684 (_iter.bio->bi_next == NULL && _iter.i == _iter.bio->bi_vcnt-1)
685 685
686 extern int blk_register_queue(struct gendisk *disk); 686 extern int blk_register_queue(struct gendisk *disk);
687 extern void blk_unregister_queue(struct gendisk *disk); 687 extern void blk_unregister_queue(struct gendisk *disk);
688 extern void register_disk(struct gendisk *dev); 688 extern void register_disk(struct gendisk *dev);
689 extern void generic_make_request(struct bio *bio); 689 extern void generic_make_request(struct bio *bio);
690 extern void blk_rq_init(struct request_queue *q, struct request *rq); 690 extern void blk_rq_init(struct request_queue *q, struct request *rq);
691 extern void blk_put_request(struct request *); 691 extern void blk_put_request(struct request *);
692 extern void __blk_put_request(struct request_queue *, struct request *); 692 extern void __blk_put_request(struct request_queue *, struct request *);
693 extern struct request *blk_get_request(struct request_queue *, int, gfp_t); 693 extern struct request *blk_get_request(struct request_queue *, int, gfp_t);
694 extern void blk_insert_request(struct request_queue *, struct request *, int, void *); 694 extern void blk_insert_request(struct request_queue *, struct request *, int, void *);
695 extern void blk_requeue_request(struct request_queue *, struct request *); 695 extern void blk_requeue_request(struct request_queue *, struct request *);
696 extern void blk_plug_device(struct request_queue *); 696 extern void blk_plug_device(struct request_queue *);
697 extern void blk_plug_device_unlocked(struct request_queue *); 697 extern void blk_plug_device_unlocked(struct request_queue *);
698 extern int blk_remove_plug(struct request_queue *); 698 extern int blk_remove_plug(struct request_queue *);
699 extern void blk_recount_segments(struct request_queue *, struct bio *); 699 extern void blk_recount_segments(struct request_queue *, struct bio *);
700 extern int scsi_cmd_ioctl(struct file *, struct request_queue *, 700 extern int scsi_cmd_ioctl(struct file *, struct request_queue *,
701 struct gendisk *, unsigned int, void __user *); 701 struct gendisk *, unsigned int, void __user *);
702 extern int sg_scsi_ioctl(struct file *, struct request_queue *, 702 extern int sg_scsi_ioctl(struct file *, struct request_queue *,
703 struct gendisk *, struct scsi_ioctl_command __user *); 703 struct gendisk *, struct scsi_ioctl_command __user *);
704 704
705 /* 705 /*
706 * Temporary export, until SCSI gets fixed up. 706 * Temporary export, until SCSI gets fixed up.
707 */ 707 */
708 extern int blk_rq_append_bio(struct request_queue *q, struct request *rq, 708 extern int blk_rq_append_bio(struct request_queue *q, struct request *rq,
709 struct bio *bio); 709 struct bio *bio);
710 710
711 /* 711 /*
712 * A queue has just exitted congestion. Note this in the global counter of 712 * A queue has just exitted congestion. Note this in the global counter of
713 * congested queues, and wake up anyone who was waiting for requests to be 713 * congested queues, and wake up anyone who was waiting for requests to be
714 * put back. 714 * put back.
715 */ 715 */
716 static inline void blk_clear_queue_congested(struct request_queue *q, int rw) 716 static inline void blk_clear_queue_congested(struct request_queue *q, int rw)
717 { 717 {
718 clear_bdi_congested(&q->backing_dev_info, rw); 718 clear_bdi_congested(&q->backing_dev_info, rw);
719 } 719 }
720 720
721 /* 721 /*
722 * A queue has just entered congestion. Flag that in the queue's VM-visible 722 * A queue has just entered congestion. Flag that in the queue's VM-visible
723 * state flags and increment the global gounter of congested queues. 723 * state flags and increment the global gounter of congested queues.
724 */ 724 */
725 static inline void blk_set_queue_congested(struct request_queue *q, int rw) 725 static inline void blk_set_queue_congested(struct request_queue *q, int rw)
726 { 726 {
727 set_bdi_congested(&q->backing_dev_info, rw); 727 set_bdi_congested(&q->backing_dev_info, rw);
728 } 728 }
729 729
730 extern void blk_start_queue(struct request_queue *q); 730 extern void blk_start_queue(struct request_queue *q);
731 extern void blk_stop_queue(struct request_queue *q); 731 extern void blk_stop_queue(struct request_queue *q);
732 extern void blk_sync_queue(struct request_queue *q); 732 extern void blk_sync_queue(struct request_queue *q);
733 extern void __blk_stop_queue(struct request_queue *q); 733 extern void __blk_stop_queue(struct request_queue *q);
734 extern void __blk_run_queue(struct request_queue *); 734 extern void __blk_run_queue(struct request_queue *);
735 extern void blk_run_queue(struct request_queue *); 735 extern void blk_run_queue(struct request_queue *);
736 extern void blk_start_queueing(struct request_queue *); 736 extern void blk_start_queueing(struct request_queue *);
737 extern int blk_rq_map_user(struct request_queue *, struct request *, 737 extern int blk_rq_map_user(struct request_queue *, struct request *,
738 struct rq_map_data *, void __user *, unsigned long, 738 struct rq_map_data *, void __user *, unsigned long,
739 gfp_t); 739 gfp_t);
740 extern int blk_rq_unmap_user(struct bio *); 740 extern int blk_rq_unmap_user(struct bio *);
741 extern int blk_rq_map_kern(struct request_queue *, struct request *, void *, unsigned int, gfp_t); 741 extern int blk_rq_map_kern(struct request_queue *, struct request *, void *, unsigned int, gfp_t);
742 extern int blk_rq_map_user_iov(struct request_queue *, struct request *, 742 extern int blk_rq_map_user_iov(struct request_queue *, struct request *,
743 struct rq_map_data *, struct sg_iovec *, int, 743 struct rq_map_data *, struct sg_iovec *, int,
744 unsigned int, gfp_t); 744 unsigned int, gfp_t);
745 extern int blk_execute_rq(struct request_queue *, struct gendisk *, 745 extern int blk_execute_rq(struct request_queue *, struct gendisk *,
746 struct request *, int); 746 struct request *, int);
747 extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *, 747 extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *,
748 struct request *, int, rq_end_io_fn *); 748 struct request *, int, rq_end_io_fn *);
749 extern void blk_unplug(struct request_queue *q); 749 extern void blk_unplug(struct request_queue *q);
750 750
751 static inline struct request_queue *bdev_get_queue(struct block_device *bdev) 751 static inline struct request_queue *bdev_get_queue(struct block_device *bdev)
752 { 752 {
753 return bdev->bd_disk->queue; 753 return bdev->bd_disk->queue;
754 } 754 }
755 755
756 static inline void blk_run_backing_dev(struct backing_dev_info *bdi, 756 static inline void blk_run_backing_dev(struct backing_dev_info *bdi,
757 struct page *page) 757 struct page *page)
758 { 758 {
759 if (bdi && bdi->unplug_io_fn) 759 if (bdi && bdi->unplug_io_fn)
760 bdi->unplug_io_fn(bdi, page); 760 bdi->unplug_io_fn(bdi, page);
761 } 761 }
762 762
763 static inline void blk_run_address_space(struct address_space *mapping) 763 static inline void blk_run_address_space(struct address_space *mapping)
764 { 764 {
765 if (mapping) 765 if (mapping)
766 blk_run_backing_dev(mapping->backing_dev_info, NULL); 766 blk_run_backing_dev(mapping->backing_dev_info, NULL);
767 } 767 }
768 768
769 /* 769 /*
770 * blk_end_request() and friends. 770 * blk_end_request() and friends.
771 * __blk_end_request() and end_request() must be called with 771 * __blk_end_request() and end_request() must be called with
772 * the request queue spinlock acquired. 772 * the request queue spinlock acquired.
773 * 773 *
774 * Several drivers define their own end_request and call 774 * Several drivers define their own end_request and call
775 * blk_end_request() for parts of the original function. 775 * blk_end_request() for parts of the original function.
776 * This prevents code duplication in drivers. 776 * This prevents code duplication in drivers.
777 */ 777 */
778 extern int blk_end_request(struct request *rq, int error, 778 extern int blk_end_request(struct request *rq, int error,
779 unsigned int nr_bytes); 779 unsigned int nr_bytes);
780 extern int __blk_end_request(struct request *rq, int error, 780 extern int __blk_end_request(struct request *rq, int error,
781 unsigned int nr_bytes); 781 unsigned int nr_bytes);
782 extern int blk_end_bidi_request(struct request *rq, int error, 782 extern int blk_end_bidi_request(struct request *rq, int error,
783 unsigned int nr_bytes, unsigned int bidi_bytes); 783 unsigned int nr_bytes, unsigned int bidi_bytes);
784 extern void end_request(struct request *, int); 784 extern void end_request(struct request *, int);
785 extern void end_queued_request(struct request *, int); 785 extern void end_queued_request(struct request *, int);
786 extern void end_dequeued_request(struct request *, int); 786 extern void end_dequeued_request(struct request *, int);
787 extern int blk_end_request_callback(struct request *rq, int error, 787 extern int blk_end_request_callback(struct request *rq, int error,
788 unsigned int nr_bytes, 788 unsigned int nr_bytes,
789 int (drv_callback)(struct request *)); 789 int (drv_callback)(struct request *));
790 extern void blk_complete_request(struct request *); 790 extern void blk_complete_request(struct request *);
791 extern void __blk_complete_request(struct request *); 791 extern void __blk_complete_request(struct request *);
792 extern void blk_abort_request(struct request *); 792 extern void blk_abort_request(struct request *);
793 extern void blk_abort_queue(struct request_queue *); 793 extern void blk_abort_queue(struct request_queue *);
794 extern void blk_update_request(struct request *rq, int error,
795 unsigned int nr_bytes);
794 796
795 /* 797 /*
796 * blk_end_request() takes bytes instead of sectors as a complete size. 798 * blk_end_request() takes bytes instead of sectors as a complete size.
797 * blk_rq_bytes() returns bytes left to complete in the entire request. 799 * blk_rq_bytes() returns bytes left to complete in the entire request.
798 * blk_rq_cur_bytes() returns bytes left to complete in the current segment. 800 * blk_rq_cur_bytes() returns bytes left to complete in the current segment.
799 */ 801 */
800 extern unsigned int blk_rq_bytes(struct request *rq); 802 extern unsigned int blk_rq_bytes(struct request *rq);
801 extern unsigned int blk_rq_cur_bytes(struct request *rq); 803 extern unsigned int blk_rq_cur_bytes(struct request *rq);
802 804
803 static inline void blkdev_dequeue_request(struct request *req) 805 static inline void blkdev_dequeue_request(struct request *req)
804 { 806 {
805 elv_dequeue_request(req->q, req); 807 elv_dequeue_request(req->q, req);
806 } 808 }
807 809
808 /* 810 /*
809 * Access functions for manipulating queue properties 811 * Access functions for manipulating queue properties
810 */ 812 */
811 extern struct request_queue *blk_init_queue_node(request_fn_proc *rfn, 813 extern struct request_queue *blk_init_queue_node(request_fn_proc *rfn,
812 spinlock_t *lock, int node_id); 814 spinlock_t *lock, int node_id);
813 extern struct request_queue *blk_init_queue(request_fn_proc *, spinlock_t *); 815 extern struct request_queue *blk_init_queue(request_fn_proc *, spinlock_t *);
814 extern void blk_cleanup_queue(struct request_queue *); 816 extern void blk_cleanup_queue(struct request_queue *);
815 extern void blk_queue_make_request(struct request_queue *, make_request_fn *); 817 extern void blk_queue_make_request(struct request_queue *, make_request_fn *);
816 extern void blk_queue_bounce_limit(struct request_queue *, u64); 818 extern void blk_queue_bounce_limit(struct request_queue *, u64);
817 extern void blk_queue_max_sectors(struct request_queue *, unsigned int); 819 extern void blk_queue_max_sectors(struct request_queue *, unsigned int);
818 extern void blk_queue_max_phys_segments(struct request_queue *, unsigned short); 820 extern void blk_queue_max_phys_segments(struct request_queue *, unsigned short);
819 extern void blk_queue_max_hw_segments(struct request_queue *, unsigned short); 821 extern void blk_queue_max_hw_segments(struct request_queue *, unsigned short);
820 extern void blk_queue_max_segment_size(struct request_queue *, unsigned int); 822 extern void blk_queue_max_segment_size(struct request_queue *, unsigned int);
821 extern void blk_queue_hardsect_size(struct request_queue *, unsigned short); 823 extern void blk_queue_hardsect_size(struct request_queue *, unsigned short);
822 extern void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b); 824 extern void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b);
823 extern void blk_queue_dma_pad(struct request_queue *, unsigned int); 825 extern void blk_queue_dma_pad(struct request_queue *, unsigned int);
824 extern void blk_queue_update_dma_pad(struct request_queue *, unsigned int); 826 extern void blk_queue_update_dma_pad(struct request_queue *, unsigned int);
825 extern int blk_queue_dma_drain(struct request_queue *q, 827 extern int blk_queue_dma_drain(struct request_queue *q,
826 dma_drain_needed_fn *dma_drain_needed, 828 dma_drain_needed_fn *dma_drain_needed,
827 void *buf, unsigned int size); 829 void *buf, unsigned int size);
828 extern void blk_queue_segment_boundary(struct request_queue *, unsigned long); 830 extern void blk_queue_segment_boundary(struct request_queue *, unsigned long);
829 extern void blk_queue_prep_rq(struct request_queue *, prep_rq_fn *pfn); 831 extern void blk_queue_prep_rq(struct request_queue *, prep_rq_fn *pfn);
830 extern void blk_queue_merge_bvec(struct request_queue *, merge_bvec_fn *); 832 extern void blk_queue_merge_bvec(struct request_queue *, merge_bvec_fn *);
831 extern void blk_queue_dma_alignment(struct request_queue *, int); 833 extern void blk_queue_dma_alignment(struct request_queue *, int);
832 extern void blk_queue_update_dma_alignment(struct request_queue *, int); 834 extern void blk_queue_update_dma_alignment(struct request_queue *, int);
833 extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *); 835 extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *);
834 extern void blk_queue_set_discard(struct request_queue *, prepare_discard_fn *); 836 extern void blk_queue_set_discard(struct request_queue *, prepare_discard_fn *);
835 extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *); 837 extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *);
836 extern void blk_queue_rq_timeout(struct request_queue *, unsigned int); 838 extern void blk_queue_rq_timeout(struct request_queue *, unsigned int);
837 extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev); 839 extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev);
838 extern int blk_queue_ordered(struct request_queue *, unsigned, prepare_flush_fn *); 840 extern int blk_queue_ordered(struct request_queue *, unsigned, prepare_flush_fn *);
839 extern int blk_do_ordered(struct request_queue *, struct request **); 841 extern int blk_do_ordered(struct request_queue *, struct request **);
840 extern unsigned blk_ordered_cur_seq(struct request_queue *); 842 extern unsigned blk_ordered_cur_seq(struct request_queue *);
841 extern unsigned blk_ordered_req_seq(struct request *); 843 extern unsigned blk_ordered_req_seq(struct request *);
842 extern void blk_ordered_complete_seq(struct request_queue *, unsigned, int); 844 extern void blk_ordered_complete_seq(struct request_queue *, unsigned, int);
843 845
844 extern int blk_rq_map_sg(struct request_queue *, struct request *, struct scatterlist *); 846 extern int blk_rq_map_sg(struct request_queue *, struct request *, struct scatterlist *);
845 extern void blk_dump_rq_flags(struct request *, char *); 847 extern void blk_dump_rq_flags(struct request *, char *);
846 extern void generic_unplug_device(struct request_queue *); 848 extern void generic_unplug_device(struct request_queue *);
847 extern void __generic_unplug_device(struct request_queue *); 849 extern void __generic_unplug_device(struct request_queue *);
848 extern long nr_blockdev_pages(void); 850 extern long nr_blockdev_pages(void);
849 851
850 int blk_get_queue(struct request_queue *); 852 int blk_get_queue(struct request_queue *);
851 struct request_queue *blk_alloc_queue(gfp_t); 853 struct request_queue *blk_alloc_queue(gfp_t);
852 struct request_queue *blk_alloc_queue_node(gfp_t, int); 854 struct request_queue *blk_alloc_queue_node(gfp_t, int);
853 extern void blk_put_queue(struct request_queue *); 855 extern void blk_put_queue(struct request_queue *);
854 856
855 /* 857 /*
856 * tag stuff 858 * tag stuff
857 */ 859 */
858 #define blk_rq_tagged(rq) ((rq)->cmd_flags & REQ_QUEUED) 860 #define blk_rq_tagged(rq) ((rq)->cmd_flags & REQ_QUEUED)
859 extern int blk_queue_start_tag(struct request_queue *, struct request *); 861 extern int blk_queue_start_tag(struct request_queue *, struct request *);
860 extern struct request *blk_queue_find_tag(struct request_queue *, int); 862 extern struct request *blk_queue_find_tag(struct request_queue *, int);
861 extern void blk_queue_end_tag(struct request_queue *, struct request *); 863 extern void blk_queue_end_tag(struct request_queue *, struct request *);
862 extern int blk_queue_init_tags(struct request_queue *, int, struct blk_queue_tag *); 864 extern int blk_queue_init_tags(struct request_queue *, int, struct blk_queue_tag *);
863 extern void blk_queue_free_tags(struct request_queue *); 865 extern void blk_queue_free_tags(struct request_queue *);
864 extern int blk_queue_resize_tags(struct request_queue *, int); 866 extern int blk_queue_resize_tags(struct request_queue *, int);
865 extern void blk_queue_invalidate_tags(struct request_queue *); 867 extern void blk_queue_invalidate_tags(struct request_queue *);
866 extern struct blk_queue_tag *blk_init_tags(int); 868 extern struct blk_queue_tag *blk_init_tags(int);
867 extern void blk_free_tags(struct blk_queue_tag *); 869 extern void blk_free_tags(struct blk_queue_tag *);
868 870
869 static inline struct request *blk_map_queue_find_tag(struct blk_queue_tag *bqt, 871 static inline struct request *blk_map_queue_find_tag(struct blk_queue_tag *bqt,
870 int tag) 872 int tag)
871 { 873 {
872 if (unlikely(bqt == NULL || tag >= bqt->real_max_depth)) 874 if (unlikely(bqt == NULL || tag >= bqt->real_max_depth))
873 return NULL; 875 return NULL;
874 return bqt->tag_index[tag]; 876 return bqt->tag_index[tag];
875 } 877 }
876 878
877 extern int blkdev_issue_flush(struct block_device *, sector_t *); 879 extern int blkdev_issue_flush(struct block_device *, sector_t *);
878 extern int blkdev_issue_discard(struct block_device *, 880 extern int blkdev_issue_discard(struct block_device *,
879 sector_t sector, sector_t nr_sects, gfp_t); 881 sector_t sector, sector_t nr_sects, gfp_t);
880 882
881 static inline int sb_issue_discard(struct super_block *sb, 883 static inline int sb_issue_discard(struct super_block *sb,
882 sector_t block, sector_t nr_blocks) 884 sector_t block, sector_t nr_blocks)
883 { 885 {
884 block <<= (sb->s_blocksize_bits - 9); 886 block <<= (sb->s_blocksize_bits - 9);
885 nr_blocks <<= (sb->s_blocksize_bits - 9); 887 nr_blocks <<= (sb->s_blocksize_bits - 9);
886 return blkdev_issue_discard(sb->s_bdev, block, nr_blocks, GFP_KERNEL); 888 return blkdev_issue_discard(sb->s_bdev, block, nr_blocks, GFP_KERNEL);
887 } 889 }
888 890
889 /* 891 /*
890 * command filter functions 892 * command filter functions
891 */ 893 */
892 extern int blk_verify_command(struct blk_cmd_filter *filter, 894 extern int blk_verify_command(struct blk_cmd_filter *filter,
893 unsigned char *cmd, int has_write_perm); 895 unsigned char *cmd, int has_write_perm);
894 extern void blk_set_cmd_filter_defaults(struct blk_cmd_filter *filter); 896 extern void blk_set_cmd_filter_defaults(struct blk_cmd_filter *filter);
895 897
896 #define MAX_PHYS_SEGMENTS 128 898 #define MAX_PHYS_SEGMENTS 128
897 #define MAX_HW_SEGMENTS 128 899 #define MAX_HW_SEGMENTS 128
898 #define SAFE_MAX_SECTORS 255 900 #define SAFE_MAX_SECTORS 255
899 #define BLK_DEF_MAX_SECTORS 1024 901 #define BLK_DEF_MAX_SECTORS 1024
900 902
901 #define MAX_SEGMENT_SIZE 65536 903 #define MAX_SEGMENT_SIZE 65536
902 904
903 #define blkdev_entry_to_request(entry) list_entry((entry), struct request, queuelist) 905 #define blkdev_entry_to_request(entry) list_entry((entry), struct request, queuelist)
904 906
905 static inline int queue_hardsect_size(struct request_queue *q) 907 static inline int queue_hardsect_size(struct request_queue *q)
906 { 908 {
907 int retval = 512; 909 int retval = 512;
908 910
909 if (q && q->hardsect_size) 911 if (q && q->hardsect_size)
910 retval = q->hardsect_size; 912 retval = q->hardsect_size;
911 913
912 return retval; 914 return retval;
913 } 915 }
914 916
915 static inline int bdev_hardsect_size(struct block_device *bdev) 917 static inline int bdev_hardsect_size(struct block_device *bdev)
916 { 918 {
917 return queue_hardsect_size(bdev_get_queue(bdev)); 919 return queue_hardsect_size(bdev_get_queue(bdev));
918 } 920 }
919 921
920 static inline int queue_dma_alignment(struct request_queue *q) 922 static inline int queue_dma_alignment(struct request_queue *q)
921 { 923 {
922 return q ? q->dma_alignment : 511; 924 return q ? q->dma_alignment : 511;
923 } 925 }
924 926
925 static inline int blk_rq_aligned(struct request_queue *q, void *addr, 927 static inline int blk_rq_aligned(struct request_queue *q, void *addr,
926 unsigned int len) 928 unsigned int len)
927 { 929 {
928 unsigned int alignment = queue_dma_alignment(q) | q->dma_pad_mask; 930 unsigned int alignment = queue_dma_alignment(q) | q->dma_pad_mask;
929 return !((unsigned long)addr & alignment) && !(len & alignment); 931 return !((unsigned long)addr & alignment) && !(len & alignment);
930 } 932 }
931 933
932 /* assumes size > 256 */ 934 /* assumes size > 256 */
933 static inline unsigned int blksize_bits(unsigned int size) 935 static inline unsigned int blksize_bits(unsigned int size)
934 { 936 {
935 unsigned int bits = 8; 937 unsigned int bits = 8;
936 do { 938 do {
937 bits++; 939 bits++;
938 size >>= 1; 940 size >>= 1;
939 } while (size > 256); 941 } while (size > 256);
940 return bits; 942 return bits;
941 } 943 }
942 944
943 static inline unsigned int block_size(struct block_device *bdev) 945 static inline unsigned int block_size(struct block_device *bdev)
944 { 946 {
945 return bdev->bd_block_size; 947 return bdev->bd_block_size;
946 } 948 }
947 949
948 typedef struct {struct page *v;} Sector; 950 typedef struct {struct page *v;} Sector;
949 951
950 unsigned char *read_dev_sector(struct block_device *, sector_t, Sector *); 952 unsigned char *read_dev_sector(struct block_device *, sector_t, Sector *);
951 953
952 static inline void put_dev_sector(Sector p) 954 static inline void put_dev_sector(Sector p)
953 { 955 {
954 page_cache_release(p.v); 956 page_cache_release(p.v);
955 } 957 }
956 958
957 struct work_struct; 959 struct work_struct;
958 int kblockd_schedule_work(struct request_queue *q, struct work_struct *work); 960 int kblockd_schedule_work(struct request_queue *q, struct work_struct *work);
959 void kblockd_flush_work(struct work_struct *work); 961 void kblockd_flush_work(struct work_struct *work);
960 962
961 #define MODULE_ALIAS_BLOCKDEV(major,minor) \ 963 #define MODULE_ALIAS_BLOCKDEV(major,minor) \
962 MODULE_ALIAS("block-major-" __stringify(major) "-" __stringify(minor)) 964 MODULE_ALIAS("block-major-" __stringify(major) "-" __stringify(minor))
963 #define MODULE_ALIAS_BLOCKDEV_MAJOR(major) \ 965 #define MODULE_ALIAS_BLOCKDEV_MAJOR(major) \
964 MODULE_ALIAS("block-major-" __stringify(major) "-*") 966 MODULE_ALIAS("block-major-" __stringify(major) "-*")
965 967
966 #if defined(CONFIG_BLK_DEV_INTEGRITY) 968 #if defined(CONFIG_BLK_DEV_INTEGRITY)
967 969
968 #define INTEGRITY_FLAG_READ 2 /* verify data integrity on read */ 970 #define INTEGRITY_FLAG_READ 2 /* verify data integrity on read */
969 #define INTEGRITY_FLAG_WRITE 4 /* generate data integrity on write */ 971 #define INTEGRITY_FLAG_WRITE 4 /* generate data integrity on write */
970 972
971 struct blk_integrity_exchg { 973 struct blk_integrity_exchg {
972 void *prot_buf; 974 void *prot_buf;
973 void *data_buf; 975 void *data_buf;
974 sector_t sector; 976 sector_t sector;
975 unsigned int data_size; 977 unsigned int data_size;
976 unsigned short sector_size; 978 unsigned short sector_size;
977 const char *disk_name; 979 const char *disk_name;
978 }; 980 };
979 981
980 typedef void (integrity_gen_fn) (struct blk_integrity_exchg *); 982 typedef void (integrity_gen_fn) (struct blk_integrity_exchg *);
981 typedef int (integrity_vrfy_fn) (struct blk_integrity_exchg *); 983 typedef int (integrity_vrfy_fn) (struct blk_integrity_exchg *);
982 typedef void (integrity_set_tag_fn) (void *, void *, unsigned int); 984 typedef void (integrity_set_tag_fn) (void *, void *, unsigned int);
983 typedef void (integrity_get_tag_fn) (void *, void *, unsigned int); 985 typedef void (integrity_get_tag_fn) (void *, void *, unsigned int);
984 986
985 struct blk_integrity { 987 struct blk_integrity {
986 integrity_gen_fn *generate_fn; 988 integrity_gen_fn *generate_fn;
987 integrity_vrfy_fn *verify_fn; 989 integrity_vrfy_fn *verify_fn;
988 integrity_set_tag_fn *set_tag_fn; 990 integrity_set_tag_fn *set_tag_fn;
989 integrity_get_tag_fn *get_tag_fn; 991 integrity_get_tag_fn *get_tag_fn;
990 992
991 unsigned short flags; 993 unsigned short flags;
992 unsigned short tuple_size; 994 unsigned short tuple_size;
993 unsigned short sector_size; 995 unsigned short sector_size;
994 unsigned short tag_size; 996 unsigned short tag_size;
995 997
996 const char *name; 998 const char *name;
997 999
998 struct kobject kobj; 1000 struct kobject kobj;
999 }; 1001 };
1000 1002
1001 extern int blk_integrity_register(struct gendisk *, struct blk_integrity *); 1003 extern int blk_integrity_register(struct gendisk *, struct blk_integrity *);
1002 extern void blk_integrity_unregister(struct gendisk *); 1004 extern void blk_integrity_unregister(struct gendisk *);
1003 extern int blk_integrity_compare(struct block_device *, struct block_device *); 1005 extern int blk_integrity_compare(struct block_device *, struct block_device *);
1004 extern int blk_rq_map_integrity_sg(struct request *, struct scatterlist *); 1006 extern int blk_rq_map_integrity_sg(struct request *, struct scatterlist *);
1005 extern int blk_rq_count_integrity_sg(struct request *); 1007 extern int blk_rq_count_integrity_sg(struct request *);
1006 1008
1007 static inline int blk_integrity_rq(struct request *rq) 1009 static inline int blk_integrity_rq(struct request *rq)
1008 { 1010 {
1009 if (rq->bio == NULL) 1011 if (rq->bio == NULL)
1010 return 0; 1012 return 0;
1011 1013
1012 return bio_integrity(rq->bio); 1014 return bio_integrity(rq->bio);
1013 } 1015 }
1014 1016
1015 #else /* CONFIG_BLK_DEV_INTEGRITY */ 1017 #else /* CONFIG_BLK_DEV_INTEGRITY */
1016 1018
1017 #define blk_integrity_rq(rq) (0) 1019 #define blk_integrity_rq(rq) (0)
1018 #define blk_rq_count_integrity_sg(a) (0) 1020 #define blk_rq_count_integrity_sg(a) (0)
1019 #define blk_rq_map_integrity_sg(a, b) (0) 1021 #define blk_rq_map_integrity_sg(a, b) (0)
1020 #define blk_integrity_compare(a, b) (0) 1022 #define blk_integrity_compare(a, b) (0)
1021 #define blk_integrity_register(a, b) (0) 1023 #define blk_integrity_register(a, b) (0)
1022 #define blk_integrity_unregister(a) do { } while (0); 1024 #define blk_integrity_unregister(a) do { } while (0);
1023 1025
1024 #endif /* CONFIG_BLK_DEV_INTEGRITY */ 1026 #endif /* CONFIG_BLK_DEV_INTEGRITY */
1025 1027
1026 #else /* CONFIG_BLOCK */ 1028 #else /* CONFIG_BLOCK */
1027 /* 1029 /*
1028 * stubs for when the block layer is configured out 1030 * stubs for when the block layer is configured out
1029 */ 1031 */
1030 #define buffer_heads_over_limit 0 1032 #define buffer_heads_over_limit 0
1031 1033
1032 static inline long nr_blockdev_pages(void) 1034 static inline long nr_blockdev_pages(void)
1033 { 1035 {
1034 return 0; 1036 return 0;
1035 } 1037 }
1036 1038
1037 #endif /* CONFIG_BLOCK */ 1039 #endif /* CONFIG_BLOCK */
1038 1040
1039 #endif 1041 #endif
1040 1042