Commit a0cd128542cd9c67f27458a08e989db486a293ce

Authored by Jens Axboe
1 parent 992c5ddaf1

block: add end_queued_request() and end_dequeued_request() helpers

We can use this helper in the elevator core for BLKPREP_KILL, and it'll
also be useful for the empty barrier patch.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

Showing 3 changed files with 77 additions and 14 deletions Inline Diff

1 /* 1 /*
2 * Block device elevator/IO-scheduler. 2 * Block device elevator/IO-scheduler.
3 * 3 *
4 * Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE 4 * Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
5 * 5 *
6 * 30042000 Jens Axboe <axboe@kernel.dk> : 6 * 30042000 Jens Axboe <axboe@kernel.dk> :
7 * 7 *
8 * Split the elevator a bit so that it is possible to choose a different 8 * Split the elevator a bit so that it is possible to choose a different
9 * one or even write a new "plug in". There are three pieces: 9 * one or even write a new "plug in". There are three pieces:
10 * - elevator_fn, inserts a new request in the queue list 10 * - elevator_fn, inserts a new request in the queue list
11 * - elevator_merge_fn, decides whether a new buffer can be merged with 11 * - elevator_merge_fn, decides whether a new buffer can be merged with
12 * an existing request 12 * an existing request
13 * - elevator_dequeue_fn, called when a request is taken off the active list 13 * - elevator_dequeue_fn, called when a request is taken off the active list
14 * 14 *
15 * 20082000 Dave Jones <davej@suse.de> : 15 * 20082000 Dave Jones <davej@suse.de> :
16 * Removed tests for max-bomb-segments, which was breaking elvtune 16 * Removed tests for max-bomb-segments, which was breaking elvtune
17 * when run without -bN 17 * when run without -bN
18 * 18 *
19 * Jens: 19 * Jens:
20 * - Rework again to work with bio instead of buffer_heads 20 * - Rework again to work with bio instead of buffer_heads
21 * - loose bi_dev comparisons, partition handling is right now 21 * - loose bi_dev comparisons, partition handling is right now
22 * - completely modularize elevator setup and teardown 22 * - completely modularize elevator setup and teardown
23 * 23 *
24 */ 24 */
25 #include <linux/kernel.h> 25 #include <linux/kernel.h>
26 #include <linux/fs.h> 26 #include <linux/fs.h>
27 #include <linux/blkdev.h> 27 #include <linux/blkdev.h>
28 #include <linux/elevator.h> 28 #include <linux/elevator.h>
29 #include <linux/bio.h> 29 #include <linux/bio.h>
30 #include <linux/module.h> 30 #include <linux/module.h>
31 #include <linux/slab.h> 31 #include <linux/slab.h>
32 #include <linux/init.h> 32 #include <linux/init.h>
33 #include <linux/compiler.h> 33 #include <linux/compiler.h>
34 #include <linux/delay.h> 34 #include <linux/delay.h>
35 #include <linux/blktrace_api.h> 35 #include <linux/blktrace_api.h>
36 #include <linux/hash.h> 36 #include <linux/hash.h>
37 37
38 #include <asm/uaccess.h> 38 #include <asm/uaccess.h>
39 39
40 static DEFINE_SPINLOCK(elv_list_lock); 40 static DEFINE_SPINLOCK(elv_list_lock);
41 static LIST_HEAD(elv_list); 41 static LIST_HEAD(elv_list);
42 42
43 /* 43 /*
44 * Merge hash stuff. 44 * Merge hash stuff.
45 */ 45 */
46 static const int elv_hash_shift = 6; 46 static const int elv_hash_shift = 6;
47 #define ELV_HASH_BLOCK(sec) ((sec) >> 3) 47 #define ELV_HASH_BLOCK(sec) ((sec) >> 3)
48 #define ELV_HASH_FN(sec) (hash_long(ELV_HASH_BLOCK((sec)), elv_hash_shift)) 48 #define ELV_HASH_FN(sec) (hash_long(ELV_HASH_BLOCK((sec)), elv_hash_shift))
49 #define ELV_HASH_ENTRIES (1 << elv_hash_shift) 49 #define ELV_HASH_ENTRIES (1 << elv_hash_shift)
50 #define rq_hash_key(rq) ((rq)->sector + (rq)->nr_sectors) 50 #define rq_hash_key(rq) ((rq)->sector + (rq)->nr_sectors)
51 #define ELV_ON_HASH(rq) (!hlist_unhashed(&(rq)->hash)) 51 #define ELV_ON_HASH(rq) (!hlist_unhashed(&(rq)->hash))
52 52
53 /* 53 /*
54 * Query io scheduler to see if the current process issuing bio may be 54 * Query io scheduler to see if the current process issuing bio may be
55 * merged with rq. 55 * merged with rq.
56 */ 56 */
57 static int elv_iosched_allow_merge(struct request *rq, struct bio *bio) 57 static int elv_iosched_allow_merge(struct request *rq, struct bio *bio)
58 { 58 {
59 struct request_queue *q = rq->q; 59 struct request_queue *q = rq->q;
60 elevator_t *e = q->elevator; 60 elevator_t *e = q->elevator;
61 61
62 if (e->ops->elevator_allow_merge_fn) 62 if (e->ops->elevator_allow_merge_fn)
63 return e->ops->elevator_allow_merge_fn(q, rq, bio); 63 return e->ops->elevator_allow_merge_fn(q, rq, bio);
64 64
65 return 1; 65 return 1;
66 } 66 }
67 67
68 /* 68 /*
69 * can we safely merge with this request? 69 * can we safely merge with this request?
70 */ 70 */
71 inline int elv_rq_merge_ok(struct request *rq, struct bio *bio) 71 inline int elv_rq_merge_ok(struct request *rq, struct bio *bio)
72 { 72 {
73 if (!rq_mergeable(rq)) 73 if (!rq_mergeable(rq))
74 return 0; 74 return 0;
75 75
76 /* 76 /*
77 * different data direction or already started, don't merge 77 * different data direction or already started, don't merge
78 */ 78 */
79 if (bio_data_dir(bio) != rq_data_dir(rq)) 79 if (bio_data_dir(bio) != rq_data_dir(rq))
80 return 0; 80 return 0;
81 81
82 /* 82 /*
83 * must be same device and not a special request 83 * must be same device and not a special request
84 */ 84 */
85 if (rq->rq_disk != bio->bi_bdev->bd_disk || rq->special) 85 if (rq->rq_disk != bio->bi_bdev->bd_disk || rq->special)
86 return 0; 86 return 0;
87 87
88 if (!elv_iosched_allow_merge(rq, bio)) 88 if (!elv_iosched_allow_merge(rq, bio))
89 return 0; 89 return 0;
90 90
91 return 1; 91 return 1;
92 } 92 }
93 EXPORT_SYMBOL(elv_rq_merge_ok); 93 EXPORT_SYMBOL(elv_rq_merge_ok);
94 94
95 static inline int elv_try_merge(struct request *__rq, struct bio *bio) 95 static inline int elv_try_merge(struct request *__rq, struct bio *bio)
96 { 96 {
97 int ret = ELEVATOR_NO_MERGE; 97 int ret = ELEVATOR_NO_MERGE;
98 98
99 /* 99 /*
100 * we can merge and sequence is ok, check if it's possible 100 * we can merge and sequence is ok, check if it's possible
101 */ 101 */
102 if (elv_rq_merge_ok(__rq, bio)) { 102 if (elv_rq_merge_ok(__rq, bio)) {
103 if (__rq->sector + __rq->nr_sectors == bio->bi_sector) 103 if (__rq->sector + __rq->nr_sectors == bio->bi_sector)
104 ret = ELEVATOR_BACK_MERGE; 104 ret = ELEVATOR_BACK_MERGE;
105 else if (__rq->sector - bio_sectors(bio) == bio->bi_sector) 105 else if (__rq->sector - bio_sectors(bio) == bio->bi_sector)
106 ret = ELEVATOR_FRONT_MERGE; 106 ret = ELEVATOR_FRONT_MERGE;
107 } 107 }
108 108
109 return ret; 109 return ret;
110 } 110 }
111 111
112 static struct elevator_type *elevator_find(const char *name) 112 static struct elevator_type *elevator_find(const char *name)
113 { 113 {
114 struct elevator_type *e; 114 struct elevator_type *e;
115 115
116 list_for_each_entry(e, &elv_list, list) { 116 list_for_each_entry(e, &elv_list, list) {
117 if (!strcmp(e->elevator_name, name)) 117 if (!strcmp(e->elevator_name, name))
118 return e; 118 return e;
119 } 119 }
120 120
121 return NULL; 121 return NULL;
122 } 122 }
123 123
124 static void elevator_put(struct elevator_type *e) 124 static void elevator_put(struct elevator_type *e)
125 { 125 {
126 module_put(e->elevator_owner); 126 module_put(e->elevator_owner);
127 } 127 }
128 128
129 static struct elevator_type *elevator_get(const char *name) 129 static struct elevator_type *elevator_get(const char *name)
130 { 130 {
131 struct elevator_type *e; 131 struct elevator_type *e;
132 132
133 spin_lock(&elv_list_lock); 133 spin_lock(&elv_list_lock);
134 134
135 e = elevator_find(name); 135 e = elevator_find(name);
136 if (e && !try_module_get(e->elevator_owner)) 136 if (e && !try_module_get(e->elevator_owner))
137 e = NULL; 137 e = NULL;
138 138
139 spin_unlock(&elv_list_lock); 139 spin_unlock(&elv_list_lock);
140 140
141 return e; 141 return e;
142 } 142 }
143 143
144 static void *elevator_init_queue(struct request_queue *q, 144 static void *elevator_init_queue(struct request_queue *q,
145 struct elevator_queue *eq) 145 struct elevator_queue *eq)
146 { 146 {
147 return eq->ops->elevator_init_fn(q); 147 return eq->ops->elevator_init_fn(q);
148 } 148 }
149 149
150 static void elevator_attach(struct request_queue *q, struct elevator_queue *eq, 150 static void elevator_attach(struct request_queue *q, struct elevator_queue *eq,
151 void *data) 151 void *data)
152 { 152 {
153 q->elevator = eq; 153 q->elevator = eq;
154 eq->elevator_data = data; 154 eq->elevator_data = data;
155 } 155 }
156 156
157 static char chosen_elevator[16]; 157 static char chosen_elevator[16];
158 158
159 static int __init elevator_setup(char *str) 159 static int __init elevator_setup(char *str)
160 { 160 {
161 /* 161 /*
162 * Be backwards-compatible with previous kernels, so users 162 * Be backwards-compatible with previous kernels, so users
163 * won't get the wrong elevator. 163 * won't get the wrong elevator.
164 */ 164 */
165 if (!strcmp(str, "as")) 165 if (!strcmp(str, "as"))
166 strcpy(chosen_elevator, "anticipatory"); 166 strcpy(chosen_elevator, "anticipatory");
167 else 167 else
168 strncpy(chosen_elevator, str, sizeof(chosen_elevator) - 1); 168 strncpy(chosen_elevator, str, sizeof(chosen_elevator) - 1);
169 return 1; 169 return 1;
170 } 170 }
171 171
172 __setup("elevator=", elevator_setup); 172 __setup("elevator=", elevator_setup);
173 173
174 static struct kobj_type elv_ktype; 174 static struct kobj_type elv_ktype;
175 175
176 static elevator_t *elevator_alloc(struct request_queue *q, 176 static elevator_t *elevator_alloc(struct request_queue *q,
177 struct elevator_type *e) 177 struct elevator_type *e)
178 { 178 {
179 elevator_t *eq; 179 elevator_t *eq;
180 int i; 180 int i;
181 181
182 eq = kmalloc_node(sizeof(elevator_t), GFP_KERNEL | __GFP_ZERO, q->node); 182 eq = kmalloc_node(sizeof(elevator_t), GFP_KERNEL | __GFP_ZERO, q->node);
183 if (unlikely(!eq)) 183 if (unlikely(!eq))
184 goto err; 184 goto err;
185 185
186 eq->ops = &e->ops; 186 eq->ops = &e->ops;
187 eq->elevator_type = e; 187 eq->elevator_type = e;
188 kobject_init(&eq->kobj); 188 kobject_init(&eq->kobj);
189 kobject_set_name(&eq->kobj, "%s", "iosched"); 189 kobject_set_name(&eq->kobj, "%s", "iosched");
190 eq->kobj.ktype = &elv_ktype; 190 eq->kobj.ktype = &elv_ktype;
191 mutex_init(&eq->sysfs_lock); 191 mutex_init(&eq->sysfs_lock);
192 192
193 eq->hash = kmalloc_node(sizeof(struct hlist_head) * ELV_HASH_ENTRIES, 193 eq->hash = kmalloc_node(sizeof(struct hlist_head) * ELV_HASH_ENTRIES,
194 GFP_KERNEL, q->node); 194 GFP_KERNEL, q->node);
195 if (!eq->hash) 195 if (!eq->hash)
196 goto err; 196 goto err;
197 197
198 for (i = 0; i < ELV_HASH_ENTRIES; i++) 198 for (i = 0; i < ELV_HASH_ENTRIES; i++)
199 INIT_HLIST_HEAD(&eq->hash[i]); 199 INIT_HLIST_HEAD(&eq->hash[i]);
200 200
201 return eq; 201 return eq;
202 err: 202 err:
203 kfree(eq); 203 kfree(eq);
204 elevator_put(e); 204 elevator_put(e);
205 return NULL; 205 return NULL;
206 } 206 }
207 207
208 static void elevator_release(struct kobject *kobj) 208 static void elevator_release(struct kobject *kobj)
209 { 209 {
210 elevator_t *e = container_of(kobj, elevator_t, kobj); 210 elevator_t *e = container_of(kobj, elevator_t, kobj);
211 211
212 elevator_put(e->elevator_type); 212 elevator_put(e->elevator_type);
213 kfree(e->hash); 213 kfree(e->hash);
214 kfree(e); 214 kfree(e);
215 } 215 }
216 216
217 int elevator_init(struct request_queue *q, char *name) 217 int elevator_init(struct request_queue *q, char *name)
218 { 218 {
219 struct elevator_type *e = NULL; 219 struct elevator_type *e = NULL;
220 struct elevator_queue *eq; 220 struct elevator_queue *eq;
221 int ret = 0; 221 int ret = 0;
222 void *data; 222 void *data;
223 223
224 INIT_LIST_HEAD(&q->queue_head); 224 INIT_LIST_HEAD(&q->queue_head);
225 q->last_merge = NULL; 225 q->last_merge = NULL;
226 q->end_sector = 0; 226 q->end_sector = 0;
227 q->boundary_rq = NULL; 227 q->boundary_rq = NULL;
228 228
229 if (name && !(e = elevator_get(name))) 229 if (name && !(e = elevator_get(name)))
230 return -EINVAL; 230 return -EINVAL;
231 231
232 if (!e && *chosen_elevator && !(e = elevator_get(chosen_elevator))) 232 if (!e && *chosen_elevator && !(e = elevator_get(chosen_elevator)))
233 printk("I/O scheduler %s not found\n", chosen_elevator); 233 printk("I/O scheduler %s not found\n", chosen_elevator);
234 234
235 if (!e && !(e = elevator_get(CONFIG_DEFAULT_IOSCHED))) { 235 if (!e && !(e = elevator_get(CONFIG_DEFAULT_IOSCHED))) {
236 printk("Default I/O scheduler not found, using no-op\n"); 236 printk("Default I/O scheduler not found, using no-op\n");
237 e = elevator_get("noop"); 237 e = elevator_get("noop");
238 } 238 }
239 239
240 eq = elevator_alloc(q, e); 240 eq = elevator_alloc(q, e);
241 if (!eq) 241 if (!eq)
242 return -ENOMEM; 242 return -ENOMEM;
243 243
244 data = elevator_init_queue(q, eq); 244 data = elevator_init_queue(q, eq);
245 if (!data) { 245 if (!data) {
246 kobject_put(&eq->kobj); 246 kobject_put(&eq->kobj);
247 return -ENOMEM; 247 return -ENOMEM;
248 } 248 }
249 249
250 elevator_attach(q, eq, data); 250 elevator_attach(q, eq, data);
251 return ret; 251 return ret;
252 } 252 }
253 253
254 EXPORT_SYMBOL(elevator_init); 254 EXPORT_SYMBOL(elevator_init);
255 255
256 void elevator_exit(elevator_t *e) 256 void elevator_exit(elevator_t *e)
257 { 257 {
258 mutex_lock(&e->sysfs_lock); 258 mutex_lock(&e->sysfs_lock);
259 if (e->ops->elevator_exit_fn) 259 if (e->ops->elevator_exit_fn)
260 e->ops->elevator_exit_fn(e); 260 e->ops->elevator_exit_fn(e);
261 e->ops = NULL; 261 e->ops = NULL;
262 mutex_unlock(&e->sysfs_lock); 262 mutex_unlock(&e->sysfs_lock);
263 263
264 kobject_put(&e->kobj); 264 kobject_put(&e->kobj);
265 } 265 }
266 266
267 EXPORT_SYMBOL(elevator_exit); 267 EXPORT_SYMBOL(elevator_exit);
268 268
269 static void elv_activate_rq(struct request_queue *q, struct request *rq) 269 static void elv_activate_rq(struct request_queue *q, struct request *rq)
270 { 270 {
271 elevator_t *e = q->elevator; 271 elevator_t *e = q->elevator;
272 272
273 if (e->ops->elevator_activate_req_fn) 273 if (e->ops->elevator_activate_req_fn)
274 e->ops->elevator_activate_req_fn(q, rq); 274 e->ops->elevator_activate_req_fn(q, rq);
275 } 275 }
276 276
277 static void elv_deactivate_rq(struct request_queue *q, struct request *rq) 277 static void elv_deactivate_rq(struct request_queue *q, struct request *rq)
278 { 278 {
279 elevator_t *e = q->elevator; 279 elevator_t *e = q->elevator;
280 280
281 if (e->ops->elevator_deactivate_req_fn) 281 if (e->ops->elevator_deactivate_req_fn)
282 e->ops->elevator_deactivate_req_fn(q, rq); 282 e->ops->elevator_deactivate_req_fn(q, rq);
283 } 283 }
284 284
285 static inline void __elv_rqhash_del(struct request *rq) 285 static inline void __elv_rqhash_del(struct request *rq)
286 { 286 {
287 hlist_del_init(&rq->hash); 287 hlist_del_init(&rq->hash);
288 } 288 }
289 289
290 static void elv_rqhash_del(struct request_queue *q, struct request *rq) 290 static void elv_rqhash_del(struct request_queue *q, struct request *rq)
291 { 291 {
292 if (ELV_ON_HASH(rq)) 292 if (ELV_ON_HASH(rq))
293 __elv_rqhash_del(rq); 293 __elv_rqhash_del(rq);
294 } 294 }
295 295
296 static void elv_rqhash_add(struct request_queue *q, struct request *rq) 296 static void elv_rqhash_add(struct request_queue *q, struct request *rq)
297 { 297 {
298 elevator_t *e = q->elevator; 298 elevator_t *e = q->elevator;
299 299
300 BUG_ON(ELV_ON_HASH(rq)); 300 BUG_ON(ELV_ON_HASH(rq));
301 hlist_add_head(&rq->hash, &e->hash[ELV_HASH_FN(rq_hash_key(rq))]); 301 hlist_add_head(&rq->hash, &e->hash[ELV_HASH_FN(rq_hash_key(rq))]);
302 } 302 }
303 303
304 static void elv_rqhash_reposition(struct request_queue *q, struct request *rq) 304 static void elv_rqhash_reposition(struct request_queue *q, struct request *rq)
305 { 305 {
306 __elv_rqhash_del(rq); 306 __elv_rqhash_del(rq);
307 elv_rqhash_add(q, rq); 307 elv_rqhash_add(q, rq);
308 } 308 }
309 309
310 static struct request *elv_rqhash_find(struct request_queue *q, sector_t offset) 310 static struct request *elv_rqhash_find(struct request_queue *q, sector_t offset)
311 { 311 {
312 elevator_t *e = q->elevator; 312 elevator_t *e = q->elevator;
313 struct hlist_head *hash_list = &e->hash[ELV_HASH_FN(offset)]; 313 struct hlist_head *hash_list = &e->hash[ELV_HASH_FN(offset)];
314 struct hlist_node *entry, *next; 314 struct hlist_node *entry, *next;
315 struct request *rq; 315 struct request *rq;
316 316
317 hlist_for_each_entry_safe(rq, entry, next, hash_list, hash) { 317 hlist_for_each_entry_safe(rq, entry, next, hash_list, hash) {
318 BUG_ON(!ELV_ON_HASH(rq)); 318 BUG_ON(!ELV_ON_HASH(rq));
319 319
320 if (unlikely(!rq_mergeable(rq))) { 320 if (unlikely(!rq_mergeable(rq))) {
321 __elv_rqhash_del(rq); 321 __elv_rqhash_del(rq);
322 continue; 322 continue;
323 } 323 }
324 324
325 if (rq_hash_key(rq) == offset) 325 if (rq_hash_key(rq) == offset)
326 return rq; 326 return rq;
327 } 327 }
328 328
329 return NULL; 329 return NULL;
330 } 330 }
331 331
332 /* 332 /*
333 * RB-tree support functions for inserting/lookup/removal of requests 333 * RB-tree support functions for inserting/lookup/removal of requests
334 * in a sorted RB tree. 334 * in a sorted RB tree.
335 */ 335 */
336 struct request *elv_rb_add(struct rb_root *root, struct request *rq) 336 struct request *elv_rb_add(struct rb_root *root, struct request *rq)
337 { 337 {
338 struct rb_node **p = &root->rb_node; 338 struct rb_node **p = &root->rb_node;
339 struct rb_node *parent = NULL; 339 struct rb_node *parent = NULL;
340 struct request *__rq; 340 struct request *__rq;
341 341
342 while (*p) { 342 while (*p) {
343 parent = *p; 343 parent = *p;
344 __rq = rb_entry(parent, struct request, rb_node); 344 __rq = rb_entry(parent, struct request, rb_node);
345 345
346 if (rq->sector < __rq->sector) 346 if (rq->sector < __rq->sector)
347 p = &(*p)->rb_left; 347 p = &(*p)->rb_left;
348 else if (rq->sector > __rq->sector) 348 else if (rq->sector > __rq->sector)
349 p = &(*p)->rb_right; 349 p = &(*p)->rb_right;
350 else 350 else
351 return __rq; 351 return __rq;
352 } 352 }
353 353
354 rb_link_node(&rq->rb_node, parent, p); 354 rb_link_node(&rq->rb_node, parent, p);
355 rb_insert_color(&rq->rb_node, root); 355 rb_insert_color(&rq->rb_node, root);
356 return NULL; 356 return NULL;
357 } 357 }
358 358
359 EXPORT_SYMBOL(elv_rb_add); 359 EXPORT_SYMBOL(elv_rb_add);
360 360
361 void elv_rb_del(struct rb_root *root, struct request *rq) 361 void elv_rb_del(struct rb_root *root, struct request *rq)
362 { 362 {
363 BUG_ON(RB_EMPTY_NODE(&rq->rb_node)); 363 BUG_ON(RB_EMPTY_NODE(&rq->rb_node));
364 rb_erase(&rq->rb_node, root); 364 rb_erase(&rq->rb_node, root);
365 RB_CLEAR_NODE(&rq->rb_node); 365 RB_CLEAR_NODE(&rq->rb_node);
366 } 366 }
367 367
368 EXPORT_SYMBOL(elv_rb_del); 368 EXPORT_SYMBOL(elv_rb_del);
369 369
370 struct request *elv_rb_find(struct rb_root *root, sector_t sector) 370 struct request *elv_rb_find(struct rb_root *root, sector_t sector)
371 { 371 {
372 struct rb_node *n = root->rb_node; 372 struct rb_node *n = root->rb_node;
373 struct request *rq; 373 struct request *rq;
374 374
375 while (n) { 375 while (n) {
376 rq = rb_entry(n, struct request, rb_node); 376 rq = rb_entry(n, struct request, rb_node);
377 377
378 if (sector < rq->sector) 378 if (sector < rq->sector)
379 n = n->rb_left; 379 n = n->rb_left;
380 else if (sector > rq->sector) 380 else if (sector > rq->sector)
381 n = n->rb_right; 381 n = n->rb_right;
382 else 382 else
383 return rq; 383 return rq;
384 } 384 }
385 385
386 return NULL; 386 return NULL;
387 } 387 }
388 388
389 EXPORT_SYMBOL(elv_rb_find); 389 EXPORT_SYMBOL(elv_rb_find);
390 390
391 /* 391 /*
392 * Insert rq into dispatch queue of q. Queue lock must be held on 392 * Insert rq into dispatch queue of q. Queue lock must be held on
393 * entry. rq is sort insted into the dispatch queue. To be used by 393 * entry. rq is sort insted into the dispatch queue. To be used by
394 * specific elevators. 394 * specific elevators.
395 */ 395 */
396 void elv_dispatch_sort(struct request_queue *q, struct request *rq) 396 void elv_dispatch_sort(struct request_queue *q, struct request *rq)
397 { 397 {
398 sector_t boundary; 398 sector_t boundary;
399 struct list_head *entry; 399 struct list_head *entry;
400 400
401 if (q->last_merge == rq) 401 if (q->last_merge == rq)
402 q->last_merge = NULL; 402 q->last_merge = NULL;
403 403
404 elv_rqhash_del(q, rq); 404 elv_rqhash_del(q, rq);
405 405
406 q->nr_sorted--; 406 q->nr_sorted--;
407 407
408 boundary = q->end_sector; 408 boundary = q->end_sector;
409 409
410 list_for_each_prev(entry, &q->queue_head) { 410 list_for_each_prev(entry, &q->queue_head) {
411 struct request *pos = list_entry_rq(entry); 411 struct request *pos = list_entry_rq(entry);
412 412
413 if (rq_data_dir(rq) != rq_data_dir(pos)) 413 if (rq_data_dir(rq) != rq_data_dir(pos))
414 break; 414 break;
415 if (pos->cmd_flags & (REQ_SOFTBARRIER|REQ_HARDBARRIER|REQ_STARTED)) 415 if (pos->cmd_flags & (REQ_SOFTBARRIER|REQ_HARDBARRIER|REQ_STARTED))
416 break; 416 break;
417 if (rq->sector >= boundary) { 417 if (rq->sector >= boundary) {
418 if (pos->sector < boundary) 418 if (pos->sector < boundary)
419 continue; 419 continue;
420 } else { 420 } else {
421 if (pos->sector >= boundary) 421 if (pos->sector >= boundary)
422 break; 422 break;
423 } 423 }
424 if (rq->sector >= pos->sector) 424 if (rq->sector >= pos->sector)
425 break; 425 break;
426 } 426 }
427 427
428 list_add(&rq->queuelist, entry); 428 list_add(&rq->queuelist, entry);
429 } 429 }
430 430
431 EXPORT_SYMBOL(elv_dispatch_sort); 431 EXPORT_SYMBOL(elv_dispatch_sort);
432 432
433 /* 433 /*
434 * Insert rq into dispatch queue of q. Queue lock must be held on 434 * Insert rq into dispatch queue of q. Queue lock must be held on
435 * entry. rq is added to the back of the dispatch queue. To be used by 435 * entry. rq is added to the back of the dispatch queue. To be used by
436 * specific elevators. 436 * specific elevators.
437 */ 437 */
438 void elv_dispatch_add_tail(struct request_queue *q, struct request *rq) 438 void elv_dispatch_add_tail(struct request_queue *q, struct request *rq)
439 { 439 {
440 if (q->last_merge == rq) 440 if (q->last_merge == rq)
441 q->last_merge = NULL; 441 q->last_merge = NULL;
442 442
443 elv_rqhash_del(q, rq); 443 elv_rqhash_del(q, rq);
444 444
445 q->nr_sorted--; 445 q->nr_sorted--;
446 446
447 q->end_sector = rq_end_sector(rq); 447 q->end_sector = rq_end_sector(rq);
448 q->boundary_rq = rq; 448 q->boundary_rq = rq;
449 list_add_tail(&rq->queuelist, &q->queue_head); 449 list_add_tail(&rq->queuelist, &q->queue_head);
450 } 450 }
451 451
452 EXPORT_SYMBOL(elv_dispatch_add_tail); 452 EXPORT_SYMBOL(elv_dispatch_add_tail);
453 453
454 int elv_merge(struct request_queue *q, struct request **req, struct bio *bio) 454 int elv_merge(struct request_queue *q, struct request **req, struct bio *bio)
455 { 455 {
456 elevator_t *e = q->elevator; 456 elevator_t *e = q->elevator;
457 struct request *__rq; 457 struct request *__rq;
458 int ret; 458 int ret;
459 459
460 /* 460 /*
461 * First try one-hit cache. 461 * First try one-hit cache.
462 */ 462 */
463 if (q->last_merge) { 463 if (q->last_merge) {
464 ret = elv_try_merge(q->last_merge, bio); 464 ret = elv_try_merge(q->last_merge, bio);
465 if (ret != ELEVATOR_NO_MERGE) { 465 if (ret != ELEVATOR_NO_MERGE) {
466 *req = q->last_merge; 466 *req = q->last_merge;
467 return ret; 467 return ret;
468 } 468 }
469 } 469 }
470 470
471 /* 471 /*
472 * See if our hash lookup can find a potential backmerge. 472 * See if our hash lookup can find a potential backmerge.
473 */ 473 */
474 __rq = elv_rqhash_find(q, bio->bi_sector); 474 __rq = elv_rqhash_find(q, bio->bi_sector);
475 if (__rq && elv_rq_merge_ok(__rq, bio)) { 475 if (__rq && elv_rq_merge_ok(__rq, bio)) {
476 *req = __rq; 476 *req = __rq;
477 return ELEVATOR_BACK_MERGE; 477 return ELEVATOR_BACK_MERGE;
478 } 478 }
479 479
480 if (e->ops->elevator_merge_fn) 480 if (e->ops->elevator_merge_fn)
481 return e->ops->elevator_merge_fn(q, req, bio); 481 return e->ops->elevator_merge_fn(q, req, bio);
482 482
483 return ELEVATOR_NO_MERGE; 483 return ELEVATOR_NO_MERGE;
484 } 484 }
485 485
486 void elv_merged_request(struct request_queue *q, struct request *rq, int type) 486 void elv_merged_request(struct request_queue *q, struct request *rq, int type)
487 { 487 {
488 elevator_t *e = q->elevator; 488 elevator_t *e = q->elevator;
489 489
490 if (e->ops->elevator_merged_fn) 490 if (e->ops->elevator_merged_fn)
491 e->ops->elevator_merged_fn(q, rq, type); 491 e->ops->elevator_merged_fn(q, rq, type);
492 492
493 if (type == ELEVATOR_BACK_MERGE) 493 if (type == ELEVATOR_BACK_MERGE)
494 elv_rqhash_reposition(q, rq); 494 elv_rqhash_reposition(q, rq);
495 495
496 q->last_merge = rq; 496 q->last_merge = rq;
497 } 497 }
498 498
499 void elv_merge_requests(struct request_queue *q, struct request *rq, 499 void elv_merge_requests(struct request_queue *q, struct request *rq,
500 struct request *next) 500 struct request *next)
501 { 501 {
502 elevator_t *e = q->elevator; 502 elevator_t *e = q->elevator;
503 503
504 if (e->ops->elevator_merge_req_fn) 504 if (e->ops->elevator_merge_req_fn)
505 e->ops->elevator_merge_req_fn(q, rq, next); 505 e->ops->elevator_merge_req_fn(q, rq, next);
506 506
507 elv_rqhash_reposition(q, rq); 507 elv_rqhash_reposition(q, rq);
508 elv_rqhash_del(q, next); 508 elv_rqhash_del(q, next);
509 509
510 q->nr_sorted--; 510 q->nr_sorted--;
511 q->last_merge = rq; 511 q->last_merge = rq;
512 } 512 }
513 513
514 void elv_requeue_request(struct request_queue *q, struct request *rq) 514 void elv_requeue_request(struct request_queue *q, struct request *rq)
515 { 515 {
516 /* 516 /*
517 * it already went through dequeue, we need to decrement the 517 * it already went through dequeue, we need to decrement the
518 * in_flight count again 518 * in_flight count again
519 */ 519 */
520 if (blk_account_rq(rq)) { 520 if (blk_account_rq(rq)) {
521 q->in_flight--; 521 q->in_flight--;
522 if (blk_sorted_rq(rq)) 522 if (blk_sorted_rq(rq))
523 elv_deactivate_rq(q, rq); 523 elv_deactivate_rq(q, rq);
524 } 524 }
525 525
526 rq->cmd_flags &= ~REQ_STARTED; 526 rq->cmd_flags &= ~REQ_STARTED;
527 527
528 elv_insert(q, rq, ELEVATOR_INSERT_REQUEUE); 528 elv_insert(q, rq, ELEVATOR_INSERT_REQUEUE);
529 } 529 }
530 530
531 static void elv_drain_elevator(struct request_queue *q) 531 static void elv_drain_elevator(struct request_queue *q)
532 { 532 {
533 static int printed; 533 static int printed;
534 while (q->elevator->ops->elevator_dispatch_fn(q, 1)) 534 while (q->elevator->ops->elevator_dispatch_fn(q, 1))
535 ; 535 ;
536 if (q->nr_sorted == 0) 536 if (q->nr_sorted == 0)
537 return; 537 return;
538 if (printed++ < 10) { 538 if (printed++ < 10) {
539 printk(KERN_ERR "%s: forced dispatching is broken " 539 printk(KERN_ERR "%s: forced dispatching is broken "
540 "(nr_sorted=%u), please report this\n", 540 "(nr_sorted=%u), please report this\n",
541 q->elevator->elevator_type->elevator_name, q->nr_sorted); 541 q->elevator->elevator_type->elevator_name, q->nr_sorted);
542 } 542 }
543 } 543 }
544 544
545 void elv_insert(struct request_queue *q, struct request *rq, int where) 545 void elv_insert(struct request_queue *q, struct request *rq, int where)
546 { 546 {
547 struct list_head *pos; 547 struct list_head *pos;
548 unsigned ordseq; 548 unsigned ordseq;
549 int unplug_it = 1; 549 int unplug_it = 1;
550 550
551 blk_add_trace_rq(q, rq, BLK_TA_INSERT); 551 blk_add_trace_rq(q, rq, BLK_TA_INSERT);
552 552
553 rq->q = q; 553 rq->q = q;
554 554
555 switch (where) { 555 switch (where) {
556 case ELEVATOR_INSERT_FRONT: 556 case ELEVATOR_INSERT_FRONT:
557 rq->cmd_flags |= REQ_SOFTBARRIER; 557 rq->cmd_flags |= REQ_SOFTBARRIER;
558 558
559 list_add(&rq->queuelist, &q->queue_head); 559 list_add(&rq->queuelist, &q->queue_head);
560 break; 560 break;
561 561
562 case ELEVATOR_INSERT_BACK: 562 case ELEVATOR_INSERT_BACK:
563 rq->cmd_flags |= REQ_SOFTBARRIER; 563 rq->cmd_flags |= REQ_SOFTBARRIER;
564 elv_drain_elevator(q); 564 elv_drain_elevator(q);
565 list_add_tail(&rq->queuelist, &q->queue_head); 565 list_add_tail(&rq->queuelist, &q->queue_head);
566 /* 566 /*
567 * We kick the queue here for the following reasons. 567 * We kick the queue here for the following reasons.
568 * - The elevator might have returned NULL previously 568 * - The elevator might have returned NULL previously
569 * to delay requests and returned them now. As the 569 * to delay requests and returned them now. As the
570 * queue wasn't empty before this request, ll_rw_blk 570 * queue wasn't empty before this request, ll_rw_blk
571 * won't run the queue on return, resulting in hang. 571 * won't run the queue on return, resulting in hang.
572 * - Usually, back inserted requests won't be merged 572 * - Usually, back inserted requests won't be merged
573 * with anything. There's no point in delaying queue 573 * with anything. There's no point in delaying queue
574 * processing. 574 * processing.
575 */ 575 */
576 blk_remove_plug(q); 576 blk_remove_plug(q);
577 q->request_fn(q); 577 q->request_fn(q);
578 break; 578 break;
579 579
580 case ELEVATOR_INSERT_SORT: 580 case ELEVATOR_INSERT_SORT:
581 BUG_ON(!blk_fs_request(rq)); 581 BUG_ON(!blk_fs_request(rq));
582 rq->cmd_flags |= REQ_SORTED; 582 rq->cmd_flags |= REQ_SORTED;
583 q->nr_sorted++; 583 q->nr_sorted++;
584 if (rq_mergeable(rq)) { 584 if (rq_mergeable(rq)) {
585 elv_rqhash_add(q, rq); 585 elv_rqhash_add(q, rq);
586 if (!q->last_merge) 586 if (!q->last_merge)
587 q->last_merge = rq; 587 q->last_merge = rq;
588 } 588 }
589 589
590 /* 590 /*
591 * Some ioscheds (cfq) run q->request_fn directly, so 591 * Some ioscheds (cfq) run q->request_fn directly, so
592 * rq cannot be accessed after calling 592 * rq cannot be accessed after calling
593 * elevator_add_req_fn. 593 * elevator_add_req_fn.
594 */ 594 */
595 q->elevator->ops->elevator_add_req_fn(q, rq); 595 q->elevator->ops->elevator_add_req_fn(q, rq);
596 break; 596 break;
597 597
598 case ELEVATOR_INSERT_REQUEUE: 598 case ELEVATOR_INSERT_REQUEUE:
599 /* 599 /*
600 * If ordered flush isn't in progress, we do front 600 * If ordered flush isn't in progress, we do front
601 * insertion; otherwise, requests should be requeued 601 * insertion; otherwise, requests should be requeued
602 * in ordseq order. 602 * in ordseq order.
603 */ 603 */
604 rq->cmd_flags |= REQ_SOFTBARRIER; 604 rq->cmd_flags |= REQ_SOFTBARRIER;
605 605
606 /* 606 /*
607 * Most requeues happen because of a busy condition, 607 * Most requeues happen because of a busy condition,
608 * don't force unplug of the queue for that case. 608 * don't force unplug of the queue for that case.
609 */ 609 */
610 unplug_it = 0; 610 unplug_it = 0;
611 611
612 if (q->ordseq == 0) { 612 if (q->ordseq == 0) {
613 list_add(&rq->queuelist, &q->queue_head); 613 list_add(&rq->queuelist, &q->queue_head);
614 break; 614 break;
615 } 615 }
616 616
617 ordseq = blk_ordered_req_seq(rq); 617 ordseq = blk_ordered_req_seq(rq);
618 618
619 list_for_each(pos, &q->queue_head) { 619 list_for_each(pos, &q->queue_head) {
620 struct request *pos_rq = list_entry_rq(pos); 620 struct request *pos_rq = list_entry_rq(pos);
621 if (ordseq <= blk_ordered_req_seq(pos_rq)) 621 if (ordseq <= blk_ordered_req_seq(pos_rq))
622 break; 622 break;
623 } 623 }
624 624
625 list_add_tail(&rq->queuelist, pos); 625 list_add_tail(&rq->queuelist, pos);
626 break; 626 break;
627 627
628 default: 628 default:
629 printk(KERN_ERR "%s: bad insertion point %d\n", 629 printk(KERN_ERR "%s: bad insertion point %d\n",
630 __FUNCTION__, where); 630 __FUNCTION__, where);
631 BUG(); 631 BUG();
632 } 632 }
633 633
634 if (unplug_it && blk_queue_plugged(q)) { 634 if (unplug_it && blk_queue_plugged(q)) {
635 int nrq = q->rq.count[READ] + q->rq.count[WRITE] 635 int nrq = q->rq.count[READ] + q->rq.count[WRITE]
636 - q->in_flight; 636 - q->in_flight;
637 637
638 if (nrq >= q->unplug_thresh) 638 if (nrq >= q->unplug_thresh)
639 __generic_unplug_device(q); 639 __generic_unplug_device(q);
640 } 640 }
641 } 641 }
642 642
643 void __elv_add_request(struct request_queue *q, struct request *rq, int where, 643 void __elv_add_request(struct request_queue *q, struct request *rq, int where,
644 int plug) 644 int plug)
645 { 645 {
646 if (q->ordcolor) 646 if (q->ordcolor)
647 rq->cmd_flags |= REQ_ORDERED_COLOR; 647 rq->cmd_flags |= REQ_ORDERED_COLOR;
648 648
649 if (rq->cmd_flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)) { 649 if (rq->cmd_flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)) {
650 /* 650 /*
651 * toggle ordered color 651 * toggle ordered color
652 */ 652 */
653 if (blk_barrier_rq(rq)) 653 if (blk_barrier_rq(rq))
654 q->ordcolor ^= 1; 654 q->ordcolor ^= 1;
655 655
656 /* 656 /*
657 * barriers implicitly indicate back insertion 657 * barriers implicitly indicate back insertion
658 */ 658 */
659 if (where == ELEVATOR_INSERT_SORT) 659 if (where == ELEVATOR_INSERT_SORT)
660 where = ELEVATOR_INSERT_BACK; 660 where = ELEVATOR_INSERT_BACK;
661 661
662 /* 662 /*
663 * this request is scheduling boundary, update 663 * this request is scheduling boundary, update
664 * end_sector 664 * end_sector
665 */ 665 */
666 if (blk_fs_request(rq)) { 666 if (blk_fs_request(rq)) {
667 q->end_sector = rq_end_sector(rq); 667 q->end_sector = rq_end_sector(rq);
668 q->boundary_rq = rq; 668 q->boundary_rq = rq;
669 } 669 }
670 } else if (!(rq->cmd_flags & REQ_ELVPRIV) && where == ELEVATOR_INSERT_SORT) 670 } else if (!(rq->cmd_flags & REQ_ELVPRIV) && where == ELEVATOR_INSERT_SORT)
671 where = ELEVATOR_INSERT_BACK; 671 where = ELEVATOR_INSERT_BACK;
672 672
673 if (plug) 673 if (plug)
674 blk_plug_device(q); 674 blk_plug_device(q);
675 675
676 elv_insert(q, rq, where); 676 elv_insert(q, rq, where);
677 } 677 }
678 678
679 EXPORT_SYMBOL(__elv_add_request); 679 EXPORT_SYMBOL(__elv_add_request);
680 680
681 void elv_add_request(struct request_queue *q, struct request *rq, int where, 681 void elv_add_request(struct request_queue *q, struct request *rq, int where,
682 int plug) 682 int plug)
683 { 683 {
684 unsigned long flags; 684 unsigned long flags;
685 685
686 spin_lock_irqsave(q->queue_lock, flags); 686 spin_lock_irqsave(q->queue_lock, flags);
687 __elv_add_request(q, rq, where, plug); 687 __elv_add_request(q, rq, where, plug);
688 spin_unlock_irqrestore(q->queue_lock, flags); 688 spin_unlock_irqrestore(q->queue_lock, flags);
689 } 689 }
690 690
691 EXPORT_SYMBOL(elv_add_request); 691 EXPORT_SYMBOL(elv_add_request);
692 692
693 static inline struct request *__elv_next_request(struct request_queue *q) 693 static inline struct request *__elv_next_request(struct request_queue *q)
694 { 694 {
695 struct request *rq; 695 struct request *rq;
696 696
697 while (1) { 697 while (1) {
698 while (!list_empty(&q->queue_head)) { 698 while (!list_empty(&q->queue_head)) {
699 rq = list_entry_rq(q->queue_head.next); 699 rq = list_entry_rq(q->queue_head.next);
700 if (blk_do_ordered(q, &rq)) 700 if (blk_do_ordered(q, &rq))
701 return rq; 701 return rq;
702 } 702 }
703 703
704 if (!q->elevator->ops->elevator_dispatch_fn(q, 0)) 704 if (!q->elevator->ops->elevator_dispatch_fn(q, 0))
705 return NULL; 705 return NULL;
706 } 706 }
707 } 707 }
708 708
709 struct request *elv_next_request(struct request_queue *q) 709 struct request *elv_next_request(struct request_queue *q)
710 { 710 {
711 struct request *rq; 711 struct request *rq;
712 int ret; 712 int ret;
713 713
714 while ((rq = __elv_next_request(q)) != NULL) { 714 while ((rq = __elv_next_request(q)) != NULL) {
715 if (!(rq->cmd_flags & REQ_STARTED)) { 715 if (!(rq->cmd_flags & REQ_STARTED)) {
716 /* 716 /*
717 * This is the first time the device driver 717 * This is the first time the device driver
718 * sees this request (possibly after 718 * sees this request (possibly after
719 * requeueing). Notify IO scheduler. 719 * requeueing). Notify IO scheduler.
720 */ 720 */
721 if (blk_sorted_rq(rq)) 721 if (blk_sorted_rq(rq))
722 elv_activate_rq(q, rq); 722 elv_activate_rq(q, rq);
723 723
724 /* 724 /*
725 * just mark as started even if we don't start 725 * just mark as started even if we don't start
726 * it, a request that has been delayed should 726 * it, a request that has been delayed should
727 * not be passed by new incoming requests 727 * not be passed by new incoming requests
728 */ 728 */
729 rq->cmd_flags |= REQ_STARTED; 729 rq->cmd_flags |= REQ_STARTED;
730 blk_add_trace_rq(q, rq, BLK_TA_ISSUE); 730 blk_add_trace_rq(q, rq, BLK_TA_ISSUE);
731 } 731 }
732 732
733 if (!q->boundary_rq || q->boundary_rq == rq) { 733 if (!q->boundary_rq || q->boundary_rq == rq) {
734 q->end_sector = rq_end_sector(rq); 734 q->end_sector = rq_end_sector(rq);
735 q->boundary_rq = NULL; 735 q->boundary_rq = NULL;
736 } 736 }
737 737
738 if ((rq->cmd_flags & REQ_DONTPREP) || !q->prep_rq_fn) 738 if ((rq->cmd_flags & REQ_DONTPREP) || !q->prep_rq_fn)
739 break; 739 break;
740 740
741 ret = q->prep_rq_fn(q, rq); 741 ret = q->prep_rq_fn(q, rq);
742 if (ret == BLKPREP_OK) { 742 if (ret == BLKPREP_OK) {
743 break; 743 break;
744 } else if (ret == BLKPREP_DEFER) { 744 } else if (ret == BLKPREP_DEFER) {
745 /* 745 /*
746 * the request may have been (partially) prepped. 746 * the request may have been (partially) prepped.
747 * we need to keep this request in the front to 747 * we need to keep this request in the front to
748 * avoid resource deadlock. REQ_STARTED will 748 * avoid resource deadlock. REQ_STARTED will
749 * prevent other fs requests from passing this one. 749 * prevent other fs requests from passing this one.
750 */ 750 */
751 rq = NULL; 751 rq = NULL;
752 break; 752 break;
753 } else if (ret == BLKPREP_KILL) { 753 } else if (ret == BLKPREP_KILL) {
754 int nr_bytes = rq->hard_nr_sectors << 9;
755
756 if (!nr_bytes)
757 nr_bytes = rq->data_len;
758
759 blkdev_dequeue_request(rq);
760 rq->cmd_flags |= REQ_QUIET; 754 rq->cmd_flags |= REQ_QUIET;
761 end_that_request_chunk(rq, 0, nr_bytes); 755 end_queued_request(rq, 0);
762 end_that_request_last(rq, 0);
763 } else { 756 } else {
764 printk(KERN_ERR "%s: bad return=%d\n", __FUNCTION__, 757 printk(KERN_ERR "%s: bad return=%d\n", __FUNCTION__,
765 ret); 758 ret);
766 break; 759 break;
767 } 760 }
768 } 761 }
769 762
770 return rq; 763 return rq;
771 } 764 }
772 765
773 EXPORT_SYMBOL(elv_next_request); 766 EXPORT_SYMBOL(elv_next_request);
774 767
775 void elv_dequeue_request(struct request_queue *q, struct request *rq) 768 void elv_dequeue_request(struct request_queue *q, struct request *rq)
776 { 769 {
777 BUG_ON(list_empty(&rq->queuelist)); 770 BUG_ON(list_empty(&rq->queuelist));
778 BUG_ON(ELV_ON_HASH(rq)); 771 BUG_ON(ELV_ON_HASH(rq));
779 772
780 list_del_init(&rq->queuelist); 773 list_del_init(&rq->queuelist);
781 774
782 /* 775 /*
783 * the time frame between a request being removed from the lists 776 * the time frame between a request being removed from the lists
784 * and to it is freed is accounted as io that is in progress at 777 * and to it is freed is accounted as io that is in progress at
785 * the driver side. 778 * the driver side.
786 */ 779 */
787 if (blk_account_rq(rq)) 780 if (blk_account_rq(rq))
788 q->in_flight++; 781 q->in_flight++;
789 } 782 }
790 783
791 EXPORT_SYMBOL(elv_dequeue_request); 784 EXPORT_SYMBOL(elv_dequeue_request);
792 785
793 int elv_queue_empty(struct request_queue *q) 786 int elv_queue_empty(struct request_queue *q)
794 { 787 {
795 elevator_t *e = q->elevator; 788 elevator_t *e = q->elevator;
796 789
797 if (!list_empty(&q->queue_head)) 790 if (!list_empty(&q->queue_head))
798 return 0; 791 return 0;
799 792
800 if (e->ops->elevator_queue_empty_fn) 793 if (e->ops->elevator_queue_empty_fn)
801 return e->ops->elevator_queue_empty_fn(q); 794 return e->ops->elevator_queue_empty_fn(q);
802 795
803 return 1; 796 return 1;
804 } 797 }
805 798
806 EXPORT_SYMBOL(elv_queue_empty); 799 EXPORT_SYMBOL(elv_queue_empty);
807 800
808 struct request *elv_latter_request(struct request_queue *q, struct request *rq) 801 struct request *elv_latter_request(struct request_queue *q, struct request *rq)
809 { 802 {
810 elevator_t *e = q->elevator; 803 elevator_t *e = q->elevator;
811 804
812 if (e->ops->elevator_latter_req_fn) 805 if (e->ops->elevator_latter_req_fn)
813 return e->ops->elevator_latter_req_fn(q, rq); 806 return e->ops->elevator_latter_req_fn(q, rq);
814 return NULL; 807 return NULL;
815 } 808 }
816 809
817 struct request *elv_former_request(struct request_queue *q, struct request *rq) 810 struct request *elv_former_request(struct request_queue *q, struct request *rq)
818 { 811 {
819 elevator_t *e = q->elevator; 812 elevator_t *e = q->elevator;
820 813
821 if (e->ops->elevator_former_req_fn) 814 if (e->ops->elevator_former_req_fn)
822 return e->ops->elevator_former_req_fn(q, rq); 815 return e->ops->elevator_former_req_fn(q, rq);
823 return NULL; 816 return NULL;
824 } 817 }
825 818
826 int elv_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask) 819 int elv_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask)
827 { 820 {
828 elevator_t *e = q->elevator; 821 elevator_t *e = q->elevator;
829 822
830 if (e->ops->elevator_set_req_fn) 823 if (e->ops->elevator_set_req_fn)
831 return e->ops->elevator_set_req_fn(q, rq, gfp_mask); 824 return e->ops->elevator_set_req_fn(q, rq, gfp_mask);
832 825
833 rq->elevator_private = NULL; 826 rq->elevator_private = NULL;
834 return 0; 827 return 0;
835 } 828 }
836 829
837 void elv_put_request(struct request_queue *q, struct request *rq) 830 void elv_put_request(struct request_queue *q, struct request *rq)
838 { 831 {
839 elevator_t *e = q->elevator; 832 elevator_t *e = q->elevator;
840 833
841 if (e->ops->elevator_put_req_fn) 834 if (e->ops->elevator_put_req_fn)
842 e->ops->elevator_put_req_fn(rq); 835 e->ops->elevator_put_req_fn(rq);
843 } 836 }
844 837
845 int elv_may_queue(struct request_queue *q, int rw) 838 int elv_may_queue(struct request_queue *q, int rw)
846 { 839 {
847 elevator_t *e = q->elevator; 840 elevator_t *e = q->elevator;
848 841
849 if (e->ops->elevator_may_queue_fn) 842 if (e->ops->elevator_may_queue_fn)
850 return e->ops->elevator_may_queue_fn(q, rw); 843 return e->ops->elevator_may_queue_fn(q, rw);
851 844
852 return ELV_MQUEUE_MAY; 845 return ELV_MQUEUE_MAY;
853 } 846 }
854 847
855 void elv_completed_request(struct request_queue *q, struct request *rq) 848 void elv_completed_request(struct request_queue *q, struct request *rq)
856 { 849 {
857 elevator_t *e = q->elevator; 850 elevator_t *e = q->elevator;
858 851
859 /* 852 /*
860 * request is released from the driver, io must be done 853 * request is released from the driver, io must be done
861 */ 854 */
862 if (blk_account_rq(rq)) { 855 if (blk_account_rq(rq)) {
863 q->in_flight--; 856 q->in_flight--;
864 if (blk_sorted_rq(rq) && e->ops->elevator_completed_req_fn) 857 if (blk_sorted_rq(rq) && e->ops->elevator_completed_req_fn)
865 e->ops->elevator_completed_req_fn(q, rq); 858 e->ops->elevator_completed_req_fn(q, rq);
866 } 859 }
867 860
868 /* 861 /*
869 * Check if the queue is waiting for fs requests to be 862 * Check if the queue is waiting for fs requests to be
870 * drained for flush sequence. 863 * drained for flush sequence.
871 */ 864 */
872 if (unlikely(q->ordseq)) { 865 if (unlikely(q->ordseq)) {
873 struct request *first_rq = list_entry_rq(q->queue_head.next); 866 struct request *first_rq = list_entry_rq(q->queue_head.next);
874 if (q->in_flight == 0 && 867 if (q->in_flight == 0 &&
875 blk_ordered_cur_seq(q) == QUEUE_ORDSEQ_DRAIN && 868 blk_ordered_cur_seq(q) == QUEUE_ORDSEQ_DRAIN &&
876 blk_ordered_req_seq(first_rq) > QUEUE_ORDSEQ_DRAIN) { 869 blk_ordered_req_seq(first_rq) > QUEUE_ORDSEQ_DRAIN) {
877 blk_ordered_complete_seq(q, QUEUE_ORDSEQ_DRAIN, 0); 870 blk_ordered_complete_seq(q, QUEUE_ORDSEQ_DRAIN, 0);
878 q->request_fn(q); 871 q->request_fn(q);
879 } 872 }
880 } 873 }
881 } 874 }
882 875
883 #define to_elv(atr) container_of((atr), struct elv_fs_entry, attr) 876 #define to_elv(atr) container_of((atr), struct elv_fs_entry, attr)
884 877
885 static ssize_t 878 static ssize_t
886 elv_attr_show(struct kobject *kobj, struct attribute *attr, char *page) 879 elv_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
887 { 880 {
888 elevator_t *e = container_of(kobj, elevator_t, kobj); 881 elevator_t *e = container_of(kobj, elevator_t, kobj);
889 struct elv_fs_entry *entry = to_elv(attr); 882 struct elv_fs_entry *entry = to_elv(attr);
890 ssize_t error; 883 ssize_t error;
891 884
892 if (!entry->show) 885 if (!entry->show)
893 return -EIO; 886 return -EIO;
894 887
895 mutex_lock(&e->sysfs_lock); 888 mutex_lock(&e->sysfs_lock);
896 error = e->ops ? entry->show(e, page) : -ENOENT; 889 error = e->ops ? entry->show(e, page) : -ENOENT;
897 mutex_unlock(&e->sysfs_lock); 890 mutex_unlock(&e->sysfs_lock);
898 return error; 891 return error;
899 } 892 }
900 893
901 static ssize_t 894 static ssize_t
902 elv_attr_store(struct kobject *kobj, struct attribute *attr, 895 elv_attr_store(struct kobject *kobj, struct attribute *attr,
903 const char *page, size_t length) 896 const char *page, size_t length)
904 { 897 {
905 elevator_t *e = container_of(kobj, elevator_t, kobj); 898 elevator_t *e = container_of(kobj, elevator_t, kobj);
906 struct elv_fs_entry *entry = to_elv(attr); 899 struct elv_fs_entry *entry = to_elv(attr);
907 ssize_t error; 900 ssize_t error;
908 901
909 if (!entry->store) 902 if (!entry->store)
910 return -EIO; 903 return -EIO;
911 904
912 mutex_lock(&e->sysfs_lock); 905 mutex_lock(&e->sysfs_lock);
913 error = e->ops ? entry->store(e, page, length) : -ENOENT; 906 error = e->ops ? entry->store(e, page, length) : -ENOENT;
914 mutex_unlock(&e->sysfs_lock); 907 mutex_unlock(&e->sysfs_lock);
915 return error; 908 return error;
916 } 909 }
917 910
918 static struct sysfs_ops elv_sysfs_ops = { 911 static struct sysfs_ops elv_sysfs_ops = {
919 .show = elv_attr_show, 912 .show = elv_attr_show,
920 .store = elv_attr_store, 913 .store = elv_attr_store,
921 }; 914 };
922 915
923 static struct kobj_type elv_ktype = { 916 static struct kobj_type elv_ktype = {
924 .sysfs_ops = &elv_sysfs_ops, 917 .sysfs_ops = &elv_sysfs_ops,
925 .release = elevator_release, 918 .release = elevator_release,
926 }; 919 };
927 920
928 int elv_register_queue(struct request_queue *q) 921 int elv_register_queue(struct request_queue *q)
929 { 922 {
930 elevator_t *e = q->elevator; 923 elevator_t *e = q->elevator;
931 int error; 924 int error;
932 925
933 e->kobj.parent = &q->kobj; 926 e->kobj.parent = &q->kobj;
934 927
935 error = kobject_add(&e->kobj); 928 error = kobject_add(&e->kobj);
936 if (!error) { 929 if (!error) {
937 struct elv_fs_entry *attr = e->elevator_type->elevator_attrs; 930 struct elv_fs_entry *attr = e->elevator_type->elevator_attrs;
938 if (attr) { 931 if (attr) {
939 while (attr->attr.name) { 932 while (attr->attr.name) {
940 if (sysfs_create_file(&e->kobj, &attr->attr)) 933 if (sysfs_create_file(&e->kobj, &attr->attr))
941 break; 934 break;
942 attr++; 935 attr++;
943 } 936 }
944 } 937 }
945 kobject_uevent(&e->kobj, KOBJ_ADD); 938 kobject_uevent(&e->kobj, KOBJ_ADD);
946 } 939 }
947 return error; 940 return error;
948 } 941 }
949 942
950 static void __elv_unregister_queue(elevator_t *e) 943 static void __elv_unregister_queue(elevator_t *e)
951 { 944 {
952 kobject_uevent(&e->kobj, KOBJ_REMOVE); 945 kobject_uevent(&e->kobj, KOBJ_REMOVE);
953 kobject_del(&e->kobj); 946 kobject_del(&e->kobj);
954 } 947 }
955 948
956 void elv_unregister_queue(struct request_queue *q) 949 void elv_unregister_queue(struct request_queue *q)
957 { 950 {
958 if (q) 951 if (q)
959 __elv_unregister_queue(q->elevator); 952 __elv_unregister_queue(q->elevator);
960 } 953 }
961 954
962 int elv_register(struct elevator_type *e) 955 int elv_register(struct elevator_type *e)
963 { 956 {
964 char *def = ""; 957 char *def = "";
965 958
966 spin_lock(&elv_list_lock); 959 spin_lock(&elv_list_lock);
967 BUG_ON(elevator_find(e->elevator_name)); 960 BUG_ON(elevator_find(e->elevator_name));
968 list_add_tail(&e->list, &elv_list); 961 list_add_tail(&e->list, &elv_list);
969 spin_unlock(&elv_list_lock); 962 spin_unlock(&elv_list_lock);
970 963
971 if (!strcmp(e->elevator_name, chosen_elevator) || 964 if (!strcmp(e->elevator_name, chosen_elevator) ||
972 (!*chosen_elevator && 965 (!*chosen_elevator &&
973 !strcmp(e->elevator_name, CONFIG_DEFAULT_IOSCHED))) 966 !strcmp(e->elevator_name, CONFIG_DEFAULT_IOSCHED)))
974 def = " (default)"; 967 def = " (default)";
975 968
976 printk(KERN_INFO "io scheduler %s registered%s\n", e->elevator_name, def); 969 printk(KERN_INFO "io scheduler %s registered%s\n", e->elevator_name, def);
977 return 0; 970 return 0;
978 } 971 }
979 EXPORT_SYMBOL_GPL(elv_register); 972 EXPORT_SYMBOL_GPL(elv_register);
980 973
981 void elv_unregister(struct elevator_type *e) 974 void elv_unregister(struct elevator_type *e)
982 { 975 {
983 struct task_struct *g, *p; 976 struct task_struct *g, *p;
984 977
985 /* 978 /*
986 * Iterate every thread in the process to remove the io contexts. 979 * Iterate every thread in the process to remove the io contexts.
987 */ 980 */
988 if (e->ops.trim) { 981 if (e->ops.trim) {
989 read_lock(&tasklist_lock); 982 read_lock(&tasklist_lock);
990 do_each_thread(g, p) { 983 do_each_thread(g, p) {
991 task_lock(p); 984 task_lock(p);
992 if (p->io_context) 985 if (p->io_context)
993 e->ops.trim(p->io_context); 986 e->ops.trim(p->io_context);
994 task_unlock(p); 987 task_unlock(p);
995 } while_each_thread(g, p); 988 } while_each_thread(g, p);
996 read_unlock(&tasklist_lock); 989 read_unlock(&tasklist_lock);
997 } 990 }
998 991
999 spin_lock(&elv_list_lock); 992 spin_lock(&elv_list_lock);
1000 list_del_init(&e->list); 993 list_del_init(&e->list);
1001 spin_unlock(&elv_list_lock); 994 spin_unlock(&elv_list_lock);
1002 } 995 }
1003 EXPORT_SYMBOL_GPL(elv_unregister); 996 EXPORT_SYMBOL_GPL(elv_unregister);
1004 997
1005 /* 998 /*
1006 * switch to new_e io scheduler. be careful not to introduce deadlocks - 999 * switch to new_e io scheduler. be careful not to introduce deadlocks -
1007 * we don't free the old io scheduler, before we have allocated what we 1000 * we don't free the old io scheduler, before we have allocated what we
1008 * need for the new one. this way we have a chance of going back to the old 1001 * need for the new one. this way we have a chance of going back to the old
1009 * one, if the new one fails init for some reason. 1002 * one, if the new one fails init for some reason.
1010 */ 1003 */
1011 static int elevator_switch(struct request_queue *q, struct elevator_type *new_e) 1004 static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
1012 { 1005 {
1013 elevator_t *old_elevator, *e; 1006 elevator_t *old_elevator, *e;
1014 void *data; 1007 void *data;
1015 1008
1016 /* 1009 /*
1017 * Allocate new elevator 1010 * Allocate new elevator
1018 */ 1011 */
1019 e = elevator_alloc(q, new_e); 1012 e = elevator_alloc(q, new_e);
1020 if (!e) 1013 if (!e)
1021 return 0; 1014 return 0;
1022 1015
1023 data = elevator_init_queue(q, e); 1016 data = elevator_init_queue(q, e);
1024 if (!data) { 1017 if (!data) {
1025 kobject_put(&e->kobj); 1018 kobject_put(&e->kobj);
1026 return 0; 1019 return 0;
1027 } 1020 }
1028 1021
1029 /* 1022 /*
1030 * Turn on BYPASS and drain all requests w/ elevator private data 1023 * Turn on BYPASS and drain all requests w/ elevator private data
1031 */ 1024 */
1032 spin_lock_irq(q->queue_lock); 1025 spin_lock_irq(q->queue_lock);
1033 1026
1034 set_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); 1027 set_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
1035 1028
1036 elv_drain_elevator(q); 1029 elv_drain_elevator(q);
1037 1030
1038 while (q->rq.elvpriv) { 1031 while (q->rq.elvpriv) {
1039 blk_remove_plug(q); 1032 blk_remove_plug(q);
1040 q->request_fn(q); 1033 q->request_fn(q);
1041 spin_unlock_irq(q->queue_lock); 1034 spin_unlock_irq(q->queue_lock);
1042 msleep(10); 1035 msleep(10);
1043 spin_lock_irq(q->queue_lock); 1036 spin_lock_irq(q->queue_lock);
1044 elv_drain_elevator(q); 1037 elv_drain_elevator(q);
1045 } 1038 }
1046 1039
1047 /* 1040 /*
1048 * Remember old elevator. 1041 * Remember old elevator.
1049 */ 1042 */
1050 old_elevator = q->elevator; 1043 old_elevator = q->elevator;
1051 1044
1052 /* 1045 /*
1053 * attach and start new elevator 1046 * attach and start new elevator
1054 */ 1047 */
1055 elevator_attach(q, e, data); 1048 elevator_attach(q, e, data);
1056 1049
1057 spin_unlock_irq(q->queue_lock); 1050 spin_unlock_irq(q->queue_lock);
1058 1051
1059 __elv_unregister_queue(old_elevator); 1052 __elv_unregister_queue(old_elevator);
1060 1053
1061 if (elv_register_queue(q)) 1054 if (elv_register_queue(q))
1062 goto fail_register; 1055 goto fail_register;
1063 1056
1064 /* 1057 /*
1065 * finally exit old elevator and turn off BYPASS. 1058 * finally exit old elevator and turn off BYPASS.
1066 */ 1059 */
1067 elevator_exit(old_elevator); 1060 elevator_exit(old_elevator);
1068 clear_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); 1061 clear_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
1069 return 1; 1062 return 1;
1070 1063
1071 fail_register: 1064 fail_register:
1072 /* 1065 /*
1073 * switch failed, exit the new io scheduler and reattach the old 1066 * switch failed, exit the new io scheduler and reattach the old
1074 * one again (along with re-adding the sysfs dir) 1067 * one again (along with re-adding the sysfs dir)
1075 */ 1068 */
1076 elevator_exit(e); 1069 elevator_exit(e);
1077 q->elevator = old_elevator; 1070 q->elevator = old_elevator;
1078 elv_register_queue(q); 1071 elv_register_queue(q);
1079 clear_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); 1072 clear_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
1080 return 0; 1073 return 0;
1081 } 1074 }
1082 1075
1083 ssize_t elv_iosched_store(struct request_queue *q, const char *name, 1076 ssize_t elv_iosched_store(struct request_queue *q, const char *name,
1084 size_t count) 1077 size_t count)
1085 { 1078 {
1086 char elevator_name[ELV_NAME_MAX]; 1079 char elevator_name[ELV_NAME_MAX];
1087 size_t len; 1080 size_t len;
1088 struct elevator_type *e; 1081 struct elevator_type *e;
1089 1082
1090 elevator_name[sizeof(elevator_name) - 1] = '\0'; 1083 elevator_name[sizeof(elevator_name) - 1] = '\0';
1091 strncpy(elevator_name, name, sizeof(elevator_name) - 1); 1084 strncpy(elevator_name, name, sizeof(elevator_name) - 1);
1092 len = strlen(elevator_name); 1085 len = strlen(elevator_name);
1093 1086
1094 if (len && elevator_name[len - 1] == '\n') 1087 if (len && elevator_name[len - 1] == '\n')
1095 elevator_name[len - 1] = '\0'; 1088 elevator_name[len - 1] = '\0';
1096 1089
1097 e = elevator_get(elevator_name); 1090 e = elevator_get(elevator_name);
1098 if (!e) { 1091 if (!e) {
1099 printk(KERN_ERR "elevator: type %s not found\n", elevator_name); 1092 printk(KERN_ERR "elevator: type %s not found\n", elevator_name);
1100 return -EINVAL; 1093 return -EINVAL;
1101 } 1094 }
1102 1095
1103 if (!strcmp(elevator_name, q->elevator->elevator_type->elevator_name)) { 1096 if (!strcmp(elevator_name, q->elevator->elevator_type->elevator_name)) {
1104 elevator_put(e); 1097 elevator_put(e);
1105 return count; 1098 return count;
1106 } 1099 }
1107 1100
1108 if (!elevator_switch(q, e)) 1101 if (!elevator_switch(q, e))
1109 printk(KERN_ERR "elevator: switch to %s failed\n",elevator_name); 1102 printk(KERN_ERR "elevator: switch to %s failed\n",elevator_name);
1110 return count; 1103 return count;
1111 } 1104 }
1112 1105
1113 ssize_t elv_iosched_show(struct request_queue *q, char *name) 1106 ssize_t elv_iosched_show(struct request_queue *q, char *name)
1114 { 1107 {
1115 elevator_t *e = q->elevator; 1108 elevator_t *e = q->elevator;
1116 struct elevator_type *elv = e->elevator_type; 1109 struct elevator_type *elv = e->elevator_type;
1117 struct elevator_type *__e; 1110 struct elevator_type *__e;
1118 int len = 0; 1111 int len = 0;
1119 1112
1120 spin_lock(&elv_list_lock); 1113 spin_lock(&elv_list_lock);
1121 list_for_each_entry(__e, &elv_list, list) { 1114 list_for_each_entry(__e, &elv_list, list) {
1122 if (!strcmp(elv->elevator_name, __e->elevator_name)) 1115 if (!strcmp(elv->elevator_name, __e->elevator_name))
1123 len += sprintf(name+len, "[%s] ", elv->elevator_name); 1116 len += sprintf(name+len, "[%s] ", elv->elevator_name);
1124 else 1117 else
1125 len += sprintf(name+len, "%s ", __e->elevator_name); 1118 len += sprintf(name+len, "%s ", __e->elevator_name);
1126 } 1119 }
1127 spin_unlock(&elv_list_lock); 1120 spin_unlock(&elv_list_lock);
1128 1121
1129 len += sprintf(len+name, "\n"); 1122 len += sprintf(len+name, "\n");
1130 return len; 1123 return len;
1131 } 1124 }
1132 1125
1133 struct request *elv_rb_former_request(struct request_queue *q, 1126 struct request *elv_rb_former_request(struct request_queue *q,
1134 struct request *rq) 1127 struct request *rq)
1135 { 1128 {
1136 struct rb_node *rbprev = rb_prev(&rq->rb_node); 1129 struct rb_node *rbprev = rb_prev(&rq->rb_node);
1137 1130
1138 if (rbprev) 1131 if (rbprev)
1139 return rb_entry_rq(rbprev); 1132 return rb_entry_rq(rbprev);
1140 1133
1141 return NULL; 1134 return NULL;
1142 } 1135 }
1143 1136
1144 EXPORT_SYMBOL(elv_rb_former_request); 1137 EXPORT_SYMBOL(elv_rb_former_request);
1145 1138
1146 struct request *elv_rb_latter_request(struct request_queue *q, 1139 struct request *elv_rb_latter_request(struct request_queue *q,
1147 struct request *rq) 1140 struct request *rq)
1148 { 1141 {
1149 struct rb_node *rbnext = rb_next(&rq->rb_node); 1142 struct rb_node *rbnext = rb_next(&rq->rb_node);
1150 1143
1151 if (rbnext) 1144 if (rbnext)
1152 return rb_entry_rq(rbnext); 1145 return rb_entry_rq(rbnext);
1153 1146
1154 return NULL; 1147 return NULL;
1155 } 1148 }
1156 1149
1157 EXPORT_SYMBOL(elv_rb_latter_request); 1150 EXPORT_SYMBOL(elv_rb_latter_request);
1158 1151
1 /* 1 /*
2 * Copyright (C) 1991, 1992 Linus Torvalds 2 * Copyright (C) 1991, 1992 Linus Torvalds
3 * Copyright (C) 1994, Karl Keyte: Added support for disk statistics 3 * Copyright (C) 1994, Karl Keyte: Added support for disk statistics
4 * Elevator latency, (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE 4 * Elevator latency, (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
5 * Queue request tables / lock, selectable elevator, Jens Axboe <axboe@suse.de> 5 * Queue request tables / lock, selectable elevator, Jens Axboe <axboe@suse.de>
6 * kernel-doc documentation started by NeilBrown <neilb@cse.unsw.edu.au> - July2000 6 * kernel-doc documentation started by NeilBrown <neilb@cse.unsw.edu.au> - July2000
7 * bio rewrite, highmem i/o, etc, Jens Axboe <axboe@suse.de> - may 2001 7 * bio rewrite, highmem i/o, etc, Jens Axboe <axboe@suse.de> - may 2001
8 */ 8 */
9 9
10 /* 10 /*
11 * This handles all read/write requests to block devices 11 * This handles all read/write requests to block devices
12 */ 12 */
13 #include <linux/kernel.h> 13 #include <linux/kernel.h>
14 #include <linux/module.h> 14 #include <linux/module.h>
15 #include <linux/backing-dev.h> 15 #include <linux/backing-dev.h>
16 #include <linux/bio.h> 16 #include <linux/bio.h>
17 #include <linux/blkdev.h> 17 #include <linux/blkdev.h>
18 #include <linux/highmem.h> 18 #include <linux/highmem.h>
19 #include <linux/mm.h> 19 #include <linux/mm.h>
20 #include <linux/kernel_stat.h> 20 #include <linux/kernel_stat.h>
21 #include <linux/string.h> 21 #include <linux/string.h>
22 #include <linux/init.h> 22 #include <linux/init.h>
23 #include <linux/bootmem.h> /* for max_pfn/max_low_pfn */ 23 #include <linux/bootmem.h> /* for max_pfn/max_low_pfn */
24 #include <linux/completion.h> 24 #include <linux/completion.h>
25 #include <linux/slab.h> 25 #include <linux/slab.h>
26 #include <linux/swap.h> 26 #include <linux/swap.h>
27 #include <linux/writeback.h> 27 #include <linux/writeback.h>
28 #include <linux/task_io_accounting_ops.h> 28 #include <linux/task_io_accounting_ops.h>
29 #include <linux/interrupt.h> 29 #include <linux/interrupt.h>
30 #include <linux/cpu.h> 30 #include <linux/cpu.h>
31 #include <linux/blktrace_api.h> 31 #include <linux/blktrace_api.h>
32 #include <linux/fault-inject.h> 32 #include <linux/fault-inject.h>
33 33
34 /* 34 /*
35 * for max sense size 35 * for max sense size
36 */ 36 */
37 #include <scsi/scsi_cmnd.h> 37 #include <scsi/scsi_cmnd.h>
38 38
39 static void blk_unplug_work(struct work_struct *work); 39 static void blk_unplug_work(struct work_struct *work);
40 static void blk_unplug_timeout(unsigned long data); 40 static void blk_unplug_timeout(unsigned long data);
41 static void drive_stat_acct(struct request *rq, int nr_sectors, int new_io); 41 static void drive_stat_acct(struct request *rq, int nr_sectors, int new_io);
42 static void init_request_from_bio(struct request *req, struct bio *bio); 42 static void init_request_from_bio(struct request *req, struct bio *bio);
43 static int __make_request(struct request_queue *q, struct bio *bio); 43 static int __make_request(struct request_queue *q, struct bio *bio);
44 static struct io_context *current_io_context(gfp_t gfp_flags, int node); 44 static struct io_context *current_io_context(gfp_t gfp_flags, int node);
45 static void blk_recalc_rq_segments(struct request *rq); 45 static void blk_recalc_rq_segments(struct request *rq);
46 static void blk_rq_bio_prep(struct request_queue *q, struct request *rq, 46 static void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
47 struct bio *bio); 47 struct bio *bio);
48 48
49 /* 49 /*
50 * For the allocated request tables 50 * For the allocated request tables
51 */ 51 */
52 static struct kmem_cache *request_cachep; 52 static struct kmem_cache *request_cachep;
53 53
54 /* 54 /*
55 * For queue allocation 55 * For queue allocation
56 */ 56 */
57 static struct kmem_cache *requestq_cachep; 57 static struct kmem_cache *requestq_cachep;
58 58
59 /* 59 /*
60 * For io context allocations 60 * For io context allocations
61 */ 61 */
62 static struct kmem_cache *iocontext_cachep; 62 static struct kmem_cache *iocontext_cachep;
63 63
64 /* 64 /*
65 * Controlling structure to kblockd 65 * Controlling structure to kblockd
66 */ 66 */
67 static struct workqueue_struct *kblockd_workqueue; 67 static struct workqueue_struct *kblockd_workqueue;
68 68
69 unsigned long blk_max_low_pfn, blk_max_pfn; 69 unsigned long blk_max_low_pfn, blk_max_pfn;
70 70
71 EXPORT_SYMBOL(blk_max_low_pfn); 71 EXPORT_SYMBOL(blk_max_low_pfn);
72 EXPORT_SYMBOL(blk_max_pfn); 72 EXPORT_SYMBOL(blk_max_pfn);
73 73
74 static DEFINE_PER_CPU(struct list_head, blk_cpu_done); 74 static DEFINE_PER_CPU(struct list_head, blk_cpu_done);
75 75
76 /* Amount of time in which a process may batch requests */ 76 /* Amount of time in which a process may batch requests */
77 #define BLK_BATCH_TIME (HZ/50UL) 77 #define BLK_BATCH_TIME (HZ/50UL)
78 78
79 /* Number of requests a "batching" process may submit */ 79 /* Number of requests a "batching" process may submit */
80 #define BLK_BATCH_REQ 32 80 #define BLK_BATCH_REQ 32
81 81
82 /* 82 /*
83 * Return the threshold (number of used requests) at which the queue is 83 * Return the threshold (number of used requests) at which the queue is
84 * considered to be congested. It include a little hysteresis to keep the 84 * considered to be congested. It include a little hysteresis to keep the
85 * context switch rate down. 85 * context switch rate down.
86 */ 86 */
87 static inline int queue_congestion_on_threshold(struct request_queue *q) 87 static inline int queue_congestion_on_threshold(struct request_queue *q)
88 { 88 {
89 return q->nr_congestion_on; 89 return q->nr_congestion_on;
90 } 90 }
91 91
92 /* 92 /*
93 * The threshold at which a queue is considered to be uncongested 93 * The threshold at which a queue is considered to be uncongested
94 */ 94 */
95 static inline int queue_congestion_off_threshold(struct request_queue *q) 95 static inline int queue_congestion_off_threshold(struct request_queue *q)
96 { 96 {
97 return q->nr_congestion_off; 97 return q->nr_congestion_off;
98 } 98 }
99 99
100 static void blk_queue_congestion_threshold(struct request_queue *q) 100 static void blk_queue_congestion_threshold(struct request_queue *q)
101 { 101 {
102 int nr; 102 int nr;
103 103
104 nr = q->nr_requests - (q->nr_requests / 8) + 1; 104 nr = q->nr_requests - (q->nr_requests / 8) + 1;
105 if (nr > q->nr_requests) 105 if (nr > q->nr_requests)
106 nr = q->nr_requests; 106 nr = q->nr_requests;
107 q->nr_congestion_on = nr; 107 q->nr_congestion_on = nr;
108 108
109 nr = q->nr_requests - (q->nr_requests / 8) - (q->nr_requests / 16) - 1; 109 nr = q->nr_requests - (q->nr_requests / 8) - (q->nr_requests / 16) - 1;
110 if (nr < 1) 110 if (nr < 1)
111 nr = 1; 111 nr = 1;
112 q->nr_congestion_off = nr; 112 q->nr_congestion_off = nr;
113 } 113 }
114 114
115 /** 115 /**
116 * blk_get_backing_dev_info - get the address of a queue's backing_dev_info 116 * blk_get_backing_dev_info - get the address of a queue's backing_dev_info
117 * @bdev: device 117 * @bdev: device
118 * 118 *
119 * Locates the passed device's request queue and returns the address of its 119 * Locates the passed device's request queue and returns the address of its
120 * backing_dev_info 120 * backing_dev_info
121 * 121 *
122 * Will return NULL if the request queue cannot be located. 122 * Will return NULL if the request queue cannot be located.
123 */ 123 */
124 struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev) 124 struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev)
125 { 125 {
126 struct backing_dev_info *ret = NULL; 126 struct backing_dev_info *ret = NULL;
127 struct request_queue *q = bdev_get_queue(bdev); 127 struct request_queue *q = bdev_get_queue(bdev);
128 128
129 if (q) 129 if (q)
130 ret = &q->backing_dev_info; 130 ret = &q->backing_dev_info;
131 return ret; 131 return ret;
132 } 132 }
133 EXPORT_SYMBOL(blk_get_backing_dev_info); 133 EXPORT_SYMBOL(blk_get_backing_dev_info);
134 134
135 /** 135 /**
136 * blk_queue_prep_rq - set a prepare_request function for queue 136 * blk_queue_prep_rq - set a prepare_request function for queue
137 * @q: queue 137 * @q: queue
138 * @pfn: prepare_request function 138 * @pfn: prepare_request function
139 * 139 *
140 * It's possible for a queue to register a prepare_request callback which 140 * It's possible for a queue to register a prepare_request callback which
141 * is invoked before the request is handed to the request_fn. The goal of 141 * is invoked before the request is handed to the request_fn. The goal of
142 * the function is to prepare a request for I/O, it can be used to build a 142 * the function is to prepare a request for I/O, it can be used to build a
143 * cdb from the request data for instance. 143 * cdb from the request data for instance.
144 * 144 *
145 */ 145 */
146 void blk_queue_prep_rq(struct request_queue *q, prep_rq_fn *pfn) 146 void blk_queue_prep_rq(struct request_queue *q, prep_rq_fn *pfn)
147 { 147 {
148 q->prep_rq_fn = pfn; 148 q->prep_rq_fn = pfn;
149 } 149 }
150 150
151 EXPORT_SYMBOL(blk_queue_prep_rq); 151 EXPORT_SYMBOL(blk_queue_prep_rq);
152 152
153 /** 153 /**
154 * blk_queue_merge_bvec - set a merge_bvec function for queue 154 * blk_queue_merge_bvec - set a merge_bvec function for queue
155 * @q: queue 155 * @q: queue
156 * @mbfn: merge_bvec_fn 156 * @mbfn: merge_bvec_fn
157 * 157 *
158 * Usually queues have static limitations on the max sectors or segments that 158 * Usually queues have static limitations on the max sectors or segments that
159 * we can put in a request. Stacking drivers may have some settings that 159 * we can put in a request. Stacking drivers may have some settings that
160 * are dynamic, and thus we have to query the queue whether it is ok to 160 * are dynamic, and thus we have to query the queue whether it is ok to
161 * add a new bio_vec to a bio at a given offset or not. If the block device 161 * add a new bio_vec to a bio at a given offset or not. If the block device
162 * has such limitations, it needs to register a merge_bvec_fn to control 162 * has such limitations, it needs to register a merge_bvec_fn to control
163 * the size of bio's sent to it. Note that a block device *must* allow a 163 * the size of bio's sent to it. Note that a block device *must* allow a
164 * single page to be added to an empty bio. The block device driver may want 164 * single page to be added to an empty bio. The block device driver may want
165 * to use the bio_split() function to deal with these bio's. By default 165 * to use the bio_split() function to deal with these bio's. By default
166 * no merge_bvec_fn is defined for a queue, and only the fixed limits are 166 * no merge_bvec_fn is defined for a queue, and only the fixed limits are
167 * honored. 167 * honored.
168 */ 168 */
169 void blk_queue_merge_bvec(struct request_queue *q, merge_bvec_fn *mbfn) 169 void blk_queue_merge_bvec(struct request_queue *q, merge_bvec_fn *mbfn)
170 { 170 {
171 q->merge_bvec_fn = mbfn; 171 q->merge_bvec_fn = mbfn;
172 } 172 }
173 173
174 EXPORT_SYMBOL(blk_queue_merge_bvec); 174 EXPORT_SYMBOL(blk_queue_merge_bvec);
175 175
176 void blk_queue_softirq_done(struct request_queue *q, softirq_done_fn *fn) 176 void blk_queue_softirq_done(struct request_queue *q, softirq_done_fn *fn)
177 { 177 {
178 q->softirq_done_fn = fn; 178 q->softirq_done_fn = fn;
179 } 179 }
180 180
181 EXPORT_SYMBOL(blk_queue_softirq_done); 181 EXPORT_SYMBOL(blk_queue_softirq_done);
182 182
183 /** 183 /**
184 * blk_queue_make_request - define an alternate make_request function for a device 184 * blk_queue_make_request - define an alternate make_request function for a device
185 * @q: the request queue for the device to be affected 185 * @q: the request queue for the device to be affected
186 * @mfn: the alternate make_request function 186 * @mfn: the alternate make_request function
187 * 187 *
188 * Description: 188 * Description:
189 * The normal way for &struct bios to be passed to a device 189 * The normal way for &struct bios to be passed to a device
190 * driver is for them to be collected into requests on a request 190 * driver is for them to be collected into requests on a request
191 * queue, and then to allow the device driver to select requests 191 * queue, and then to allow the device driver to select requests
192 * off that queue when it is ready. This works well for many block 192 * off that queue when it is ready. This works well for many block
193 * devices. However some block devices (typically virtual devices 193 * devices. However some block devices (typically virtual devices
194 * such as md or lvm) do not benefit from the processing on the 194 * such as md or lvm) do not benefit from the processing on the
195 * request queue, and are served best by having the requests passed 195 * request queue, and are served best by having the requests passed
196 * directly to them. This can be achieved by providing a function 196 * directly to them. This can be achieved by providing a function
197 * to blk_queue_make_request(). 197 * to blk_queue_make_request().
198 * 198 *
199 * Caveat: 199 * Caveat:
200 * The driver that does this *must* be able to deal appropriately 200 * The driver that does this *must* be able to deal appropriately
201 * with buffers in "highmemory". This can be accomplished by either calling 201 * with buffers in "highmemory". This can be accomplished by either calling
202 * __bio_kmap_atomic() to get a temporary kernel mapping, or by calling 202 * __bio_kmap_atomic() to get a temporary kernel mapping, or by calling
203 * blk_queue_bounce() to create a buffer in normal memory. 203 * blk_queue_bounce() to create a buffer in normal memory.
204 **/ 204 **/
205 void blk_queue_make_request(struct request_queue * q, make_request_fn * mfn) 205 void blk_queue_make_request(struct request_queue * q, make_request_fn * mfn)
206 { 206 {
207 /* 207 /*
208 * set defaults 208 * set defaults
209 */ 209 */
210 q->nr_requests = BLKDEV_MAX_RQ; 210 q->nr_requests = BLKDEV_MAX_RQ;
211 blk_queue_max_phys_segments(q, MAX_PHYS_SEGMENTS); 211 blk_queue_max_phys_segments(q, MAX_PHYS_SEGMENTS);
212 blk_queue_max_hw_segments(q, MAX_HW_SEGMENTS); 212 blk_queue_max_hw_segments(q, MAX_HW_SEGMENTS);
213 q->make_request_fn = mfn; 213 q->make_request_fn = mfn;
214 q->backing_dev_info.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; 214 q->backing_dev_info.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
215 q->backing_dev_info.state = 0; 215 q->backing_dev_info.state = 0;
216 q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY; 216 q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY;
217 blk_queue_max_sectors(q, SAFE_MAX_SECTORS); 217 blk_queue_max_sectors(q, SAFE_MAX_SECTORS);
218 blk_queue_hardsect_size(q, 512); 218 blk_queue_hardsect_size(q, 512);
219 blk_queue_dma_alignment(q, 511); 219 blk_queue_dma_alignment(q, 511);
220 blk_queue_congestion_threshold(q); 220 blk_queue_congestion_threshold(q);
221 q->nr_batching = BLK_BATCH_REQ; 221 q->nr_batching = BLK_BATCH_REQ;
222 222
223 q->unplug_thresh = 4; /* hmm */ 223 q->unplug_thresh = 4; /* hmm */
224 q->unplug_delay = (3 * HZ) / 1000; /* 3 milliseconds */ 224 q->unplug_delay = (3 * HZ) / 1000; /* 3 milliseconds */
225 if (q->unplug_delay == 0) 225 if (q->unplug_delay == 0)
226 q->unplug_delay = 1; 226 q->unplug_delay = 1;
227 227
228 INIT_WORK(&q->unplug_work, blk_unplug_work); 228 INIT_WORK(&q->unplug_work, blk_unplug_work);
229 229
230 q->unplug_timer.function = blk_unplug_timeout; 230 q->unplug_timer.function = blk_unplug_timeout;
231 q->unplug_timer.data = (unsigned long)q; 231 q->unplug_timer.data = (unsigned long)q;
232 232
233 /* 233 /*
234 * by default assume old behaviour and bounce for any highmem page 234 * by default assume old behaviour and bounce for any highmem page
235 */ 235 */
236 blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH); 236 blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH);
237 } 237 }
238 238
239 EXPORT_SYMBOL(blk_queue_make_request); 239 EXPORT_SYMBOL(blk_queue_make_request);
240 240
241 static void rq_init(struct request_queue *q, struct request *rq) 241 static void rq_init(struct request_queue *q, struct request *rq)
242 { 242 {
243 INIT_LIST_HEAD(&rq->queuelist); 243 INIT_LIST_HEAD(&rq->queuelist);
244 INIT_LIST_HEAD(&rq->donelist); 244 INIT_LIST_HEAD(&rq->donelist);
245 245
246 rq->errors = 0; 246 rq->errors = 0;
247 rq->bio = rq->biotail = NULL; 247 rq->bio = rq->biotail = NULL;
248 INIT_HLIST_NODE(&rq->hash); 248 INIT_HLIST_NODE(&rq->hash);
249 RB_CLEAR_NODE(&rq->rb_node); 249 RB_CLEAR_NODE(&rq->rb_node);
250 rq->ioprio = 0; 250 rq->ioprio = 0;
251 rq->buffer = NULL; 251 rq->buffer = NULL;
252 rq->ref_count = 1; 252 rq->ref_count = 1;
253 rq->q = q; 253 rq->q = q;
254 rq->special = NULL; 254 rq->special = NULL;
255 rq->data_len = 0; 255 rq->data_len = 0;
256 rq->data = NULL; 256 rq->data = NULL;
257 rq->nr_phys_segments = 0; 257 rq->nr_phys_segments = 0;
258 rq->sense = NULL; 258 rq->sense = NULL;
259 rq->end_io = NULL; 259 rq->end_io = NULL;
260 rq->end_io_data = NULL; 260 rq->end_io_data = NULL;
261 rq->completion_data = NULL; 261 rq->completion_data = NULL;
262 rq->next_rq = NULL; 262 rq->next_rq = NULL;
263 } 263 }
264 264
265 /** 265 /**
266 * blk_queue_ordered - does this queue support ordered writes 266 * blk_queue_ordered - does this queue support ordered writes
267 * @q: the request queue 267 * @q: the request queue
268 * @ordered: one of QUEUE_ORDERED_* 268 * @ordered: one of QUEUE_ORDERED_*
269 * @prepare_flush_fn: rq setup helper for cache flush ordered writes 269 * @prepare_flush_fn: rq setup helper for cache flush ordered writes
270 * 270 *
271 * Description: 271 * Description:
272 * For journalled file systems, doing ordered writes on a commit 272 * For journalled file systems, doing ordered writes on a commit
273 * block instead of explicitly doing wait_on_buffer (which is bad 273 * block instead of explicitly doing wait_on_buffer (which is bad
274 * for performance) can be a big win. Block drivers supporting this 274 * for performance) can be a big win. Block drivers supporting this
275 * feature should call this function and indicate so. 275 * feature should call this function and indicate so.
276 * 276 *
277 **/ 277 **/
278 int blk_queue_ordered(struct request_queue *q, unsigned ordered, 278 int blk_queue_ordered(struct request_queue *q, unsigned ordered,
279 prepare_flush_fn *prepare_flush_fn) 279 prepare_flush_fn *prepare_flush_fn)
280 { 280 {
281 if (ordered & (QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_POSTFLUSH) && 281 if (ordered & (QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_POSTFLUSH) &&
282 prepare_flush_fn == NULL) { 282 prepare_flush_fn == NULL) {
283 printk(KERN_ERR "blk_queue_ordered: prepare_flush_fn required\n"); 283 printk(KERN_ERR "blk_queue_ordered: prepare_flush_fn required\n");
284 return -EINVAL; 284 return -EINVAL;
285 } 285 }
286 286
287 if (ordered != QUEUE_ORDERED_NONE && 287 if (ordered != QUEUE_ORDERED_NONE &&
288 ordered != QUEUE_ORDERED_DRAIN && 288 ordered != QUEUE_ORDERED_DRAIN &&
289 ordered != QUEUE_ORDERED_DRAIN_FLUSH && 289 ordered != QUEUE_ORDERED_DRAIN_FLUSH &&
290 ordered != QUEUE_ORDERED_DRAIN_FUA && 290 ordered != QUEUE_ORDERED_DRAIN_FUA &&
291 ordered != QUEUE_ORDERED_TAG && 291 ordered != QUEUE_ORDERED_TAG &&
292 ordered != QUEUE_ORDERED_TAG_FLUSH && 292 ordered != QUEUE_ORDERED_TAG_FLUSH &&
293 ordered != QUEUE_ORDERED_TAG_FUA) { 293 ordered != QUEUE_ORDERED_TAG_FUA) {
294 printk(KERN_ERR "blk_queue_ordered: bad value %d\n", ordered); 294 printk(KERN_ERR "blk_queue_ordered: bad value %d\n", ordered);
295 return -EINVAL; 295 return -EINVAL;
296 } 296 }
297 297
298 q->ordered = ordered; 298 q->ordered = ordered;
299 q->next_ordered = ordered; 299 q->next_ordered = ordered;
300 q->prepare_flush_fn = prepare_flush_fn; 300 q->prepare_flush_fn = prepare_flush_fn;
301 301
302 return 0; 302 return 0;
303 } 303 }
304 304
305 EXPORT_SYMBOL(blk_queue_ordered); 305 EXPORT_SYMBOL(blk_queue_ordered);
306 306
307 /** 307 /**
308 * blk_queue_issue_flush_fn - set function for issuing a flush 308 * blk_queue_issue_flush_fn - set function for issuing a flush
309 * @q: the request queue 309 * @q: the request queue
310 * @iff: the function to be called issuing the flush 310 * @iff: the function to be called issuing the flush
311 * 311 *
312 * Description: 312 * Description:
313 * If a driver supports issuing a flush command, the support is notified 313 * If a driver supports issuing a flush command, the support is notified
314 * to the block layer by defining it through this call. 314 * to the block layer by defining it through this call.
315 * 315 *
316 **/ 316 **/
317 void blk_queue_issue_flush_fn(struct request_queue *q, issue_flush_fn *iff) 317 void blk_queue_issue_flush_fn(struct request_queue *q, issue_flush_fn *iff)
318 { 318 {
319 q->issue_flush_fn = iff; 319 q->issue_flush_fn = iff;
320 } 320 }
321 321
322 EXPORT_SYMBOL(blk_queue_issue_flush_fn); 322 EXPORT_SYMBOL(blk_queue_issue_flush_fn);
323 323
324 /* 324 /*
325 * Cache flushing for ordered writes handling 325 * Cache flushing for ordered writes handling
326 */ 326 */
327 inline unsigned blk_ordered_cur_seq(struct request_queue *q) 327 inline unsigned blk_ordered_cur_seq(struct request_queue *q)
328 { 328 {
329 if (!q->ordseq) 329 if (!q->ordseq)
330 return 0; 330 return 0;
331 return 1 << ffz(q->ordseq); 331 return 1 << ffz(q->ordseq);
332 } 332 }
333 333
334 unsigned blk_ordered_req_seq(struct request *rq) 334 unsigned blk_ordered_req_seq(struct request *rq)
335 { 335 {
336 struct request_queue *q = rq->q; 336 struct request_queue *q = rq->q;
337 337
338 BUG_ON(q->ordseq == 0); 338 BUG_ON(q->ordseq == 0);
339 339
340 if (rq == &q->pre_flush_rq) 340 if (rq == &q->pre_flush_rq)
341 return QUEUE_ORDSEQ_PREFLUSH; 341 return QUEUE_ORDSEQ_PREFLUSH;
342 if (rq == &q->bar_rq) 342 if (rq == &q->bar_rq)
343 return QUEUE_ORDSEQ_BAR; 343 return QUEUE_ORDSEQ_BAR;
344 if (rq == &q->post_flush_rq) 344 if (rq == &q->post_flush_rq)
345 return QUEUE_ORDSEQ_POSTFLUSH; 345 return QUEUE_ORDSEQ_POSTFLUSH;
346 346
347 /* 347 /*
348 * !fs requests don't need to follow barrier ordering. Always 348 * !fs requests don't need to follow barrier ordering. Always
349 * put them at the front. This fixes the following deadlock. 349 * put them at the front. This fixes the following deadlock.
350 * 350 *
351 * http://thread.gmane.org/gmane.linux.kernel/537473 351 * http://thread.gmane.org/gmane.linux.kernel/537473
352 */ 352 */
353 if (!blk_fs_request(rq)) 353 if (!blk_fs_request(rq))
354 return QUEUE_ORDSEQ_DRAIN; 354 return QUEUE_ORDSEQ_DRAIN;
355 355
356 if ((rq->cmd_flags & REQ_ORDERED_COLOR) == 356 if ((rq->cmd_flags & REQ_ORDERED_COLOR) ==
357 (q->orig_bar_rq->cmd_flags & REQ_ORDERED_COLOR)) 357 (q->orig_bar_rq->cmd_flags & REQ_ORDERED_COLOR))
358 return QUEUE_ORDSEQ_DRAIN; 358 return QUEUE_ORDSEQ_DRAIN;
359 else 359 else
360 return QUEUE_ORDSEQ_DONE; 360 return QUEUE_ORDSEQ_DONE;
361 } 361 }
362 362
363 void blk_ordered_complete_seq(struct request_queue *q, unsigned seq, int error) 363 void blk_ordered_complete_seq(struct request_queue *q, unsigned seq, int error)
364 { 364 {
365 struct request *rq; 365 struct request *rq;
366 int uptodate; 366 int uptodate;
367 367
368 if (error && !q->orderr) 368 if (error && !q->orderr)
369 q->orderr = error; 369 q->orderr = error;
370 370
371 BUG_ON(q->ordseq & seq); 371 BUG_ON(q->ordseq & seq);
372 q->ordseq |= seq; 372 q->ordseq |= seq;
373 373
374 if (blk_ordered_cur_seq(q) != QUEUE_ORDSEQ_DONE) 374 if (blk_ordered_cur_seq(q) != QUEUE_ORDSEQ_DONE)
375 return; 375 return;
376 376
377 /* 377 /*
378 * Okay, sequence complete. 378 * Okay, sequence complete.
379 */ 379 */
380 uptodate = 1; 380 uptodate = 1;
381 if (q->orderr) 381 if (q->orderr)
382 uptodate = q->orderr; 382 uptodate = q->orderr;
383 383
384 q->ordseq = 0; 384 q->ordseq = 0;
385 rq = q->orig_bar_rq; 385 rq = q->orig_bar_rq;
386 386
387 end_that_request_first(rq, uptodate, rq->hard_nr_sectors); 387 end_that_request_first(rq, uptodate, rq->hard_nr_sectors);
388 end_that_request_last(rq, uptodate); 388 end_that_request_last(rq, uptodate);
389 } 389 }
390 390
391 static void pre_flush_end_io(struct request *rq, int error) 391 static void pre_flush_end_io(struct request *rq, int error)
392 { 392 {
393 elv_completed_request(rq->q, rq); 393 elv_completed_request(rq->q, rq);
394 blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_PREFLUSH, error); 394 blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_PREFLUSH, error);
395 } 395 }
396 396
397 static void bar_end_io(struct request *rq, int error) 397 static void bar_end_io(struct request *rq, int error)
398 { 398 {
399 elv_completed_request(rq->q, rq); 399 elv_completed_request(rq->q, rq);
400 blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_BAR, error); 400 blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_BAR, error);
401 } 401 }
402 402
403 static void post_flush_end_io(struct request *rq, int error) 403 static void post_flush_end_io(struct request *rq, int error)
404 { 404 {
405 elv_completed_request(rq->q, rq); 405 elv_completed_request(rq->q, rq);
406 blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_POSTFLUSH, error); 406 blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_POSTFLUSH, error);
407 } 407 }
408 408
409 static void queue_flush(struct request_queue *q, unsigned which) 409 static void queue_flush(struct request_queue *q, unsigned which)
410 { 410 {
411 struct request *rq; 411 struct request *rq;
412 rq_end_io_fn *end_io; 412 rq_end_io_fn *end_io;
413 413
414 if (which == QUEUE_ORDERED_PREFLUSH) { 414 if (which == QUEUE_ORDERED_PREFLUSH) {
415 rq = &q->pre_flush_rq; 415 rq = &q->pre_flush_rq;
416 end_io = pre_flush_end_io; 416 end_io = pre_flush_end_io;
417 } else { 417 } else {
418 rq = &q->post_flush_rq; 418 rq = &q->post_flush_rq;
419 end_io = post_flush_end_io; 419 end_io = post_flush_end_io;
420 } 420 }
421 421
422 rq->cmd_flags = REQ_HARDBARRIER; 422 rq->cmd_flags = REQ_HARDBARRIER;
423 rq_init(q, rq); 423 rq_init(q, rq);
424 rq->elevator_private = NULL; 424 rq->elevator_private = NULL;
425 rq->elevator_private2 = NULL; 425 rq->elevator_private2 = NULL;
426 rq->rq_disk = q->bar_rq.rq_disk; 426 rq->rq_disk = q->bar_rq.rq_disk;
427 rq->end_io = end_io; 427 rq->end_io = end_io;
428 q->prepare_flush_fn(q, rq); 428 q->prepare_flush_fn(q, rq);
429 429
430 elv_insert(q, rq, ELEVATOR_INSERT_FRONT); 430 elv_insert(q, rq, ELEVATOR_INSERT_FRONT);
431 } 431 }
432 432
433 static inline struct request *start_ordered(struct request_queue *q, 433 static inline struct request *start_ordered(struct request_queue *q,
434 struct request *rq) 434 struct request *rq)
435 { 435 {
436 q->orderr = 0; 436 q->orderr = 0;
437 q->ordered = q->next_ordered; 437 q->ordered = q->next_ordered;
438 q->ordseq |= QUEUE_ORDSEQ_STARTED; 438 q->ordseq |= QUEUE_ORDSEQ_STARTED;
439 439
440 /* 440 /*
441 * Prep proxy barrier request. 441 * Prep proxy barrier request.
442 */ 442 */
443 blkdev_dequeue_request(rq); 443 blkdev_dequeue_request(rq);
444 q->orig_bar_rq = rq; 444 q->orig_bar_rq = rq;
445 rq = &q->bar_rq; 445 rq = &q->bar_rq;
446 rq->cmd_flags = 0; 446 rq->cmd_flags = 0;
447 rq_init(q, rq); 447 rq_init(q, rq);
448 if (bio_data_dir(q->orig_bar_rq->bio) == WRITE) 448 if (bio_data_dir(q->orig_bar_rq->bio) == WRITE)
449 rq->cmd_flags |= REQ_RW; 449 rq->cmd_flags |= REQ_RW;
450 if (q->ordered & QUEUE_ORDERED_FUA) 450 if (q->ordered & QUEUE_ORDERED_FUA)
451 rq->cmd_flags |= REQ_FUA; 451 rq->cmd_flags |= REQ_FUA;
452 rq->elevator_private = NULL; 452 rq->elevator_private = NULL;
453 rq->elevator_private2 = NULL; 453 rq->elevator_private2 = NULL;
454 init_request_from_bio(rq, q->orig_bar_rq->bio); 454 init_request_from_bio(rq, q->orig_bar_rq->bio);
455 rq->end_io = bar_end_io; 455 rq->end_io = bar_end_io;
456 456
457 /* 457 /*
458 * Queue ordered sequence. As we stack them at the head, we 458 * Queue ordered sequence. As we stack them at the head, we
459 * need to queue in reverse order. Note that we rely on that 459 * need to queue in reverse order. Note that we rely on that
460 * no fs request uses ELEVATOR_INSERT_FRONT and thus no fs 460 * no fs request uses ELEVATOR_INSERT_FRONT and thus no fs
461 * request gets inbetween ordered sequence. 461 * request gets inbetween ordered sequence.
462 */ 462 */
463 if (q->ordered & QUEUE_ORDERED_POSTFLUSH) 463 if (q->ordered & QUEUE_ORDERED_POSTFLUSH)
464 queue_flush(q, QUEUE_ORDERED_POSTFLUSH); 464 queue_flush(q, QUEUE_ORDERED_POSTFLUSH);
465 else 465 else
466 q->ordseq |= QUEUE_ORDSEQ_POSTFLUSH; 466 q->ordseq |= QUEUE_ORDSEQ_POSTFLUSH;
467 467
468 elv_insert(q, rq, ELEVATOR_INSERT_FRONT); 468 elv_insert(q, rq, ELEVATOR_INSERT_FRONT);
469 469
470 if (q->ordered & QUEUE_ORDERED_PREFLUSH) { 470 if (q->ordered & QUEUE_ORDERED_PREFLUSH) {
471 queue_flush(q, QUEUE_ORDERED_PREFLUSH); 471 queue_flush(q, QUEUE_ORDERED_PREFLUSH);
472 rq = &q->pre_flush_rq; 472 rq = &q->pre_flush_rq;
473 } else 473 } else
474 q->ordseq |= QUEUE_ORDSEQ_PREFLUSH; 474 q->ordseq |= QUEUE_ORDSEQ_PREFLUSH;
475 475
476 if ((q->ordered & QUEUE_ORDERED_TAG) || q->in_flight == 0) 476 if ((q->ordered & QUEUE_ORDERED_TAG) || q->in_flight == 0)
477 q->ordseq |= QUEUE_ORDSEQ_DRAIN; 477 q->ordseq |= QUEUE_ORDSEQ_DRAIN;
478 else 478 else
479 rq = NULL; 479 rq = NULL;
480 480
481 return rq; 481 return rq;
482 } 482 }
483 483
484 int blk_do_ordered(struct request_queue *q, struct request **rqp) 484 int blk_do_ordered(struct request_queue *q, struct request **rqp)
485 { 485 {
486 struct request *rq = *rqp; 486 struct request *rq = *rqp;
487 int is_barrier = blk_fs_request(rq) && blk_barrier_rq(rq); 487 int is_barrier = blk_fs_request(rq) && blk_barrier_rq(rq);
488 488
489 if (!q->ordseq) { 489 if (!q->ordseq) {
490 if (!is_barrier) 490 if (!is_barrier)
491 return 1; 491 return 1;
492 492
493 if (q->next_ordered != QUEUE_ORDERED_NONE) { 493 if (q->next_ordered != QUEUE_ORDERED_NONE) {
494 *rqp = start_ordered(q, rq); 494 *rqp = start_ordered(q, rq);
495 return 1; 495 return 1;
496 } else { 496 } else {
497 /* 497 /*
498 * This can happen when the queue switches to 498 * This can happen when the queue switches to
499 * ORDERED_NONE while this request is on it. 499 * ORDERED_NONE while this request is on it.
500 */ 500 */
501 blkdev_dequeue_request(rq); 501 blkdev_dequeue_request(rq);
502 end_that_request_first(rq, -EOPNOTSUPP, 502 end_that_request_first(rq, -EOPNOTSUPP,
503 rq->hard_nr_sectors); 503 rq->hard_nr_sectors);
504 end_that_request_last(rq, -EOPNOTSUPP); 504 end_that_request_last(rq, -EOPNOTSUPP);
505 *rqp = NULL; 505 *rqp = NULL;
506 return 0; 506 return 0;
507 } 507 }
508 } 508 }
509 509
510 /* 510 /*
511 * Ordered sequence in progress 511 * Ordered sequence in progress
512 */ 512 */
513 513
514 /* Special requests are not subject to ordering rules. */ 514 /* Special requests are not subject to ordering rules. */
515 if (!blk_fs_request(rq) && 515 if (!blk_fs_request(rq) &&
516 rq != &q->pre_flush_rq && rq != &q->post_flush_rq) 516 rq != &q->pre_flush_rq && rq != &q->post_flush_rq)
517 return 1; 517 return 1;
518 518
519 if (q->ordered & QUEUE_ORDERED_TAG) { 519 if (q->ordered & QUEUE_ORDERED_TAG) {
520 /* Ordered by tag. Blocking the next barrier is enough. */ 520 /* Ordered by tag. Blocking the next barrier is enough. */
521 if (is_barrier && rq != &q->bar_rq) 521 if (is_barrier && rq != &q->bar_rq)
522 *rqp = NULL; 522 *rqp = NULL;
523 } else { 523 } else {
524 /* Ordered by draining. Wait for turn. */ 524 /* Ordered by draining. Wait for turn. */
525 WARN_ON(blk_ordered_req_seq(rq) < blk_ordered_cur_seq(q)); 525 WARN_ON(blk_ordered_req_seq(rq) < blk_ordered_cur_seq(q));
526 if (blk_ordered_req_seq(rq) > blk_ordered_cur_seq(q)) 526 if (blk_ordered_req_seq(rq) > blk_ordered_cur_seq(q))
527 *rqp = NULL; 527 *rqp = NULL;
528 } 528 }
529 529
530 return 1; 530 return 1;
531 } 531 }
532 532
533 static void req_bio_endio(struct request *rq, struct bio *bio, 533 static void req_bio_endio(struct request *rq, struct bio *bio,
534 unsigned int nbytes, int error) 534 unsigned int nbytes, int error)
535 { 535 {
536 struct request_queue *q = rq->q; 536 struct request_queue *q = rq->q;
537 537
538 if (&q->bar_rq != rq) { 538 if (&q->bar_rq != rq) {
539 if (error) 539 if (error)
540 clear_bit(BIO_UPTODATE, &bio->bi_flags); 540 clear_bit(BIO_UPTODATE, &bio->bi_flags);
541 else if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) 541 else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
542 error = -EIO; 542 error = -EIO;
543 543
544 if (unlikely(nbytes > bio->bi_size)) { 544 if (unlikely(nbytes > bio->bi_size)) {
545 printk("%s: want %u bytes done, only %u left\n", 545 printk("%s: want %u bytes done, only %u left\n",
546 __FUNCTION__, nbytes, bio->bi_size); 546 __FUNCTION__, nbytes, bio->bi_size);
547 nbytes = bio->bi_size; 547 nbytes = bio->bi_size;
548 } 548 }
549 549
550 bio->bi_size -= nbytes; 550 bio->bi_size -= nbytes;
551 bio->bi_sector += (nbytes >> 9); 551 bio->bi_sector += (nbytes >> 9);
552 if (bio->bi_size == 0) 552 if (bio->bi_size == 0)
553 bio_endio(bio, error); 553 bio_endio(bio, error);
554 } else { 554 } else {
555 555
556 /* 556 /*
557 * Okay, this is the barrier request in progress, just 557 * Okay, this is the barrier request in progress, just
558 * record the error; 558 * record the error;
559 */ 559 */
560 if (error && !q->orderr) 560 if (error && !q->orderr)
561 q->orderr = error; 561 q->orderr = error;
562 } 562 }
563 } 563 }
564 564
565 /** 565 /**
566 * blk_queue_bounce_limit - set bounce buffer limit for queue 566 * blk_queue_bounce_limit - set bounce buffer limit for queue
567 * @q: the request queue for the device 567 * @q: the request queue for the device
568 * @dma_addr: bus address limit 568 * @dma_addr: bus address limit
569 * 569 *
570 * Description: 570 * Description:
571 * Different hardware can have different requirements as to what pages 571 * Different hardware can have different requirements as to what pages
572 * it can do I/O directly to. A low level driver can call 572 * it can do I/O directly to. A low level driver can call
573 * blk_queue_bounce_limit to have lower memory pages allocated as bounce 573 * blk_queue_bounce_limit to have lower memory pages allocated as bounce
574 * buffers for doing I/O to pages residing above @page. 574 * buffers for doing I/O to pages residing above @page.
575 **/ 575 **/
576 void blk_queue_bounce_limit(struct request_queue *q, u64 dma_addr) 576 void blk_queue_bounce_limit(struct request_queue *q, u64 dma_addr)
577 { 577 {
578 unsigned long bounce_pfn = dma_addr >> PAGE_SHIFT; 578 unsigned long bounce_pfn = dma_addr >> PAGE_SHIFT;
579 int dma = 0; 579 int dma = 0;
580 580
581 q->bounce_gfp = GFP_NOIO; 581 q->bounce_gfp = GFP_NOIO;
582 #if BITS_PER_LONG == 64 582 #if BITS_PER_LONG == 64
583 /* Assume anything <= 4GB can be handled by IOMMU. 583 /* Assume anything <= 4GB can be handled by IOMMU.
584 Actually some IOMMUs can handle everything, but I don't 584 Actually some IOMMUs can handle everything, but I don't
585 know of a way to test this here. */ 585 know of a way to test this here. */
586 if (bounce_pfn < (min_t(u64,0xffffffff,BLK_BOUNCE_HIGH) >> PAGE_SHIFT)) 586 if (bounce_pfn < (min_t(u64,0xffffffff,BLK_BOUNCE_HIGH) >> PAGE_SHIFT))
587 dma = 1; 587 dma = 1;
588 q->bounce_pfn = max_low_pfn; 588 q->bounce_pfn = max_low_pfn;
589 #else 589 #else
590 if (bounce_pfn < blk_max_low_pfn) 590 if (bounce_pfn < blk_max_low_pfn)
591 dma = 1; 591 dma = 1;
592 q->bounce_pfn = bounce_pfn; 592 q->bounce_pfn = bounce_pfn;
593 #endif 593 #endif
594 if (dma) { 594 if (dma) {
595 init_emergency_isa_pool(); 595 init_emergency_isa_pool();
596 q->bounce_gfp = GFP_NOIO | GFP_DMA; 596 q->bounce_gfp = GFP_NOIO | GFP_DMA;
597 q->bounce_pfn = bounce_pfn; 597 q->bounce_pfn = bounce_pfn;
598 } 598 }
599 } 599 }
600 600
601 EXPORT_SYMBOL(blk_queue_bounce_limit); 601 EXPORT_SYMBOL(blk_queue_bounce_limit);
602 602
603 /** 603 /**
604 * blk_queue_max_sectors - set max sectors for a request for this queue 604 * blk_queue_max_sectors - set max sectors for a request for this queue
605 * @q: the request queue for the device 605 * @q: the request queue for the device
606 * @max_sectors: max sectors in the usual 512b unit 606 * @max_sectors: max sectors in the usual 512b unit
607 * 607 *
608 * Description: 608 * Description:
609 * Enables a low level driver to set an upper limit on the size of 609 * Enables a low level driver to set an upper limit on the size of
610 * received requests. 610 * received requests.
611 **/ 611 **/
612 void blk_queue_max_sectors(struct request_queue *q, unsigned int max_sectors) 612 void blk_queue_max_sectors(struct request_queue *q, unsigned int max_sectors)
613 { 613 {
614 if ((max_sectors << 9) < PAGE_CACHE_SIZE) { 614 if ((max_sectors << 9) < PAGE_CACHE_SIZE) {
615 max_sectors = 1 << (PAGE_CACHE_SHIFT - 9); 615 max_sectors = 1 << (PAGE_CACHE_SHIFT - 9);
616 printk("%s: set to minimum %d\n", __FUNCTION__, max_sectors); 616 printk("%s: set to minimum %d\n", __FUNCTION__, max_sectors);
617 } 617 }
618 618
619 if (BLK_DEF_MAX_SECTORS > max_sectors) 619 if (BLK_DEF_MAX_SECTORS > max_sectors)
620 q->max_hw_sectors = q->max_sectors = max_sectors; 620 q->max_hw_sectors = q->max_sectors = max_sectors;
621 else { 621 else {
622 q->max_sectors = BLK_DEF_MAX_SECTORS; 622 q->max_sectors = BLK_DEF_MAX_SECTORS;
623 q->max_hw_sectors = max_sectors; 623 q->max_hw_sectors = max_sectors;
624 } 624 }
625 } 625 }
626 626
627 EXPORT_SYMBOL(blk_queue_max_sectors); 627 EXPORT_SYMBOL(blk_queue_max_sectors);
628 628
629 /** 629 /**
630 * blk_queue_max_phys_segments - set max phys segments for a request for this queue 630 * blk_queue_max_phys_segments - set max phys segments for a request for this queue
631 * @q: the request queue for the device 631 * @q: the request queue for the device
632 * @max_segments: max number of segments 632 * @max_segments: max number of segments
633 * 633 *
634 * Description: 634 * Description:
635 * Enables a low level driver to set an upper limit on the number of 635 * Enables a low level driver to set an upper limit on the number of
636 * physical data segments in a request. This would be the largest sized 636 * physical data segments in a request. This would be the largest sized
637 * scatter list the driver could handle. 637 * scatter list the driver could handle.
638 **/ 638 **/
639 void blk_queue_max_phys_segments(struct request_queue *q, 639 void blk_queue_max_phys_segments(struct request_queue *q,
640 unsigned short max_segments) 640 unsigned short max_segments)
641 { 641 {
642 if (!max_segments) { 642 if (!max_segments) {
643 max_segments = 1; 643 max_segments = 1;
644 printk("%s: set to minimum %d\n", __FUNCTION__, max_segments); 644 printk("%s: set to minimum %d\n", __FUNCTION__, max_segments);
645 } 645 }
646 646
647 q->max_phys_segments = max_segments; 647 q->max_phys_segments = max_segments;
648 } 648 }
649 649
650 EXPORT_SYMBOL(blk_queue_max_phys_segments); 650 EXPORT_SYMBOL(blk_queue_max_phys_segments);
651 651
652 /** 652 /**
653 * blk_queue_max_hw_segments - set max hw segments for a request for this queue 653 * blk_queue_max_hw_segments - set max hw segments for a request for this queue
654 * @q: the request queue for the device 654 * @q: the request queue for the device
655 * @max_segments: max number of segments 655 * @max_segments: max number of segments
656 * 656 *
657 * Description: 657 * Description:
658 * Enables a low level driver to set an upper limit on the number of 658 * Enables a low level driver to set an upper limit on the number of
659 * hw data segments in a request. This would be the largest number of 659 * hw data segments in a request. This would be the largest number of
660 * address/length pairs the host adapter can actually give as once 660 * address/length pairs the host adapter can actually give as once
661 * to the device. 661 * to the device.
662 **/ 662 **/
663 void blk_queue_max_hw_segments(struct request_queue *q, 663 void blk_queue_max_hw_segments(struct request_queue *q,
664 unsigned short max_segments) 664 unsigned short max_segments)
665 { 665 {
666 if (!max_segments) { 666 if (!max_segments) {
667 max_segments = 1; 667 max_segments = 1;
668 printk("%s: set to minimum %d\n", __FUNCTION__, max_segments); 668 printk("%s: set to minimum %d\n", __FUNCTION__, max_segments);
669 } 669 }
670 670
671 q->max_hw_segments = max_segments; 671 q->max_hw_segments = max_segments;
672 } 672 }
673 673
674 EXPORT_SYMBOL(blk_queue_max_hw_segments); 674 EXPORT_SYMBOL(blk_queue_max_hw_segments);
675 675
676 /** 676 /**
677 * blk_queue_max_segment_size - set max segment size for blk_rq_map_sg 677 * blk_queue_max_segment_size - set max segment size for blk_rq_map_sg
678 * @q: the request queue for the device 678 * @q: the request queue for the device
679 * @max_size: max size of segment in bytes 679 * @max_size: max size of segment in bytes
680 * 680 *
681 * Description: 681 * Description:
682 * Enables a low level driver to set an upper limit on the size of a 682 * Enables a low level driver to set an upper limit on the size of a
683 * coalesced segment 683 * coalesced segment
684 **/ 684 **/
685 void blk_queue_max_segment_size(struct request_queue *q, unsigned int max_size) 685 void blk_queue_max_segment_size(struct request_queue *q, unsigned int max_size)
686 { 686 {
687 if (max_size < PAGE_CACHE_SIZE) { 687 if (max_size < PAGE_CACHE_SIZE) {
688 max_size = PAGE_CACHE_SIZE; 688 max_size = PAGE_CACHE_SIZE;
689 printk("%s: set to minimum %d\n", __FUNCTION__, max_size); 689 printk("%s: set to minimum %d\n", __FUNCTION__, max_size);
690 } 690 }
691 691
692 q->max_segment_size = max_size; 692 q->max_segment_size = max_size;
693 } 693 }
694 694
695 EXPORT_SYMBOL(blk_queue_max_segment_size); 695 EXPORT_SYMBOL(blk_queue_max_segment_size);
696 696
697 /** 697 /**
698 * blk_queue_hardsect_size - set hardware sector size for the queue 698 * blk_queue_hardsect_size - set hardware sector size for the queue
699 * @q: the request queue for the device 699 * @q: the request queue for the device
700 * @size: the hardware sector size, in bytes 700 * @size: the hardware sector size, in bytes
701 * 701 *
702 * Description: 702 * Description:
703 * This should typically be set to the lowest possible sector size 703 * This should typically be set to the lowest possible sector size
704 * that the hardware can operate on (possible without reverting to 704 * that the hardware can operate on (possible without reverting to
705 * even internal read-modify-write operations). Usually the default 705 * even internal read-modify-write operations). Usually the default
706 * of 512 covers most hardware. 706 * of 512 covers most hardware.
707 **/ 707 **/
708 void blk_queue_hardsect_size(struct request_queue *q, unsigned short size) 708 void blk_queue_hardsect_size(struct request_queue *q, unsigned short size)
709 { 709 {
710 q->hardsect_size = size; 710 q->hardsect_size = size;
711 } 711 }
712 712
713 EXPORT_SYMBOL(blk_queue_hardsect_size); 713 EXPORT_SYMBOL(blk_queue_hardsect_size);
714 714
715 /* 715 /*
716 * Returns the minimum that is _not_ zero, unless both are zero. 716 * Returns the minimum that is _not_ zero, unless both are zero.
717 */ 717 */
718 #define min_not_zero(l, r) (l == 0) ? r : ((r == 0) ? l : min(l, r)) 718 #define min_not_zero(l, r) (l == 0) ? r : ((r == 0) ? l : min(l, r))
719 719
720 /** 720 /**
721 * blk_queue_stack_limits - inherit underlying queue limits for stacked drivers 721 * blk_queue_stack_limits - inherit underlying queue limits for stacked drivers
722 * @t: the stacking driver (top) 722 * @t: the stacking driver (top)
723 * @b: the underlying device (bottom) 723 * @b: the underlying device (bottom)
724 **/ 724 **/
725 void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b) 725 void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b)
726 { 726 {
727 /* zero is "infinity" */ 727 /* zero is "infinity" */
728 t->max_sectors = min_not_zero(t->max_sectors,b->max_sectors); 728 t->max_sectors = min_not_zero(t->max_sectors,b->max_sectors);
729 t->max_hw_sectors = min_not_zero(t->max_hw_sectors,b->max_hw_sectors); 729 t->max_hw_sectors = min_not_zero(t->max_hw_sectors,b->max_hw_sectors);
730 730
731 t->max_phys_segments = min(t->max_phys_segments,b->max_phys_segments); 731 t->max_phys_segments = min(t->max_phys_segments,b->max_phys_segments);
732 t->max_hw_segments = min(t->max_hw_segments,b->max_hw_segments); 732 t->max_hw_segments = min(t->max_hw_segments,b->max_hw_segments);
733 t->max_segment_size = min(t->max_segment_size,b->max_segment_size); 733 t->max_segment_size = min(t->max_segment_size,b->max_segment_size);
734 t->hardsect_size = max(t->hardsect_size,b->hardsect_size); 734 t->hardsect_size = max(t->hardsect_size,b->hardsect_size);
735 if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags)) 735 if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags))
736 clear_bit(QUEUE_FLAG_CLUSTER, &t->queue_flags); 736 clear_bit(QUEUE_FLAG_CLUSTER, &t->queue_flags);
737 } 737 }
738 738
739 EXPORT_SYMBOL(blk_queue_stack_limits); 739 EXPORT_SYMBOL(blk_queue_stack_limits);
740 740
741 /** 741 /**
742 * blk_queue_segment_boundary - set boundary rules for segment merging 742 * blk_queue_segment_boundary - set boundary rules for segment merging
743 * @q: the request queue for the device 743 * @q: the request queue for the device
744 * @mask: the memory boundary mask 744 * @mask: the memory boundary mask
745 **/ 745 **/
746 void blk_queue_segment_boundary(struct request_queue *q, unsigned long mask) 746 void blk_queue_segment_boundary(struct request_queue *q, unsigned long mask)
747 { 747 {
748 if (mask < PAGE_CACHE_SIZE - 1) { 748 if (mask < PAGE_CACHE_SIZE - 1) {
749 mask = PAGE_CACHE_SIZE - 1; 749 mask = PAGE_CACHE_SIZE - 1;
750 printk("%s: set to minimum %lx\n", __FUNCTION__, mask); 750 printk("%s: set to minimum %lx\n", __FUNCTION__, mask);
751 } 751 }
752 752
753 q->seg_boundary_mask = mask; 753 q->seg_boundary_mask = mask;
754 } 754 }
755 755
756 EXPORT_SYMBOL(blk_queue_segment_boundary); 756 EXPORT_SYMBOL(blk_queue_segment_boundary);
757 757
758 /** 758 /**
759 * blk_queue_dma_alignment - set dma length and memory alignment 759 * blk_queue_dma_alignment - set dma length and memory alignment
760 * @q: the request queue for the device 760 * @q: the request queue for the device
761 * @mask: alignment mask 761 * @mask: alignment mask
762 * 762 *
763 * description: 763 * description:
764 * set required memory and length aligment for direct dma transactions. 764 * set required memory and length aligment for direct dma transactions.
765 * this is used when buiding direct io requests for the queue. 765 * this is used when buiding direct io requests for the queue.
766 * 766 *
767 **/ 767 **/
768 void blk_queue_dma_alignment(struct request_queue *q, int mask) 768 void blk_queue_dma_alignment(struct request_queue *q, int mask)
769 { 769 {
770 q->dma_alignment = mask; 770 q->dma_alignment = mask;
771 } 771 }
772 772
773 EXPORT_SYMBOL(blk_queue_dma_alignment); 773 EXPORT_SYMBOL(blk_queue_dma_alignment);
774 774
775 /** 775 /**
776 * blk_queue_find_tag - find a request by its tag and queue 776 * blk_queue_find_tag - find a request by its tag and queue
777 * @q: The request queue for the device 777 * @q: The request queue for the device
778 * @tag: The tag of the request 778 * @tag: The tag of the request
779 * 779 *
780 * Notes: 780 * Notes:
781 * Should be used when a device returns a tag and you want to match 781 * Should be used when a device returns a tag and you want to match
782 * it with a request. 782 * it with a request.
783 * 783 *
784 * no locks need be held. 784 * no locks need be held.
785 **/ 785 **/
786 struct request *blk_queue_find_tag(struct request_queue *q, int tag) 786 struct request *blk_queue_find_tag(struct request_queue *q, int tag)
787 { 787 {
788 return blk_map_queue_find_tag(q->queue_tags, tag); 788 return blk_map_queue_find_tag(q->queue_tags, tag);
789 } 789 }
790 790
791 EXPORT_SYMBOL(blk_queue_find_tag); 791 EXPORT_SYMBOL(blk_queue_find_tag);
792 792
793 /** 793 /**
794 * __blk_free_tags - release a given set of tag maintenance info 794 * __blk_free_tags - release a given set of tag maintenance info
795 * @bqt: the tag map to free 795 * @bqt: the tag map to free
796 * 796 *
797 * Tries to free the specified @bqt@. Returns true if it was 797 * Tries to free the specified @bqt@. Returns true if it was
798 * actually freed and false if there are still references using it 798 * actually freed and false if there are still references using it
799 */ 799 */
800 static int __blk_free_tags(struct blk_queue_tag *bqt) 800 static int __blk_free_tags(struct blk_queue_tag *bqt)
801 { 801 {
802 int retval; 802 int retval;
803 803
804 retval = atomic_dec_and_test(&bqt->refcnt); 804 retval = atomic_dec_and_test(&bqt->refcnt);
805 if (retval) { 805 if (retval) {
806 BUG_ON(bqt->busy); 806 BUG_ON(bqt->busy);
807 BUG_ON(!list_empty(&bqt->busy_list)); 807 BUG_ON(!list_empty(&bqt->busy_list));
808 808
809 kfree(bqt->tag_index); 809 kfree(bqt->tag_index);
810 bqt->tag_index = NULL; 810 bqt->tag_index = NULL;
811 811
812 kfree(bqt->tag_map); 812 kfree(bqt->tag_map);
813 bqt->tag_map = NULL; 813 bqt->tag_map = NULL;
814 814
815 kfree(bqt); 815 kfree(bqt);
816 816
817 } 817 }
818 818
819 return retval; 819 return retval;
820 } 820 }
821 821
822 /** 822 /**
823 * __blk_queue_free_tags - release tag maintenance info 823 * __blk_queue_free_tags - release tag maintenance info
824 * @q: the request queue for the device 824 * @q: the request queue for the device
825 * 825 *
826 * Notes: 826 * Notes:
827 * blk_cleanup_queue() will take care of calling this function, if tagging 827 * blk_cleanup_queue() will take care of calling this function, if tagging
828 * has been used. So there's no need to call this directly. 828 * has been used. So there's no need to call this directly.
829 **/ 829 **/
830 static void __blk_queue_free_tags(struct request_queue *q) 830 static void __blk_queue_free_tags(struct request_queue *q)
831 { 831 {
832 struct blk_queue_tag *bqt = q->queue_tags; 832 struct blk_queue_tag *bqt = q->queue_tags;
833 833
834 if (!bqt) 834 if (!bqt)
835 return; 835 return;
836 836
837 __blk_free_tags(bqt); 837 __blk_free_tags(bqt);
838 838
839 q->queue_tags = NULL; 839 q->queue_tags = NULL;
840 q->queue_flags &= ~(1 << QUEUE_FLAG_QUEUED); 840 q->queue_flags &= ~(1 << QUEUE_FLAG_QUEUED);
841 } 841 }
842 842
843 843
844 /** 844 /**
845 * blk_free_tags - release a given set of tag maintenance info 845 * blk_free_tags - release a given set of tag maintenance info
846 * @bqt: the tag map to free 846 * @bqt: the tag map to free
847 * 847 *
848 * For externally managed @bqt@ frees the map. Callers of this 848 * For externally managed @bqt@ frees the map. Callers of this
849 * function must guarantee to have released all the queues that 849 * function must guarantee to have released all the queues that
850 * might have been using this tag map. 850 * might have been using this tag map.
851 */ 851 */
852 void blk_free_tags(struct blk_queue_tag *bqt) 852 void blk_free_tags(struct blk_queue_tag *bqt)
853 { 853 {
854 if (unlikely(!__blk_free_tags(bqt))) 854 if (unlikely(!__blk_free_tags(bqt)))
855 BUG(); 855 BUG();
856 } 856 }
857 EXPORT_SYMBOL(blk_free_tags); 857 EXPORT_SYMBOL(blk_free_tags);
858 858
859 /** 859 /**
860 * blk_queue_free_tags - release tag maintenance info 860 * blk_queue_free_tags - release tag maintenance info
861 * @q: the request queue for the device 861 * @q: the request queue for the device
862 * 862 *
863 * Notes: 863 * Notes:
864 * This is used to disabled tagged queuing to a device, yet leave 864 * This is used to disabled tagged queuing to a device, yet leave
865 * queue in function. 865 * queue in function.
866 **/ 866 **/
867 void blk_queue_free_tags(struct request_queue *q) 867 void blk_queue_free_tags(struct request_queue *q)
868 { 868 {
869 clear_bit(QUEUE_FLAG_QUEUED, &q->queue_flags); 869 clear_bit(QUEUE_FLAG_QUEUED, &q->queue_flags);
870 } 870 }
871 871
872 EXPORT_SYMBOL(blk_queue_free_tags); 872 EXPORT_SYMBOL(blk_queue_free_tags);
873 873
874 static int 874 static int
875 init_tag_map(struct request_queue *q, struct blk_queue_tag *tags, int depth) 875 init_tag_map(struct request_queue *q, struct blk_queue_tag *tags, int depth)
876 { 876 {
877 struct request **tag_index; 877 struct request **tag_index;
878 unsigned long *tag_map; 878 unsigned long *tag_map;
879 int nr_ulongs; 879 int nr_ulongs;
880 880
881 if (q && depth > q->nr_requests * 2) { 881 if (q && depth > q->nr_requests * 2) {
882 depth = q->nr_requests * 2; 882 depth = q->nr_requests * 2;
883 printk(KERN_ERR "%s: adjusted depth to %d\n", 883 printk(KERN_ERR "%s: adjusted depth to %d\n",
884 __FUNCTION__, depth); 884 __FUNCTION__, depth);
885 } 885 }
886 886
887 tag_index = kzalloc(depth * sizeof(struct request *), GFP_ATOMIC); 887 tag_index = kzalloc(depth * sizeof(struct request *), GFP_ATOMIC);
888 if (!tag_index) 888 if (!tag_index)
889 goto fail; 889 goto fail;
890 890
891 nr_ulongs = ALIGN(depth, BITS_PER_LONG) / BITS_PER_LONG; 891 nr_ulongs = ALIGN(depth, BITS_PER_LONG) / BITS_PER_LONG;
892 tag_map = kzalloc(nr_ulongs * sizeof(unsigned long), GFP_ATOMIC); 892 tag_map = kzalloc(nr_ulongs * sizeof(unsigned long), GFP_ATOMIC);
893 if (!tag_map) 893 if (!tag_map)
894 goto fail; 894 goto fail;
895 895
896 tags->real_max_depth = depth; 896 tags->real_max_depth = depth;
897 tags->max_depth = depth; 897 tags->max_depth = depth;
898 tags->tag_index = tag_index; 898 tags->tag_index = tag_index;
899 tags->tag_map = tag_map; 899 tags->tag_map = tag_map;
900 900
901 return 0; 901 return 0;
902 fail: 902 fail:
903 kfree(tag_index); 903 kfree(tag_index);
904 return -ENOMEM; 904 return -ENOMEM;
905 } 905 }
906 906
907 static struct blk_queue_tag *__blk_queue_init_tags(struct request_queue *q, 907 static struct blk_queue_tag *__blk_queue_init_tags(struct request_queue *q,
908 int depth) 908 int depth)
909 { 909 {
910 struct blk_queue_tag *tags; 910 struct blk_queue_tag *tags;
911 911
912 tags = kmalloc(sizeof(struct blk_queue_tag), GFP_ATOMIC); 912 tags = kmalloc(sizeof(struct blk_queue_tag), GFP_ATOMIC);
913 if (!tags) 913 if (!tags)
914 goto fail; 914 goto fail;
915 915
916 if (init_tag_map(q, tags, depth)) 916 if (init_tag_map(q, tags, depth))
917 goto fail; 917 goto fail;
918 918
919 INIT_LIST_HEAD(&tags->busy_list); 919 INIT_LIST_HEAD(&tags->busy_list);
920 tags->busy = 0; 920 tags->busy = 0;
921 atomic_set(&tags->refcnt, 1); 921 atomic_set(&tags->refcnt, 1);
922 return tags; 922 return tags;
923 fail: 923 fail:
924 kfree(tags); 924 kfree(tags);
925 return NULL; 925 return NULL;
926 } 926 }
927 927
928 /** 928 /**
929 * blk_init_tags - initialize the tag info for an external tag map 929 * blk_init_tags - initialize the tag info for an external tag map
930 * @depth: the maximum queue depth supported 930 * @depth: the maximum queue depth supported
931 * @tags: the tag to use 931 * @tags: the tag to use
932 **/ 932 **/
933 struct blk_queue_tag *blk_init_tags(int depth) 933 struct blk_queue_tag *blk_init_tags(int depth)
934 { 934 {
935 return __blk_queue_init_tags(NULL, depth); 935 return __blk_queue_init_tags(NULL, depth);
936 } 936 }
937 EXPORT_SYMBOL(blk_init_tags); 937 EXPORT_SYMBOL(blk_init_tags);
938 938
939 /** 939 /**
940 * blk_queue_init_tags - initialize the queue tag info 940 * blk_queue_init_tags - initialize the queue tag info
941 * @q: the request queue for the device 941 * @q: the request queue for the device
942 * @depth: the maximum queue depth supported 942 * @depth: the maximum queue depth supported
943 * @tags: the tag to use 943 * @tags: the tag to use
944 **/ 944 **/
945 int blk_queue_init_tags(struct request_queue *q, int depth, 945 int blk_queue_init_tags(struct request_queue *q, int depth,
946 struct blk_queue_tag *tags) 946 struct blk_queue_tag *tags)
947 { 947 {
948 int rc; 948 int rc;
949 949
950 BUG_ON(tags && q->queue_tags && tags != q->queue_tags); 950 BUG_ON(tags && q->queue_tags && tags != q->queue_tags);
951 951
952 if (!tags && !q->queue_tags) { 952 if (!tags && !q->queue_tags) {
953 tags = __blk_queue_init_tags(q, depth); 953 tags = __blk_queue_init_tags(q, depth);
954 954
955 if (!tags) 955 if (!tags)
956 goto fail; 956 goto fail;
957 } else if (q->queue_tags) { 957 } else if (q->queue_tags) {
958 if ((rc = blk_queue_resize_tags(q, depth))) 958 if ((rc = blk_queue_resize_tags(q, depth)))
959 return rc; 959 return rc;
960 set_bit(QUEUE_FLAG_QUEUED, &q->queue_flags); 960 set_bit(QUEUE_FLAG_QUEUED, &q->queue_flags);
961 return 0; 961 return 0;
962 } else 962 } else
963 atomic_inc(&tags->refcnt); 963 atomic_inc(&tags->refcnt);
964 964
965 /* 965 /*
966 * assign it, all done 966 * assign it, all done
967 */ 967 */
968 q->queue_tags = tags; 968 q->queue_tags = tags;
969 q->queue_flags |= (1 << QUEUE_FLAG_QUEUED); 969 q->queue_flags |= (1 << QUEUE_FLAG_QUEUED);
970 return 0; 970 return 0;
971 fail: 971 fail:
972 kfree(tags); 972 kfree(tags);
973 return -ENOMEM; 973 return -ENOMEM;
974 } 974 }
975 975
976 EXPORT_SYMBOL(blk_queue_init_tags); 976 EXPORT_SYMBOL(blk_queue_init_tags);
977 977
978 /** 978 /**
979 * blk_queue_resize_tags - change the queueing depth 979 * blk_queue_resize_tags - change the queueing depth
980 * @q: the request queue for the device 980 * @q: the request queue for the device
981 * @new_depth: the new max command queueing depth 981 * @new_depth: the new max command queueing depth
982 * 982 *
983 * Notes: 983 * Notes:
984 * Must be called with the queue lock held. 984 * Must be called with the queue lock held.
985 **/ 985 **/
986 int blk_queue_resize_tags(struct request_queue *q, int new_depth) 986 int blk_queue_resize_tags(struct request_queue *q, int new_depth)
987 { 987 {
988 struct blk_queue_tag *bqt = q->queue_tags; 988 struct blk_queue_tag *bqt = q->queue_tags;
989 struct request **tag_index; 989 struct request **tag_index;
990 unsigned long *tag_map; 990 unsigned long *tag_map;
991 int max_depth, nr_ulongs; 991 int max_depth, nr_ulongs;
992 992
993 if (!bqt) 993 if (!bqt)
994 return -ENXIO; 994 return -ENXIO;
995 995
996 /* 996 /*
997 * if we already have large enough real_max_depth. just 997 * if we already have large enough real_max_depth. just
998 * adjust max_depth. *NOTE* as requests with tag value 998 * adjust max_depth. *NOTE* as requests with tag value
999 * between new_depth and real_max_depth can be in-flight, tag 999 * between new_depth and real_max_depth can be in-flight, tag
1000 * map can not be shrunk blindly here. 1000 * map can not be shrunk blindly here.
1001 */ 1001 */
1002 if (new_depth <= bqt->real_max_depth) { 1002 if (new_depth <= bqt->real_max_depth) {
1003 bqt->max_depth = new_depth; 1003 bqt->max_depth = new_depth;
1004 return 0; 1004 return 0;
1005 } 1005 }
1006 1006
1007 /* 1007 /*
1008 * Currently cannot replace a shared tag map with a new 1008 * Currently cannot replace a shared tag map with a new
1009 * one, so error out if this is the case 1009 * one, so error out if this is the case
1010 */ 1010 */
1011 if (atomic_read(&bqt->refcnt) != 1) 1011 if (atomic_read(&bqt->refcnt) != 1)
1012 return -EBUSY; 1012 return -EBUSY;
1013 1013
1014 /* 1014 /*
1015 * save the old state info, so we can copy it back 1015 * save the old state info, so we can copy it back
1016 */ 1016 */
1017 tag_index = bqt->tag_index; 1017 tag_index = bqt->tag_index;
1018 tag_map = bqt->tag_map; 1018 tag_map = bqt->tag_map;
1019 max_depth = bqt->real_max_depth; 1019 max_depth = bqt->real_max_depth;
1020 1020
1021 if (init_tag_map(q, bqt, new_depth)) 1021 if (init_tag_map(q, bqt, new_depth))
1022 return -ENOMEM; 1022 return -ENOMEM;
1023 1023
1024 memcpy(bqt->tag_index, tag_index, max_depth * sizeof(struct request *)); 1024 memcpy(bqt->tag_index, tag_index, max_depth * sizeof(struct request *));
1025 nr_ulongs = ALIGN(max_depth, BITS_PER_LONG) / BITS_PER_LONG; 1025 nr_ulongs = ALIGN(max_depth, BITS_PER_LONG) / BITS_PER_LONG;
1026 memcpy(bqt->tag_map, tag_map, nr_ulongs * sizeof(unsigned long)); 1026 memcpy(bqt->tag_map, tag_map, nr_ulongs * sizeof(unsigned long));
1027 1027
1028 kfree(tag_index); 1028 kfree(tag_index);
1029 kfree(tag_map); 1029 kfree(tag_map);
1030 return 0; 1030 return 0;
1031 } 1031 }
1032 1032
1033 EXPORT_SYMBOL(blk_queue_resize_tags); 1033 EXPORT_SYMBOL(blk_queue_resize_tags);
1034 1034
1035 /** 1035 /**
1036 * blk_queue_end_tag - end tag operations for a request 1036 * blk_queue_end_tag - end tag operations for a request
1037 * @q: the request queue for the device 1037 * @q: the request queue for the device
1038 * @rq: the request that has completed 1038 * @rq: the request that has completed
1039 * 1039 *
1040 * Description: 1040 * Description:
1041 * Typically called when end_that_request_first() returns 0, meaning 1041 * Typically called when end_that_request_first() returns 0, meaning
1042 * all transfers have been done for a request. It's important to call 1042 * all transfers have been done for a request. It's important to call
1043 * this function before end_that_request_last(), as that will put the 1043 * this function before end_that_request_last(), as that will put the
1044 * request back on the free list thus corrupting the internal tag list. 1044 * request back on the free list thus corrupting the internal tag list.
1045 * 1045 *
1046 * Notes: 1046 * Notes:
1047 * queue lock must be held. 1047 * queue lock must be held.
1048 **/ 1048 **/
1049 void blk_queue_end_tag(struct request_queue *q, struct request *rq) 1049 void blk_queue_end_tag(struct request_queue *q, struct request *rq)
1050 { 1050 {
1051 struct blk_queue_tag *bqt = q->queue_tags; 1051 struct blk_queue_tag *bqt = q->queue_tags;
1052 int tag = rq->tag; 1052 int tag = rq->tag;
1053 1053
1054 BUG_ON(tag == -1); 1054 BUG_ON(tag == -1);
1055 1055
1056 if (unlikely(tag >= bqt->real_max_depth)) 1056 if (unlikely(tag >= bqt->real_max_depth))
1057 /* 1057 /*
1058 * This can happen after tag depth has been reduced. 1058 * This can happen after tag depth has been reduced.
1059 * FIXME: how about a warning or info message here? 1059 * FIXME: how about a warning or info message here?
1060 */ 1060 */
1061 return; 1061 return;
1062 1062
1063 list_del_init(&rq->queuelist); 1063 list_del_init(&rq->queuelist);
1064 rq->cmd_flags &= ~REQ_QUEUED; 1064 rq->cmd_flags &= ~REQ_QUEUED;
1065 rq->tag = -1; 1065 rq->tag = -1;
1066 1066
1067 if (unlikely(bqt->tag_index[tag] == NULL)) 1067 if (unlikely(bqt->tag_index[tag] == NULL))
1068 printk(KERN_ERR "%s: tag %d is missing\n", 1068 printk(KERN_ERR "%s: tag %d is missing\n",
1069 __FUNCTION__, tag); 1069 __FUNCTION__, tag);
1070 1070
1071 bqt->tag_index[tag] = NULL; 1071 bqt->tag_index[tag] = NULL;
1072 1072
1073 /* 1073 /*
1074 * We use test_and_clear_bit's memory ordering properties here. 1074 * We use test_and_clear_bit's memory ordering properties here.
1075 * The tag_map bit acts as a lock for tag_index[bit], so we need 1075 * The tag_map bit acts as a lock for tag_index[bit], so we need
1076 * a barrer before clearing the bit (precisely: release semantics). 1076 * a barrer before clearing the bit (precisely: release semantics).
1077 * Could use clear_bit_unlock when it is merged. 1077 * Could use clear_bit_unlock when it is merged.
1078 */ 1078 */
1079 if (unlikely(!test_and_clear_bit(tag, bqt->tag_map))) { 1079 if (unlikely(!test_and_clear_bit(tag, bqt->tag_map))) {
1080 printk(KERN_ERR "%s: attempt to clear non-busy tag (%d)\n", 1080 printk(KERN_ERR "%s: attempt to clear non-busy tag (%d)\n",
1081 __FUNCTION__, tag); 1081 __FUNCTION__, tag);
1082 return; 1082 return;
1083 } 1083 }
1084 1084
1085 bqt->busy--; 1085 bqt->busy--;
1086 } 1086 }
1087 1087
1088 EXPORT_SYMBOL(blk_queue_end_tag); 1088 EXPORT_SYMBOL(blk_queue_end_tag);
1089 1089
1090 /** 1090 /**
1091 * blk_queue_start_tag - find a free tag and assign it 1091 * blk_queue_start_tag - find a free tag and assign it
1092 * @q: the request queue for the device 1092 * @q: the request queue for the device
1093 * @rq: the block request that needs tagging 1093 * @rq: the block request that needs tagging
1094 * 1094 *
1095 * Description: 1095 * Description:
1096 * This can either be used as a stand-alone helper, or possibly be 1096 * This can either be used as a stand-alone helper, or possibly be
1097 * assigned as the queue &prep_rq_fn (in which case &struct request 1097 * assigned as the queue &prep_rq_fn (in which case &struct request
1098 * automagically gets a tag assigned). Note that this function 1098 * automagically gets a tag assigned). Note that this function
1099 * assumes that any type of request can be queued! if this is not 1099 * assumes that any type of request can be queued! if this is not
1100 * true for your device, you must check the request type before 1100 * true for your device, you must check the request type before
1101 * calling this function. The request will also be removed from 1101 * calling this function. The request will also be removed from
1102 * the request queue, so it's the drivers responsibility to readd 1102 * the request queue, so it's the drivers responsibility to readd
1103 * it if it should need to be restarted for some reason. 1103 * it if it should need to be restarted for some reason.
1104 * 1104 *
1105 * Notes: 1105 * Notes:
1106 * queue lock must be held. 1106 * queue lock must be held.
1107 **/ 1107 **/
1108 int blk_queue_start_tag(struct request_queue *q, struct request *rq) 1108 int blk_queue_start_tag(struct request_queue *q, struct request *rq)
1109 { 1109 {
1110 struct blk_queue_tag *bqt = q->queue_tags; 1110 struct blk_queue_tag *bqt = q->queue_tags;
1111 int tag; 1111 int tag;
1112 1112
1113 if (unlikely((rq->cmd_flags & REQ_QUEUED))) { 1113 if (unlikely((rq->cmd_flags & REQ_QUEUED))) {
1114 printk(KERN_ERR 1114 printk(KERN_ERR
1115 "%s: request %p for device [%s] already tagged %d", 1115 "%s: request %p for device [%s] already tagged %d",
1116 __FUNCTION__, rq, 1116 __FUNCTION__, rq,
1117 rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->tag); 1117 rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->tag);
1118 BUG(); 1118 BUG();
1119 } 1119 }
1120 1120
1121 /* 1121 /*
1122 * Protect against shared tag maps, as we may not have exclusive 1122 * Protect against shared tag maps, as we may not have exclusive
1123 * access to the tag map. 1123 * access to the tag map.
1124 */ 1124 */
1125 do { 1125 do {
1126 tag = find_first_zero_bit(bqt->tag_map, bqt->max_depth); 1126 tag = find_first_zero_bit(bqt->tag_map, bqt->max_depth);
1127 if (tag >= bqt->max_depth) 1127 if (tag >= bqt->max_depth)
1128 return 1; 1128 return 1;
1129 1129
1130 } while (test_and_set_bit(tag, bqt->tag_map)); 1130 } while (test_and_set_bit(tag, bqt->tag_map));
1131 /* 1131 /*
1132 * We rely on test_and_set_bit providing lock memory ordering semantics 1132 * We rely on test_and_set_bit providing lock memory ordering semantics
1133 * (could use test_and_set_bit_lock when it is merged). 1133 * (could use test_and_set_bit_lock when it is merged).
1134 */ 1134 */
1135 1135
1136 rq->cmd_flags |= REQ_QUEUED; 1136 rq->cmd_flags |= REQ_QUEUED;
1137 rq->tag = tag; 1137 rq->tag = tag;
1138 bqt->tag_index[tag] = rq; 1138 bqt->tag_index[tag] = rq;
1139 blkdev_dequeue_request(rq); 1139 blkdev_dequeue_request(rq);
1140 list_add(&rq->queuelist, &bqt->busy_list); 1140 list_add(&rq->queuelist, &bqt->busy_list);
1141 bqt->busy++; 1141 bqt->busy++;
1142 return 0; 1142 return 0;
1143 } 1143 }
1144 1144
1145 EXPORT_SYMBOL(blk_queue_start_tag); 1145 EXPORT_SYMBOL(blk_queue_start_tag);
1146 1146
1147 /** 1147 /**
1148 * blk_queue_invalidate_tags - invalidate all pending tags 1148 * blk_queue_invalidate_tags - invalidate all pending tags
1149 * @q: the request queue for the device 1149 * @q: the request queue for the device
1150 * 1150 *
1151 * Description: 1151 * Description:
1152 * Hardware conditions may dictate a need to stop all pending requests. 1152 * Hardware conditions may dictate a need to stop all pending requests.
1153 * In this case, we will safely clear the block side of the tag queue and 1153 * In this case, we will safely clear the block side of the tag queue and
1154 * readd all requests to the request queue in the right order. 1154 * readd all requests to the request queue in the right order.
1155 * 1155 *
1156 * Notes: 1156 * Notes:
1157 * queue lock must be held. 1157 * queue lock must be held.
1158 **/ 1158 **/
1159 void blk_queue_invalidate_tags(struct request_queue *q) 1159 void blk_queue_invalidate_tags(struct request_queue *q)
1160 { 1160 {
1161 struct blk_queue_tag *bqt = q->queue_tags; 1161 struct blk_queue_tag *bqt = q->queue_tags;
1162 struct list_head *tmp, *n; 1162 struct list_head *tmp, *n;
1163 struct request *rq; 1163 struct request *rq;
1164 1164
1165 list_for_each_safe(tmp, n, &bqt->busy_list) { 1165 list_for_each_safe(tmp, n, &bqt->busy_list) {
1166 rq = list_entry_rq(tmp); 1166 rq = list_entry_rq(tmp);
1167 1167
1168 if (rq->tag == -1) { 1168 if (rq->tag == -1) {
1169 printk(KERN_ERR 1169 printk(KERN_ERR
1170 "%s: bad tag found on list\n", __FUNCTION__); 1170 "%s: bad tag found on list\n", __FUNCTION__);
1171 list_del_init(&rq->queuelist); 1171 list_del_init(&rq->queuelist);
1172 rq->cmd_flags &= ~REQ_QUEUED; 1172 rq->cmd_flags &= ~REQ_QUEUED;
1173 } else 1173 } else
1174 blk_queue_end_tag(q, rq); 1174 blk_queue_end_tag(q, rq);
1175 1175
1176 rq->cmd_flags &= ~REQ_STARTED; 1176 rq->cmd_flags &= ~REQ_STARTED;
1177 __elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 0); 1177 __elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 0);
1178 } 1178 }
1179 } 1179 }
1180 1180
1181 EXPORT_SYMBOL(blk_queue_invalidate_tags); 1181 EXPORT_SYMBOL(blk_queue_invalidate_tags);
1182 1182
1183 void blk_dump_rq_flags(struct request *rq, char *msg) 1183 void blk_dump_rq_flags(struct request *rq, char *msg)
1184 { 1184 {
1185 int bit; 1185 int bit;
1186 1186
1187 printk("%s: dev %s: type=%x, flags=%x\n", msg, 1187 printk("%s: dev %s: type=%x, flags=%x\n", msg,
1188 rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->cmd_type, 1188 rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->cmd_type,
1189 rq->cmd_flags); 1189 rq->cmd_flags);
1190 1190
1191 printk("\nsector %llu, nr/cnr %lu/%u\n", (unsigned long long)rq->sector, 1191 printk("\nsector %llu, nr/cnr %lu/%u\n", (unsigned long long)rq->sector,
1192 rq->nr_sectors, 1192 rq->nr_sectors,
1193 rq->current_nr_sectors); 1193 rq->current_nr_sectors);
1194 printk("bio %p, biotail %p, buffer %p, data %p, len %u\n", rq->bio, rq->biotail, rq->buffer, rq->data, rq->data_len); 1194 printk("bio %p, biotail %p, buffer %p, data %p, len %u\n", rq->bio, rq->biotail, rq->buffer, rq->data, rq->data_len);
1195 1195
1196 if (blk_pc_request(rq)) { 1196 if (blk_pc_request(rq)) {
1197 printk("cdb: "); 1197 printk("cdb: ");
1198 for (bit = 0; bit < sizeof(rq->cmd); bit++) 1198 for (bit = 0; bit < sizeof(rq->cmd); bit++)
1199 printk("%02x ", rq->cmd[bit]); 1199 printk("%02x ", rq->cmd[bit]);
1200 printk("\n"); 1200 printk("\n");
1201 } 1201 }
1202 } 1202 }
1203 1203
1204 EXPORT_SYMBOL(blk_dump_rq_flags); 1204 EXPORT_SYMBOL(blk_dump_rq_flags);
1205 1205
1206 void blk_recount_segments(struct request_queue *q, struct bio *bio) 1206 void blk_recount_segments(struct request_queue *q, struct bio *bio)
1207 { 1207 {
1208 struct request rq; 1208 struct request rq;
1209 struct bio *nxt = bio->bi_next; 1209 struct bio *nxt = bio->bi_next;
1210 rq.q = q; 1210 rq.q = q;
1211 rq.bio = rq.biotail = bio; 1211 rq.bio = rq.biotail = bio;
1212 bio->bi_next = NULL; 1212 bio->bi_next = NULL;
1213 blk_recalc_rq_segments(&rq); 1213 blk_recalc_rq_segments(&rq);
1214 bio->bi_next = nxt; 1214 bio->bi_next = nxt;
1215 bio->bi_phys_segments = rq.nr_phys_segments; 1215 bio->bi_phys_segments = rq.nr_phys_segments;
1216 bio->bi_hw_segments = rq.nr_hw_segments; 1216 bio->bi_hw_segments = rq.nr_hw_segments;
1217 bio->bi_flags |= (1 << BIO_SEG_VALID); 1217 bio->bi_flags |= (1 << BIO_SEG_VALID);
1218 } 1218 }
1219 EXPORT_SYMBOL(blk_recount_segments); 1219 EXPORT_SYMBOL(blk_recount_segments);
1220 1220
1221 static void blk_recalc_rq_segments(struct request *rq) 1221 static void blk_recalc_rq_segments(struct request *rq)
1222 { 1222 {
1223 int nr_phys_segs; 1223 int nr_phys_segs;
1224 int nr_hw_segs; 1224 int nr_hw_segs;
1225 unsigned int phys_size; 1225 unsigned int phys_size;
1226 unsigned int hw_size; 1226 unsigned int hw_size;
1227 struct bio_vec *bv, *bvprv = NULL; 1227 struct bio_vec *bv, *bvprv = NULL;
1228 int seg_size; 1228 int seg_size;
1229 int hw_seg_size; 1229 int hw_seg_size;
1230 int cluster; 1230 int cluster;
1231 struct req_iterator iter; 1231 struct req_iterator iter;
1232 int high, highprv = 1; 1232 int high, highprv = 1;
1233 struct request_queue *q = rq->q; 1233 struct request_queue *q = rq->q;
1234 1234
1235 if (!rq->bio) 1235 if (!rq->bio)
1236 return; 1236 return;
1237 1237
1238 cluster = q->queue_flags & (1 << QUEUE_FLAG_CLUSTER); 1238 cluster = q->queue_flags & (1 << QUEUE_FLAG_CLUSTER);
1239 hw_seg_size = seg_size = 0; 1239 hw_seg_size = seg_size = 0;
1240 phys_size = hw_size = nr_phys_segs = nr_hw_segs = 0; 1240 phys_size = hw_size = nr_phys_segs = nr_hw_segs = 0;
1241 rq_for_each_segment(bv, rq, iter) { 1241 rq_for_each_segment(bv, rq, iter) {
1242 /* 1242 /*
1243 * the trick here is making sure that a high page is never 1243 * the trick here is making sure that a high page is never
1244 * considered part of another segment, since that might 1244 * considered part of another segment, since that might
1245 * change with the bounce page. 1245 * change with the bounce page.
1246 */ 1246 */
1247 high = page_to_pfn(bv->bv_page) > q->bounce_pfn; 1247 high = page_to_pfn(bv->bv_page) > q->bounce_pfn;
1248 if (high || highprv) 1248 if (high || highprv)
1249 goto new_hw_segment; 1249 goto new_hw_segment;
1250 if (cluster) { 1250 if (cluster) {
1251 if (seg_size + bv->bv_len > q->max_segment_size) 1251 if (seg_size + bv->bv_len > q->max_segment_size)
1252 goto new_segment; 1252 goto new_segment;
1253 if (!BIOVEC_PHYS_MERGEABLE(bvprv, bv)) 1253 if (!BIOVEC_PHYS_MERGEABLE(bvprv, bv))
1254 goto new_segment; 1254 goto new_segment;
1255 if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bv)) 1255 if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bv))
1256 goto new_segment; 1256 goto new_segment;
1257 if (BIOVEC_VIRT_OVERSIZE(hw_seg_size + bv->bv_len)) 1257 if (BIOVEC_VIRT_OVERSIZE(hw_seg_size + bv->bv_len))
1258 goto new_hw_segment; 1258 goto new_hw_segment;
1259 1259
1260 seg_size += bv->bv_len; 1260 seg_size += bv->bv_len;
1261 hw_seg_size += bv->bv_len; 1261 hw_seg_size += bv->bv_len;
1262 bvprv = bv; 1262 bvprv = bv;
1263 continue; 1263 continue;
1264 } 1264 }
1265 new_segment: 1265 new_segment:
1266 if (BIOVEC_VIRT_MERGEABLE(bvprv, bv) && 1266 if (BIOVEC_VIRT_MERGEABLE(bvprv, bv) &&
1267 !BIOVEC_VIRT_OVERSIZE(hw_seg_size + bv->bv_len)) 1267 !BIOVEC_VIRT_OVERSIZE(hw_seg_size + bv->bv_len))
1268 hw_seg_size += bv->bv_len; 1268 hw_seg_size += bv->bv_len;
1269 else { 1269 else {
1270 new_hw_segment: 1270 new_hw_segment:
1271 if (nr_hw_segs == 1 && 1271 if (nr_hw_segs == 1 &&
1272 hw_seg_size > rq->bio->bi_hw_front_size) 1272 hw_seg_size > rq->bio->bi_hw_front_size)
1273 rq->bio->bi_hw_front_size = hw_seg_size; 1273 rq->bio->bi_hw_front_size = hw_seg_size;
1274 hw_seg_size = BIOVEC_VIRT_START_SIZE(bv) + bv->bv_len; 1274 hw_seg_size = BIOVEC_VIRT_START_SIZE(bv) + bv->bv_len;
1275 nr_hw_segs++; 1275 nr_hw_segs++;
1276 } 1276 }
1277 1277
1278 nr_phys_segs++; 1278 nr_phys_segs++;
1279 bvprv = bv; 1279 bvprv = bv;
1280 seg_size = bv->bv_len; 1280 seg_size = bv->bv_len;
1281 highprv = high; 1281 highprv = high;
1282 } 1282 }
1283 1283
1284 if (nr_hw_segs == 1 && 1284 if (nr_hw_segs == 1 &&
1285 hw_seg_size > rq->bio->bi_hw_front_size) 1285 hw_seg_size > rq->bio->bi_hw_front_size)
1286 rq->bio->bi_hw_front_size = hw_seg_size; 1286 rq->bio->bi_hw_front_size = hw_seg_size;
1287 if (hw_seg_size > rq->biotail->bi_hw_back_size) 1287 if (hw_seg_size > rq->biotail->bi_hw_back_size)
1288 rq->biotail->bi_hw_back_size = hw_seg_size; 1288 rq->biotail->bi_hw_back_size = hw_seg_size;
1289 rq->nr_phys_segments = nr_phys_segs; 1289 rq->nr_phys_segments = nr_phys_segs;
1290 rq->nr_hw_segments = nr_hw_segs; 1290 rq->nr_hw_segments = nr_hw_segs;
1291 } 1291 }
1292 1292
1293 static int blk_phys_contig_segment(struct request_queue *q, struct bio *bio, 1293 static int blk_phys_contig_segment(struct request_queue *q, struct bio *bio,
1294 struct bio *nxt) 1294 struct bio *nxt)
1295 { 1295 {
1296 if (!(q->queue_flags & (1 << QUEUE_FLAG_CLUSTER))) 1296 if (!(q->queue_flags & (1 << QUEUE_FLAG_CLUSTER)))
1297 return 0; 1297 return 0;
1298 1298
1299 if (!BIOVEC_PHYS_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt))) 1299 if (!BIOVEC_PHYS_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt)))
1300 return 0; 1300 return 0;
1301 if (bio->bi_size + nxt->bi_size > q->max_segment_size) 1301 if (bio->bi_size + nxt->bi_size > q->max_segment_size)
1302 return 0; 1302 return 0;
1303 1303
1304 /* 1304 /*
1305 * bio and nxt are contigous in memory, check if the queue allows 1305 * bio and nxt are contigous in memory, check if the queue allows
1306 * these two to be merged into one 1306 * these two to be merged into one
1307 */ 1307 */
1308 if (BIO_SEG_BOUNDARY(q, bio, nxt)) 1308 if (BIO_SEG_BOUNDARY(q, bio, nxt))
1309 return 1; 1309 return 1;
1310 1310
1311 return 0; 1311 return 0;
1312 } 1312 }
1313 1313
1314 static int blk_hw_contig_segment(struct request_queue *q, struct bio *bio, 1314 static int blk_hw_contig_segment(struct request_queue *q, struct bio *bio,
1315 struct bio *nxt) 1315 struct bio *nxt)
1316 { 1316 {
1317 if (unlikely(!bio_flagged(bio, BIO_SEG_VALID))) 1317 if (unlikely(!bio_flagged(bio, BIO_SEG_VALID)))
1318 blk_recount_segments(q, bio); 1318 blk_recount_segments(q, bio);
1319 if (unlikely(!bio_flagged(nxt, BIO_SEG_VALID))) 1319 if (unlikely(!bio_flagged(nxt, BIO_SEG_VALID)))
1320 blk_recount_segments(q, nxt); 1320 blk_recount_segments(q, nxt);
1321 if (!BIOVEC_VIRT_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt)) || 1321 if (!BIOVEC_VIRT_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt)) ||
1322 BIOVEC_VIRT_OVERSIZE(bio->bi_hw_back_size + nxt->bi_hw_front_size)) 1322 BIOVEC_VIRT_OVERSIZE(bio->bi_hw_back_size + nxt->bi_hw_front_size))
1323 return 0; 1323 return 0;
1324 if (bio->bi_hw_back_size + nxt->bi_hw_front_size > q->max_segment_size) 1324 if (bio->bi_hw_back_size + nxt->bi_hw_front_size > q->max_segment_size)
1325 return 0; 1325 return 0;
1326 1326
1327 return 1; 1327 return 1;
1328 } 1328 }
1329 1329
1330 /* 1330 /*
1331 * map a request to scatterlist, return number of sg entries setup. Caller 1331 * map a request to scatterlist, return number of sg entries setup. Caller
1332 * must make sure sg can hold rq->nr_phys_segments entries 1332 * must make sure sg can hold rq->nr_phys_segments entries
1333 */ 1333 */
1334 int blk_rq_map_sg(struct request_queue *q, struct request *rq, 1334 int blk_rq_map_sg(struct request_queue *q, struct request *rq,
1335 struct scatterlist *sg) 1335 struct scatterlist *sg)
1336 { 1336 {
1337 struct bio_vec *bvec, *bvprv; 1337 struct bio_vec *bvec, *bvprv;
1338 struct req_iterator iter; 1338 struct req_iterator iter;
1339 int nsegs, cluster; 1339 int nsegs, cluster;
1340 1340
1341 nsegs = 0; 1341 nsegs = 0;
1342 cluster = q->queue_flags & (1 << QUEUE_FLAG_CLUSTER); 1342 cluster = q->queue_flags & (1 << QUEUE_FLAG_CLUSTER);
1343 1343
1344 /* 1344 /*
1345 * for each bio in rq 1345 * for each bio in rq
1346 */ 1346 */
1347 bvprv = NULL; 1347 bvprv = NULL;
1348 rq_for_each_segment(bvec, rq, iter) { 1348 rq_for_each_segment(bvec, rq, iter) {
1349 int nbytes = bvec->bv_len; 1349 int nbytes = bvec->bv_len;
1350 1350
1351 if (bvprv && cluster) { 1351 if (bvprv && cluster) {
1352 if (sg[nsegs - 1].length + nbytes > q->max_segment_size) 1352 if (sg[nsegs - 1].length + nbytes > q->max_segment_size)
1353 goto new_segment; 1353 goto new_segment;
1354 1354
1355 if (!BIOVEC_PHYS_MERGEABLE(bvprv, bvec)) 1355 if (!BIOVEC_PHYS_MERGEABLE(bvprv, bvec))
1356 goto new_segment; 1356 goto new_segment;
1357 if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bvec)) 1357 if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bvec))
1358 goto new_segment; 1358 goto new_segment;
1359 1359
1360 sg[nsegs - 1].length += nbytes; 1360 sg[nsegs - 1].length += nbytes;
1361 } else { 1361 } else {
1362 new_segment: 1362 new_segment:
1363 memset(&sg[nsegs],0,sizeof(struct scatterlist)); 1363 memset(&sg[nsegs],0,sizeof(struct scatterlist));
1364 sg[nsegs].page = bvec->bv_page; 1364 sg[nsegs].page = bvec->bv_page;
1365 sg[nsegs].length = nbytes; 1365 sg[nsegs].length = nbytes;
1366 sg[nsegs].offset = bvec->bv_offset; 1366 sg[nsegs].offset = bvec->bv_offset;
1367 1367
1368 nsegs++; 1368 nsegs++;
1369 } 1369 }
1370 bvprv = bvec; 1370 bvprv = bvec;
1371 } /* segments in rq */ 1371 } /* segments in rq */
1372 1372
1373 return nsegs; 1373 return nsegs;
1374 } 1374 }
1375 1375
1376 EXPORT_SYMBOL(blk_rq_map_sg); 1376 EXPORT_SYMBOL(blk_rq_map_sg);
1377 1377
1378 /* 1378 /*
1379 * the standard queue merge functions, can be overridden with device 1379 * the standard queue merge functions, can be overridden with device
1380 * specific ones if so desired 1380 * specific ones if so desired
1381 */ 1381 */
1382 1382
1383 static inline int ll_new_mergeable(struct request_queue *q, 1383 static inline int ll_new_mergeable(struct request_queue *q,
1384 struct request *req, 1384 struct request *req,
1385 struct bio *bio) 1385 struct bio *bio)
1386 { 1386 {
1387 int nr_phys_segs = bio_phys_segments(q, bio); 1387 int nr_phys_segs = bio_phys_segments(q, bio);
1388 1388
1389 if (req->nr_phys_segments + nr_phys_segs > q->max_phys_segments) { 1389 if (req->nr_phys_segments + nr_phys_segs > q->max_phys_segments) {
1390 req->cmd_flags |= REQ_NOMERGE; 1390 req->cmd_flags |= REQ_NOMERGE;
1391 if (req == q->last_merge) 1391 if (req == q->last_merge)
1392 q->last_merge = NULL; 1392 q->last_merge = NULL;
1393 return 0; 1393 return 0;
1394 } 1394 }
1395 1395
1396 /* 1396 /*
1397 * A hw segment is just getting larger, bump just the phys 1397 * A hw segment is just getting larger, bump just the phys
1398 * counter. 1398 * counter.
1399 */ 1399 */
1400 req->nr_phys_segments += nr_phys_segs; 1400 req->nr_phys_segments += nr_phys_segs;
1401 return 1; 1401 return 1;
1402 } 1402 }
1403 1403
1404 static inline int ll_new_hw_segment(struct request_queue *q, 1404 static inline int ll_new_hw_segment(struct request_queue *q,
1405 struct request *req, 1405 struct request *req,
1406 struct bio *bio) 1406 struct bio *bio)
1407 { 1407 {
1408 int nr_hw_segs = bio_hw_segments(q, bio); 1408 int nr_hw_segs = bio_hw_segments(q, bio);
1409 int nr_phys_segs = bio_phys_segments(q, bio); 1409 int nr_phys_segs = bio_phys_segments(q, bio);
1410 1410
1411 if (req->nr_hw_segments + nr_hw_segs > q->max_hw_segments 1411 if (req->nr_hw_segments + nr_hw_segs > q->max_hw_segments
1412 || req->nr_phys_segments + nr_phys_segs > q->max_phys_segments) { 1412 || req->nr_phys_segments + nr_phys_segs > q->max_phys_segments) {
1413 req->cmd_flags |= REQ_NOMERGE; 1413 req->cmd_flags |= REQ_NOMERGE;
1414 if (req == q->last_merge) 1414 if (req == q->last_merge)
1415 q->last_merge = NULL; 1415 q->last_merge = NULL;
1416 return 0; 1416 return 0;
1417 } 1417 }
1418 1418
1419 /* 1419 /*
1420 * This will form the start of a new hw segment. Bump both 1420 * This will form the start of a new hw segment. Bump both
1421 * counters. 1421 * counters.
1422 */ 1422 */
1423 req->nr_hw_segments += nr_hw_segs; 1423 req->nr_hw_segments += nr_hw_segs;
1424 req->nr_phys_segments += nr_phys_segs; 1424 req->nr_phys_segments += nr_phys_segs;
1425 return 1; 1425 return 1;
1426 } 1426 }
1427 1427
1428 static int ll_back_merge_fn(struct request_queue *q, struct request *req, 1428 static int ll_back_merge_fn(struct request_queue *q, struct request *req,
1429 struct bio *bio) 1429 struct bio *bio)
1430 { 1430 {
1431 unsigned short max_sectors; 1431 unsigned short max_sectors;
1432 int len; 1432 int len;
1433 1433
1434 if (unlikely(blk_pc_request(req))) 1434 if (unlikely(blk_pc_request(req)))
1435 max_sectors = q->max_hw_sectors; 1435 max_sectors = q->max_hw_sectors;
1436 else 1436 else
1437 max_sectors = q->max_sectors; 1437 max_sectors = q->max_sectors;
1438 1438
1439 if (req->nr_sectors + bio_sectors(bio) > max_sectors) { 1439 if (req->nr_sectors + bio_sectors(bio) > max_sectors) {
1440 req->cmd_flags |= REQ_NOMERGE; 1440 req->cmd_flags |= REQ_NOMERGE;
1441 if (req == q->last_merge) 1441 if (req == q->last_merge)
1442 q->last_merge = NULL; 1442 q->last_merge = NULL;
1443 return 0; 1443 return 0;
1444 } 1444 }
1445 if (unlikely(!bio_flagged(req->biotail, BIO_SEG_VALID))) 1445 if (unlikely(!bio_flagged(req->biotail, BIO_SEG_VALID)))
1446 blk_recount_segments(q, req->biotail); 1446 blk_recount_segments(q, req->biotail);
1447 if (unlikely(!bio_flagged(bio, BIO_SEG_VALID))) 1447 if (unlikely(!bio_flagged(bio, BIO_SEG_VALID)))
1448 blk_recount_segments(q, bio); 1448 blk_recount_segments(q, bio);
1449 len = req->biotail->bi_hw_back_size + bio->bi_hw_front_size; 1449 len = req->biotail->bi_hw_back_size + bio->bi_hw_front_size;
1450 if (BIOVEC_VIRT_MERGEABLE(__BVEC_END(req->biotail), __BVEC_START(bio)) && 1450 if (BIOVEC_VIRT_MERGEABLE(__BVEC_END(req->biotail), __BVEC_START(bio)) &&
1451 !BIOVEC_VIRT_OVERSIZE(len)) { 1451 !BIOVEC_VIRT_OVERSIZE(len)) {
1452 int mergeable = ll_new_mergeable(q, req, bio); 1452 int mergeable = ll_new_mergeable(q, req, bio);
1453 1453
1454 if (mergeable) { 1454 if (mergeable) {
1455 if (req->nr_hw_segments == 1) 1455 if (req->nr_hw_segments == 1)
1456 req->bio->bi_hw_front_size = len; 1456 req->bio->bi_hw_front_size = len;
1457 if (bio->bi_hw_segments == 1) 1457 if (bio->bi_hw_segments == 1)
1458 bio->bi_hw_back_size = len; 1458 bio->bi_hw_back_size = len;
1459 } 1459 }
1460 return mergeable; 1460 return mergeable;
1461 } 1461 }
1462 1462
1463 return ll_new_hw_segment(q, req, bio); 1463 return ll_new_hw_segment(q, req, bio);
1464 } 1464 }
1465 1465
1466 static int ll_front_merge_fn(struct request_queue *q, struct request *req, 1466 static int ll_front_merge_fn(struct request_queue *q, struct request *req,
1467 struct bio *bio) 1467 struct bio *bio)
1468 { 1468 {
1469 unsigned short max_sectors; 1469 unsigned short max_sectors;
1470 int len; 1470 int len;
1471 1471
1472 if (unlikely(blk_pc_request(req))) 1472 if (unlikely(blk_pc_request(req)))
1473 max_sectors = q->max_hw_sectors; 1473 max_sectors = q->max_hw_sectors;
1474 else 1474 else
1475 max_sectors = q->max_sectors; 1475 max_sectors = q->max_sectors;
1476 1476
1477 1477
1478 if (req->nr_sectors + bio_sectors(bio) > max_sectors) { 1478 if (req->nr_sectors + bio_sectors(bio) > max_sectors) {
1479 req->cmd_flags |= REQ_NOMERGE; 1479 req->cmd_flags |= REQ_NOMERGE;
1480 if (req == q->last_merge) 1480 if (req == q->last_merge)
1481 q->last_merge = NULL; 1481 q->last_merge = NULL;
1482 return 0; 1482 return 0;
1483 } 1483 }
1484 len = bio->bi_hw_back_size + req->bio->bi_hw_front_size; 1484 len = bio->bi_hw_back_size + req->bio->bi_hw_front_size;
1485 if (unlikely(!bio_flagged(bio, BIO_SEG_VALID))) 1485 if (unlikely(!bio_flagged(bio, BIO_SEG_VALID)))
1486 blk_recount_segments(q, bio); 1486 blk_recount_segments(q, bio);
1487 if (unlikely(!bio_flagged(req->bio, BIO_SEG_VALID))) 1487 if (unlikely(!bio_flagged(req->bio, BIO_SEG_VALID)))
1488 blk_recount_segments(q, req->bio); 1488 blk_recount_segments(q, req->bio);
1489 if (BIOVEC_VIRT_MERGEABLE(__BVEC_END(bio), __BVEC_START(req->bio)) && 1489 if (BIOVEC_VIRT_MERGEABLE(__BVEC_END(bio), __BVEC_START(req->bio)) &&
1490 !BIOVEC_VIRT_OVERSIZE(len)) { 1490 !BIOVEC_VIRT_OVERSIZE(len)) {
1491 int mergeable = ll_new_mergeable(q, req, bio); 1491 int mergeable = ll_new_mergeable(q, req, bio);
1492 1492
1493 if (mergeable) { 1493 if (mergeable) {
1494 if (bio->bi_hw_segments == 1) 1494 if (bio->bi_hw_segments == 1)
1495 bio->bi_hw_front_size = len; 1495 bio->bi_hw_front_size = len;
1496 if (req->nr_hw_segments == 1) 1496 if (req->nr_hw_segments == 1)
1497 req->biotail->bi_hw_back_size = len; 1497 req->biotail->bi_hw_back_size = len;
1498 } 1498 }
1499 return mergeable; 1499 return mergeable;
1500 } 1500 }
1501 1501
1502 return ll_new_hw_segment(q, req, bio); 1502 return ll_new_hw_segment(q, req, bio);
1503 } 1503 }
1504 1504
1505 static int ll_merge_requests_fn(struct request_queue *q, struct request *req, 1505 static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
1506 struct request *next) 1506 struct request *next)
1507 { 1507 {
1508 int total_phys_segments; 1508 int total_phys_segments;
1509 int total_hw_segments; 1509 int total_hw_segments;
1510 1510
1511 /* 1511 /*
1512 * First check if the either of the requests are re-queued 1512 * First check if the either of the requests are re-queued
1513 * requests. Can't merge them if they are. 1513 * requests. Can't merge them if they are.
1514 */ 1514 */
1515 if (req->special || next->special) 1515 if (req->special || next->special)
1516 return 0; 1516 return 0;
1517 1517
1518 /* 1518 /*
1519 * Will it become too large? 1519 * Will it become too large?
1520 */ 1520 */
1521 if ((req->nr_sectors + next->nr_sectors) > q->max_sectors) 1521 if ((req->nr_sectors + next->nr_sectors) > q->max_sectors)
1522 return 0; 1522 return 0;
1523 1523
1524 total_phys_segments = req->nr_phys_segments + next->nr_phys_segments; 1524 total_phys_segments = req->nr_phys_segments + next->nr_phys_segments;
1525 if (blk_phys_contig_segment(q, req->biotail, next->bio)) 1525 if (blk_phys_contig_segment(q, req->biotail, next->bio))
1526 total_phys_segments--; 1526 total_phys_segments--;
1527 1527
1528 if (total_phys_segments > q->max_phys_segments) 1528 if (total_phys_segments > q->max_phys_segments)
1529 return 0; 1529 return 0;
1530 1530
1531 total_hw_segments = req->nr_hw_segments + next->nr_hw_segments; 1531 total_hw_segments = req->nr_hw_segments + next->nr_hw_segments;
1532 if (blk_hw_contig_segment(q, req->biotail, next->bio)) { 1532 if (blk_hw_contig_segment(q, req->biotail, next->bio)) {
1533 int len = req->biotail->bi_hw_back_size + next->bio->bi_hw_front_size; 1533 int len = req->biotail->bi_hw_back_size + next->bio->bi_hw_front_size;
1534 /* 1534 /*
1535 * propagate the combined length to the end of the requests 1535 * propagate the combined length to the end of the requests
1536 */ 1536 */
1537 if (req->nr_hw_segments == 1) 1537 if (req->nr_hw_segments == 1)
1538 req->bio->bi_hw_front_size = len; 1538 req->bio->bi_hw_front_size = len;
1539 if (next->nr_hw_segments == 1) 1539 if (next->nr_hw_segments == 1)
1540 next->biotail->bi_hw_back_size = len; 1540 next->biotail->bi_hw_back_size = len;
1541 total_hw_segments--; 1541 total_hw_segments--;
1542 } 1542 }
1543 1543
1544 if (total_hw_segments > q->max_hw_segments) 1544 if (total_hw_segments > q->max_hw_segments)
1545 return 0; 1545 return 0;
1546 1546
1547 /* Merge is OK... */ 1547 /* Merge is OK... */
1548 req->nr_phys_segments = total_phys_segments; 1548 req->nr_phys_segments = total_phys_segments;
1549 req->nr_hw_segments = total_hw_segments; 1549 req->nr_hw_segments = total_hw_segments;
1550 return 1; 1550 return 1;
1551 } 1551 }
1552 1552
1553 /* 1553 /*
1554 * "plug" the device if there are no outstanding requests: this will 1554 * "plug" the device if there are no outstanding requests: this will
1555 * force the transfer to start only after we have put all the requests 1555 * force the transfer to start only after we have put all the requests
1556 * on the list. 1556 * on the list.
1557 * 1557 *
1558 * This is called with interrupts off and no requests on the queue and 1558 * This is called with interrupts off and no requests on the queue and
1559 * with the queue lock held. 1559 * with the queue lock held.
1560 */ 1560 */
1561 void blk_plug_device(struct request_queue *q) 1561 void blk_plug_device(struct request_queue *q)
1562 { 1562 {
1563 WARN_ON(!irqs_disabled()); 1563 WARN_ON(!irqs_disabled());
1564 1564
1565 /* 1565 /*
1566 * don't plug a stopped queue, it must be paired with blk_start_queue() 1566 * don't plug a stopped queue, it must be paired with blk_start_queue()
1567 * which will restart the queueing 1567 * which will restart the queueing
1568 */ 1568 */
1569 if (blk_queue_stopped(q)) 1569 if (blk_queue_stopped(q))
1570 return; 1570 return;
1571 1571
1572 if (!test_and_set_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags)) { 1572 if (!test_and_set_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags)) {
1573 mod_timer(&q->unplug_timer, jiffies + q->unplug_delay); 1573 mod_timer(&q->unplug_timer, jiffies + q->unplug_delay);
1574 blk_add_trace_generic(q, NULL, 0, BLK_TA_PLUG); 1574 blk_add_trace_generic(q, NULL, 0, BLK_TA_PLUG);
1575 } 1575 }
1576 } 1576 }
1577 1577
1578 EXPORT_SYMBOL(blk_plug_device); 1578 EXPORT_SYMBOL(blk_plug_device);
1579 1579
1580 /* 1580 /*
1581 * remove the queue from the plugged list, if present. called with 1581 * remove the queue from the plugged list, if present. called with
1582 * queue lock held and interrupts disabled. 1582 * queue lock held and interrupts disabled.
1583 */ 1583 */
1584 int blk_remove_plug(struct request_queue *q) 1584 int blk_remove_plug(struct request_queue *q)
1585 { 1585 {
1586 WARN_ON(!irqs_disabled()); 1586 WARN_ON(!irqs_disabled());
1587 1587
1588 if (!test_and_clear_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags)) 1588 if (!test_and_clear_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags))
1589 return 0; 1589 return 0;
1590 1590
1591 del_timer(&q->unplug_timer); 1591 del_timer(&q->unplug_timer);
1592 return 1; 1592 return 1;
1593 } 1593 }
1594 1594
1595 EXPORT_SYMBOL(blk_remove_plug); 1595 EXPORT_SYMBOL(blk_remove_plug);
1596 1596
1597 /* 1597 /*
1598 * remove the plug and let it rip.. 1598 * remove the plug and let it rip..
1599 */ 1599 */
1600 void __generic_unplug_device(struct request_queue *q) 1600 void __generic_unplug_device(struct request_queue *q)
1601 { 1601 {
1602 if (unlikely(blk_queue_stopped(q))) 1602 if (unlikely(blk_queue_stopped(q)))
1603 return; 1603 return;
1604 1604
1605 if (!blk_remove_plug(q)) 1605 if (!blk_remove_plug(q))
1606 return; 1606 return;
1607 1607
1608 q->request_fn(q); 1608 q->request_fn(q);
1609 } 1609 }
1610 EXPORT_SYMBOL(__generic_unplug_device); 1610 EXPORT_SYMBOL(__generic_unplug_device);
1611 1611
1612 /** 1612 /**
1613 * generic_unplug_device - fire a request queue 1613 * generic_unplug_device - fire a request queue
1614 * @q: The &struct request_queue in question 1614 * @q: The &struct request_queue in question
1615 * 1615 *
1616 * Description: 1616 * Description:
1617 * Linux uses plugging to build bigger requests queues before letting 1617 * Linux uses plugging to build bigger requests queues before letting
1618 * the device have at them. If a queue is plugged, the I/O scheduler 1618 * the device have at them. If a queue is plugged, the I/O scheduler
1619 * is still adding and merging requests on the queue. Once the queue 1619 * is still adding and merging requests on the queue. Once the queue
1620 * gets unplugged, the request_fn defined for the queue is invoked and 1620 * gets unplugged, the request_fn defined for the queue is invoked and
1621 * transfers started. 1621 * transfers started.
1622 **/ 1622 **/
1623 void generic_unplug_device(struct request_queue *q) 1623 void generic_unplug_device(struct request_queue *q)
1624 { 1624 {
1625 spin_lock_irq(q->queue_lock); 1625 spin_lock_irq(q->queue_lock);
1626 __generic_unplug_device(q); 1626 __generic_unplug_device(q);
1627 spin_unlock_irq(q->queue_lock); 1627 spin_unlock_irq(q->queue_lock);
1628 } 1628 }
1629 EXPORT_SYMBOL(generic_unplug_device); 1629 EXPORT_SYMBOL(generic_unplug_device);
1630 1630
1631 static void blk_backing_dev_unplug(struct backing_dev_info *bdi, 1631 static void blk_backing_dev_unplug(struct backing_dev_info *bdi,
1632 struct page *page) 1632 struct page *page)
1633 { 1633 {
1634 struct request_queue *q = bdi->unplug_io_data; 1634 struct request_queue *q = bdi->unplug_io_data;
1635 1635
1636 /* 1636 /*
1637 * devices don't necessarily have an ->unplug_fn defined 1637 * devices don't necessarily have an ->unplug_fn defined
1638 */ 1638 */
1639 if (q->unplug_fn) { 1639 if (q->unplug_fn) {
1640 blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_IO, NULL, 1640 blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_IO, NULL,
1641 q->rq.count[READ] + q->rq.count[WRITE]); 1641 q->rq.count[READ] + q->rq.count[WRITE]);
1642 1642
1643 q->unplug_fn(q); 1643 q->unplug_fn(q);
1644 } 1644 }
1645 } 1645 }
1646 1646
1647 static void blk_unplug_work(struct work_struct *work) 1647 static void blk_unplug_work(struct work_struct *work)
1648 { 1648 {
1649 struct request_queue *q = 1649 struct request_queue *q =
1650 container_of(work, struct request_queue, unplug_work); 1650 container_of(work, struct request_queue, unplug_work);
1651 1651
1652 blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_IO, NULL, 1652 blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_IO, NULL,
1653 q->rq.count[READ] + q->rq.count[WRITE]); 1653 q->rq.count[READ] + q->rq.count[WRITE]);
1654 1654
1655 q->unplug_fn(q); 1655 q->unplug_fn(q);
1656 } 1656 }
1657 1657
1658 static void blk_unplug_timeout(unsigned long data) 1658 static void blk_unplug_timeout(unsigned long data)
1659 { 1659 {
1660 struct request_queue *q = (struct request_queue *)data; 1660 struct request_queue *q = (struct request_queue *)data;
1661 1661
1662 blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_TIMER, NULL, 1662 blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_TIMER, NULL,
1663 q->rq.count[READ] + q->rq.count[WRITE]); 1663 q->rq.count[READ] + q->rq.count[WRITE]);
1664 1664
1665 kblockd_schedule_work(&q->unplug_work); 1665 kblockd_schedule_work(&q->unplug_work);
1666 } 1666 }
1667 1667
1668 /** 1668 /**
1669 * blk_start_queue - restart a previously stopped queue 1669 * blk_start_queue - restart a previously stopped queue
1670 * @q: The &struct request_queue in question 1670 * @q: The &struct request_queue in question
1671 * 1671 *
1672 * Description: 1672 * Description:
1673 * blk_start_queue() will clear the stop flag on the queue, and call 1673 * blk_start_queue() will clear the stop flag on the queue, and call
1674 * the request_fn for the queue if it was in a stopped state when 1674 * the request_fn for the queue if it was in a stopped state when
1675 * entered. Also see blk_stop_queue(). Queue lock must be held. 1675 * entered. Also see blk_stop_queue(). Queue lock must be held.
1676 **/ 1676 **/
1677 void blk_start_queue(struct request_queue *q) 1677 void blk_start_queue(struct request_queue *q)
1678 { 1678 {
1679 WARN_ON(!irqs_disabled()); 1679 WARN_ON(!irqs_disabled());
1680 1680
1681 clear_bit(QUEUE_FLAG_STOPPED, &q->queue_flags); 1681 clear_bit(QUEUE_FLAG_STOPPED, &q->queue_flags);
1682 1682
1683 /* 1683 /*
1684 * one level of recursion is ok and is much faster than kicking 1684 * one level of recursion is ok and is much faster than kicking
1685 * the unplug handling 1685 * the unplug handling
1686 */ 1686 */
1687 if (!test_and_set_bit(QUEUE_FLAG_REENTER, &q->queue_flags)) { 1687 if (!test_and_set_bit(QUEUE_FLAG_REENTER, &q->queue_flags)) {
1688 q->request_fn(q); 1688 q->request_fn(q);
1689 clear_bit(QUEUE_FLAG_REENTER, &q->queue_flags); 1689 clear_bit(QUEUE_FLAG_REENTER, &q->queue_flags);
1690 } else { 1690 } else {
1691 blk_plug_device(q); 1691 blk_plug_device(q);
1692 kblockd_schedule_work(&q->unplug_work); 1692 kblockd_schedule_work(&q->unplug_work);
1693 } 1693 }
1694 } 1694 }
1695 1695
1696 EXPORT_SYMBOL(blk_start_queue); 1696 EXPORT_SYMBOL(blk_start_queue);
1697 1697
1698 /** 1698 /**
1699 * blk_stop_queue - stop a queue 1699 * blk_stop_queue - stop a queue
1700 * @q: The &struct request_queue in question 1700 * @q: The &struct request_queue in question
1701 * 1701 *
1702 * Description: 1702 * Description:
1703 * The Linux block layer assumes that a block driver will consume all 1703 * The Linux block layer assumes that a block driver will consume all
1704 * entries on the request queue when the request_fn strategy is called. 1704 * entries on the request queue when the request_fn strategy is called.
1705 * Often this will not happen, because of hardware limitations (queue 1705 * Often this will not happen, because of hardware limitations (queue
1706 * depth settings). If a device driver gets a 'queue full' response, 1706 * depth settings). If a device driver gets a 'queue full' response,
1707 * or if it simply chooses not to queue more I/O at one point, it can 1707 * or if it simply chooses not to queue more I/O at one point, it can
1708 * call this function to prevent the request_fn from being called until 1708 * call this function to prevent the request_fn from being called until
1709 * the driver has signalled it's ready to go again. This happens by calling 1709 * the driver has signalled it's ready to go again. This happens by calling
1710 * blk_start_queue() to restart queue operations. Queue lock must be held. 1710 * blk_start_queue() to restart queue operations. Queue lock must be held.
1711 **/ 1711 **/
1712 void blk_stop_queue(struct request_queue *q) 1712 void blk_stop_queue(struct request_queue *q)
1713 { 1713 {
1714 blk_remove_plug(q); 1714 blk_remove_plug(q);
1715 set_bit(QUEUE_FLAG_STOPPED, &q->queue_flags); 1715 set_bit(QUEUE_FLAG_STOPPED, &q->queue_flags);
1716 } 1716 }
1717 EXPORT_SYMBOL(blk_stop_queue); 1717 EXPORT_SYMBOL(blk_stop_queue);
1718 1718
1719 /** 1719 /**
1720 * blk_sync_queue - cancel any pending callbacks on a queue 1720 * blk_sync_queue - cancel any pending callbacks on a queue
1721 * @q: the queue 1721 * @q: the queue
1722 * 1722 *
1723 * Description: 1723 * Description:
1724 * The block layer may perform asynchronous callback activity 1724 * The block layer may perform asynchronous callback activity
1725 * on a queue, such as calling the unplug function after a timeout. 1725 * on a queue, such as calling the unplug function after a timeout.
1726 * A block device may call blk_sync_queue to ensure that any 1726 * A block device may call blk_sync_queue to ensure that any
1727 * such activity is cancelled, thus allowing it to release resources 1727 * such activity is cancelled, thus allowing it to release resources
1728 * that the callbacks might use. The caller must already have made sure 1728 * that the callbacks might use. The caller must already have made sure
1729 * that its ->make_request_fn will not re-add plugging prior to calling 1729 * that its ->make_request_fn will not re-add plugging prior to calling
1730 * this function. 1730 * this function.
1731 * 1731 *
1732 */ 1732 */
1733 void blk_sync_queue(struct request_queue *q) 1733 void blk_sync_queue(struct request_queue *q)
1734 { 1734 {
1735 del_timer_sync(&q->unplug_timer); 1735 del_timer_sync(&q->unplug_timer);
1736 } 1736 }
1737 EXPORT_SYMBOL(blk_sync_queue); 1737 EXPORT_SYMBOL(blk_sync_queue);
1738 1738
1739 /** 1739 /**
1740 * blk_run_queue - run a single device queue 1740 * blk_run_queue - run a single device queue
1741 * @q: The queue to run 1741 * @q: The queue to run
1742 */ 1742 */
1743 void blk_run_queue(struct request_queue *q) 1743 void blk_run_queue(struct request_queue *q)
1744 { 1744 {
1745 unsigned long flags; 1745 unsigned long flags;
1746 1746
1747 spin_lock_irqsave(q->queue_lock, flags); 1747 spin_lock_irqsave(q->queue_lock, flags);
1748 blk_remove_plug(q); 1748 blk_remove_plug(q);
1749 1749
1750 /* 1750 /*
1751 * Only recurse once to avoid overrunning the stack, let the unplug 1751 * Only recurse once to avoid overrunning the stack, let the unplug
1752 * handling reinvoke the handler shortly if we already got there. 1752 * handling reinvoke the handler shortly if we already got there.
1753 */ 1753 */
1754 if (!elv_queue_empty(q)) { 1754 if (!elv_queue_empty(q)) {
1755 if (!test_and_set_bit(QUEUE_FLAG_REENTER, &q->queue_flags)) { 1755 if (!test_and_set_bit(QUEUE_FLAG_REENTER, &q->queue_flags)) {
1756 q->request_fn(q); 1756 q->request_fn(q);
1757 clear_bit(QUEUE_FLAG_REENTER, &q->queue_flags); 1757 clear_bit(QUEUE_FLAG_REENTER, &q->queue_flags);
1758 } else { 1758 } else {
1759 blk_plug_device(q); 1759 blk_plug_device(q);
1760 kblockd_schedule_work(&q->unplug_work); 1760 kblockd_schedule_work(&q->unplug_work);
1761 } 1761 }
1762 } 1762 }
1763 1763
1764 spin_unlock_irqrestore(q->queue_lock, flags); 1764 spin_unlock_irqrestore(q->queue_lock, flags);
1765 } 1765 }
1766 EXPORT_SYMBOL(blk_run_queue); 1766 EXPORT_SYMBOL(blk_run_queue);
1767 1767
1768 /** 1768 /**
1769 * blk_cleanup_queue: - release a &struct request_queue when it is no longer needed 1769 * blk_cleanup_queue: - release a &struct request_queue when it is no longer needed
1770 * @kobj: the kobj belonging of the request queue to be released 1770 * @kobj: the kobj belonging of the request queue to be released
1771 * 1771 *
1772 * Description: 1772 * Description:
1773 * blk_cleanup_queue is the pair to blk_init_queue() or 1773 * blk_cleanup_queue is the pair to blk_init_queue() or
1774 * blk_queue_make_request(). It should be called when a request queue is 1774 * blk_queue_make_request(). It should be called when a request queue is
1775 * being released; typically when a block device is being de-registered. 1775 * being released; typically when a block device is being de-registered.
1776 * Currently, its primary task it to free all the &struct request 1776 * Currently, its primary task it to free all the &struct request
1777 * structures that were allocated to the queue and the queue itself. 1777 * structures that were allocated to the queue and the queue itself.
1778 * 1778 *
1779 * Caveat: 1779 * Caveat:
1780 * Hopefully the low level driver will have finished any 1780 * Hopefully the low level driver will have finished any
1781 * outstanding requests first... 1781 * outstanding requests first...
1782 **/ 1782 **/
1783 static void blk_release_queue(struct kobject *kobj) 1783 static void blk_release_queue(struct kobject *kobj)
1784 { 1784 {
1785 struct request_queue *q = 1785 struct request_queue *q =
1786 container_of(kobj, struct request_queue, kobj); 1786 container_of(kobj, struct request_queue, kobj);
1787 struct request_list *rl = &q->rq; 1787 struct request_list *rl = &q->rq;
1788 1788
1789 blk_sync_queue(q); 1789 blk_sync_queue(q);
1790 1790
1791 if (rl->rq_pool) 1791 if (rl->rq_pool)
1792 mempool_destroy(rl->rq_pool); 1792 mempool_destroy(rl->rq_pool);
1793 1793
1794 if (q->queue_tags) 1794 if (q->queue_tags)
1795 __blk_queue_free_tags(q); 1795 __blk_queue_free_tags(q);
1796 1796
1797 blk_trace_shutdown(q); 1797 blk_trace_shutdown(q);
1798 1798
1799 kmem_cache_free(requestq_cachep, q); 1799 kmem_cache_free(requestq_cachep, q);
1800 } 1800 }
1801 1801
1802 void blk_put_queue(struct request_queue *q) 1802 void blk_put_queue(struct request_queue *q)
1803 { 1803 {
1804 kobject_put(&q->kobj); 1804 kobject_put(&q->kobj);
1805 } 1805 }
1806 EXPORT_SYMBOL(blk_put_queue); 1806 EXPORT_SYMBOL(blk_put_queue);
1807 1807
1808 void blk_cleanup_queue(struct request_queue * q) 1808 void blk_cleanup_queue(struct request_queue * q)
1809 { 1809 {
1810 mutex_lock(&q->sysfs_lock); 1810 mutex_lock(&q->sysfs_lock);
1811 set_bit(QUEUE_FLAG_DEAD, &q->queue_flags); 1811 set_bit(QUEUE_FLAG_DEAD, &q->queue_flags);
1812 mutex_unlock(&q->sysfs_lock); 1812 mutex_unlock(&q->sysfs_lock);
1813 1813
1814 if (q->elevator) 1814 if (q->elevator)
1815 elevator_exit(q->elevator); 1815 elevator_exit(q->elevator);
1816 1816
1817 blk_put_queue(q); 1817 blk_put_queue(q);
1818 } 1818 }
1819 1819
1820 EXPORT_SYMBOL(blk_cleanup_queue); 1820 EXPORT_SYMBOL(blk_cleanup_queue);
1821 1821
1822 static int blk_init_free_list(struct request_queue *q) 1822 static int blk_init_free_list(struct request_queue *q)
1823 { 1823 {
1824 struct request_list *rl = &q->rq; 1824 struct request_list *rl = &q->rq;
1825 1825
1826 rl->count[READ] = rl->count[WRITE] = 0; 1826 rl->count[READ] = rl->count[WRITE] = 0;
1827 rl->starved[READ] = rl->starved[WRITE] = 0; 1827 rl->starved[READ] = rl->starved[WRITE] = 0;
1828 rl->elvpriv = 0; 1828 rl->elvpriv = 0;
1829 init_waitqueue_head(&rl->wait[READ]); 1829 init_waitqueue_head(&rl->wait[READ]);
1830 init_waitqueue_head(&rl->wait[WRITE]); 1830 init_waitqueue_head(&rl->wait[WRITE]);
1831 1831
1832 rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab, 1832 rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab,
1833 mempool_free_slab, request_cachep, q->node); 1833 mempool_free_slab, request_cachep, q->node);
1834 1834
1835 if (!rl->rq_pool) 1835 if (!rl->rq_pool)
1836 return -ENOMEM; 1836 return -ENOMEM;
1837 1837
1838 return 0; 1838 return 0;
1839 } 1839 }
1840 1840
1841 struct request_queue *blk_alloc_queue(gfp_t gfp_mask) 1841 struct request_queue *blk_alloc_queue(gfp_t gfp_mask)
1842 { 1842 {
1843 return blk_alloc_queue_node(gfp_mask, -1); 1843 return blk_alloc_queue_node(gfp_mask, -1);
1844 } 1844 }
1845 EXPORT_SYMBOL(blk_alloc_queue); 1845 EXPORT_SYMBOL(blk_alloc_queue);
1846 1846
1847 static struct kobj_type queue_ktype; 1847 static struct kobj_type queue_ktype;
1848 1848
1849 struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) 1849 struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
1850 { 1850 {
1851 struct request_queue *q; 1851 struct request_queue *q;
1852 1852
1853 q = kmem_cache_alloc_node(requestq_cachep, 1853 q = kmem_cache_alloc_node(requestq_cachep,
1854 gfp_mask | __GFP_ZERO, node_id); 1854 gfp_mask | __GFP_ZERO, node_id);
1855 if (!q) 1855 if (!q)
1856 return NULL; 1856 return NULL;
1857 1857
1858 init_timer(&q->unplug_timer); 1858 init_timer(&q->unplug_timer);
1859 1859
1860 kobject_set_name(&q->kobj, "%s", "queue"); 1860 kobject_set_name(&q->kobj, "%s", "queue");
1861 q->kobj.ktype = &queue_ktype; 1861 q->kobj.ktype = &queue_ktype;
1862 kobject_init(&q->kobj); 1862 kobject_init(&q->kobj);
1863 1863
1864 q->backing_dev_info.unplug_io_fn = blk_backing_dev_unplug; 1864 q->backing_dev_info.unplug_io_fn = blk_backing_dev_unplug;
1865 q->backing_dev_info.unplug_io_data = q; 1865 q->backing_dev_info.unplug_io_data = q;
1866 1866
1867 mutex_init(&q->sysfs_lock); 1867 mutex_init(&q->sysfs_lock);
1868 1868
1869 return q; 1869 return q;
1870 } 1870 }
1871 EXPORT_SYMBOL(blk_alloc_queue_node); 1871 EXPORT_SYMBOL(blk_alloc_queue_node);
1872 1872
1873 /** 1873 /**
1874 * blk_init_queue - prepare a request queue for use with a block device 1874 * blk_init_queue - prepare a request queue for use with a block device
1875 * @rfn: The function to be called to process requests that have been 1875 * @rfn: The function to be called to process requests that have been
1876 * placed on the queue. 1876 * placed on the queue.
1877 * @lock: Request queue spin lock 1877 * @lock: Request queue spin lock
1878 * 1878 *
1879 * Description: 1879 * Description:
1880 * If a block device wishes to use the standard request handling procedures, 1880 * If a block device wishes to use the standard request handling procedures,
1881 * which sorts requests and coalesces adjacent requests, then it must 1881 * which sorts requests and coalesces adjacent requests, then it must
1882 * call blk_init_queue(). The function @rfn will be called when there 1882 * call blk_init_queue(). The function @rfn will be called when there
1883 * are requests on the queue that need to be processed. If the device 1883 * are requests on the queue that need to be processed. If the device
1884 * supports plugging, then @rfn may not be called immediately when requests 1884 * supports plugging, then @rfn may not be called immediately when requests
1885 * are available on the queue, but may be called at some time later instead. 1885 * are available on the queue, but may be called at some time later instead.
1886 * Plugged queues are generally unplugged when a buffer belonging to one 1886 * Plugged queues are generally unplugged when a buffer belonging to one
1887 * of the requests on the queue is needed, or due to memory pressure. 1887 * of the requests on the queue is needed, or due to memory pressure.
1888 * 1888 *
1889 * @rfn is not required, or even expected, to remove all requests off the 1889 * @rfn is not required, or even expected, to remove all requests off the
1890 * queue, but only as many as it can handle at a time. If it does leave 1890 * queue, but only as many as it can handle at a time. If it does leave
1891 * requests on the queue, it is responsible for arranging that the requests 1891 * requests on the queue, it is responsible for arranging that the requests
1892 * get dealt with eventually. 1892 * get dealt with eventually.
1893 * 1893 *
1894 * The queue spin lock must be held while manipulating the requests on the 1894 * The queue spin lock must be held while manipulating the requests on the
1895 * request queue; this lock will be taken also from interrupt context, so irq 1895 * request queue; this lock will be taken also from interrupt context, so irq
1896 * disabling is needed for it. 1896 * disabling is needed for it.
1897 * 1897 *
1898 * Function returns a pointer to the initialized request queue, or NULL if 1898 * Function returns a pointer to the initialized request queue, or NULL if
1899 * it didn't succeed. 1899 * it didn't succeed.
1900 * 1900 *
1901 * Note: 1901 * Note:
1902 * blk_init_queue() must be paired with a blk_cleanup_queue() call 1902 * blk_init_queue() must be paired with a blk_cleanup_queue() call
1903 * when the block device is deactivated (such as at module unload). 1903 * when the block device is deactivated (such as at module unload).
1904 **/ 1904 **/
1905 1905
1906 struct request_queue *blk_init_queue(request_fn_proc *rfn, spinlock_t *lock) 1906 struct request_queue *blk_init_queue(request_fn_proc *rfn, spinlock_t *lock)
1907 { 1907 {
1908 return blk_init_queue_node(rfn, lock, -1); 1908 return blk_init_queue_node(rfn, lock, -1);
1909 } 1909 }
1910 EXPORT_SYMBOL(blk_init_queue); 1910 EXPORT_SYMBOL(blk_init_queue);
1911 1911
1912 struct request_queue * 1912 struct request_queue *
1913 blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id) 1913 blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)
1914 { 1914 {
1915 struct request_queue *q = blk_alloc_queue_node(GFP_KERNEL, node_id); 1915 struct request_queue *q = blk_alloc_queue_node(GFP_KERNEL, node_id);
1916 1916
1917 if (!q) 1917 if (!q)
1918 return NULL; 1918 return NULL;
1919 1919
1920 q->node = node_id; 1920 q->node = node_id;
1921 if (blk_init_free_list(q)) { 1921 if (blk_init_free_list(q)) {
1922 kmem_cache_free(requestq_cachep, q); 1922 kmem_cache_free(requestq_cachep, q);
1923 return NULL; 1923 return NULL;
1924 } 1924 }
1925 1925
1926 /* 1926 /*
1927 * if caller didn't supply a lock, they get per-queue locking with 1927 * if caller didn't supply a lock, they get per-queue locking with
1928 * our embedded lock 1928 * our embedded lock
1929 */ 1929 */
1930 if (!lock) { 1930 if (!lock) {
1931 spin_lock_init(&q->__queue_lock); 1931 spin_lock_init(&q->__queue_lock);
1932 lock = &q->__queue_lock; 1932 lock = &q->__queue_lock;
1933 } 1933 }
1934 1934
1935 q->request_fn = rfn; 1935 q->request_fn = rfn;
1936 q->prep_rq_fn = NULL; 1936 q->prep_rq_fn = NULL;
1937 q->unplug_fn = generic_unplug_device; 1937 q->unplug_fn = generic_unplug_device;
1938 q->queue_flags = (1 << QUEUE_FLAG_CLUSTER); 1938 q->queue_flags = (1 << QUEUE_FLAG_CLUSTER);
1939 q->queue_lock = lock; 1939 q->queue_lock = lock;
1940 1940
1941 blk_queue_segment_boundary(q, 0xffffffff); 1941 blk_queue_segment_boundary(q, 0xffffffff);
1942 1942
1943 blk_queue_make_request(q, __make_request); 1943 blk_queue_make_request(q, __make_request);
1944 blk_queue_max_segment_size(q, MAX_SEGMENT_SIZE); 1944 blk_queue_max_segment_size(q, MAX_SEGMENT_SIZE);
1945 1945
1946 blk_queue_max_hw_segments(q, MAX_HW_SEGMENTS); 1946 blk_queue_max_hw_segments(q, MAX_HW_SEGMENTS);
1947 blk_queue_max_phys_segments(q, MAX_PHYS_SEGMENTS); 1947 blk_queue_max_phys_segments(q, MAX_PHYS_SEGMENTS);
1948 1948
1949 q->sg_reserved_size = INT_MAX; 1949 q->sg_reserved_size = INT_MAX;
1950 1950
1951 /* 1951 /*
1952 * all done 1952 * all done
1953 */ 1953 */
1954 if (!elevator_init(q, NULL)) { 1954 if (!elevator_init(q, NULL)) {
1955 blk_queue_congestion_threshold(q); 1955 blk_queue_congestion_threshold(q);
1956 return q; 1956 return q;
1957 } 1957 }
1958 1958
1959 blk_put_queue(q); 1959 blk_put_queue(q);
1960 return NULL; 1960 return NULL;
1961 } 1961 }
1962 EXPORT_SYMBOL(blk_init_queue_node); 1962 EXPORT_SYMBOL(blk_init_queue_node);
1963 1963
1964 int blk_get_queue(struct request_queue *q) 1964 int blk_get_queue(struct request_queue *q)
1965 { 1965 {
1966 if (likely(!test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) { 1966 if (likely(!test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) {
1967 kobject_get(&q->kobj); 1967 kobject_get(&q->kobj);
1968 return 0; 1968 return 0;
1969 } 1969 }
1970 1970
1971 return 1; 1971 return 1;
1972 } 1972 }
1973 1973
1974 EXPORT_SYMBOL(blk_get_queue); 1974 EXPORT_SYMBOL(blk_get_queue);
1975 1975
1976 static inline void blk_free_request(struct request_queue *q, struct request *rq) 1976 static inline void blk_free_request(struct request_queue *q, struct request *rq)
1977 { 1977 {
1978 if (rq->cmd_flags & REQ_ELVPRIV) 1978 if (rq->cmd_flags & REQ_ELVPRIV)
1979 elv_put_request(q, rq); 1979 elv_put_request(q, rq);
1980 mempool_free(rq, q->rq.rq_pool); 1980 mempool_free(rq, q->rq.rq_pool);
1981 } 1981 }
1982 1982
1983 static struct request * 1983 static struct request *
1984 blk_alloc_request(struct request_queue *q, int rw, int priv, gfp_t gfp_mask) 1984 blk_alloc_request(struct request_queue *q, int rw, int priv, gfp_t gfp_mask)
1985 { 1985 {
1986 struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask); 1986 struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask);
1987 1987
1988 if (!rq) 1988 if (!rq)
1989 return NULL; 1989 return NULL;
1990 1990
1991 /* 1991 /*
1992 * first three bits are identical in rq->cmd_flags and bio->bi_rw, 1992 * first three bits are identical in rq->cmd_flags and bio->bi_rw,
1993 * see bio.h and blkdev.h 1993 * see bio.h and blkdev.h
1994 */ 1994 */
1995 rq->cmd_flags = rw | REQ_ALLOCED; 1995 rq->cmd_flags = rw | REQ_ALLOCED;
1996 1996
1997 if (priv) { 1997 if (priv) {
1998 if (unlikely(elv_set_request(q, rq, gfp_mask))) { 1998 if (unlikely(elv_set_request(q, rq, gfp_mask))) {
1999 mempool_free(rq, q->rq.rq_pool); 1999 mempool_free(rq, q->rq.rq_pool);
2000 return NULL; 2000 return NULL;
2001 } 2001 }
2002 rq->cmd_flags |= REQ_ELVPRIV; 2002 rq->cmd_flags |= REQ_ELVPRIV;
2003 } 2003 }
2004 2004
2005 return rq; 2005 return rq;
2006 } 2006 }
2007 2007
2008 /* 2008 /*
2009 * ioc_batching returns true if the ioc is a valid batching request and 2009 * ioc_batching returns true if the ioc is a valid batching request and
2010 * should be given priority access to a request. 2010 * should be given priority access to a request.
2011 */ 2011 */
2012 static inline int ioc_batching(struct request_queue *q, struct io_context *ioc) 2012 static inline int ioc_batching(struct request_queue *q, struct io_context *ioc)
2013 { 2013 {
2014 if (!ioc) 2014 if (!ioc)
2015 return 0; 2015 return 0;
2016 2016
2017 /* 2017 /*
2018 * Make sure the process is able to allocate at least 1 request 2018 * Make sure the process is able to allocate at least 1 request
2019 * even if the batch times out, otherwise we could theoretically 2019 * even if the batch times out, otherwise we could theoretically
2020 * lose wakeups. 2020 * lose wakeups.
2021 */ 2021 */
2022 return ioc->nr_batch_requests == q->nr_batching || 2022 return ioc->nr_batch_requests == q->nr_batching ||
2023 (ioc->nr_batch_requests > 0 2023 (ioc->nr_batch_requests > 0
2024 && time_before(jiffies, ioc->last_waited + BLK_BATCH_TIME)); 2024 && time_before(jiffies, ioc->last_waited + BLK_BATCH_TIME));
2025 } 2025 }
2026 2026
2027 /* 2027 /*
2028 * ioc_set_batching sets ioc to be a new "batcher" if it is not one. This 2028 * ioc_set_batching sets ioc to be a new "batcher" if it is not one. This
2029 * will cause the process to be a "batcher" on all queues in the system. This 2029 * will cause the process to be a "batcher" on all queues in the system. This
2030 * is the behaviour we want though - once it gets a wakeup it should be given 2030 * is the behaviour we want though - once it gets a wakeup it should be given
2031 * a nice run. 2031 * a nice run.
2032 */ 2032 */
2033 static void ioc_set_batching(struct request_queue *q, struct io_context *ioc) 2033 static void ioc_set_batching(struct request_queue *q, struct io_context *ioc)
2034 { 2034 {
2035 if (!ioc || ioc_batching(q, ioc)) 2035 if (!ioc || ioc_batching(q, ioc))
2036 return; 2036 return;
2037 2037
2038 ioc->nr_batch_requests = q->nr_batching; 2038 ioc->nr_batch_requests = q->nr_batching;
2039 ioc->last_waited = jiffies; 2039 ioc->last_waited = jiffies;
2040 } 2040 }
2041 2041
2042 static void __freed_request(struct request_queue *q, int rw) 2042 static void __freed_request(struct request_queue *q, int rw)
2043 { 2043 {
2044 struct request_list *rl = &q->rq; 2044 struct request_list *rl = &q->rq;
2045 2045
2046 if (rl->count[rw] < queue_congestion_off_threshold(q)) 2046 if (rl->count[rw] < queue_congestion_off_threshold(q))
2047 blk_clear_queue_congested(q, rw); 2047 blk_clear_queue_congested(q, rw);
2048 2048
2049 if (rl->count[rw] + 1 <= q->nr_requests) { 2049 if (rl->count[rw] + 1 <= q->nr_requests) {
2050 if (waitqueue_active(&rl->wait[rw])) 2050 if (waitqueue_active(&rl->wait[rw]))
2051 wake_up(&rl->wait[rw]); 2051 wake_up(&rl->wait[rw]);
2052 2052
2053 blk_clear_queue_full(q, rw); 2053 blk_clear_queue_full(q, rw);
2054 } 2054 }
2055 } 2055 }
2056 2056
2057 /* 2057 /*
2058 * A request has just been released. Account for it, update the full and 2058 * A request has just been released. Account for it, update the full and
2059 * congestion status, wake up any waiters. Called under q->queue_lock. 2059 * congestion status, wake up any waiters. Called under q->queue_lock.
2060 */ 2060 */
2061 static void freed_request(struct request_queue *q, int rw, int priv) 2061 static void freed_request(struct request_queue *q, int rw, int priv)
2062 { 2062 {
2063 struct request_list *rl = &q->rq; 2063 struct request_list *rl = &q->rq;
2064 2064
2065 rl->count[rw]--; 2065 rl->count[rw]--;
2066 if (priv) 2066 if (priv)
2067 rl->elvpriv--; 2067 rl->elvpriv--;
2068 2068
2069 __freed_request(q, rw); 2069 __freed_request(q, rw);
2070 2070
2071 if (unlikely(rl->starved[rw ^ 1])) 2071 if (unlikely(rl->starved[rw ^ 1]))
2072 __freed_request(q, rw ^ 1); 2072 __freed_request(q, rw ^ 1);
2073 } 2073 }
2074 2074
2075 #define blkdev_free_rq(list) list_entry((list)->next, struct request, queuelist) 2075 #define blkdev_free_rq(list) list_entry((list)->next, struct request, queuelist)
2076 /* 2076 /*
2077 * Get a free request, queue_lock must be held. 2077 * Get a free request, queue_lock must be held.
2078 * Returns NULL on failure, with queue_lock held. 2078 * Returns NULL on failure, with queue_lock held.
2079 * Returns !NULL on success, with queue_lock *not held*. 2079 * Returns !NULL on success, with queue_lock *not held*.
2080 */ 2080 */
2081 static struct request *get_request(struct request_queue *q, int rw_flags, 2081 static struct request *get_request(struct request_queue *q, int rw_flags,
2082 struct bio *bio, gfp_t gfp_mask) 2082 struct bio *bio, gfp_t gfp_mask)
2083 { 2083 {
2084 struct request *rq = NULL; 2084 struct request *rq = NULL;
2085 struct request_list *rl = &q->rq; 2085 struct request_list *rl = &q->rq;
2086 struct io_context *ioc = NULL; 2086 struct io_context *ioc = NULL;
2087 const int rw = rw_flags & 0x01; 2087 const int rw = rw_flags & 0x01;
2088 int may_queue, priv; 2088 int may_queue, priv;
2089 2089
2090 may_queue = elv_may_queue(q, rw_flags); 2090 may_queue = elv_may_queue(q, rw_flags);
2091 if (may_queue == ELV_MQUEUE_NO) 2091 if (may_queue == ELV_MQUEUE_NO)
2092 goto rq_starved; 2092 goto rq_starved;
2093 2093
2094 if (rl->count[rw]+1 >= queue_congestion_on_threshold(q)) { 2094 if (rl->count[rw]+1 >= queue_congestion_on_threshold(q)) {
2095 if (rl->count[rw]+1 >= q->nr_requests) { 2095 if (rl->count[rw]+1 >= q->nr_requests) {
2096 ioc = current_io_context(GFP_ATOMIC, q->node); 2096 ioc = current_io_context(GFP_ATOMIC, q->node);
2097 /* 2097 /*
2098 * The queue will fill after this allocation, so set 2098 * The queue will fill after this allocation, so set
2099 * it as full, and mark this process as "batching". 2099 * it as full, and mark this process as "batching".
2100 * This process will be allowed to complete a batch of 2100 * This process will be allowed to complete a batch of
2101 * requests, others will be blocked. 2101 * requests, others will be blocked.
2102 */ 2102 */
2103 if (!blk_queue_full(q, rw)) { 2103 if (!blk_queue_full(q, rw)) {
2104 ioc_set_batching(q, ioc); 2104 ioc_set_batching(q, ioc);
2105 blk_set_queue_full(q, rw); 2105 blk_set_queue_full(q, rw);
2106 } else { 2106 } else {
2107 if (may_queue != ELV_MQUEUE_MUST 2107 if (may_queue != ELV_MQUEUE_MUST
2108 && !ioc_batching(q, ioc)) { 2108 && !ioc_batching(q, ioc)) {
2109 /* 2109 /*
2110 * The queue is full and the allocating 2110 * The queue is full and the allocating
2111 * process is not a "batcher", and not 2111 * process is not a "batcher", and not
2112 * exempted by the IO scheduler 2112 * exempted by the IO scheduler
2113 */ 2113 */
2114 goto out; 2114 goto out;
2115 } 2115 }
2116 } 2116 }
2117 } 2117 }
2118 blk_set_queue_congested(q, rw); 2118 blk_set_queue_congested(q, rw);
2119 } 2119 }
2120 2120
2121 /* 2121 /*
2122 * Only allow batching queuers to allocate up to 50% over the defined 2122 * Only allow batching queuers to allocate up to 50% over the defined
2123 * limit of requests, otherwise we could have thousands of requests 2123 * limit of requests, otherwise we could have thousands of requests
2124 * allocated with any setting of ->nr_requests 2124 * allocated with any setting of ->nr_requests
2125 */ 2125 */
2126 if (rl->count[rw] >= (3 * q->nr_requests / 2)) 2126 if (rl->count[rw] >= (3 * q->nr_requests / 2))
2127 goto out; 2127 goto out;
2128 2128
2129 rl->count[rw]++; 2129 rl->count[rw]++;
2130 rl->starved[rw] = 0; 2130 rl->starved[rw] = 0;
2131 2131
2132 priv = !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); 2132 priv = !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
2133 if (priv) 2133 if (priv)
2134 rl->elvpriv++; 2134 rl->elvpriv++;
2135 2135
2136 spin_unlock_irq(q->queue_lock); 2136 spin_unlock_irq(q->queue_lock);
2137 2137
2138 rq = blk_alloc_request(q, rw_flags, priv, gfp_mask); 2138 rq = blk_alloc_request(q, rw_flags, priv, gfp_mask);
2139 if (unlikely(!rq)) { 2139 if (unlikely(!rq)) {
2140 /* 2140 /*
2141 * Allocation failed presumably due to memory. Undo anything 2141 * Allocation failed presumably due to memory. Undo anything
2142 * we might have messed up. 2142 * we might have messed up.
2143 * 2143 *
2144 * Allocating task should really be put onto the front of the 2144 * Allocating task should really be put onto the front of the
2145 * wait queue, but this is pretty rare. 2145 * wait queue, but this is pretty rare.
2146 */ 2146 */
2147 spin_lock_irq(q->queue_lock); 2147 spin_lock_irq(q->queue_lock);
2148 freed_request(q, rw, priv); 2148 freed_request(q, rw, priv);
2149 2149
2150 /* 2150 /*
2151 * in the very unlikely event that allocation failed and no 2151 * in the very unlikely event that allocation failed and no
2152 * requests for this direction was pending, mark us starved 2152 * requests for this direction was pending, mark us starved
2153 * so that freeing of a request in the other direction will 2153 * so that freeing of a request in the other direction will
2154 * notice us. another possible fix would be to split the 2154 * notice us. another possible fix would be to split the
2155 * rq mempool into READ and WRITE 2155 * rq mempool into READ and WRITE
2156 */ 2156 */
2157 rq_starved: 2157 rq_starved:
2158 if (unlikely(rl->count[rw] == 0)) 2158 if (unlikely(rl->count[rw] == 0))
2159 rl->starved[rw] = 1; 2159 rl->starved[rw] = 1;
2160 2160
2161 goto out; 2161 goto out;
2162 } 2162 }
2163 2163
2164 /* 2164 /*
2165 * ioc may be NULL here, and ioc_batching will be false. That's 2165 * ioc may be NULL here, and ioc_batching will be false. That's
2166 * OK, if the queue is under the request limit then requests need 2166 * OK, if the queue is under the request limit then requests need
2167 * not count toward the nr_batch_requests limit. There will always 2167 * not count toward the nr_batch_requests limit. There will always
2168 * be some limit enforced by BLK_BATCH_TIME. 2168 * be some limit enforced by BLK_BATCH_TIME.
2169 */ 2169 */
2170 if (ioc_batching(q, ioc)) 2170 if (ioc_batching(q, ioc))
2171 ioc->nr_batch_requests--; 2171 ioc->nr_batch_requests--;
2172 2172
2173 rq_init(q, rq); 2173 rq_init(q, rq);
2174 2174
2175 blk_add_trace_generic(q, bio, rw, BLK_TA_GETRQ); 2175 blk_add_trace_generic(q, bio, rw, BLK_TA_GETRQ);
2176 out: 2176 out:
2177 return rq; 2177 return rq;
2178 } 2178 }
2179 2179
2180 /* 2180 /*
2181 * No available requests for this queue, unplug the device and wait for some 2181 * No available requests for this queue, unplug the device and wait for some
2182 * requests to become available. 2182 * requests to become available.
2183 * 2183 *
2184 * Called with q->queue_lock held, and returns with it unlocked. 2184 * Called with q->queue_lock held, and returns with it unlocked.
2185 */ 2185 */
2186 static struct request *get_request_wait(struct request_queue *q, int rw_flags, 2186 static struct request *get_request_wait(struct request_queue *q, int rw_flags,
2187 struct bio *bio) 2187 struct bio *bio)
2188 { 2188 {
2189 const int rw = rw_flags & 0x01; 2189 const int rw = rw_flags & 0x01;
2190 struct request *rq; 2190 struct request *rq;
2191 2191
2192 rq = get_request(q, rw_flags, bio, GFP_NOIO); 2192 rq = get_request(q, rw_flags, bio, GFP_NOIO);
2193 while (!rq) { 2193 while (!rq) {
2194 DEFINE_WAIT(wait); 2194 DEFINE_WAIT(wait);
2195 struct request_list *rl = &q->rq; 2195 struct request_list *rl = &q->rq;
2196 2196
2197 prepare_to_wait_exclusive(&rl->wait[rw], &wait, 2197 prepare_to_wait_exclusive(&rl->wait[rw], &wait,
2198 TASK_UNINTERRUPTIBLE); 2198 TASK_UNINTERRUPTIBLE);
2199 2199
2200 rq = get_request(q, rw_flags, bio, GFP_NOIO); 2200 rq = get_request(q, rw_flags, bio, GFP_NOIO);
2201 2201
2202 if (!rq) { 2202 if (!rq) {
2203 struct io_context *ioc; 2203 struct io_context *ioc;
2204 2204
2205 blk_add_trace_generic(q, bio, rw, BLK_TA_SLEEPRQ); 2205 blk_add_trace_generic(q, bio, rw, BLK_TA_SLEEPRQ);
2206 2206
2207 __generic_unplug_device(q); 2207 __generic_unplug_device(q);
2208 spin_unlock_irq(q->queue_lock); 2208 spin_unlock_irq(q->queue_lock);
2209 io_schedule(); 2209 io_schedule();
2210 2210
2211 /* 2211 /*
2212 * After sleeping, we become a "batching" process and 2212 * After sleeping, we become a "batching" process and
2213 * will be able to allocate at least one request, and 2213 * will be able to allocate at least one request, and
2214 * up to a big batch of them for a small period time. 2214 * up to a big batch of them for a small period time.
2215 * See ioc_batching, ioc_set_batching 2215 * See ioc_batching, ioc_set_batching
2216 */ 2216 */
2217 ioc = current_io_context(GFP_NOIO, q->node); 2217 ioc = current_io_context(GFP_NOIO, q->node);
2218 ioc_set_batching(q, ioc); 2218 ioc_set_batching(q, ioc);
2219 2219
2220 spin_lock_irq(q->queue_lock); 2220 spin_lock_irq(q->queue_lock);
2221 } 2221 }
2222 finish_wait(&rl->wait[rw], &wait); 2222 finish_wait(&rl->wait[rw], &wait);
2223 } 2223 }
2224 2224
2225 return rq; 2225 return rq;
2226 } 2226 }
2227 2227
2228 struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask) 2228 struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask)
2229 { 2229 {
2230 struct request *rq; 2230 struct request *rq;
2231 2231
2232 BUG_ON(rw != READ && rw != WRITE); 2232 BUG_ON(rw != READ && rw != WRITE);
2233 2233
2234 spin_lock_irq(q->queue_lock); 2234 spin_lock_irq(q->queue_lock);
2235 if (gfp_mask & __GFP_WAIT) { 2235 if (gfp_mask & __GFP_WAIT) {
2236 rq = get_request_wait(q, rw, NULL); 2236 rq = get_request_wait(q, rw, NULL);
2237 } else { 2237 } else {
2238 rq = get_request(q, rw, NULL, gfp_mask); 2238 rq = get_request(q, rw, NULL, gfp_mask);
2239 if (!rq) 2239 if (!rq)
2240 spin_unlock_irq(q->queue_lock); 2240 spin_unlock_irq(q->queue_lock);
2241 } 2241 }
2242 /* q->queue_lock is unlocked at this point */ 2242 /* q->queue_lock is unlocked at this point */
2243 2243
2244 return rq; 2244 return rq;
2245 } 2245 }
2246 EXPORT_SYMBOL(blk_get_request); 2246 EXPORT_SYMBOL(blk_get_request);
2247 2247
2248 /** 2248 /**
2249 * blk_start_queueing - initiate dispatch of requests to device 2249 * blk_start_queueing - initiate dispatch of requests to device
2250 * @q: request queue to kick into gear 2250 * @q: request queue to kick into gear
2251 * 2251 *
2252 * This is basically a helper to remove the need to know whether a queue 2252 * This is basically a helper to remove the need to know whether a queue
2253 * is plugged or not if someone just wants to initiate dispatch of requests 2253 * is plugged or not if someone just wants to initiate dispatch of requests
2254 * for this queue. 2254 * for this queue.
2255 * 2255 *
2256 * The queue lock must be held with interrupts disabled. 2256 * The queue lock must be held with interrupts disabled.
2257 */ 2257 */
2258 void blk_start_queueing(struct request_queue *q) 2258 void blk_start_queueing(struct request_queue *q)
2259 { 2259 {
2260 if (!blk_queue_plugged(q)) 2260 if (!blk_queue_plugged(q))
2261 q->request_fn(q); 2261 q->request_fn(q);
2262 else 2262 else
2263 __generic_unplug_device(q); 2263 __generic_unplug_device(q);
2264 } 2264 }
2265 EXPORT_SYMBOL(blk_start_queueing); 2265 EXPORT_SYMBOL(blk_start_queueing);
2266 2266
2267 /** 2267 /**
2268 * blk_requeue_request - put a request back on queue 2268 * blk_requeue_request - put a request back on queue
2269 * @q: request queue where request should be inserted 2269 * @q: request queue where request should be inserted
2270 * @rq: request to be inserted 2270 * @rq: request to be inserted
2271 * 2271 *
2272 * Description: 2272 * Description:
2273 * Drivers often keep queueing requests until the hardware cannot accept 2273 * Drivers often keep queueing requests until the hardware cannot accept
2274 * more, when that condition happens we need to put the request back 2274 * more, when that condition happens we need to put the request back
2275 * on the queue. Must be called with queue lock held. 2275 * on the queue. Must be called with queue lock held.
2276 */ 2276 */
2277 void blk_requeue_request(struct request_queue *q, struct request *rq) 2277 void blk_requeue_request(struct request_queue *q, struct request *rq)
2278 { 2278 {
2279 blk_add_trace_rq(q, rq, BLK_TA_REQUEUE); 2279 blk_add_trace_rq(q, rq, BLK_TA_REQUEUE);
2280 2280
2281 if (blk_rq_tagged(rq)) 2281 if (blk_rq_tagged(rq))
2282 blk_queue_end_tag(q, rq); 2282 blk_queue_end_tag(q, rq);
2283 2283
2284 elv_requeue_request(q, rq); 2284 elv_requeue_request(q, rq);
2285 } 2285 }
2286 2286
2287 EXPORT_SYMBOL(blk_requeue_request); 2287 EXPORT_SYMBOL(blk_requeue_request);
2288 2288
2289 /** 2289 /**
2290 * blk_insert_request - insert a special request in to a request queue 2290 * blk_insert_request - insert a special request in to a request queue
2291 * @q: request queue where request should be inserted 2291 * @q: request queue where request should be inserted
2292 * @rq: request to be inserted 2292 * @rq: request to be inserted
2293 * @at_head: insert request at head or tail of queue 2293 * @at_head: insert request at head or tail of queue
2294 * @data: private data 2294 * @data: private data
2295 * 2295 *
2296 * Description: 2296 * Description:
2297 * Many block devices need to execute commands asynchronously, so they don't 2297 * Many block devices need to execute commands asynchronously, so they don't
2298 * block the whole kernel from preemption during request execution. This is 2298 * block the whole kernel from preemption during request execution. This is
2299 * accomplished normally by inserting aritficial requests tagged as 2299 * accomplished normally by inserting aritficial requests tagged as
2300 * REQ_SPECIAL in to the corresponding request queue, and letting them be 2300 * REQ_SPECIAL in to the corresponding request queue, and letting them be
2301 * scheduled for actual execution by the request queue. 2301 * scheduled for actual execution by the request queue.
2302 * 2302 *
2303 * We have the option of inserting the head or the tail of the queue. 2303 * We have the option of inserting the head or the tail of the queue.
2304 * Typically we use the tail for new ioctls and so forth. We use the head 2304 * Typically we use the tail for new ioctls and so forth. We use the head
2305 * of the queue for things like a QUEUE_FULL message from a device, or a 2305 * of the queue for things like a QUEUE_FULL message from a device, or a
2306 * host that is unable to accept a particular command. 2306 * host that is unable to accept a particular command.
2307 */ 2307 */
2308 void blk_insert_request(struct request_queue *q, struct request *rq, 2308 void blk_insert_request(struct request_queue *q, struct request *rq,
2309 int at_head, void *data) 2309 int at_head, void *data)
2310 { 2310 {
2311 int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK; 2311 int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK;
2312 unsigned long flags; 2312 unsigned long flags;
2313 2313
2314 /* 2314 /*
2315 * tell I/O scheduler that this isn't a regular read/write (ie it 2315 * tell I/O scheduler that this isn't a regular read/write (ie it
2316 * must not attempt merges on this) and that it acts as a soft 2316 * must not attempt merges on this) and that it acts as a soft
2317 * barrier 2317 * barrier
2318 */ 2318 */
2319 rq->cmd_type = REQ_TYPE_SPECIAL; 2319 rq->cmd_type = REQ_TYPE_SPECIAL;
2320 rq->cmd_flags |= REQ_SOFTBARRIER; 2320 rq->cmd_flags |= REQ_SOFTBARRIER;
2321 2321
2322 rq->special = data; 2322 rq->special = data;
2323 2323
2324 spin_lock_irqsave(q->queue_lock, flags); 2324 spin_lock_irqsave(q->queue_lock, flags);
2325 2325
2326 /* 2326 /*
2327 * If command is tagged, release the tag 2327 * If command is tagged, release the tag
2328 */ 2328 */
2329 if (blk_rq_tagged(rq)) 2329 if (blk_rq_tagged(rq))
2330 blk_queue_end_tag(q, rq); 2330 blk_queue_end_tag(q, rq);
2331 2331
2332 drive_stat_acct(rq, rq->nr_sectors, 1); 2332 drive_stat_acct(rq, rq->nr_sectors, 1);
2333 __elv_add_request(q, rq, where, 0); 2333 __elv_add_request(q, rq, where, 0);
2334 blk_start_queueing(q); 2334 blk_start_queueing(q);
2335 spin_unlock_irqrestore(q->queue_lock, flags); 2335 spin_unlock_irqrestore(q->queue_lock, flags);
2336 } 2336 }
2337 2337
2338 EXPORT_SYMBOL(blk_insert_request); 2338 EXPORT_SYMBOL(blk_insert_request);
2339 2339
2340 static int __blk_rq_unmap_user(struct bio *bio) 2340 static int __blk_rq_unmap_user(struct bio *bio)
2341 { 2341 {
2342 int ret = 0; 2342 int ret = 0;
2343 2343
2344 if (bio) { 2344 if (bio) {
2345 if (bio_flagged(bio, BIO_USER_MAPPED)) 2345 if (bio_flagged(bio, BIO_USER_MAPPED))
2346 bio_unmap_user(bio); 2346 bio_unmap_user(bio);
2347 else 2347 else
2348 ret = bio_uncopy_user(bio); 2348 ret = bio_uncopy_user(bio);
2349 } 2349 }
2350 2350
2351 return ret; 2351 return ret;
2352 } 2352 }
2353 2353
2354 int blk_rq_append_bio(struct request_queue *q, struct request *rq, 2354 int blk_rq_append_bio(struct request_queue *q, struct request *rq,
2355 struct bio *bio) 2355 struct bio *bio)
2356 { 2356 {
2357 if (!rq->bio) 2357 if (!rq->bio)
2358 blk_rq_bio_prep(q, rq, bio); 2358 blk_rq_bio_prep(q, rq, bio);
2359 else if (!ll_back_merge_fn(q, rq, bio)) 2359 else if (!ll_back_merge_fn(q, rq, bio))
2360 return -EINVAL; 2360 return -EINVAL;
2361 else { 2361 else {
2362 rq->biotail->bi_next = bio; 2362 rq->biotail->bi_next = bio;
2363 rq->biotail = bio; 2363 rq->biotail = bio;
2364 2364
2365 rq->data_len += bio->bi_size; 2365 rq->data_len += bio->bi_size;
2366 } 2366 }
2367 return 0; 2367 return 0;
2368 } 2368 }
2369 EXPORT_SYMBOL(blk_rq_append_bio); 2369 EXPORT_SYMBOL(blk_rq_append_bio);
2370 2370
2371 static int __blk_rq_map_user(struct request_queue *q, struct request *rq, 2371 static int __blk_rq_map_user(struct request_queue *q, struct request *rq,
2372 void __user *ubuf, unsigned int len) 2372 void __user *ubuf, unsigned int len)
2373 { 2373 {
2374 unsigned long uaddr; 2374 unsigned long uaddr;
2375 struct bio *bio, *orig_bio; 2375 struct bio *bio, *orig_bio;
2376 int reading, ret; 2376 int reading, ret;
2377 2377
2378 reading = rq_data_dir(rq) == READ; 2378 reading = rq_data_dir(rq) == READ;
2379 2379
2380 /* 2380 /*
2381 * if alignment requirement is satisfied, map in user pages for 2381 * if alignment requirement is satisfied, map in user pages for
2382 * direct dma. else, set up kernel bounce buffers 2382 * direct dma. else, set up kernel bounce buffers
2383 */ 2383 */
2384 uaddr = (unsigned long) ubuf; 2384 uaddr = (unsigned long) ubuf;
2385 if (!(uaddr & queue_dma_alignment(q)) && !(len & queue_dma_alignment(q))) 2385 if (!(uaddr & queue_dma_alignment(q)) && !(len & queue_dma_alignment(q)))
2386 bio = bio_map_user(q, NULL, uaddr, len, reading); 2386 bio = bio_map_user(q, NULL, uaddr, len, reading);
2387 else 2387 else
2388 bio = bio_copy_user(q, uaddr, len, reading); 2388 bio = bio_copy_user(q, uaddr, len, reading);
2389 2389
2390 if (IS_ERR(bio)) 2390 if (IS_ERR(bio))
2391 return PTR_ERR(bio); 2391 return PTR_ERR(bio);
2392 2392
2393 orig_bio = bio; 2393 orig_bio = bio;
2394 blk_queue_bounce(q, &bio); 2394 blk_queue_bounce(q, &bio);
2395 2395
2396 /* 2396 /*
2397 * We link the bounce buffer in and could have to traverse it 2397 * We link the bounce buffer in and could have to traverse it
2398 * later so we have to get a ref to prevent it from being freed 2398 * later so we have to get a ref to prevent it from being freed
2399 */ 2399 */
2400 bio_get(bio); 2400 bio_get(bio);
2401 2401
2402 ret = blk_rq_append_bio(q, rq, bio); 2402 ret = blk_rq_append_bio(q, rq, bio);
2403 if (!ret) 2403 if (!ret)
2404 return bio->bi_size; 2404 return bio->bi_size;
2405 2405
2406 /* if it was boucned we must call the end io function */ 2406 /* if it was boucned we must call the end io function */
2407 bio_endio(bio, 0); 2407 bio_endio(bio, 0);
2408 __blk_rq_unmap_user(orig_bio); 2408 __blk_rq_unmap_user(orig_bio);
2409 bio_put(bio); 2409 bio_put(bio);
2410 return ret; 2410 return ret;
2411 } 2411 }
2412 2412
2413 /** 2413 /**
2414 * blk_rq_map_user - map user data to a request, for REQ_BLOCK_PC usage 2414 * blk_rq_map_user - map user data to a request, for REQ_BLOCK_PC usage
2415 * @q: request queue where request should be inserted 2415 * @q: request queue where request should be inserted
2416 * @rq: request structure to fill 2416 * @rq: request structure to fill
2417 * @ubuf: the user buffer 2417 * @ubuf: the user buffer
2418 * @len: length of user data 2418 * @len: length of user data
2419 * 2419 *
2420 * Description: 2420 * Description:
2421 * Data will be mapped directly for zero copy io, if possible. Otherwise 2421 * Data will be mapped directly for zero copy io, if possible. Otherwise
2422 * a kernel bounce buffer is used. 2422 * a kernel bounce buffer is used.
2423 * 2423 *
2424 * A matching blk_rq_unmap_user() must be issued at the end of io, while 2424 * A matching blk_rq_unmap_user() must be issued at the end of io, while
2425 * still in process context. 2425 * still in process context.
2426 * 2426 *
2427 * Note: The mapped bio may need to be bounced through blk_queue_bounce() 2427 * Note: The mapped bio may need to be bounced through blk_queue_bounce()
2428 * before being submitted to the device, as pages mapped may be out of 2428 * before being submitted to the device, as pages mapped may be out of
2429 * reach. It's the callers responsibility to make sure this happens. The 2429 * reach. It's the callers responsibility to make sure this happens. The
2430 * original bio must be passed back in to blk_rq_unmap_user() for proper 2430 * original bio must be passed back in to blk_rq_unmap_user() for proper
2431 * unmapping. 2431 * unmapping.
2432 */ 2432 */
2433 int blk_rq_map_user(struct request_queue *q, struct request *rq, 2433 int blk_rq_map_user(struct request_queue *q, struct request *rq,
2434 void __user *ubuf, unsigned long len) 2434 void __user *ubuf, unsigned long len)
2435 { 2435 {
2436 unsigned long bytes_read = 0; 2436 unsigned long bytes_read = 0;
2437 struct bio *bio = NULL; 2437 struct bio *bio = NULL;
2438 int ret; 2438 int ret;
2439 2439
2440 if (len > (q->max_hw_sectors << 9)) 2440 if (len > (q->max_hw_sectors << 9))
2441 return -EINVAL; 2441 return -EINVAL;
2442 if (!len || !ubuf) 2442 if (!len || !ubuf)
2443 return -EINVAL; 2443 return -EINVAL;
2444 2444
2445 while (bytes_read != len) { 2445 while (bytes_read != len) {
2446 unsigned long map_len, end, start; 2446 unsigned long map_len, end, start;
2447 2447
2448 map_len = min_t(unsigned long, len - bytes_read, BIO_MAX_SIZE); 2448 map_len = min_t(unsigned long, len - bytes_read, BIO_MAX_SIZE);
2449 end = ((unsigned long)ubuf + map_len + PAGE_SIZE - 1) 2449 end = ((unsigned long)ubuf + map_len + PAGE_SIZE - 1)
2450 >> PAGE_SHIFT; 2450 >> PAGE_SHIFT;
2451 start = (unsigned long)ubuf >> PAGE_SHIFT; 2451 start = (unsigned long)ubuf >> PAGE_SHIFT;
2452 2452
2453 /* 2453 /*
2454 * A bad offset could cause us to require BIO_MAX_PAGES + 1 2454 * A bad offset could cause us to require BIO_MAX_PAGES + 1
2455 * pages. If this happens we just lower the requested 2455 * pages. If this happens we just lower the requested
2456 * mapping len by a page so that we can fit 2456 * mapping len by a page so that we can fit
2457 */ 2457 */
2458 if (end - start > BIO_MAX_PAGES) 2458 if (end - start > BIO_MAX_PAGES)
2459 map_len -= PAGE_SIZE; 2459 map_len -= PAGE_SIZE;
2460 2460
2461 ret = __blk_rq_map_user(q, rq, ubuf, map_len); 2461 ret = __blk_rq_map_user(q, rq, ubuf, map_len);
2462 if (ret < 0) 2462 if (ret < 0)
2463 goto unmap_rq; 2463 goto unmap_rq;
2464 if (!bio) 2464 if (!bio)
2465 bio = rq->bio; 2465 bio = rq->bio;
2466 bytes_read += ret; 2466 bytes_read += ret;
2467 ubuf += ret; 2467 ubuf += ret;
2468 } 2468 }
2469 2469
2470 rq->buffer = rq->data = NULL; 2470 rq->buffer = rq->data = NULL;
2471 return 0; 2471 return 0;
2472 unmap_rq: 2472 unmap_rq:
2473 blk_rq_unmap_user(bio); 2473 blk_rq_unmap_user(bio);
2474 return ret; 2474 return ret;
2475 } 2475 }
2476 2476
2477 EXPORT_SYMBOL(blk_rq_map_user); 2477 EXPORT_SYMBOL(blk_rq_map_user);
2478 2478
2479 /** 2479 /**
2480 * blk_rq_map_user_iov - map user data to a request, for REQ_BLOCK_PC usage 2480 * blk_rq_map_user_iov - map user data to a request, for REQ_BLOCK_PC usage
2481 * @q: request queue where request should be inserted 2481 * @q: request queue where request should be inserted
2482 * @rq: request to map data to 2482 * @rq: request to map data to
2483 * @iov: pointer to the iovec 2483 * @iov: pointer to the iovec
2484 * @iov_count: number of elements in the iovec 2484 * @iov_count: number of elements in the iovec
2485 * @len: I/O byte count 2485 * @len: I/O byte count
2486 * 2486 *
2487 * Description: 2487 * Description:
2488 * Data will be mapped directly for zero copy io, if possible. Otherwise 2488 * Data will be mapped directly for zero copy io, if possible. Otherwise
2489 * a kernel bounce buffer is used. 2489 * a kernel bounce buffer is used.
2490 * 2490 *
2491 * A matching blk_rq_unmap_user() must be issued at the end of io, while 2491 * A matching blk_rq_unmap_user() must be issued at the end of io, while
2492 * still in process context. 2492 * still in process context.
2493 * 2493 *
2494 * Note: The mapped bio may need to be bounced through blk_queue_bounce() 2494 * Note: The mapped bio may need to be bounced through blk_queue_bounce()
2495 * before being submitted to the device, as pages mapped may be out of 2495 * before being submitted to the device, as pages mapped may be out of
2496 * reach. It's the callers responsibility to make sure this happens. The 2496 * reach. It's the callers responsibility to make sure this happens. The
2497 * original bio must be passed back in to blk_rq_unmap_user() for proper 2497 * original bio must be passed back in to blk_rq_unmap_user() for proper
2498 * unmapping. 2498 * unmapping.
2499 */ 2499 */
2500 int blk_rq_map_user_iov(struct request_queue *q, struct request *rq, 2500 int blk_rq_map_user_iov(struct request_queue *q, struct request *rq,
2501 struct sg_iovec *iov, int iov_count, unsigned int len) 2501 struct sg_iovec *iov, int iov_count, unsigned int len)
2502 { 2502 {
2503 struct bio *bio; 2503 struct bio *bio;
2504 2504
2505 if (!iov || iov_count <= 0) 2505 if (!iov || iov_count <= 0)
2506 return -EINVAL; 2506 return -EINVAL;
2507 2507
2508 /* we don't allow misaligned data like bio_map_user() does. If the 2508 /* we don't allow misaligned data like bio_map_user() does. If the
2509 * user is using sg, they're expected to know the alignment constraints 2509 * user is using sg, they're expected to know the alignment constraints
2510 * and respect them accordingly */ 2510 * and respect them accordingly */
2511 bio = bio_map_user_iov(q, NULL, iov, iov_count, rq_data_dir(rq)== READ); 2511 bio = bio_map_user_iov(q, NULL, iov, iov_count, rq_data_dir(rq)== READ);
2512 if (IS_ERR(bio)) 2512 if (IS_ERR(bio))
2513 return PTR_ERR(bio); 2513 return PTR_ERR(bio);
2514 2514
2515 if (bio->bi_size != len) { 2515 if (bio->bi_size != len) {
2516 bio_endio(bio, 0); 2516 bio_endio(bio, 0);
2517 bio_unmap_user(bio); 2517 bio_unmap_user(bio);
2518 return -EINVAL; 2518 return -EINVAL;
2519 } 2519 }
2520 2520
2521 bio_get(bio); 2521 bio_get(bio);
2522 blk_rq_bio_prep(q, rq, bio); 2522 blk_rq_bio_prep(q, rq, bio);
2523 rq->buffer = rq->data = NULL; 2523 rq->buffer = rq->data = NULL;
2524 return 0; 2524 return 0;
2525 } 2525 }
2526 2526
2527 EXPORT_SYMBOL(blk_rq_map_user_iov); 2527 EXPORT_SYMBOL(blk_rq_map_user_iov);
2528 2528
2529 /** 2529 /**
2530 * blk_rq_unmap_user - unmap a request with user data 2530 * blk_rq_unmap_user - unmap a request with user data
2531 * @bio: start of bio list 2531 * @bio: start of bio list
2532 * 2532 *
2533 * Description: 2533 * Description:
2534 * Unmap a rq previously mapped by blk_rq_map_user(). The caller must 2534 * Unmap a rq previously mapped by blk_rq_map_user(). The caller must
2535 * supply the original rq->bio from the blk_rq_map_user() return, since 2535 * supply the original rq->bio from the blk_rq_map_user() return, since
2536 * the io completion may have changed rq->bio. 2536 * the io completion may have changed rq->bio.
2537 */ 2537 */
2538 int blk_rq_unmap_user(struct bio *bio) 2538 int blk_rq_unmap_user(struct bio *bio)
2539 { 2539 {
2540 struct bio *mapped_bio; 2540 struct bio *mapped_bio;
2541 int ret = 0, ret2; 2541 int ret = 0, ret2;
2542 2542
2543 while (bio) { 2543 while (bio) {
2544 mapped_bio = bio; 2544 mapped_bio = bio;
2545 if (unlikely(bio_flagged(bio, BIO_BOUNCED))) 2545 if (unlikely(bio_flagged(bio, BIO_BOUNCED)))
2546 mapped_bio = bio->bi_private; 2546 mapped_bio = bio->bi_private;
2547 2547
2548 ret2 = __blk_rq_unmap_user(mapped_bio); 2548 ret2 = __blk_rq_unmap_user(mapped_bio);
2549 if (ret2 && !ret) 2549 if (ret2 && !ret)
2550 ret = ret2; 2550 ret = ret2;
2551 2551
2552 mapped_bio = bio; 2552 mapped_bio = bio;
2553 bio = bio->bi_next; 2553 bio = bio->bi_next;
2554 bio_put(mapped_bio); 2554 bio_put(mapped_bio);
2555 } 2555 }
2556 2556
2557 return ret; 2557 return ret;
2558 } 2558 }
2559 2559
2560 EXPORT_SYMBOL(blk_rq_unmap_user); 2560 EXPORT_SYMBOL(blk_rq_unmap_user);
2561 2561
2562 /** 2562 /**
2563 * blk_rq_map_kern - map kernel data to a request, for REQ_BLOCK_PC usage 2563 * blk_rq_map_kern - map kernel data to a request, for REQ_BLOCK_PC usage
2564 * @q: request queue where request should be inserted 2564 * @q: request queue where request should be inserted
2565 * @rq: request to fill 2565 * @rq: request to fill
2566 * @kbuf: the kernel buffer 2566 * @kbuf: the kernel buffer
2567 * @len: length of user data 2567 * @len: length of user data
2568 * @gfp_mask: memory allocation flags 2568 * @gfp_mask: memory allocation flags
2569 */ 2569 */
2570 int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf, 2570 int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
2571 unsigned int len, gfp_t gfp_mask) 2571 unsigned int len, gfp_t gfp_mask)
2572 { 2572 {
2573 struct bio *bio; 2573 struct bio *bio;
2574 2574
2575 if (len > (q->max_hw_sectors << 9)) 2575 if (len > (q->max_hw_sectors << 9))
2576 return -EINVAL; 2576 return -EINVAL;
2577 if (!len || !kbuf) 2577 if (!len || !kbuf)
2578 return -EINVAL; 2578 return -EINVAL;
2579 2579
2580 bio = bio_map_kern(q, kbuf, len, gfp_mask); 2580 bio = bio_map_kern(q, kbuf, len, gfp_mask);
2581 if (IS_ERR(bio)) 2581 if (IS_ERR(bio))
2582 return PTR_ERR(bio); 2582 return PTR_ERR(bio);
2583 2583
2584 if (rq_data_dir(rq) == WRITE) 2584 if (rq_data_dir(rq) == WRITE)
2585 bio->bi_rw |= (1 << BIO_RW); 2585 bio->bi_rw |= (1 << BIO_RW);
2586 2586
2587 blk_rq_bio_prep(q, rq, bio); 2587 blk_rq_bio_prep(q, rq, bio);
2588 blk_queue_bounce(q, &rq->bio); 2588 blk_queue_bounce(q, &rq->bio);
2589 rq->buffer = rq->data = NULL; 2589 rq->buffer = rq->data = NULL;
2590 return 0; 2590 return 0;
2591 } 2591 }
2592 2592
2593 EXPORT_SYMBOL(blk_rq_map_kern); 2593 EXPORT_SYMBOL(blk_rq_map_kern);
2594 2594
2595 /** 2595 /**
2596 * blk_execute_rq_nowait - insert a request into queue for execution 2596 * blk_execute_rq_nowait - insert a request into queue for execution
2597 * @q: queue to insert the request in 2597 * @q: queue to insert the request in
2598 * @bd_disk: matching gendisk 2598 * @bd_disk: matching gendisk
2599 * @rq: request to insert 2599 * @rq: request to insert
2600 * @at_head: insert request at head or tail of queue 2600 * @at_head: insert request at head or tail of queue
2601 * @done: I/O completion handler 2601 * @done: I/O completion handler
2602 * 2602 *
2603 * Description: 2603 * Description:
2604 * Insert a fully prepared request at the back of the io scheduler queue 2604 * Insert a fully prepared request at the back of the io scheduler queue
2605 * for execution. Don't wait for completion. 2605 * for execution. Don't wait for completion.
2606 */ 2606 */
2607 void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk, 2607 void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
2608 struct request *rq, int at_head, 2608 struct request *rq, int at_head,
2609 rq_end_io_fn *done) 2609 rq_end_io_fn *done)
2610 { 2610 {
2611 int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK; 2611 int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK;
2612 2612
2613 rq->rq_disk = bd_disk; 2613 rq->rq_disk = bd_disk;
2614 rq->cmd_flags |= REQ_NOMERGE; 2614 rq->cmd_flags |= REQ_NOMERGE;
2615 rq->end_io = done; 2615 rq->end_io = done;
2616 WARN_ON(irqs_disabled()); 2616 WARN_ON(irqs_disabled());
2617 spin_lock_irq(q->queue_lock); 2617 spin_lock_irq(q->queue_lock);
2618 __elv_add_request(q, rq, where, 1); 2618 __elv_add_request(q, rq, where, 1);
2619 __generic_unplug_device(q); 2619 __generic_unplug_device(q);
2620 spin_unlock_irq(q->queue_lock); 2620 spin_unlock_irq(q->queue_lock);
2621 } 2621 }
2622 EXPORT_SYMBOL_GPL(blk_execute_rq_nowait); 2622 EXPORT_SYMBOL_GPL(blk_execute_rq_nowait);
2623 2623
2624 /** 2624 /**
2625 * blk_execute_rq - insert a request into queue for execution 2625 * blk_execute_rq - insert a request into queue for execution
2626 * @q: queue to insert the request in 2626 * @q: queue to insert the request in
2627 * @bd_disk: matching gendisk 2627 * @bd_disk: matching gendisk
2628 * @rq: request to insert 2628 * @rq: request to insert
2629 * @at_head: insert request at head or tail of queue 2629 * @at_head: insert request at head or tail of queue
2630 * 2630 *
2631 * Description: 2631 * Description:
2632 * Insert a fully prepared request at the back of the io scheduler queue 2632 * Insert a fully prepared request at the back of the io scheduler queue
2633 * for execution and wait for completion. 2633 * for execution and wait for completion.
2634 */ 2634 */
2635 int blk_execute_rq(struct request_queue *q, struct gendisk *bd_disk, 2635 int blk_execute_rq(struct request_queue *q, struct gendisk *bd_disk,
2636 struct request *rq, int at_head) 2636 struct request *rq, int at_head)
2637 { 2637 {
2638 DECLARE_COMPLETION_ONSTACK(wait); 2638 DECLARE_COMPLETION_ONSTACK(wait);
2639 char sense[SCSI_SENSE_BUFFERSIZE]; 2639 char sense[SCSI_SENSE_BUFFERSIZE];
2640 int err = 0; 2640 int err = 0;
2641 2641
2642 /* 2642 /*
2643 * we need an extra reference to the request, so we can look at 2643 * we need an extra reference to the request, so we can look at
2644 * it after io completion 2644 * it after io completion
2645 */ 2645 */
2646 rq->ref_count++; 2646 rq->ref_count++;
2647 2647
2648 if (!rq->sense) { 2648 if (!rq->sense) {
2649 memset(sense, 0, sizeof(sense)); 2649 memset(sense, 0, sizeof(sense));
2650 rq->sense = sense; 2650 rq->sense = sense;
2651 rq->sense_len = 0; 2651 rq->sense_len = 0;
2652 } 2652 }
2653 2653
2654 rq->end_io_data = &wait; 2654 rq->end_io_data = &wait;
2655 blk_execute_rq_nowait(q, bd_disk, rq, at_head, blk_end_sync_rq); 2655 blk_execute_rq_nowait(q, bd_disk, rq, at_head, blk_end_sync_rq);
2656 wait_for_completion(&wait); 2656 wait_for_completion(&wait);
2657 2657
2658 if (rq->errors) 2658 if (rq->errors)
2659 err = -EIO; 2659 err = -EIO;
2660 2660
2661 return err; 2661 return err;
2662 } 2662 }
2663 2663
2664 EXPORT_SYMBOL(blk_execute_rq); 2664 EXPORT_SYMBOL(blk_execute_rq);
2665 2665
2666 /** 2666 /**
2667 * blkdev_issue_flush - queue a flush 2667 * blkdev_issue_flush - queue a flush
2668 * @bdev: blockdev to issue flush for 2668 * @bdev: blockdev to issue flush for
2669 * @error_sector: error sector 2669 * @error_sector: error sector
2670 * 2670 *
2671 * Description: 2671 * Description:
2672 * Issue a flush for the block device in question. Caller can supply 2672 * Issue a flush for the block device in question. Caller can supply
2673 * room for storing the error offset in case of a flush error, if they 2673 * room for storing the error offset in case of a flush error, if they
2674 * wish to. Caller must run wait_for_completion() on its own. 2674 * wish to. Caller must run wait_for_completion() on its own.
2675 */ 2675 */
2676 int blkdev_issue_flush(struct block_device *bdev, sector_t *error_sector) 2676 int blkdev_issue_flush(struct block_device *bdev, sector_t *error_sector)
2677 { 2677 {
2678 struct request_queue *q; 2678 struct request_queue *q;
2679 2679
2680 if (bdev->bd_disk == NULL) 2680 if (bdev->bd_disk == NULL)
2681 return -ENXIO; 2681 return -ENXIO;
2682 2682
2683 q = bdev_get_queue(bdev); 2683 q = bdev_get_queue(bdev);
2684 if (!q) 2684 if (!q)
2685 return -ENXIO; 2685 return -ENXIO;
2686 if (!q->issue_flush_fn) 2686 if (!q->issue_flush_fn)
2687 return -EOPNOTSUPP; 2687 return -EOPNOTSUPP;
2688 2688
2689 return q->issue_flush_fn(q, bdev->bd_disk, error_sector); 2689 return q->issue_flush_fn(q, bdev->bd_disk, error_sector);
2690 } 2690 }
2691 2691
2692 EXPORT_SYMBOL(blkdev_issue_flush); 2692 EXPORT_SYMBOL(blkdev_issue_flush);
2693 2693
2694 static void drive_stat_acct(struct request *rq, int nr_sectors, int new_io) 2694 static void drive_stat_acct(struct request *rq, int nr_sectors, int new_io)
2695 { 2695 {
2696 int rw = rq_data_dir(rq); 2696 int rw = rq_data_dir(rq);
2697 2697
2698 if (!blk_fs_request(rq) || !rq->rq_disk) 2698 if (!blk_fs_request(rq) || !rq->rq_disk)
2699 return; 2699 return;
2700 2700
2701 if (!new_io) { 2701 if (!new_io) {
2702 __disk_stat_inc(rq->rq_disk, merges[rw]); 2702 __disk_stat_inc(rq->rq_disk, merges[rw]);
2703 } else { 2703 } else {
2704 disk_round_stats(rq->rq_disk); 2704 disk_round_stats(rq->rq_disk);
2705 rq->rq_disk->in_flight++; 2705 rq->rq_disk->in_flight++;
2706 } 2706 }
2707 } 2707 }
2708 2708
2709 /* 2709 /*
2710 * add-request adds a request to the linked list. 2710 * add-request adds a request to the linked list.
2711 * queue lock is held and interrupts disabled, as we muck with the 2711 * queue lock is held and interrupts disabled, as we muck with the
2712 * request queue list. 2712 * request queue list.
2713 */ 2713 */
2714 static inline void add_request(struct request_queue * q, struct request * req) 2714 static inline void add_request(struct request_queue * q, struct request * req)
2715 { 2715 {
2716 drive_stat_acct(req, req->nr_sectors, 1); 2716 drive_stat_acct(req, req->nr_sectors, 1);
2717 2717
2718 /* 2718 /*
2719 * elevator indicated where it wants this request to be 2719 * elevator indicated where it wants this request to be
2720 * inserted at elevator_merge time 2720 * inserted at elevator_merge time
2721 */ 2721 */
2722 __elv_add_request(q, req, ELEVATOR_INSERT_SORT, 0); 2722 __elv_add_request(q, req, ELEVATOR_INSERT_SORT, 0);
2723 } 2723 }
2724 2724
2725 /* 2725 /*
2726 * disk_round_stats() - Round off the performance stats on a struct 2726 * disk_round_stats() - Round off the performance stats on a struct
2727 * disk_stats. 2727 * disk_stats.
2728 * 2728 *
2729 * The average IO queue length and utilisation statistics are maintained 2729 * The average IO queue length and utilisation statistics are maintained
2730 * by observing the current state of the queue length and the amount of 2730 * by observing the current state of the queue length and the amount of
2731 * time it has been in this state for. 2731 * time it has been in this state for.
2732 * 2732 *
2733 * Normally, that accounting is done on IO completion, but that can result 2733 * Normally, that accounting is done on IO completion, but that can result
2734 * in more than a second's worth of IO being accounted for within any one 2734 * in more than a second's worth of IO being accounted for within any one
2735 * second, leading to >100% utilisation. To deal with that, we call this 2735 * second, leading to >100% utilisation. To deal with that, we call this
2736 * function to do a round-off before returning the results when reading 2736 * function to do a round-off before returning the results when reading
2737 * /proc/diskstats. This accounts immediately for all queue usage up to 2737 * /proc/diskstats. This accounts immediately for all queue usage up to
2738 * the current jiffies and restarts the counters again. 2738 * the current jiffies and restarts the counters again.
2739 */ 2739 */
2740 void disk_round_stats(struct gendisk *disk) 2740 void disk_round_stats(struct gendisk *disk)
2741 { 2741 {
2742 unsigned long now = jiffies; 2742 unsigned long now = jiffies;
2743 2743
2744 if (now == disk->stamp) 2744 if (now == disk->stamp)
2745 return; 2745 return;
2746 2746
2747 if (disk->in_flight) { 2747 if (disk->in_flight) {
2748 __disk_stat_add(disk, time_in_queue, 2748 __disk_stat_add(disk, time_in_queue,
2749 disk->in_flight * (now - disk->stamp)); 2749 disk->in_flight * (now - disk->stamp));
2750 __disk_stat_add(disk, io_ticks, (now - disk->stamp)); 2750 __disk_stat_add(disk, io_ticks, (now - disk->stamp));
2751 } 2751 }
2752 disk->stamp = now; 2752 disk->stamp = now;
2753 } 2753 }
2754 2754
2755 EXPORT_SYMBOL_GPL(disk_round_stats); 2755 EXPORT_SYMBOL_GPL(disk_round_stats);
2756 2756
2757 /* 2757 /*
2758 * queue lock must be held 2758 * queue lock must be held
2759 */ 2759 */
2760 void __blk_put_request(struct request_queue *q, struct request *req) 2760 void __blk_put_request(struct request_queue *q, struct request *req)
2761 { 2761 {
2762 if (unlikely(!q)) 2762 if (unlikely(!q))
2763 return; 2763 return;
2764 if (unlikely(--req->ref_count)) 2764 if (unlikely(--req->ref_count))
2765 return; 2765 return;
2766 2766
2767 elv_completed_request(q, req); 2767 elv_completed_request(q, req);
2768 2768
2769 /* 2769 /*
2770 * Request may not have originated from ll_rw_blk. if not, 2770 * Request may not have originated from ll_rw_blk. if not,
2771 * it didn't come out of our reserved rq pools 2771 * it didn't come out of our reserved rq pools
2772 */ 2772 */
2773 if (req->cmd_flags & REQ_ALLOCED) { 2773 if (req->cmd_flags & REQ_ALLOCED) {
2774 int rw = rq_data_dir(req); 2774 int rw = rq_data_dir(req);
2775 int priv = req->cmd_flags & REQ_ELVPRIV; 2775 int priv = req->cmd_flags & REQ_ELVPRIV;
2776 2776
2777 BUG_ON(!list_empty(&req->queuelist)); 2777 BUG_ON(!list_empty(&req->queuelist));
2778 BUG_ON(!hlist_unhashed(&req->hash)); 2778 BUG_ON(!hlist_unhashed(&req->hash));
2779 2779
2780 blk_free_request(q, req); 2780 blk_free_request(q, req);
2781 freed_request(q, rw, priv); 2781 freed_request(q, rw, priv);
2782 } 2782 }
2783 } 2783 }
2784 2784
2785 EXPORT_SYMBOL_GPL(__blk_put_request); 2785 EXPORT_SYMBOL_GPL(__blk_put_request);
2786 2786
2787 void blk_put_request(struct request *req) 2787 void blk_put_request(struct request *req)
2788 { 2788 {
2789 unsigned long flags; 2789 unsigned long flags;
2790 struct request_queue *q = req->q; 2790 struct request_queue *q = req->q;
2791 2791
2792 /* 2792 /*
2793 * Gee, IDE calls in w/ NULL q. Fix IDE and remove the 2793 * Gee, IDE calls in w/ NULL q. Fix IDE and remove the
2794 * following if (q) test. 2794 * following if (q) test.
2795 */ 2795 */
2796 if (q) { 2796 if (q) {
2797 spin_lock_irqsave(q->queue_lock, flags); 2797 spin_lock_irqsave(q->queue_lock, flags);
2798 __blk_put_request(q, req); 2798 __blk_put_request(q, req);
2799 spin_unlock_irqrestore(q->queue_lock, flags); 2799 spin_unlock_irqrestore(q->queue_lock, flags);
2800 } 2800 }
2801 } 2801 }
2802 2802
2803 EXPORT_SYMBOL(blk_put_request); 2803 EXPORT_SYMBOL(blk_put_request);
2804 2804
2805 /** 2805 /**
2806 * blk_end_sync_rq - executes a completion event on a request 2806 * blk_end_sync_rq - executes a completion event on a request
2807 * @rq: request to complete 2807 * @rq: request to complete
2808 * @error: end io status of the request 2808 * @error: end io status of the request
2809 */ 2809 */
2810 void blk_end_sync_rq(struct request *rq, int error) 2810 void blk_end_sync_rq(struct request *rq, int error)
2811 { 2811 {
2812 struct completion *waiting = rq->end_io_data; 2812 struct completion *waiting = rq->end_io_data;
2813 2813
2814 rq->end_io_data = NULL; 2814 rq->end_io_data = NULL;
2815 __blk_put_request(rq->q, rq); 2815 __blk_put_request(rq->q, rq);
2816 2816
2817 /* 2817 /*
2818 * complete last, if this is a stack request the process (and thus 2818 * complete last, if this is a stack request the process (and thus
2819 * the rq pointer) could be invalid right after this complete() 2819 * the rq pointer) could be invalid right after this complete()
2820 */ 2820 */
2821 complete(waiting); 2821 complete(waiting);
2822 } 2822 }
2823 EXPORT_SYMBOL(blk_end_sync_rq); 2823 EXPORT_SYMBOL(blk_end_sync_rq);
2824 2824
2825 /* 2825 /*
2826 * Has to be called with the request spinlock acquired 2826 * Has to be called with the request spinlock acquired
2827 */ 2827 */
2828 static int attempt_merge(struct request_queue *q, struct request *req, 2828 static int attempt_merge(struct request_queue *q, struct request *req,
2829 struct request *next) 2829 struct request *next)
2830 { 2830 {
2831 if (!rq_mergeable(req) || !rq_mergeable(next)) 2831 if (!rq_mergeable(req) || !rq_mergeable(next))
2832 return 0; 2832 return 0;
2833 2833
2834 /* 2834 /*
2835 * not contiguous 2835 * not contiguous
2836 */ 2836 */
2837 if (req->sector + req->nr_sectors != next->sector) 2837 if (req->sector + req->nr_sectors != next->sector)
2838 return 0; 2838 return 0;
2839 2839
2840 if (rq_data_dir(req) != rq_data_dir(next) 2840 if (rq_data_dir(req) != rq_data_dir(next)
2841 || req->rq_disk != next->rq_disk 2841 || req->rq_disk != next->rq_disk
2842 || next->special) 2842 || next->special)
2843 return 0; 2843 return 0;
2844 2844
2845 /* 2845 /*
2846 * If we are allowed to merge, then append bio list 2846 * If we are allowed to merge, then append bio list
2847 * from next to rq and release next. merge_requests_fn 2847 * from next to rq and release next. merge_requests_fn
2848 * will have updated segment counts, update sector 2848 * will have updated segment counts, update sector
2849 * counts here. 2849 * counts here.
2850 */ 2850 */
2851 if (!ll_merge_requests_fn(q, req, next)) 2851 if (!ll_merge_requests_fn(q, req, next))
2852 return 0; 2852 return 0;
2853 2853
2854 /* 2854 /*
2855 * At this point we have either done a back merge 2855 * At this point we have either done a back merge
2856 * or front merge. We need the smaller start_time of 2856 * or front merge. We need the smaller start_time of
2857 * the merged requests to be the current request 2857 * the merged requests to be the current request
2858 * for accounting purposes. 2858 * for accounting purposes.
2859 */ 2859 */
2860 if (time_after(req->start_time, next->start_time)) 2860 if (time_after(req->start_time, next->start_time))
2861 req->start_time = next->start_time; 2861 req->start_time = next->start_time;
2862 2862
2863 req->biotail->bi_next = next->bio; 2863 req->biotail->bi_next = next->bio;
2864 req->biotail = next->biotail; 2864 req->biotail = next->biotail;
2865 2865
2866 req->nr_sectors = req->hard_nr_sectors += next->hard_nr_sectors; 2866 req->nr_sectors = req->hard_nr_sectors += next->hard_nr_sectors;
2867 2867
2868 elv_merge_requests(q, req, next); 2868 elv_merge_requests(q, req, next);
2869 2869
2870 if (req->rq_disk) { 2870 if (req->rq_disk) {
2871 disk_round_stats(req->rq_disk); 2871 disk_round_stats(req->rq_disk);
2872 req->rq_disk->in_flight--; 2872 req->rq_disk->in_flight--;
2873 } 2873 }
2874 2874
2875 req->ioprio = ioprio_best(req->ioprio, next->ioprio); 2875 req->ioprio = ioprio_best(req->ioprio, next->ioprio);
2876 2876
2877 __blk_put_request(q, next); 2877 __blk_put_request(q, next);
2878 return 1; 2878 return 1;
2879 } 2879 }
2880 2880
2881 static inline int attempt_back_merge(struct request_queue *q, 2881 static inline int attempt_back_merge(struct request_queue *q,
2882 struct request *rq) 2882 struct request *rq)
2883 { 2883 {
2884 struct request *next = elv_latter_request(q, rq); 2884 struct request *next = elv_latter_request(q, rq);
2885 2885
2886 if (next) 2886 if (next)
2887 return attempt_merge(q, rq, next); 2887 return attempt_merge(q, rq, next);
2888 2888
2889 return 0; 2889 return 0;
2890 } 2890 }
2891 2891
2892 static inline int attempt_front_merge(struct request_queue *q, 2892 static inline int attempt_front_merge(struct request_queue *q,
2893 struct request *rq) 2893 struct request *rq)
2894 { 2894 {
2895 struct request *prev = elv_former_request(q, rq); 2895 struct request *prev = elv_former_request(q, rq);
2896 2896
2897 if (prev) 2897 if (prev)
2898 return attempt_merge(q, prev, rq); 2898 return attempt_merge(q, prev, rq);
2899 2899
2900 return 0; 2900 return 0;
2901 } 2901 }
2902 2902
2903 static void init_request_from_bio(struct request *req, struct bio *bio) 2903 static void init_request_from_bio(struct request *req, struct bio *bio)
2904 { 2904 {
2905 req->cmd_type = REQ_TYPE_FS; 2905 req->cmd_type = REQ_TYPE_FS;
2906 2906
2907 /* 2907 /*
2908 * inherit FAILFAST from bio (for read-ahead, and explicit FAILFAST) 2908 * inherit FAILFAST from bio (for read-ahead, and explicit FAILFAST)
2909 */ 2909 */
2910 if (bio_rw_ahead(bio) || bio_failfast(bio)) 2910 if (bio_rw_ahead(bio) || bio_failfast(bio))
2911 req->cmd_flags |= REQ_FAILFAST; 2911 req->cmd_flags |= REQ_FAILFAST;
2912 2912
2913 /* 2913 /*
2914 * REQ_BARRIER implies no merging, but lets make it explicit 2914 * REQ_BARRIER implies no merging, but lets make it explicit
2915 */ 2915 */
2916 if (unlikely(bio_barrier(bio))) 2916 if (unlikely(bio_barrier(bio)))
2917 req->cmd_flags |= (REQ_HARDBARRIER | REQ_NOMERGE); 2917 req->cmd_flags |= (REQ_HARDBARRIER | REQ_NOMERGE);
2918 2918
2919 if (bio_sync(bio)) 2919 if (bio_sync(bio))
2920 req->cmd_flags |= REQ_RW_SYNC; 2920 req->cmd_flags |= REQ_RW_SYNC;
2921 if (bio_rw_meta(bio)) 2921 if (bio_rw_meta(bio))
2922 req->cmd_flags |= REQ_RW_META; 2922 req->cmd_flags |= REQ_RW_META;
2923 2923
2924 req->errors = 0; 2924 req->errors = 0;
2925 req->hard_sector = req->sector = bio->bi_sector; 2925 req->hard_sector = req->sector = bio->bi_sector;
2926 req->ioprio = bio_prio(bio); 2926 req->ioprio = bio_prio(bio);
2927 req->start_time = jiffies; 2927 req->start_time = jiffies;
2928 blk_rq_bio_prep(req->q, req, bio); 2928 blk_rq_bio_prep(req->q, req, bio);
2929 } 2929 }
2930 2930
2931 static int __make_request(struct request_queue *q, struct bio *bio) 2931 static int __make_request(struct request_queue *q, struct bio *bio)
2932 { 2932 {
2933 struct request *req; 2933 struct request *req;
2934 int el_ret, nr_sectors, barrier, err; 2934 int el_ret, nr_sectors, barrier, err;
2935 const unsigned short prio = bio_prio(bio); 2935 const unsigned short prio = bio_prio(bio);
2936 const int sync = bio_sync(bio); 2936 const int sync = bio_sync(bio);
2937 int rw_flags; 2937 int rw_flags;
2938 2938
2939 nr_sectors = bio_sectors(bio); 2939 nr_sectors = bio_sectors(bio);
2940 2940
2941 /* 2941 /*
2942 * low level driver can indicate that it wants pages above a 2942 * low level driver can indicate that it wants pages above a
2943 * certain limit bounced to low memory (ie for highmem, or even 2943 * certain limit bounced to low memory (ie for highmem, or even
2944 * ISA dma in theory) 2944 * ISA dma in theory)
2945 */ 2945 */
2946 blk_queue_bounce(q, &bio); 2946 blk_queue_bounce(q, &bio);
2947 2947
2948 barrier = bio_barrier(bio); 2948 barrier = bio_barrier(bio);
2949 if (unlikely(barrier) && (q->next_ordered == QUEUE_ORDERED_NONE)) { 2949 if (unlikely(barrier) && (q->next_ordered == QUEUE_ORDERED_NONE)) {
2950 err = -EOPNOTSUPP; 2950 err = -EOPNOTSUPP;
2951 goto end_io; 2951 goto end_io;
2952 } 2952 }
2953 2953
2954 spin_lock_irq(q->queue_lock); 2954 spin_lock_irq(q->queue_lock);
2955 2955
2956 if (unlikely(barrier) || elv_queue_empty(q)) 2956 if (unlikely(barrier) || elv_queue_empty(q))
2957 goto get_rq; 2957 goto get_rq;
2958 2958
2959 el_ret = elv_merge(q, &req, bio); 2959 el_ret = elv_merge(q, &req, bio);
2960 switch (el_ret) { 2960 switch (el_ret) {
2961 case ELEVATOR_BACK_MERGE: 2961 case ELEVATOR_BACK_MERGE:
2962 BUG_ON(!rq_mergeable(req)); 2962 BUG_ON(!rq_mergeable(req));
2963 2963
2964 if (!ll_back_merge_fn(q, req, bio)) 2964 if (!ll_back_merge_fn(q, req, bio))
2965 break; 2965 break;
2966 2966
2967 blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE); 2967 blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE);
2968 2968
2969 req->biotail->bi_next = bio; 2969 req->biotail->bi_next = bio;
2970 req->biotail = bio; 2970 req->biotail = bio;
2971 req->nr_sectors = req->hard_nr_sectors += nr_sectors; 2971 req->nr_sectors = req->hard_nr_sectors += nr_sectors;
2972 req->ioprio = ioprio_best(req->ioprio, prio); 2972 req->ioprio = ioprio_best(req->ioprio, prio);
2973 drive_stat_acct(req, nr_sectors, 0); 2973 drive_stat_acct(req, nr_sectors, 0);
2974 if (!attempt_back_merge(q, req)) 2974 if (!attempt_back_merge(q, req))
2975 elv_merged_request(q, req, el_ret); 2975 elv_merged_request(q, req, el_ret);
2976 goto out; 2976 goto out;
2977 2977
2978 case ELEVATOR_FRONT_MERGE: 2978 case ELEVATOR_FRONT_MERGE:
2979 BUG_ON(!rq_mergeable(req)); 2979 BUG_ON(!rq_mergeable(req));
2980 2980
2981 if (!ll_front_merge_fn(q, req, bio)) 2981 if (!ll_front_merge_fn(q, req, bio))
2982 break; 2982 break;
2983 2983
2984 blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE); 2984 blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE);
2985 2985
2986 bio->bi_next = req->bio; 2986 bio->bi_next = req->bio;
2987 req->bio = bio; 2987 req->bio = bio;
2988 2988
2989 /* 2989 /*
2990 * may not be valid. if the low level driver said 2990 * may not be valid. if the low level driver said
2991 * it didn't need a bounce buffer then it better 2991 * it didn't need a bounce buffer then it better
2992 * not touch req->buffer either... 2992 * not touch req->buffer either...
2993 */ 2993 */
2994 req->buffer = bio_data(bio); 2994 req->buffer = bio_data(bio);
2995 req->current_nr_sectors = bio_cur_sectors(bio); 2995 req->current_nr_sectors = bio_cur_sectors(bio);
2996 req->hard_cur_sectors = req->current_nr_sectors; 2996 req->hard_cur_sectors = req->current_nr_sectors;
2997 req->sector = req->hard_sector = bio->bi_sector; 2997 req->sector = req->hard_sector = bio->bi_sector;
2998 req->nr_sectors = req->hard_nr_sectors += nr_sectors; 2998 req->nr_sectors = req->hard_nr_sectors += nr_sectors;
2999 req->ioprio = ioprio_best(req->ioprio, prio); 2999 req->ioprio = ioprio_best(req->ioprio, prio);
3000 drive_stat_acct(req, nr_sectors, 0); 3000 drive_stat_acct(req, nr_sectors, 0);
3001 if (!attempt_front_merge(q, req)) 3001 if (!attempt_front_merge(q, req))
3002 elv_merged_request(q, req, el_ret); 3002 elv_merged_request(q, req, el_ret);
3003 goto out; 3003 goto out;
3004 3004
3005 /* ELV_NO_MERGE: elevator says don't/can't merge. */ 3005 /* ELV_NO_MERGE: elevator says don't/can't merge. */
3006 default: 3006 default:
3007 ; 3007 ;
3008 } 3008 }
3009 3009
3010 get_rq: 3010 get_rq:
3011 /* 3011 /*
3012 * This sync check and mask will be re-done in init_request_from_bio(), 3012 * This sync check and mask will be re-done in init_request_from_bio(),
3013 * but we need to set it earlier to expose the sync flag to the 3013 * but we need to set it earlier to expose the sync flag to the
3014 * rq allocator and io schedulers. 3014 * rq allocator and io schedulers.
3015 */ 3015 */
3016 rw_flags = bio_data_dir(bio); 3016 rw_flags = bio_data_dir(bio);
3017 if (sync) 3017 if (sync)
3018 rw_flags |= REQ_RW_SYNC; 3018 rw_flags |= REQ_RW_SYNC;
3019 3019
3020 /* 3020 /*
3021 * Grab a free request. This is might sleep but can not fail. 3021 * Grab a free request. This is might sleep but can not fail.
3022 * Returns with the queue unlocked. 3022 * Returns with the queue unlocked.
3023 */ 3023 */
3024 req = get_request_wait(q, rw_flags, bio); 3024 req = get_request_wait(q, rw_flags, bio);
3025 3025
3026 /* 3026 /*
3027 * After dropping the lock and possibly sleeping here, our request 3027 * After dropping the lock and possibly sleeping here, our request
3028 * may now be mergeable after it had proven unmergeable (above). 3028 * may now be mergeable after it had proven unmergeable (above).
3029 * We don't worry about that case for efficiency. It won't happen 3029 * We don't worry about that case for efficiency. It won't happen
3030 * often, and the elevators are able to handle it. 3030 * often, and the elevators are able to handle it.
3031 */ 3031 */
3032 init_request_from_bio(req, bio); 3032 init_request_from_bio(req, bio);
3033 3033
3034 spin_lock_irq(q->queue_lock); 3034 spin_lock_irq(q->queue_lock);
3035 if (elv_queue_empty(q)) 3035 if (elv_queue_empty(q))
3036 blk_plug_device(q); 3036 blk_plug_device(q);
3037 add_request(q, req); 3037 add_request(q, req);
3038 out: 3038 out:
3039 if (sync) 3039 if (sync)
3040 __generic_unplug_device(q); 3040 __generic_unplug_device(q);
3041 3041
3042 spin_unlock_irq(q->queue_lock); 3042 spin_unlock_irq(q->queue_lock);
3043 return 0; 3043 return 0;
3044 3044
3045 end_io: 3045 end_io:
3046 bio_endio(bio, err); 3046 bio_endio(bio, err);
3047 return 0; 3047 return 0;
3048 } 3048 }
3049 3049
3050 /* 3050 /*
3051 * If bio->bi_dev is a partition, remap the location 3051 * If bio->bi_dev is a partition, remap the location
3052 */ 3052 */
3053 static inline void blk_partition_remap(struct bio *bio) 3053 static inline void blk_partition_remap(struct bio *bio)
3054 { 3054 {
3055 struct block_device *bdev = bio->bi_bdev; 3055 struct block_device *bdev = bio->bi_bdev;
3056 3056
3057 if (bdev != bdev->bd_contains) { 3057 if (bdev != bdev->bd_contains) {
3058 struct hd_struct *p = bdev->bd_part; 3058 struct hd_struct *p = bdev->bd_part;
3059 const int rw = bio_data_dir(bio); 3059 const int rw = bio_data_dir(bio);
3060 3060
3061 p->sectors[rw] += bio_sectors(bio); 3061 p->sectors[rw] += bio_sectors(bio);
3062 p->ios[rw]++; 3062 p->ios[rw]++;
3063 3063
3064 bio->bi_sector += p->start_sect; 3064 bio->bi_sector += p->start_sect;
3065 bio->bi_bdev = bdev->bd_contains; 3065 bio->bi_bdev = bdev->bd_contains;
3066 3066
3067 blk_add_trace_remap(bdev_get_queue(bio->bi_bdev), bio, 3067 blk_add_trace_remap(bdev_get_queue(bio->bi_bdev), bio,
3068 bdev->bd_dev, bio->bi_sector, 3068 bdev->bd_dev, bio->bi_sector,
3069 bio->bi_sector - p->start_sect); 3069 bio->bi_sector - p->start_sect);
3070 } 3070 }
3071 } 3071 }
3072 3072
3073 static void handle_bad_sector(struct bio *bio) 3073 static void handle_bad_sector(struct bio *bio)
3074 { 3074 {
3075 char b[BDEVNAME_SIZE]; 3075 char b[BDEVNAME_SIZE];
3076 3076
3077 printk(KERN_INFO "attempt to access beyond end of device\n"); 3077 printk(KERN_INFO "attempt to access beyond end of device\n");
3078 printk(KERN_INFO "%s: rw=%ld, want=%Lu, limit=%Lu\n", 3078 printk(KERN_INFO "%s: rw=%ld, want=%Lu, limit=%Lu\n",
3079 bdevname(bio->bi_bdev, b), 3079 bdevname(bio->bi_bdev, b),
3080 bio->bi_rw, 3080 bio->bi_rw,
3081 (unsigned long long)bio->bi_sector + bio_sectors(bio), 3081 (unsigned long long)bio->bi_sector + bio_sectors(bio),
3082 (long long)(bio->bi_bdev->bd_inode->i_size >> 9)); 3082 (long long)(bio->bi_bdev->bd_inode->i_size >> 9));
3083 3083
3084 set_bit(BIO_EOF, &bio->bi_flags); 3084 set_bit(BIO_EOF, &bio->bi_flags);
3085 } 3085 }
3086 3086
3087 #ifdef CONFIG_FAIL_MAKE_REQUEST 3087 #ifdef CONFIG_FAIL_MAKE_REQUEST
3088 3088
3089 static DECLARE_FAULT_ATTR(fail_make_request); 3089 static DECLARE_FAULT_ATTR(fail_make_request);
3090 3090
3091 static int __init setup_fail_make_request(char *str) 3091 static int __init setup_fail_make_request(char *str)
3092 { 3092 {
3093 return setup_fault_attr(&fail_make_request, str); 3093 return setup_fault_attr(&fail_make_request, str);
3094 } 3094 }
3095 __setup("fail_make_request=", setup_fail_make_request); 3095 __setup("fail_make_request=", setup_fail_make_request);
3096 3096
3097 static int should_fail_request(struct bio *bio) 3097 static int should_fail_request(struct bio *bio)
3098 { 3098 {
3099 if ((bio->bi_bdev->bd_disk->flags & GENHD_FL_FAIL) || 3099 if ((bio->bi_bdev->bd_disk->flags & GENHD_FL_FAIL) ||
3100 (bio->bi_bdev->bd_part && bio->bi_bdev->bd_part->make_it_fail)) 3100 (bio->bi_bdev->bd_part && bio->bi_bdev->bd_part->make_it_fail))
3101 return should_fail(&fail_make_request, bio->bi_size); 3101 return should_fail(&fail_make_request, bio->bi_size);
3102 3102
3103 return 0; 3103 return 0;
3104 } 3104 }
3105 3105
3106 static int __init fail_make_request_debugfs(void) 3106 static int __init fail_make_request_debugfs(void)
3107 { 3107 {
3108 return init_fault_attr_dentries(&fail_make_request, 3108 return init_fault_attr_dentries(&fail_make_request,
3109 "fail_make_request"); 3109 "fail_make_request");
3110 } 3110 }
3111 3111
3112 late_initcall(fail_make_request_debugfs); 3112 late_initcall(fail_make_request_debugfs);
3113 3113
3114 #else /* CONFIG_FAIL_MAKE_REQUEST */ 3114 #else /* CONFIG_FAIL_MAKE_REQUEST */
3115 3115
3116 static inline int should_fail_request(struct bio *bio) 3116 static inline int should_fail_request(struct bio *bio)
3117 { 3117 {
3118 return 0; 3118 return 0;
3119 } 3119 }
3120 3120
3121 #endif /* CONFIG_FAIL_MAKE_REQUEST */ 3121 #endif /* CONFIG_FAIL_MAKE_REQUEST */
3122 3122
3123 /** 3123 /**
3124 * generic_make_request: hand a buffer to its device driver for I/O 3124 * generic_make_request: hand a buffer to its device driver for I/O
3125 * @bio: The bio describing the location in memory and on the device. 3125 * @bio: The bio describing the location in memory and on the device.
3126 * 3126 *
3127 * generic_make_request() is used to make I/O requests of block 3127 * generic_make_request() is used to make I/O requests of block
3128 * devices. It is passed a &struct bio, which describes the I/O that needs 3128 * devices. It is passed a &struct bio, which describes the I/O that needs
3129 * to be done. 3129 * to be done.
3130 * 3130 *
3131 * generic_make_request() does not return any status. The 3131 * generic_make_request() does not return any status. The
3132 * success/failure status of the request, along with notification of 3132 * success/failure status of the request, along with notification of
3133 * completion, is delivered asynchronously through the bio->bi_end_io 3133 * completion, is delivered asynchronously through the bio->bi_end_io
3134 * function described (one day) else where. 3134 * function described (one day) else where.
3135 * 3135 *
3136 * The caller of generic_make_request must make sure that bi_io_vec 3136 * The caller of generic_make_request must make sure that bi_io_vec
3137 * are set to describe the memory buffer, and that bi_dev and bi_sector are 3137 * are set to describe the memory buffer, and that bi_dev and bi_sector are
3138 * set to describe the device address, and the 3138 * set to describe the device address, and the
3139 * bi_end_io and optionally bi_private are set to describe how 3139 * bi_end_io and optionally bi_private are set to describe how
3140 * completion notification should be signaled. 3140 * completion notification should be signaled.
3141 * 3141 *
3142 * generic_make_request and the drivers it calls may use bi_next if this 3142 * generic_make_request and the drivers it calls may use bi_next if this
3143 * bio happens to be merged with someone else, and may change bi_dev and 3143 * bio happens to be merged with someone else, and may change bi_dev and
3144 * bi_sector for remaps as it sees fit. So the values of these fields 3144 * bi_sector for remaps as it sees fit. So the values of these fields
3145 * should NOT be depended on after the call to generic_make_request. 3145 * should NOT be depended on after the call to generic_make_request.
3146 */ 3146 */
3147 static inline void __generic_make_request(struct bio *bio) 3147 static inline void __generic_make_request(struct bio *bio)
3148 { 3148 {
3149 struct request_queue *q; 3149 struct request_queue *q;
3150 sector_t maxsector; 3150 sector_t maxsector;
3151 sector_t old_sector; 3151 sector_t old_sector;
3152 int ret, nr_sectors = bio_sectors(bio); 3152 int ret, nr_sectors = bio_sectors(bio);
3153 dev_t old_dev; 3153 dev_t old_dev;
3154 3154
3155 might_sleep(); 3155 might_sleep();
3156 /* Test device or partition size, when known. */ 3156 /* Test device or partition size, when known. */
3157 maxsector = bio->bi_bdev->bd_inode->i_size >> 9; 3157 maxsector = bio->bi_bdev->bd_inode->i_size >> 9;
3158 if (maxsector) { 3158 if (maxsector) {
3159 sector_t sector = bio->bi_sector; 3159 sector_t sector = bio->bi_sector;
3160 3160
3161 if (maxsector < nr_sectors || maxsector - nr_sectors < sector) { 3161 if (maxsector < nr_sectors || maxsector - nr_sectors < sector) {
3162 /* 3162 /*
3163 * This may well happen - the kernel calls bread() 3163 * This may well happen - the kernel calls bread()
3164 * without checking the size of the device, e.g., when 3164 * without checking the size of the device, e.g., when
3165 * mounting a device. 3165 * mounting a device.
3166 */ 3166 */
3167 handle_bad_sector(bio); 3167 handle_bad_sector(bio);
3168 goto end_io; 3168 goto end_io;
3169 } 3169 }
3170 } 3170 }
3171 3171
3172 /* 3172 /*
3173 * Resolve the mapping until finished. (drivers are 3173 * Resolve the mapping until finished. (drivers are
3174 * still free to implement/resolve their own stacking 3174 * still free to implement/resolve their own stacking
3175 * by explicitly returning 0) 3175 * by explicitly returning 0)
3176 * 3176 *
3177 * NOTE: we don't repeat the blk_size check for each new device. 3177 * NOTE: we don't repeat the blk_size check for each new device.
3178 * Stacking drivers are expected to know what they are doing. 3178 * Stacking drivers are expected to know what they are doing.
3179 */ 3179 */
3180 old_sector = -1; 3180 old_sector = -1;
3181 old_dev = 0; 3181 old_dev = 0;
3182 do { 3182 do {
3183 char b[BDEVNAME_SIZE]; 3183 char b[BDEVNAME_SIZE];
3184 3184
3185 q = bdev_get_queue(bio->bi_bdev); 3185 q = bdev_get_queue(bio->bi_bdev);
3186 if (!q) { 3186 if (!q) {
3187 printk(KERN_ERR 3187 printk(KERN_ERR
3188 "generic_make_request: Trying to access " 3188 "generic_make_request: Trying to access "
3189 "nonexistent block-device %s (%Lu)\n", 3189 "nonexistent block-device %s (%Lu)\n",
3190 bdevname(bio->bi_bdev, b), 3190 bdevname(bio->bi_bdev, b),
3191 (long long) bio->bi_sector); 3191 (long long) bio->bi_sector);
3192 end_io: 3192 end_io:
3193 bio_endio(bio, -EIO); 3193 bio_endio(bio, -EIO);
3194 break; 3194 break;
3195 } 3195 }
3196 3196
3197 if (unlikely(nr_sectors > q->max_hw_sectors)) { 3197 if (unlikely(nr_sectors > q->max_hw_sectors)) {
3198 printk("bio too big device %s (%u > %u)\n", 3198 printk("bio too big device %s (%u > %u)\n",
3199 bdevname(bio->bi_bdev, b), 3199 bdevname(bio->bi_bdev, b),
3200 bio_sectors(bio), 3200 bio_sectors(bio),
3201 q->max_hw_sectors); 3201 q->max_hw_sectors);
3202 goto end_io; 3202 goto end_io;
3203 } 3203 }
3204 3204
3205 if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) 3205 if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)))
3206 goto end_io; 3206 goto end_io;
3207 3207
3208 if (should_fail_request(bio)) 3208 if (should_fail_request(bio))
3209 goto end_io; 3209 goto end_io;
3210 3210
3211 /* 3211 /*
3212 * If this device has partitions, remap block n 3212 * If this device has partitions, remap block n
3213 * of partition p to block n+start(p) of the disk. 3213 * of partition p to block n+start(p) of the disk.
3214 */ 3214 */
3215 blk_partition_remap(bio); 3215 blk_partition_remap(bio);
3216 3216
3217 if (old_sector != -1) 3217 if (old_sector != -1)
3218 blk_add_trace_remap(q, bio, old_dev, bio->bi_sector, 3218 blk_add_trace_remap(q, bio, old_dev, bio->bi_sector,
3219 old_sector); 3219 old_sector);
3220 3220
3221 blk_add_trace_bio(q, bio, BLK_TA_QUEUE); 3221 blk_add_trace_bio(q, bio, BLK_TA_QUEUE);
3222 3222
3223 old_sector = bio->bi_sector; 3223 old_sector = bio->bi_sector;
3224 old_dev = bio->bi_bdev->bd_dev; 3224 old_dev = bio->bi_bdev->bd_dev;
3225 3225
3226 maxsector = bio->bi_bdev->bd_inode->i_size >> 9; 3226 maxsector = bio->bi_bdev->bd_inode->i_size >> 9;
3227 if (maxsector) { 3227 if (maxsector) {
3228 sector_t sector = bio->bi_sector; 3228 sector_t sector = bio->bi_sector;
3229 3229
3230 if (maxsector < nr_sectors || 3230 if (maxsector < nr_sectors ||
3231 maxsector - nr_sectors < sector) { 3231 maxsector - nr_sectors < sector) {
3232 /* 3232 /*
3233 * This may well happen - partitions are not 3233 * This may well happen - partitions are not
3234 * checked to make sure they are within the size 3234 * checked to make sure they are within the size
3235 * of the whole device. 3235 * of the whole device.
3236 */ 3236 */
3237 handle_bad_sector(bio); 3237 handle_bad_sector(bio);
3238 goto end_io; 3238 goto end_io;
3239 } 3239 }
3240 } 3240 }
3241 3241
3242 ret = q->make_request_fn(q, bio); 3242 ret = q->make_request_fn(q, bio);
3243 } while (ret); 3243 } while (ret);
3244 } 3244 }
3245 3245
3246 /* 3246 /*
3247 * We only want one ->make_request_fn to be active at a time, 3247 * We only want one ->make_request_fn to be active at a time,
3248 * else stack usage with stacked devices could be a problem. 3248 * else stack usage with stacked devices could be a problem.
3249 * So use current->bio_{list,tail} to keep a list of requests 3249 * So use current->bio_{list,tail} to keep a list of requests
3250 * submited by a make_request_fn function. 3250 * submited by a make_request_fn function.
3251 * current->bio_tail is also used as a flag to say if 3251 * current->bio_tail is also used as a flag to say if
3252 * generic_make_request is currently active in this task or not. 3252 * generic_make_request is currently active in this task or not.
3253 * If it is NULL, then no make_request is active. If it is non-NULL, 3253 * If it is NULL, then no make_request is active. If it is non-NULL,
3254 * then a make_request is active, and new requests should be added 3254 * then a make_request is active, and new requests should be added
3255 * at the tail 3255 * at the tail
3256 */ 3256 */
3257 void generic_make_request(struct bio *bio) 3257 void generic_make_request(struct bio *bio)
3258 { 3258 {
3259 if (current->bio_tail) { 3259 if (current->bio_tail) {
3260 /* make_request is active */ 3260 /* make_request is active */
3261 *(current->bio_tail) = bio; 3261 *(current->bio_tail) = bio;
3262 bio->bi_next = NULL; 3262 bio->bi_next = NULL;
3263 current->bio_tail = &bio->bi_next; 3263 current->bio_tail = &bio->bi_next;
3264 return; 3264 return;
3265 } 3265 }
3266 /* following loop may be a bit non-obvious, and so deserves some 3266 /* following loop may be a bit non-obvious, and so deserves some
3267 * explanation. 3267 * explanation.
3268 * Before entering the loop, bio->bi_next is NULL (as all callers 3268 * Before entering the loop, bio->bi_next is NULL (as all callers
3269 * ensure that) so we have a list with a single bio. 3269 * ensure that) so we have a list with a single bio.
3270 * We pretend that we have just taken it off a longer list, so 3270 * We pretend that we have just taken it off a longer list, so
3271 * we assign bio_list to the next (which is NULL) and bio_tail 3271 * we assign bio_list to the next (which is NULL) and bio_tail
3272 * to &bio_list, thus initialising the bio_list of new bios to be 3272 * to &bio_list, thus initialising the bio_list of new bios to be
3273 * added. __generic_make_request may indeed add some more bios 3273 * added. __generic_make_request may indeed add some more bios
3274 * through a recursive call to generic_make_request. If it 3274 * through a recursive call to generic_make_request. If it
3275 * did, we find a non-NULL value in bio_list and re-enter the loop 3275 * did, we find a non-NULL value in bio_list and re-enter the loop
3276 * from the top. In this case we really did just take the bio 3276 * from the top. In this case we really did just take the bio
3277 * of the top of the list (no pretending) and so fixup bio_list and 3277 * of the top of the list (no pretending) and so fixup bio_list and
3278 * bio_tail or bi_next, and call into __generic_make_request again. 3278 * bio_tail or bi_next, and call into __generic_make_request again.
3279 * 3279 *
3280 * The loop was structured like this to make only one call to 3280 * The loop was structured like this to make only one call to
3281 * __generic_make_request (which is important as it is large and 3281 * __generic_make_request (which is important as it is large and
3282 * inlined) and to keep the structure simple. 3282 * inlined) and to keep the structure simple.
3283 */ 3283 */
3284 BUG_ON(bio->bi_next); 3284 BUG_ON(bio->bi_next);
3285 do { 3285 do {
3286 current->bio_list = bio->bi_next; 3286 current->bio_list = bio->bi_next;
3287 if (bio->bi_next == NULL) 3287 if (bio->bi_next == NULL)
3288 current->bio_tail = &current->bio_list; 3288 current->bio_tail = &current->bio_list;
3289 else 3289 else
3290 bio->bi_next = NULL; 3290 bio->bi_next = NULL;
3291 __generic_make_request(bio); 3291 __generic_make_request(bio);
3292 bio = current->bio_list; 3292 bio = current->bio_list;
3293 } while (bio); 3293 } while (bio);
3294 current->bio_tail = NULL; /* deactivate */ 3294 current->bio_tail = NULL; /* deactivate */
3295 } 3295 }
3296 3296
3297 EXPORT_SYMBOL(generic_make_request); 3297 EXPORT_SYMBOL(generic_make_request);
3298 3298
3299 /** 3299 /**
3300 * submit_bio: submit a bio to the block device layer for I/O 3300 * submit_bio: submit a bio to the block device layer for I/O
3301 * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead) 3301 * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead)
3302 * @bio: The &struct bio which describes the I/O 3302 * @bio: The &struct bio which describes the I/O
3303 * 3303 *
3304 * submit_bio() is very similar in purpose to generic_make_request(), and 3304 * submit_bio() is very similar in purpose to generic_make_request(), and
3305 * uses that function to do most of the work. Both are fairly rough 3305 * uses that function to do most of the work. Both are fairly rough
3306 * interfaces, @bio must be presetup and ready for I/O. 3306 * interfaces, @bio must be presetup and ready for I/O.
3307 * 3307 *
3308 */ 3308 */
3309 void submit_bio(int rw, struct bio *bio) 3309 void submit_bio(int rw, struct bio *bio)
3310 { 3310 {
3311 int count = bio_sectors(bio); 3311 int count = bio_sectors(bio);
3312 3312
3313 BIO_BUG_ON(!bio->bi_size); 3313 BIO_BUG_ON(!bio->bi_size);
3314 BIO_BUG_ON(!bio->bi_io_vec); 3314 BIO_BUG_ON(!bio->bi_io_vec);
3315 bio->bi_rw |= rw; 3315 bio->bi_rw |= rw;
3316 if (rw & WRITE) { 3316 if (rw & WRITE) {
3317 count_vm_events(PGPGOUT, count); 3317 count_vm_events(PGPGOUT, count);
3318 } else { 3318 } else {
3319 task_io_account_read(bio->bi_size); 3319 task_io_account_read(bio->bi_size);
3320 count_vm_events(PGPGIN, count); 3320 count_vm_events(PGPGIN, count);
3321 } 3321 }
3322 3322
3323 if (unlikely(block_dump)) { 3323 if (unlikely(block_dump)) {
3324 char b[BDEVNAME_SIZE]; 3324 char b[BDEVNAME_SIZE];
3325 printk(KERN_DEBUG "%s(%d): %s block %Lu on %s\n", 3325 printk(KERN_DEBUG "%s(%d): %s block %Lu on %s\n",
3326 current->comm, current->pid, 3326 current->comm, current->pid,
3327 (rw & WRITE) ? "WRITE" : "READ", 3327 (rw & WRITE) ? "WRITE" : "READ",
3328 (unsigned long long)bio->bi_sector, 3328 (unsigned long long)bio->bi_sector,
3329 bdevname(bio->bi_bdev,b)); 3329 bdevname(bio->bi_bdev,b));
3330 } 3330 }
3331 3331
3332 generic_make_request(bio); 3332 generic_make_request(bio);
3333 } 3333 }
3334 3334
3335 EXPORT_SYMBOL(submit_bio); 3335 EXPORT_SYMBOL(submit_bio);
3336 3336
3337 static void blk_recalc_rq_sectors(struct request *rq, int nsect) 3337 static void blk_recalc_rq_sectors(struct request *rq, int nsect)
3338 { 3338 {
3339 if (blk_fs_request(rq)) { 3339 if (blk_fs_request(rq)) {
3340 rq->hard_sector += nsect; 3340 rq->hard_sector += nsect;
3341 rq->hard_nr_sectors -= nsect; 3341 rq->hard_nr_sectors -= nsect;
3342 3342
3343 /* 3343 /*
3344 * Move the I/O submission pointers ahead if required. 3344 * Move the I/O submission pointers ahead if required.
3345 */ 3345 */
3346 if ((rq->nr_sectors >= rq->hard_nr_sectors) && 3346 if ((rq->nr_sectors >= rq->hard_nr_sectors) &&
3347 (rq->sector <= rq->hard_sector)) { 3347 (rq->sector <= rq->hard_sector)) {
3348 rq->sector = rq->hard_sector; 3348 rq->sector = rq->hard_sector;
3349 rq->nr_sectors = rq->hard_nr_sectors; 3349 rq->nr_sectors = rq->hard_nr_sectors;
3350 rq->hard_cur_sectors = bio_cur_sectors(rq->bio); 3350 rq->hard_cur_sectors = bio_cur_sectors(rq->bio);
3351 rq->current_nr_sectors = rq->hard_cur_sectors; 3351 rq->current_nr_sectors = rq->hard_cur_sectors;
3352 rq->buffer = bio_data(rq->bio); 3352 rq->buffer = bio_data(rq->bio);
3353 } 3353 }
3354 3354
3355 /* 3355 /*
3356 * if total number of sectors is less than the first segment 3356 * if total number of sectors is less than the first segment
3357 * size, something has gone terribly wrong 3357 * size, something has gone terribly wrong
3358 */ 3358 */
3359 if (rq->nr_sectors < rq->current_nr_sectors) { 3359 if (rq->nr_sectors < rq->current_nr_sectors) {
3360 printk("blk: request botched\n"); 3360 printk("blk: request botched\n");
3361 rq->nr_sectors = rq->current_nr_sectors; 3361 rq->nr_sectors = rq->current_nr_sectors;
3362 } 3362 }
3363 } 3363 }
3364 } 3364 }
3365 3365
3366 static int __end_that_request_first(struct request *req, int uptodate, 3366 static int __end_that_request_first(struct request *req, int uptodate,
3367 int nr_bytes) 3367 int nr_bytes)
3368 { 3368 {
3369 int total_bytes, bio_nbytes, error, next_idx = 0; 3369 int total_bytes, bio_nbytes, error, next_idx = 0;
3370 struct bio *bio; 3370 struct bio *bio;
3371 3371
3372 blk_add_trace_rq(req->q, req, BLK_TA_COMPLETE); 3372 blk_add_trace_rq(req->q, req, BLK_TA_COMPLETE);
3373 3373
3374 /* 3374 /*
3375 * extend uptodate bool to allow < 0 value to be direct io error 3375 * extend uptodate bool to allow < 0 value to be direct io error
3376 */ 3376 */
3377 error = 0; 3377 error = 0;
3378 if (end_io_error(uptodate)) 3378 if (end_io_error(uptodate))
3379 error = !uptodate ? -EIO : uptodate; 3379 error = !uptodate ? -EIO : uptodate;
3380 3380
3381 /* 3381 /*
3382 * for a REQ_BLOCK_PC request, we want to carry any eventual 3382 * for a REQ_BLOCK_PC request, we want to carry any eventual
3383 * sense key with us all the way through 3383 * sense key with us all the way through
3384 */ 3384 */
3385 if (!blk_pc_request(req)) 3385 if (!blk_pc_request(req))
3386 req->errors = 0; 3386 req->errors = 0;
3387 3387
3388 if (!uptodate) { 3388 if (!uptodate) {
3389 if (blk_fs_request(req) && !(req->cmd_flags & REQ_QUIET)) 3389 if (blk_fs_request(req) && !(req->cmd_flags & REQ_QUIET))
3390 printk("end_request: I/O error, dev %s, sector %llu\n", 3390 printk("end_request: I/O error, dev %s, sector %llu\n",
3391 req->rq_disk ? req->rq_disk->disk_name : "?", 3391 req->rq_disk ? req->rq_disk->disk_name : "?",
3392 (unsigned long long)req->sector); 3392 (unsigned long long)req->sector);
3393 } 3393 }
3394 3394
3395 if (blk_fs_request(req) && req->rq_disk) { 3395 if (blk_fs_request(req) && req->rq_disk) {
3396 const int rw = rq_data_dir(req); 3396 const int rw = rq_data_dir(req);
3397 3397
3398 disk_stat_add(req->rq_disk, sectors[rw], nr_bytes >> 9); 3398 disk_stat_add(req->rq_disk, sectors[rw], nr_bytes >> 9);
3399 } 3399 }
3400 3400
3401 total_bytes = bio_nbytes = 0; 3401 total_bytes = bio_nbytes = 0;
3402 while ((bio = req->bio) != NULL) { 3402 while ((bio = req->bio) != NULL) {
3403 int nbytes; 3403 int nbytes;
3404 3404
3405 if (nr_bytes >= bio->bi_size) { 3405 if (nr_bytes >= bio->bi_size) {
3406 req->bio = bio->bi_next; 3406 req->bio = bio->bi_next;
3407 nbytes = bio->bi_size; 3407 nbytes = bio->bi_size;
3408 req_bio_endio(req, bio, nbytes, error); 3408 req_bio_endio(req, bio, nbytes, error);
3409 next_idx = 0; 3409 next_idx = 0;
3410 bio_nbytes = 0; 3410 bio_nbytes = 0;
3411 } else { 3411 } else {
3412 int idx = bio->bi_idx + next_idx; 3412 int idx = bio->bi_idx + next_idx;
3413 3413
3414 if (unlikely(bio->bi_idx >= bio->bi_vcnt)) { 3414 if (unlikely(bio->bi_idx >= bio->bi_vcnt)) {
3415 blk_dump_rq_flags(req, "__end_that"); 3415 blk_dump_rq_flags(req, "__end_that");
3416 printk("%s: bio idx %d >= vcnt %d\n", 3416 printk("%s: bio idx %d >= vcnt %d\n",
3417 __FUNCTION__, 3417 __FUNCTION__,
3418 bio->bi_idx, bio->bi_vcnt); 3418 bio->bi_idx, bio->bi_vcnt);
3419 break; 3419 break;
3420 } 3420 }
3421 3421
3422 nbytes = bio_iovec_idx(bio, idx)->bv_len; 3422 nbytes = bio_iovec_idx(bio, idx)->bv_len;
3423 BIO_BUG_ON(nbytes > bio->bi_size); 3423 BIO_BUG_ON(nbytes > bio->bi_size);
3424 3424
3425 /* 3425 /*
3426 * not a complete bvec done 3426 * not a complete bvec done
3427 */ 3427 */
3428 if (unlikely(nbytes > nr_bytes)) { 3428 if (unlikely(nbytes > nr_bytes)) {
3429 bio_nbytes += nr_bytes; 3429 bio_nbytes += nr_bytes;
3430 total_bytes += nr_bytes; 3430 total_bytes += nr_bytes;
3431 break; 3431 break;
3432 } 3432 }
3433 3433
3434 /* 3434 /*
3435 * advance to the next vector 3435 * advance to the next vector
3436 */ 3436 */
3437 next_idx++; 3437 next_idx++;
3438 bio_nbytes += nbytes; 3438 bio_nbytes += nbytes;
3439 } 3439 }
3440 3440
3441 total_bytes += nbytes; 3441 total_bytes += nbytes;
3442 nr_bytes -= nbytes; 3442 nr_bytes -= nbytes;
3443 3443
3444 if ((bio = req->bio)) { 3444 if ((bio = req->bio)) {
3445 /* 3445 /*
3446 * end more in this run, or just return 'not-done' 3446 * end more in this run, or just return 'not-done'
3447 */ 3447 */
3448 if (unlikely(nr_bytes <= 0)) 3448 if (unlikely(nr_bytes <= 0))
3449 break; 3449 break;
3450 } 3450 }
3451 } 3451 }
3452 3452
3453 /* 3453 /*
3454 * completely done 3454 * completely done
3455 */ 3455 */
3456 if (!req->bio) 3456 if (!req->bio)
3457 return 0; 3457 return 0;
3458 3458
3459 /* 3459 /*
3460 * if the request wasn't completed, update state 3460 * if the request wasn't completed, update state
3461 */ 3461 */
3462 if (bio_nbytes) { 3462 if (bio_nbytes) {
3463 req_bio_endio(req, bio, bio_nbytes, error); 3463 req_bio_endio(req, bio, bio_nbytes, error);
3464 bio->bi_idx += next_idx; 3464 bio->bi_idx += next_idx;
3465 bio_iovec(bio)->bv_offset += nr_bytes; 3465 bio_iovec(bio)->bv_offset += nr_bytes;
3466 bio_iovec(bio)->bv_len -= nr_bytes; 3466 bio_iovec(bio)->bv_len -= nr_bytes;
3467 } 3467 }
3468 3468
3469 blk_recalc_rq_sectors(req, total_bytes >> 9); 3469 blk_recalc_rq_sectors(req, total_bytes >> 9);
3470 blk_recalc_rq_segments(req); 3470 blk_recalc_rq_segments(req);
3471 return 1; 3471 return 1;
3472 } 3472 }
3473 3473
3474 /** 3474 /**
3475 * end_that_request_first - end I/O on a request 3475 * end_that_request_first - end I/O on a request
3476 * @req: the request being processed 3476 * @req: the request being processed
3477 * @uptodate: 1 for success, 0 for I/O error, < 0 for specific error 3477 * @uptodate: 1 for success, 0 for I/O error, < 0 for specific error
3478 * @nr_sectors: number of sectors to end I/O on 3478 * @nr_sectors: number of sectors to end I/O on
3479 * 3479 *
3480 * Description: 3480 * Description:
3481 * Ends I/O on a number of sectors attached to @req, and sets it up 3481 * Ends I/O on a number of sectors attached to @req, and sets it up
3482 * for the next range of segments (if any) in the cluster. 3482 * for the next range of segments (if any) in the cluster.
3483 * 3483 *
3484 * Return: 3484 * Return:
3485 * 0 - we are done with this request, call end_that_request_last() 3485 * 0 - we are done with this request, call end_that_request_last()
3486 * 1 - still buffers pending for this request 3486 * 1 - still buffers pending for this request
3487 **/ 3487 **/
3488 int end_that_request_first(struct request *req, int uptodate, int nr_sectors) 3488 int end_that_request_first(struct request *req, int uptodate, int nr_sectors)
3489 { 3489 {
3490 return __end_that_request_first(req, uptodate, nr_sectors << 9); 3490 return __end_that_request_first(req, uptodate, nr_sectors << 9);
3491 } 3491 }
3492 3492
3493 EXPORT_SYMBOL(end_that_request_first); 3493 EXPORT_SYMBOL(end_that_request_first);
3494 3494
3495 /** 3495 /**
3496 * end_that_request_chunk - end I/O on a request 3496 * end_that_request_chunk - end I/O on a request
3497 * @req: the request being processed 3497 * @req: the request being processed
3498 * @uptodate: 1 for success, 0 for I/O error, < 0 for specific error 3498 * @uptodate: 1 for success, 0 for I/O error, < 0 for specific error
3499 * @nr_bytes: number of bytes to complete 3499 * @nr_bytes: number of bytes to complete
3500 * 3500 *
3501 * Description: 3501 * Description:
3502 * Ends I/O on a number of bytes attached to @req, and sets it up 3502 * Ends I/O on a number of bytes attached to @req, and sets it up
3503 * for the next range of segments (if any). Like end_that_request_first(), 3503 * for the next range of segments (if any). Like end_that_request_first(),
3504 * but deals with bytes instead of sectors. 3504 * but deals with bytes instead of sectors.
3505 * 3505 *
3506 * Return: 3506 * Return:
3507 * 0 - we are done with this request, call end_that_request_last() 3507 * 0 - we are done with this request, call end_that_request_last()
3508 * 1 - still buffers pending for this request 3508 * 1 - still buffers pending for this request
3509 **/ 3509 **/
3510 int end_that_request_chunk(struct request *req, int uptodate, int nr_bytes) 3510 int end_that_request_chunk(struct request *req, int uptodate, int nr_bytes)
3511 { 3511 {
3512 return __end_that_request_first(req, uptodate, nr_bytes); 3512 return __end_that_request_first(req, uptodate, nr_bytes);
3513 } 3513 }
3514 3514
3515 EXPORT_SYMBOL(end_that_request_chunk); 3515 EXPORT_SYMBOL(end_that_request_chunk);
3516 3516
3517 /* 3517 /*
3518 * splice the completion data to a local structure and hand off to 3518 * splice the completion data to a local structure and hand off to
3519 * process_completion_queue() to complete the requests 3519 * process_completion_queue() to complete the requests
3520 */ 3520 */
3521 static void blk_done_softirq(struct softirq_action *h) 3521 static void blk_done_softirq(struct softirq_action *h)
3522 { 3522 {
3523 struct list_head *cpu_list, local_list; 3523 struct list_head *cpu_list, local_list;
3524 3524
3525 local_irq_disable(); 3525 local_irq_disable();
3526 cpu_list = &__get_cpu_var(blk_cpu_done); 3526 cpu_list = &__get_cpu_var(blk_cpu_done);
3527 list_replace_init(cpu_list, &local_list); 3527 list_replace_init(cpu_list, &local_list);
3528 local_irq_enable(); 3528 local_irq_enable();
3529 3529
3530 while (!list_empty(&local_list)) { 3530 while (!list_empty(&local_list)) {
3531 struct request *rq = list_entry(local_list.next, struct request, donelist); 3531 struct request *rq = list_entry(local_list.next, struct request, donelist);
3532 3532
3533 list_del_init(&rq->donelist); 3533 list_del_init(&rq->donelist);
3534 rq->q->softirq_done_fn(rq); 3534 rq->q->softirq_done_fn(rq);
3535 } 3535 }
3536 } 3536 }
3537 3537
3538 static int __cpuinit blk_cpu_notify(struct notifier_block *self, unsigned long action, 3538 static int __cpuinit blk_cpu_notify(struct notifier_block *self, unsigned long action,
3539 void *hcpu) 3539 void *hcpu)
3540 { 3540 {
3541 /* 3541 /*
3542 * If a CPU goes away, splice its entries to the current CPU 3542 * If a CPU goes away, splice its entries to the current CPU
3543 * and trigger a run of the softirq 3543 * and trigger a run of the softirq
3544 */ 3544 */
3545 if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) { 3545 if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
3546 int cpu = (unsigned long) hcpu; 3546 int cpu = (unsigned long) hcpu;
3547 3547
3548 local_irq_disable(); 3548 local_irq_disable();
3549 list_splice_init(&per_cpu(blk_cpu_done, cpu), 3549 list_splice_init(&per_cpu(blk_cpu_done, cpu),
3550 &__get_cpu_var(blk_cpu_done)); 3550 &__get_cpu_var(blk_cpu_done));
3551 raise_softirq_irqoff(BLOCK_SOFTIRQ); 3551 raise_softirq_irqoff(BLOCK_SOFTIRQ);
3552 local_irq_enable(); 3552 local_irq_enable();
3553 } 3553 }
3554 3554
3555 return NOTIFY_OK; 3555 return NOTIFY_OK;
3556 } 3556 }
3557 3557
3558 3558
3559 static struct notifier_block blk_cpu_notifier __cpuinitdata = { 3559 static struct notifier_block blk_cpu_notifier __cpuinitdata = {
3560 .notifier_call = blk_cpu_notify, 3560 .notifier_call = blk_cpu_notify,
3561 }; 3561 };
3562 3562
3563 /** 3563 /**
3564 * blk_complete_request - end I/O on a request 3564 * blk_complete_request - end I/O on a request
3565 * @req: the request being processed 3565 * @req: the request being processed
3566 * 3566 *
3567 * Description: 3567 * Description:
3568 * Ends all I/O on a request. It does not handle partial completions, 3568 * Ends all I/O on a request. It does not handle partial completions,
3569 * unless the driver actually implements this in its completion callback 3569 * unless the driver actually implements this in its completion callback
3570 * through requeueing. The actual completion happens out-of-order, 3570 * through requeueing. The actual completion happens out-of-order,
3571 * through a softirq handler. The user must have registered a completion 3571 * through a softirq handler. The user must have registered a completion
3572 * callback through blk_queue_softirq_done(). 3572 * callback through blk_queue_softirq_done().
3573 **/ 3573 **/
3574 3574
3575 void blk_complete_request(struct request *req) 3575 void blk_complete_request(struct request *req)
3576 { 3576 {
3577 struct list_head *cpu_list; 3577 struct list_head *cpu_list;
3578 unsigned long flags; 3578 unsigned long flags;
3579 3579
3580 BUG_ON(!req->q->softirq_done_fn); 3580 BUG_ON(!req->q->softirq_done_fn);
3581 3581
3582 local_irq_save(flags); 3582 local_irq_save(flags);
3583 3583
3584 cpu_list = &__get_cpu_var(blk_cpu_done); 3584 cpu_list = &__get_cpu_var(blk_cpu_done);
3585 list_add_tail(&req->donelist, cpu_list); 3585 list_add_tail(&req->donelist, cpu_list);
3586 raise_softirq_irqoff(BLOCK_SOFTIRQ); 3586 raise_softirq_irqoff(BLOCK_SOFTIRQ);
3587 3587
3588 local_irq_restore(flags); 3588 local_irq_restore(flags);
3589 } 3589 }
3590 3590
3591 EXPORT_SYMBOL(blk_complete_request); 3591 EXPORT_SYMBOL(blk_complete_request);
3592 3592
3593 /* 3593 /*
3594 * queue lock must be held 3594 * queue lock must be held
3595 */ 3595 */
3596 void end_that_request_last(struct request *req, int uptodate) 3596 void end_that_request_last(struct request *req, int uptodate)
3597 { 3597 {
3598 struct gendisk *disk = req->rq_disk; 3598 struct gendisk *disk = req->rq_disk;
3599 int error; 3599 int error;
3600 3600
3601 /* 3601 /*
3602 * extend uptodate bool to allow < 0 value to be direct io error 3602 * extend uptodate bool to allow < 0 value to be direct io error
3603 */ 3603 */
3604 error = 0; 3604 error = 0;
3605 if (end_io_error(uptodate)) 3605 if (end_io_error(uptodate))
3606 error = !uptodate ? -EIO : uptodate; 3606 error = !uptodate ? -EIO : uptodate;
3607 3607
3608 if (unlikely(laptop_mode) && blk_fs_request(req)) 3608 if (unlikely(laptop_mode) && blk_fs_request(req))
3609 laptop_io_completion(); 3609 laptop_io_completion();
3610 3610
3611 /* 3611 /*
3612 * Account IO completion. bar_rq isn't accounted as a normal 3612 * Account IO completion. bar_rq isn't accounted as a normal
3613 * IO on queueing nor completion. Accounting the containing 3613 * IO on queueing nor completion. Accounting the containing
3614 * request is enough. 3614 * request is enough.
3615 */ 3615 */
3616 if (disk && blk_fs_request(req) && req != &req->q->bar_rq) { 3616 if (disk && blk_fs_request(req) && req != &req->q->bar_rq) {
3617 unsigned long duration = jiffies - req->start_time; 3617 unsigned long duration = jiffies - req->start_time;
3618 const int rw = rq_data_dir(req); 3618 const int rw = rq_data_dir(req);
3619 3619
3620 __disk_stat_inc(disk, ios[rw]); 3620 __disk_stat_inc(disk, ios[rw]);
3621 __disk_stat_add(disk, ticks[rw], duration); 3621 __disk_stat_add(disk, ticks[rw], duration);
3622 disk_round_stats(disk); 3622 disk_round_stats(disk);
3623 disk->in_flight--; 3623 disk->in_flight--;
3624 } 3624 }
3625 if (req->end_io) 3625 if (req->end_io)
3626 req->end_io(req, error); 3626 req->end_io(req, error);
3627 else 3627 else
3628 __blk_put_request(req->q, req); 3628 __blk_put_request(req->q, req);
3629 } 3629 }
3630 3630
3631 EXPORT_SYMBOL(end_that_request_last); 3631 EXPORT_SYMBOL(end_that_request_last);
3632 3632
3633 void end_request(struct request *req, int uptodate) 3633 static inline void __end_request(struct request *rq, int uptodate,
3634 unsigned int nr_bytes, int dequeue)
3634 { 3635 {
3635 if (!end_that_request_first(req, uptodate, req->hard_cur_sectors)) { 3636 if (!end_that_request_chunk(rq, uptodate, nr_bytes)) {
3636 add_disk_randomness(req->rq_disk); 3637 if (dequeue)
3637 blkdev_dequeue_request(req); 3638 blkdev_dequeue_request(rq);
3638 end_that_request_last(req, uptodate); 3639 add_disk_randomness(rq->rq_disk);
3640 end_that_request_last(rq, uptodate);
3639 } 3641 }
3640 } 3642 }
3641 3643
3644 static unsigned int rq_byte_size(struct request *rq)
3645 {
3646 if (blk_fs_request(rq))
3647 return rq->hard_nr_sectors << 9;
3648
3649 return rq->data_len;
3650 }
3651
3652 /**
3653 * end_queued_request - end all I/O on a queued request
3654 * @rq: the request being processed
3655 * @uptodate: error value or 0/1 uptodate flag
3656 *
3657 * Description:
3658 * Ends all I/O on a request, and removes it from the block layer queues.
3659 * Not suitable for normal IO completion, unless the driver still has
3660 * the request attached to the block layer.
3661 *
3662 **/
3663 void end_queued_request(struct request *rq, int uptodate)
3664 {
3665 __end_request(rq, uptodate, rq_byte_size(rq), 1);
3666 }
3667 EXPORT_SYMBOL(end_queued_request);
3668
3669 /**
3670 * end_dequeued_request - end all I/O on a dequeued request
3671 * @rq: the request being processed
3672 * @uptodate: error value or 0/1 uptodate flag
3673 *
3674 * Description:
3675 * Ends all I/O on a request. The request must already have been
3676 * dequeued using blkdev_dequeue_request(), as is normally the case
3677 * for most drivers.
3678 *
3679 **/
3680 void end_dequeued_request(struct request *rq, int uptodate)
3681 {
3682 __end_request(rq, uptodate, rq_byte_size(rq), 0);
3683 }
3684 EXPORT_SYMBOL(end_dequeued_request);
3685
3686
3687 /**
3688 * end_request - end I/O on the current segment of the request
3689 * @rq: the request being processed
3690 * @uptodate: error value or 0/1 uptodate flag
3691 *
3692 * Description:
3693 * Ends I/O on the current segment of a request. If that is the only
3694 * remaining segment, the request is also completed and freed.
3695 *
3696 * This is a remnant of how older block drivers handled IO completions.
3697 * Modern drivers typically end IO on the full request in one go, unless
3698 * they have a residual value to account for. For that case this function
3699 * isn't really useful, unless the residual just happens to be the
3700 * full current segment. In other words, don't use this function in new
3701 * code. Either use end_request_completely(), or the
3702 * end_that_request_chunk() (along with end_that_request_last()) for
3703 * partial completions.
3704 *
3705 **/
3706 void end_request(struct request *req, int uptodate)
3707 {
3708 __end_request(req, uptodate, req->hard_cur_sectors << 9, 1);
3709 }
3642 EXPORT_SYMBOL(end_request); 3710 EXPORT_SYMBOL(end_request);
3643 3711
3644 static void blk_rq_bio_prep(struct request_queue *q, struct request *rq, 3712 static void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
3645 struct bio *bio) 3713 struct bio *bio)
3646 { 3714 {
3647 /* first two bits are identical in rq->cmd_flags and bio->bi_rw */ 3715 /* first two bits are identical in rq->cmd_flags and bio->bi_rw */
3648 rq->cmd_flags |= (bio->bi_rw & 3); 3716 rq->cmd_flags |= (bio->bi_rw & 3);
3649 3717
3650 rq->nr_phys_segments = bio_phys_segments(q, bio); 3718 rq->nr_phys_segments = bio_phys_segments(q, bio);
3651 rq->nr_hw_segments = bio_hw_segments(q, bio); 3719 rq->nr_hw_segments = bio_hw_segments(q, bio);
3652 rq->current_nr_sectors = bio_cur_sectors(bio); 3720 rq->current_nr_sectors = bio_cur_sectors(bio);
3653 rq->hard_cur_sectors = rq->current_nr_sectors; 3721 rq->hard_cur_sectors = rq->current_nr_sectors;
3654 rq->hard_nr_sectors = rq->nr_sectors = bio_sectors(bio); 3722 rq->hard_nr_sectors = rq->nr_sectors = bio_sectors(bio);
3655 rq->buffer = bio_data(bio); 3723 rq->buffer = bio_data(bio);
3656 rq->data_len = bio->bi_size; 3724 rq->data_len = bio->bi_size;
3657 3725
3658 rq->bio = rq->biotail = bio; 3726 rq->bio = rq->biotail = bio;
3659 3727
3660 if (bio->bi_bdev) 3728 if (bio->bi_bdev)
3661 rq->rq_disk = bio->bi_bdev->bd_disk; 3729 rq->rq_disk = bio->bi_bdev->bd_disk;
3662 } 3730 }
3663 3731
3664 int kblockd_schedule_work(struct work_struct *work) 3732 int kblockd_schedule_work(struct work_struct *work)
3665 { 3733 {
3666 return queue_work(kblockd_workqueue, work); 3734 return queue_work(kblockd_workqueue, work);
3667 } 3735 }
3668 3736
3669 EXPORT_SYMBOL(kblockd_schedule_work); 3737 EXPORT_SYMBOL(kblockd_schedule_work);
3670 3738
3671 void kblockd_flush_work(struct work_struct *work) 3739 void kblockd_flush_work(struct work_struct *work)
3672 { 3740 {
3673 cancel_work_sync(work); 3741 cancel_work_sync(work);
3674 } 3742 }
3675 EXPORT_SYMBOL(kblockd_flush_work); 3743 EXPORT_SYMBOL(kblockd_flush_work);
3676 3744
3677 int __init blk_dev_init(void) 3745 int __init blk_dev_init(void)
3678 { 3746 {
3679 int i; 3747 int i;
3680 3748
3681 kblockd_workqueue = create_workqueue("kblockd"); 3749 kblockd_workqueue = create_workqueue("kblockd");
3682 if (!kblockd_workqueue) 3750 if (!kblockd_workqueue)
3683 panic("Failed to create kblockd\n"); 3751 panic("Failed to create kblockd\n");
3684 3752
3685 request_cachep = kmem_cache_create("blkdev_requests", 3753 request_cachep = kmem_cache_create("blkdev_requests",
3686 sizeof(struct request), 0, SLAB_PANIC, NULL); 3754 sizeof(struct request), 0, SLAB_PANIC, NULL);
3687 3755
3688 requestq_cachep = kmem_cache_create("blkdev_queue", 3756 requestq_cachep = kmem_cache_create("blkdev_queue",
3689 sizeof(struct request_queue), 0, SLAB_PANIC, NULL); 3757 sizeof(struct request_queue), 0, SLAB_PANIC, NULL);
3690 3758
3691 iocontext_cachep = kmem_cache_create("blkdev_ioc", 3759 iocontext_cachep = kmem_cache_create("blkdev_ioc",
3692 sizeof(struct io_context), 0, SLAB_PANIC, NULL); 3760 sizeof(struct io_context), 0, SLAB_PANIC, NULL);
3693 3761
3694 for_each_possible_cpu(i) 3762 for_each_possible_cpu(i)
3695 INIT_LIST_HEAD(&per_cpu(blk_cpu_done, i)); 3763 INIT_LIST_HEAD(&per_cpu(blk_cpu_done, i));
3696 3764
3697 open_softirq(BLOCK_SOFTIRQ, blk_done_softirq, NULL); 3765 open_softirq(BLOCK_SOFTIRQ, blk_done_softirq, NULL);
3698 register_hotcpu_notifier(&blk_cpu_notifier); 3766 register_hotcpu_notifier(&blk_cpu_notifier);
3699 3767
3700 blk_max_low_pfn = max_low_pfn - 1; 3768 blk_max_low_pfn = max_low_pfn - 1;
3701 blk_max_pfn = max_pfn - 1; 3769 blk_max_pfn = max_pfn - 1;
3702 3770
3703 return 0; 3771 return 0;
3704 } 3772 }
3705 3773
3706 /* 3774 /*
3707 * IO Context helper functions 3775 * IO Context helper functions
3708 */ 3776 */
3709 void put_io_context(struct io_context *ioc) 3777 void put_io_context(struct io_context *ioc)
3710 { 3778 {
3711 if (ioc == NULL) 3779 if (ioc == NULL)
3712 return; 3780 return;
3713 3781
3714 BUG_ON(atomic_read(&ioc->refcount) == 0); 3782 BUG_ON(atomic_read(&ioc->refcount) == 0);
3715 3783
3716 if (atomic_dec_and_test(&ioc->refcount)) { 3784 if (atomic_dec_and_test(&ioc->refcount)) {
3717 struct cfq_io_context *cic; 3785 struct cfq_io_context *cic;
3718 3786
3719 rcu_read_lock(); 3787 rcu_read_lock();
3720 if (ioc->aic && ioc->aic->dtor) 3788 if (ioc->aic && ioc->aic->dtor)
3721 ioc->aic->dtor(ioc->aic); 3789 ioc->aic->dtor(ioc->aic);
3722 if (ioc->cic_root.rb_node != NULL) { 3790 if (ioc->cic_root.rb_node != NULL) {
3723 struct rb_node *n = rb_first(&ioc->cic_root); 3791 struct rb_node *n = rb_first(&ioc->cic_root);
3724 3792
3725 cic = rb_entry(n, struct cfq_io_context, rb_node); 3793 cic = rb_entry(n, struct cfq_io_context, rb_node);
3726 cic->dtor(ioc); 3794 cic->dtor(ioc);
3727 } 3795 }
3728 rcu_read_unlock(); 3796 rcu_read_unlock();
3729 3797
3730 kmem_cache_free(iocontext_cachep, ioc); 3798 kmem_cache_free(iocontext_cachep, ioc);
3731 } 3799 }
3732 } 3800 }
3733 EXPORT_SYMBOL(put_io_context); 3801 EXPORT_SYMBOL(put_io_context);
3734 3802
3735 /* Called by the exitting task */ 3803 /* Called by the exitting task */
3736 void exit_io_context(void) 3804 void exit_io_context(void)
3737 { 3805 {
3738 struct io_context *ioc; 3806 struct io_context *ioc;
3739 struct cfq_io_context *cic; 3807 struct cfq_io_context *cic;
3740 3808
3741 task_lock(current); 3809 task_lock(current);
3742 ioc = current->io_context; 3810 ioc = current->io_context;
3743 current->io_context = NULL; 3811 current->io_context = NULL;
3744 task_unlock(current); 3812 task_unlock(current);
3745 3813
3746 ioc->task = NULL; 3814 ioc->task = NULL;
3747 if (ioc->aic && ioc->aic->exit) 3815 if (ioc->aic && ioc->aic->exit)
3748 ioc->aic->exit(ioc->aic); 3816 ioc->aic->exit(ioc->aic);
3749 if (ioc->cic_root.rb_node != NULL) { 3817 if (ioc->cic_root.rb_node != NULL) {
3750 cic = rb_entry(rb_first(&ioc->cic_root), struct cfq_io_context, rb_node); 3818 cic = rb_entry(rb_first(&ioc->cic_root), struct cfq_io_context, rb_node);
3751 cic->exit(ioc); 3819 cic->exit(ioc);
3752 } 3820 }
3753 3821
3754 put_io_context(ioc); 3822 put_io_context(ioc);
3755 } 3823 }
3756 3824
3757 /* 3825 /*
3758 * If the current task has no IO context then create one and initialise it. 3826 * If the current task has no IO context then create one and initialise it.
3759 * Otherwise, return its existing IO context. 3827 * Otherwise, return its existing IO context.
3760 * 3828 *
3761 * This returned IO context doesn't have a specifically elevated refcount, 3829 * This returned IO context doesn't have a specifically elevated refcount,
3762 * but since the current task itself holds a reference, the context can be 3830 * but since the current task itself holds a reference, the context can be
3763 * used in general code, so long as it stays within `current` context. 3831 * used in general code, so long as it stays within `current` context.
3764 */ 3832 */
3765 static struct io_context *current_io_context(gfp_t gfp_flags, int node) 3833 static struct io_context *current_io_context(gfp_t gfp_flags, int node)
3766 { 3834 {
3767 struct task_struct *tsk = current; 3835 struct task_struct *tsk = current;
3768 struct io_context *ret; 3836 struct io_context *ret;
3769 3837
3770 ret = tsk->io_context; 3838 ret = tsk->io_context;
3771 if (likely(ret)) 3839 if (likely(ret))
3772 return ret; 3840 return ret;
3773 3841
3774 ret = kmem_cache_alloc_node(iocontext_cachep, gfp_flags, node); 3842 ret = kmem_cache_alloc_node(iocontext_cachep, gfp_flags, node);
3775 if (ret) { 3843 if (ret) {
3776 atomic_set(&ret->refcount, 1); 3844 atomic_set(&ret->refcount, 1);
3777 ret->task = current; 3845 ret->task = current;
3778 ret->ioprio_changed = 0; 3846 ret->ioprio_changed = 0;
3779 ret->last_waited = jiffies; /* doesn't matter... */ 3847 ret->last_waited = jiffies; /* doesn't matter... */
3780 ret->nr_batch_requests = 0; /* because this is 0 */ 3848 ret->nr_batch_requests = 0; /* because this is 0 */
3781 ret->aic = NULL; 3849 ret->aic = NULL;
3782 ret->cic_root.rb_node = NULL; 3850 ret->cic_root.rb_node = NULL;
3783 ret->ioc_data = NULL; 3851 ret->ioc_data = NULL;
3784 /* make sure set_task_ioprio() sees the settings above */ 3852 /* make sure set_task_ioprio() sees the settings above */
3785 smp_wmb(); 3853 smp_wmb();
3786 tsk->io_context = ret; 3854 tsk->io_context = ret;
3787 } 3855 }
3788 3856
3789 return ret; 3857 return ret;
3790 } 3858 }
3791 3859
3792 /* 3860 /*
3793 * If the current task has no IO context then create one and initialise it. 3861 * If the current task has no IO context then create one and initialise it.
3794 * If it does have a context, take a ref on it. 3862 * If it does have a context, take a ref on it.
3795 * 3863 *
3796 * This is always called in the context of the task which submitted the I/O. 3864 * This is always called in the context of the task which submitted the I/O.
3797 */ 3865 */
3798 struct io_context *get_io_context(gfp_t gfp_flags, int node) 3866 struct io_context *get_io_context(gfp_t gfp_flags, int node)
3799 { 3867 {
3800 struct io_context *ret; 3868 struct io_context *ret;
3801 ret = current_io_context(gfp_flags, node); 3869 ret = current_io_context(gfp_flags, node);
3802 if (likely(ret)) 3870 if (likely(ret))
3803 atomic_inc(&ret->refcount); 3871 atomic_inc(&ret->refcount);
3804 return ret; 3872 return ret;
3805 } 3873 }
3806 EXPORT_SYMBOL(get_io_context); 3874 EXPORT_SYMBOL(get_io_context);
3807 3875
3808 void copy_io_context(struct io_context **pdst, struct io_context **psrc) 3876 void copy_io_context(struct io_context **pdst, struct io_context **psrc)
3809 { 3877 {
3810 struct io_context *src = *psrc; 3878 struct io_context *src = *psrc;
3811 struct io_context *dst = *pdst; 3879 struct io_context *dst = *pdst;
3812 3880
3813 if (src) { 3881 if (src) {
3814 BUG_ON(atomic_read(&src->refcount) == 0); 3882 BUG_ON(atomic_read(&src->refcount) == 0);
3815 atomic_inc(&src->refcount); 3883 atomic_inc(&src->refcount);
3816 put_io_context(dst); 3884 put_io_context(dst);
3817 *pdst = src; 3885 *pdst = src;
3818 } 3886 }
3819 } 3887 }
3820 EXPORT_SYMBOL(copy_io_context); 3888 EXPORT_SYMBOL(copy_io_context);
3821 3889
3822 void swap_io_context(struct io_context **ioc1, struct io_context **ioc2) 3890 void swap_io_context(struct io_context **ioc1, struct io_context **ioc2)
3823 { 3891 {
3824 struct io_context *temp; 3892 struct io_context *temp;
3825 temp = *ioc1; 3893 temp = *ioc1;
3826 *ioc1 = *ioc2; 3894 *ioc1 = *ioc2;
3827 *ioc2 = temp; 3895 *ioc2 = temp;
3828 } 3896 }
3829 EXPORT_SYMBOL(swap_io_context); 3897 EXPORT_SYMBOL(swap_io_context);
3830 3898
3831 /* 3899 /*
3832 * sysfs parts below 3900 * sysfs parts below
3833 */ 3901 */
3834 struct queue_sysfs_entry { 3902 struct queue_sysfs_entry {
3835 struct attribute attr; 3903 struct attribute attr;
3836 ssize_t (*show)(struct request_queue *, char *); 3904 ssize_t (*show)(struct request_queue *, char *);
3837 ssize_t (*store)(struct request_queue *, const char *, size_t); 3905 ssize_t (*store)(struct request_queue *, const char *, size_t);
3838 }; 3906 };
3839 3907
3840 static ssize_t 3908 static ssize_t
3841 queue_var_show(unsigned int var, char *page) 3909 queue_var_show(unsigned int var, char *page)
3842 { 3910 {
3843 return sprintf(page, "%d\n", var); 3911 return sprintf(page, "%d\n", var);
3844 } 3912 }
3845 3913
3846 static ssize_t 3914 static ssize_t
3847 queue_var_store(unsigned long *var, const char *page, size_t count) 3915 queue_var_store(unsigned long *var, const char *page, size_t count)
3848 { 3916 {
3849 char *p = (char *) page; 3917 char *p = (char *) page;
3850 3918
3851 *var = simple_strtoul(p, &p, 10); 3919 *var = simple_strtoul(p, &p, 10);
3852 return count; 3920 return count;
3853 } 3921 }
3854 3922
3855 static ssize_t queue_requests_show(struct request_queue *q, char *page) 3923 static ssize_t queue_requests_show(struct request_queue *q, char *page)
3856 { 3924 {
3857 return queue_var_show(q->nr_requests, (page)); 3925 return queue_var_show(q->nr_requests, (page));
3858 } 3926 }
3859 3927
3860 static ssize_t 3928 static ssize_t
3861 queue_requests_store(struct request_queue *q, const char *page, size_t count) 3929 queue_requests_store(struct request_queue *q, const char *page, size_t count)
3862 { 3930 {
3863 struct request_list *rl = &q->rq; 3931 struct request_list *rl = &q->rq;
3864 unsigned long nr; 3932 unsigned long nr;
3865 int ret = queue_var_store(&nr, page, count); 3933 int ret = queue_var_store(&nr, page, count);
3866 if (nr < BLKDEV_MIN_RQ) 3934 if (nr < BLKDEV_MIN_RQ)
3867 nr = BLKDEV_MIN_RQ; 3935 nr = BLKDEV_MIN_RQ;
3868 3936
3869 spin_lock_irq(q->queue_lock); 3937 spin_lock_irq(q->queue_lock);
3870 q->nr_requests = nr; 3938 q->nr_requests = nr;
3871 blk_queue_congestion_threshold(q); 3939 blk_queue_congestion_threshold(q);
3872 3940
3873 if (rl->count[READ] >= queue_congestion_on_threshold(q)) 3941 if (rl->count[READ] >= queue_congestion_on_threshold(q))
3874 blk_set_queue_congested(q, READ); 3942 blk_set_queue_congested(q, READ);
3875 else if (rl->count[READ] < queue_congestion_off_threshold(q)) 3943 else if (rl->count[READ] < queue_congestion_off_threshold(q))
3876 blk_clear_queue_congested(q, READ); 3944 blk_clear_queue_congested(q, READ);
3877 3945
3878 if (rl->count[WRITE] >= queue_congestion_on_threshold(q)) 3946 if (rl->count[WRITE] >= queue_congestion_on_threshold(q))
3879 blk_set_queue_congested(q, WRITE); 3947 blk_set_queue_congested(q, WRITE);
3880 else if (rl->count[WRITE] < queue_congestion_off_threshold(q)) 3948 else if (rl->count[WRITE] < queue_congestion_off_threshold(q))
3881 blk_clear_queue_congested(q, WRITE); 3949 blk_clear_queue_congested(q, WRITE);
3882 3950
3883 if (rl->count[READ] >= q->nr_requests) { 3951 if (rl->count[READ] >= q->nr_requests) {
3884 blk_set_queue_full(q, READ); 3952 blk_set_queue_full(q, READ);
3885 } else if (rl->count[READ]+1 <= q->nr_requests) { 3953 } else if (rl->count[READ]+1 <= q->nr_requests) {
3886 blk_clear_queue_full(q, READ); 3954 blk_clear_queue_full(q, READ);
3887 wake_up(&rl->wait[READ]); 3955 wake_up(&rl->wait[READ]);
3888 } 3956 }
3889 3957
3890 if (rl->count[WRITE] >= q->nr_requests) { 3958 if (rl->count[WRITE] >= q->nr_requests) {
3891 blk_set_queue_full(q, WRITE); 3959 blk_set_queue_full(q, WRITE);
3892 } else if (rl->count[WRITE]+1 <= q->nr_requests) { 3960 } else if (rl->count[WRITE]+1 <= q->nr_requests) {
3893 blk_clear_queue_full(q, WRITE); 3961 blk_clear_queue_full(q, WRITE);
3894 wake_up(&rl->wait[WRITE]); 3962 wake_up(&rl->wait[WRITE]);
3895 } 3963 }
3896 spin_unlock_irq(q->queue_lock); 3964 spin_unlock_irq(q->queue_lock);
3897 return ret; 3965 return ret;
3898 } 3966 }
3899 3967
3900 static ssize_t queue_ra_show(struct request_queue *q, char *page) 3968 static ssize_t queue_ra_show(struct request_queue *q, char *page)
3901 { 3969 {
3902 int ra_kb = q->backing_dev_info.ra_pages << (PAGE_CACHE_SHIFT - 10); 3970 int ra_kb = q->backing_dev_info.ra_pages << (PAGE_CACHE_SHIFT - 10);
3903 3971
3904 return queue_var_show(ra_kb, (page)); 3972 return queue_var_show(ra_kb, (page));
3905 } 3973 }
3906 3974
3907 static ssize_t 3975 static ssize_t
3908 queue_ra_store(struct request_queue *q, const char *page, size_t count) 3976 queue_ra_store(struct request_queue *q, const char *page, size_t count)
3909 { 3977 {
3910 unsigned long ra_kb; 3978 unsigned long ra_kb;
3911 ssize_t ret = queue_var_store(&ra_kb, page, count); 3979 ssize_t ret = queue_var_store(&ra_kb, page, count);
3912 3980
3913 spin_lock_irq(q->queue_lock); 3981 spin_lock_irq(q->queue_lock);
3914 q->backing_dev_info.ra_pages = ra_kb >> (PAGE_CACHE_SHIFT - 10); 3982 q->backing_dev_info.ra_pages = ra_kb >> (PAGE_CACHE_SHIFT - 10);
3915 spin_unlock_irq(q->queue_lock); 3983 spin_unlock_irq(q->queue_lock);
3916 3984
3917 return ret; 3985 return ret;
3918 } 3986 }
3919 3987
3920 static ssize_t queue_max_sectors_show(struct request_queue *q, char *page) 3988 static ssize_t queue_max_sectors_show(struct request_queue *q, char *page)
3921 { 3989 {
3922 int max_sectors_kb = q->max_sectors >> 1; 3990 int max_sectors_kb = q->max_sectors >> 1;
3923 3991
3924 return queue_var_show(max_sectors_kb, (page)); 3992 return queue_var_show(max_sectors_kb, (page));
3925 } 3993 }
3926 3994
3927 static ssize_t 3995 static ssize_t
3928 queue_max_sectors_store(struct request_queue *q, const char *page, size_t count) 3996 queue_max_sectors_store(struct request_queue *q, const char *page, size_t count)
3929 { 3997 {
3930 unsigned long max_sectors_kb, 3998 unsigned long max_sectors_kb,
3931 max_hw_sectors_kb = q->max_hw_sectors >> 1, 3999 max_hw_sectors_kb = q->max_hw_sectors >> 1,
3932 page_kb = 1 << (PAGE_CACHE_SHIFT - 10); 4000 page_kb = 1 << (PAGE_CACHE_SHIFT - 10);
3933 ssize_t ret = queue_var_store(&max_sectors_kb, page, count); 4001 ssize_t ret = queue_var_store(&max_sectors_kb, page, count);
3934 int ra_kb; 4002 int ra_kb;
3935 4003
3936 if (max_sectors_kb > max_hw_sectors_kb || max_sectors_kb < page_kb) 4004 if (max_sectors_kb > max_hw_sectors_kb || max_sectors_kb < page_kb)
3937 return -EINVAL; 4005 return -EINVAL;
3938 /* 4006 /*
3939 * Take the queue lock to update the readahead and max_sectors 4007 * Take the queue lock to update the readahead and max_sectors
3940 * values synchronously: 4008 * values synchronously:
3941 */ 4009 */
3942 spin_lock_irq(q->queue_lock); 4010 spin_lock_irq(q->queue_lock);
3943 /* 4011 /*
3944 * Trim readahead window as well, if necessary: 4012 * Trim readahead window as well, if necessary:
3945 */ 4013 */
3946 ra_kb = q->backing_dev_info.ra_pages << (PAGE_CACHE_SHIFT - 10); 4014 ra_kb = q->backing_dev_info.ra_pages << (PAGE_CACHE_SHIFT - 10);
3947 if (ra_kb > max_sectors_kb) 4015 if (ra_kb > max_sectors_kb)
3948 q->backing_dev_info.ra_pages = 4016 q->backing_dev_info.ra_pages =
3949 max_sectors_kb >> (PAGE_CACHE_SHIFT - 10); 4017 max_sectors_kb >> (PAGE_CACHE_SHIFT - 10);
3950 4018
3951 q->max_sectors = max_sectors_kb << 1; 4019 q->max_sectors = max_sectors_kb << 1;
3952 spin_unlock_irq(q->queue_lock); 4020 spin_unlock_irq(q->queue_lock);
3953 4021
3954 return ret; 4022 return ret;
3955 } 4023 }
3956 4024
3957 static ssize_t queue_max_hw_sectors_show(struct request_queue *q, char *page) 4025 static ssize_t queue_max_hw_sectors_show(struct request_queue *q, char *page)
3958 { 4026 {
3959 int max_hw_sectors_kb = q->max_hw_sectors >> 1; 4027 int max_hw_sectors_kb = q->max_hw_sectors >> 1;
3960 4028
3961 return queue_var_show(max_hw_sectors_kb, (page)); 4029 return queue_var_show(max_hw_sectors_kb, (page));
3962 } 4030 }
3963 4031
3964 4032
3965 static struct queue_sysfs_entry queue_requests_entry = { 4033 static struct queue_sysfs_entry queue_requests_entry = {
3966 .attr = {.name = "nr_requests", .mode = S_IRUGO | S_IWUSR }, 4034 .attr = {.name = "nr_requests", .mode = S_IRUGO | S_IWUSR },
3967 .show = queue_requests_show, 4035 .show = queue_requests_show,
3968 .store = queue_requests_store, 4036 .store = queue_requests_store,
3969 }; 4037 };
3970 4038
3971 static struct queue_sysfs_entry queue_ra_entry = { 4039 static struct queue_sysfs_entry queue_ra_entry = {
3972 .attr = {.name = "read_ahead_kb", .mode = S_IRUGO | S_IWUSR }, 4040 .attr = {.name = "read_ahead_kb", .mode = S_IRUGO | S_IWUSR },
3973 .show = queue_ra_show, 4041 .show = queue_ra_show,
3974 .store = queue_ra_store, 4042 .store = queue_ra_store,
3975 }; 4043 };
3976 4044
3977 static struct queue_sysfs_entry queue_max_sectors_entry = { 4045 static struct queue_sysfs_entry queue_max_sectors_entry = {
3978 .attr = {.name = "max_sectors_kb", .mode = S_IRUGO | S_IWUSR }, 4046 .attr = {.name = "max_sectors_kb", .mode = S_IRUGO | S_IWUSR },
3979 .show = queue_max_sectors_show, 4047 .show = queue_max_sectors_show,
3980 .store = queue_max_sectors_store, 4048 .store = queue_max_sectors_store,
3981 }; 4049 };
3982 4050
3983 static struct queue_sysfs_entry queue_max_hw_sectors_entry = { 4051 static struct queue_sysfs_entry queue_max_hw_sectors_entry = {
3984 .attr = {.name = "max_hw_sectors_kb", .mode = S_IRUGO }, 4052 .attr = {.name = "max_hw_sectors_kb", .mode = S_IRUGO },
3985 .show = queue_max_hw_sectors_show, 4053 .show = queue_max_hw_sectors_show,
3986 }; 4054 };
3987 4055
3988 static struct queue_sysfs_entry queue_iosched_entry = { 4056 static struct queue_sysfs_entry queue_iosched_entry = {
3989 .attr = {.name = "scheduler", .mode = S_IRUGO | S_IWUSR }, 4057 .attr = {.name = "scheduler", .mode = S_IRUGO | S_IWUSR },
3990 .show = elv_iosched_show, 4058 .show = elv_iosched_show,
3991 .store = elv_iosched_store, 4059 .store = elv_iosched_store,
3992 }; 4060 };
3993 4061
3994 static struct attribute *default_attrs[] = { 4062 static struct attribute *default_attrs[] = {
3995 &queue_requests_entry.attr, 4063 &queue_requests_entry.attr,
3996 &queue_ra_entry.attr, 4064 &queue_ra_entry.attr,
3997 &queue_max_hw_sectors_entry.attr, 4065 &queue_max_hw_sectors_entry.attr,
3998 &queue_max_sectors_entry.attr, 4066 &queue_max_sectors_entry.attr,
3999 &queue_iosched_entry.attr, 4067 &queue_iosched_entry.attr,
4000 NULL, 4068 NULL,
4001 }; 4069 };
4002 4070
4003 #define to_queue(atr) container_of((atr), struct queue_sysfs_entry, attr) 4071 #define to_queue(atr) container_of((atr), struct queue_sysfs_entry, attr)
4004 4072
4005 static ssize_t 4073 static ssize_t
4006 queue_attr_show(struct kobject *kobj, struct attribute *attr, char *page) 4074 queue_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
4007 { 4075 {
4008 struct queue_sysfs_entry *entry = to_queue(attr); 4076 struct queue_sysfs_entry *entry = to_queue(attr);
4009 struct request_queue *q = 4077 struct request_queue *q =
4010 container_of(kobj, struct request_queue, kobj); 4078 container_of(kobj, struct request_queue, kobj);
4011 ssize_t res; 4079 ssize_t res;
4012 4080
4013 if (!entry->show) 4081 if (!entry->show)
4014 return -EIO; 4082 return -EIO;
4015 mutex_lock(&q->sysfs_lock); 4083 mutex_lock(&q->sysfs_lock);
4016 if (test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)) { 4084 if (test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)) {
4017 mutex_unlock(&q->sysfs_lock); 4085 mutex_unlock(&q->sysfs_lock);
4018 return -ENOENT; 4086 return -ENOENT;
4019 } 4087 }
4020 res = entry->show(q, page); 4088 res = entry->show(q, page);
4021 mutex_unlock(&q->sysfs_lock); 4089 mutex_unlock(&q->sysfs_lock);
4022 return res; 4090 return res;
4023 } 4091 }
4024 4092
4025 static ssize_t 4093 static ssize_t
4026 queue_attr_store(struct kobject *kobj, struct attribute *attr, 4094 queue_attr_store(struct kobject *kobj, struct attribute *attr,
4027 const char *page, size_t length) 4095 const char *page, size_t length)
4028 { 4096 {
4029 struct queue_sysfs_entry *entry = to_queue(attr); 4097 struct queue_sysfs_entry *entry = to_queue(attr);
4030 struct request_queue *q = container_of(kobj, struct request_queue, kobj); 4098 struct request_queue *q = container_of(kobj, struct request_queue, kobj);
4031 4099
4032 ssize_t res; 4100 ssize_t res;
4033 4101
4034 if (!entry->store) 4102 if (!entry->store)
4035 return -EIO; 4103 return -EIO;
4036 mutex_lock(&q->sysfs_lock); 4104 mutex_lock(&q->sysfs_lock);
4037 if (test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)) { 4105 if (test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)) {
4038 mutex_unlock(&q->sysfs_lock); 4106 mutex_unlock(&q->sysfs_lock);
4039 return -ENOENT; 4107 return -ENOENT;
4040 } 4108 }
4041 res = entry->store(q, page, length); 4109 res = entry->store(q, page, length);
4042 mutex_unlock(&q->sysfs_lock); 4110 mutex_unlock(&q->sysfs_lock);
4043 return res; 4111 return res;
4044 } 4112 }
4045 4113
4046 static struct sysfs_ops queue_sysfs_ops = { 4114 static struct sysfs_ops queue_sysfs_ops = {
4047 .show = queue_attr_show, 4115 .show = queue_attr_show,
4048 .store = queue_attr_store, 4116 .store = queue_attr_store,
4049 }; 4117 };
4050 4118
4051 static struct kobj_type queue_ktype = { 4119 static struct kobj_type queue_ktype = {
4052 .sysfs_ops = &queue_sysfs_ops, 4120 .sysfs_ops = &queue_sysfs_ops,
4053 .default_attrs = default_attrs, 4121 .default_attrs = default_attrs,
4054 .release = blk_release_queue, 4122 .release = blk_release_queue,
4055 }; 4123 };
4056 4124
4057 int blk_register_queue(struct gendisk *disk) 4125 int blk_register_queue(struct gendisk *disk)
4058 { 4126 {
4059 int ret; 4127 int ret;
4060 4128
4061 struct request_queue *q = disk->queue; 4129 struct request_queue *q = disk->queue;
4062 4130
4063 if (!q || !q->request_fn) 4131 if (!q || !q->request_fn)
4064 return -ENXIO; 4132 return -ENXIO;
4065 4133
4066 q->kobj.parent = kobject_get(&disk->kobj); 4134 q->kobj.parent = kobject_get(&disk->kobj);
4067 4135
4068 ret = kobject_add(&q->kobj); 4136 ret = kobject_add(&q->kobj);
4069 if (ret < 0) 4137 if (ret < 0)
4070 return ret; 4138 return ret;
4071 4139
4072 kobject_uevent(&q->kobj, KOBJ_ADD); 4140 kobject_uevent(&q->kobj, KOBJ_ADD);
4073 4141
4074 ret = elv_register_queue(q); 4142 ret = elv_register_queue(q);
4075 if (ret) { 4143 if (ret) {
4076 kobject_uevent(&q->kobj, KOBJ_REMOVE); 4144 kobject_uevent(&q->kobj, KOBJ_REMOVE);
4077 kobject_del(&q->kobj); 4145 kobject_del(&q->kobj);
4078 return ret; 4146 return ret;
4079 } 4147 }
4080 4148
4081 return 0; 4149 return 0;
4082 } 4150 }
4083 4151
4084 void blk_unregister_queue(struct gendisk *disk) 4152 void blk_unregister_queue(struct gendisk *disk)
4085 { 4153 {
4086 struct request_queue *q = disk->queue; 4154 struct request_queue *q = disk->queue;
4087 4155
4088 if (q && q->request_fn) { 4156 if (q && q->request_fn) {
4089 elv_unregister_queue(q); 4157 elv_unregister_queue(q);
4090 4158
4091 kobject_uevent(&q->kobj, KOBJ_REMOVE); 4159 kobject_uevent(&q->kobj, KOBJ_REMOVE);
4092 kobject_del(&q->kobj); 4160 kobject_del(&q->kobj);
4093 kobject_put(&disk->kobj); 4161 kobject_put(&disk->kobj);
4094 } 4162 }
4095 } 4163 }
4096 4164
include/linux/blkdev.h
1 #ifndef _LINUX_BLKDEV_H 1 #ifndef _LINUX_BLKDEV_H
2 #define _LINUX_BLKDEV_H 2 #define _LINUX_BLKDEV_H
3 3
4 #ifdef CONFIG_BLOCK 4 #ifdef CONFIG_BLOCK
5 5
6 #include <linux/sched.h> 6 #include <linux/sched.h>
7 #include <linux/major.h> 7 #include <linux/major.h>
8 #include <linux/genhd.h> 8 #include <linux/genhd.h>
9 #include <linux/list.h> 9 #include <linux/list.h>
10 #include <linux/timer.h> 10 #include <linux/timer.h>
11 #include <linux/workqueue.h> 11 #include <linux/workqueue.h>
12 #include <linux/pagemap.h> 12 #include <linux/pagemap.h>
13 #include <linux/backing-dev.h> 13 #include <linux/backing-dev.h>
14 #include <linux/wait.h> 14 #include <linux/wait.h>
15 #include <linux/mempool.h> 15 #include <linux/mempool.h>
16 #include <linux/bio.h> 16 #include <linux/bio.h>
17 #include <linux/module.h> 17 #include <linux/module.h>
18 #include <linux/stringify.h> 18 #include <linux/stringify.h>
19 #include <linux/bsg.h> 19 #include <linux/bsg.h>
20 20
21 #include <asm/scatterlist.h> 21 #include <asm/scatterlist.h>
22 22
23 struct scsi_ioctl_command; 23 struct scsi_ioctl_command;
24 24
25 struct request_queue; 25 struct request_queue;
26 typedef struct request_queue request_queue_t __deprecated; 26 typedef struct request_queue request_queue_t __deprecated;
27 struct elevator_queue; 27 struct elevator_queue;
28 typedef struct elevator_queue elevator_t; 28 typedef struct elevator_queue elevator_t;
29 struct request_pm_state; 29 struct request_pm_state;
30 struct blk_trace; 30 struct blk_trace;
31 struct request; 31 struct request;
32 struct sg_io_hdr; 32 struct sg_io_hdr;
33 33
34 #define BLKDEV_MIN_RQ 4 34 #define BLKDEV_MIN_RQ 4
35 #define BLKDEV_MAX_RQ 128 /* Default maximum */ 35 #define BLKDEV_MAX_RQ 128 /* Default maximum */
36 36
37 /* 37 /*
38 * This is the per-process anticipatory I/O scheduler state. 38 * This is the per-process anticipatory I/O scheduler state.
39 */ 39 */
40 struct as_io_context { 40 struct as_io_context {
41 spinlock_t lock; 41 spinlock_t lock;
42 42
43 void (*dtor)(struct as_io_context *aic); /* destructor */ 43 void (*dtor)(struct as_io_context *aic); /* destructor */
44 void (*exit)(struct as_io_context *aic); /* called on task exit */ 44 void (*exit)(struct as_io_context *aic); /* called on task exit */
45 45
46 unsigned long state; 46 unsigned long state;
47 atomic_t nr_queued; /* queued reads & sync writes */ 47 atomic_t nr_queued; /* queued reads & sync writes */
48 atomic_t nr_dispatched; /* number of requests gone to the drivers */ 48 atomic_t nr_dispatched; /* number of requests gone to the drivers */
49 49
50 /* IO History tracking */ 50 /* IO History tracking */
51 /* Thinktime */ 51 /* Thinktime */
52 unsigned long last_end_request; 52 unsigned long last_end_request;
53 unsigned long ttime_total; 53 unsigned long ttime_total;
54 unsigned long ttime_samples; 54 unsigned long ttime_samples;
55 unsigned long ttime_mean; 55 unsigned long ttime_mean;
56 /* Layout pattern */ 56 /* Layout pattern */
57 unsigned int seek_samples; 57 unsigned int seek_samples;
58 sector_t last_request_pos; 58 sector_t last_request_pos;
59 u64 seek_total; 59 u64 seek_total;
60 sector_t seek_mean; 60 sector_t seek_mean;
61 }; 61 };
62 62
63 struct cfq_queue; 63 struct cfq_queue;
64 struct cfq_io_context { 64 struct cfq_io_context {
65 struct rb_node rb_node; 65 struct rb_node rb_node;
66 void *key; 66 void *key;
67 67
68 struct cfq_queue *cfqq[2]; 68 struct cfq_queue *cfqq[2];
69 69
70 struct io_context *ioc; 70 struct io_context *ioc;
71 71
72 unsigned long last_end_request; 72 unsigned long last_end_request;
73 sector_t last_request_pos; 73 sector_t last_request_pos;
74 74
75 unsigned long ttime_total; 75 unsigned long ttime_total;
76 unsigned long ttime_samples; 76 unsigned long ttime_samples;
77 unsigned long ttime_mean; 77 unsigned long ttime_mean;
78 78
79 unsigned int seek_samples; 79 unsigned int seek_samples;
80 u64 seek_total; 80 u64 seek_total;
81 sector_t seek_mean; 81 sector_t seek_mean;
82 82
83 struct list_head queue_list; 83 struct list_head queue_list;
84 84
85 void (*dtor)(struct io_context *); /* destructor */ 85 void (*dtor)(struct io_context *); /* destructor */
86 void (*exit)(struct io_context *); /* called on task exit */ 86 void (*exit)(struct io_context *); /* called on task exit */
87 }; 87 };
88 88
89 /* 89 /*
90 * This is the per-process I/O subsystem state. It is refcounted and 90 * This is the per-process I/O subsystem state. It is refcounted and
91 * kmalloc'ed. Currently all fields are modified in process io context 91 * kmalloc'ed. Currently all fields are modified in process io context
92 * (apart from the atomic refcount), so require no locking. 92 * (apart from the atomic refcount), so require no locking.
93 */ 93 */
94 struct io_context { 94 struct io_context {
95 atomic_t refcount; 95 atomic_t refcount;
96 struct task_struct *task; 96 struct task_struct *task;
97 97
98 unsigned int ioprio_changed; 98 unsigned int ioprio_changed;
99 99
100 /* 100 /*
101 * For request batching 101 * For request batching
102 */ 102 */
103 unsigned long last_waited; /* Time last woken after wait for request */ 103 unsigned long last_waited; /* Time last woken after wait for request */
104 int nr_batch_requests; /* Number of requests left in the batch */ 104 int nr_batch_requests; /* Number of requests left in the batch */
105 105
106 struct as_io_context *aic; 106 struct as_io_context *aic;
107 struct rb_root cic_root; 107 struct rb_root cic_root;
108 void *ioc_data; 108 void *ioc_data;
109 }; 109 };
110 110
111 void put_io_context(struct io_context *ioc); 111 void put_io_context(struct io_context *ioc);
112 void exit_io_context(void); 112 void exit_io_context(void);
113 struct io_context *get_io_context(gfp_t gfp_flags, int node); 113 struct io_context *get_io_context(gfp_t gfp_flags, int node);
114 void copy_io_context(struct io_context **pdst, struct io_context **psrc); 114 void copy_io_context(struct io_context **pdst, struct io_context **psrc);
115 void swap_io_context(struct io_context **ioc1, struct io_context **ioc2); 115 void swap_io_context(struct io_context **ioc1, struct io_context **ioc2);
116 116
117 struct request; 117 struct request;
118 typedef void (rq_end_io_fn)(struct request *, int); 118 typedef void (rq_end_io_fn)(struct request *, int);
119 119
120 struct request_list { 120 struct request_list {
121 int count[2]; 121 int count[2];
122 int starved[2]; 122 int starved[2];
123 int elvpriv; 123 int elvpriv;
124 mempool_t *rq_pool; 124 mempool_t *rq_pool;
125 wait_queue_head_t wait[2]; 125 wait_queue_head_t wait[2];
126 }; 126 };
127 127
128 /* 128 /*
129 * request command types 129 * request command types
130 */ 130 */
131 enum rq_cmd_type_bits { 131 enum rq_cmd_type_bits {
132 REQ_TYPE_FS = 1, /* fs request */ 132 REQ_TYPE_FS = 1, /* fs request */
133 REQ_TYPE_BLOCK_PC, /* scsi command */ 133 REQ_TYPE_BLOCK_PC, /* scsi command */
134 REQ_TYPE_SENSE, /* sense request */ 134 REQ_TYPE_SENSE, /* sense request */
135 REQ_TYPE_PM_SUSPEND, /* suspend request */ 135 REQ_TYPE_PM_SUSPEND, /* suspend request */
136 REQ_TYPE_PM_RESUME, /* resume request */ 136 REQ_TYPE_PM_RESUME, /* resume request */
137 REQ_TYPE_PM_SHUTDOWN, /* shutdown request */ 137 REQ_TYPE_PM_SHUTDOWN, /* shutdown request */
138 REQ_TYPE_FLUSH, /* flush request */ 138 REQ_TYPE_FLUSH, /* flush request */
139 REQ_TYPE_SPECIAL, /* driver defined type */ 139 REQ_TYPE_SPECIAL, /* driver defined type */
140 REQ_TYPE_LINUX_BLOCK, /* generic block layer message */ 140 REQ_TYPE_LINUX_BLOCK, /* generic block layer message */
141 /* 141 /*
142 * for ATA/ATAPI devices. this really doesn't belong here, ide should 142 * for ATA/ATAPI devices. this really doesn't belong here, ide should
143 * use REQ_TYPE_SPECIAL and use rq->cmd[0] with the range of driver 143 * use REQ_TYPE_SPECIAL and use rq->cmd[0] with the range of driver
144 * private REQ_LB opcodes to differentiate what type of request this is 144 * private REQ_LB opcodes to differentiate what type of request this is
145 */ 145 */
146 REQ_TYPE_ATA_CMD, 146 REQ_TYPE_ATA_CMD,
147 REQ_TYPE_ATA_TASK, 147 REQ_TYPE_ATA_TASK,
148 REQ_TYPE_ATA_TASKFILE, 148 REQ_TYPE_ATA_TASKFILE,
149 REQ_TYPE_ATA_PC, 149 REQ_TYPE_ATA_PC,
150 }; 150 };
151 151
152 /* 152 /*
153 * For request of type REQ_TYPE_LINUX_BLOCK, rq->cmd[0] is the opcode being 153 * For request of type REQ_TYPE_LINUX_BLOCK, rq->cmd[0] is the opcode being
154 * sent down (similar to how REQ_TYPE_BLOCK_PC means that ->cmd[] holds a 154 * sent down (similar to how REQ_TYPE_BLOCK_PC means that ->cmd[] holds a
155 * SCSI cdb. 155 * SCSI cdb.
156 * 156 *
157 * 0x00 -> 0x3f are driver private, to be used for whatever purpose they need, 157 * 0x00 -> 0x3f are driver private, to be used for whatever purpose they need,
158 * typically to differentiate REQ_TYPE_SPECIAL requests. 158 * typically to differentiate REQ_TYPE_SPECIAL requests.
159 * 159 *
160 */ 160 */
161 enum { 161 enum {
162 /* 162 /*
163 * just examples for now 163 * just examples for now
164 */ 164 */
165 REQ_LB_OP_EJECT = 0x40, /* eject request */ 165 REQ_LB_OP_EJECT = 0x40, /* eject request */
166 REQ_LB_OP_FLUSH = 0x41, /* flush device */ 166 REQ_LB_OP_FLUSH = 0x41, /* flush device */
167 }; 167 };
168 168
169 /* 169 /*
170 * request type modified bits. first three bits match BIO_RW* bits, important 170 * request type modified bits. first three bits match BIO_RW* bits, important
171 */ 171 */
172 enum rq_flag_bits { 172 enum rq_flag_bits {
173 __REQ_RW, /* not set, read. set, write */ 173 __REQ_RW, /* not set, read. set, write */
174 __REQ_FAILFAST, /* no low level driver retries */ 174 __REQ_FAILFAST, /* no low level driver retries */
175 __REQ_SORTED, /* elevator knows about this request */ 175 __REQ_SORTED, /* elevator knows about this request */
176 __REQ_SOFTBARRIER, /* may not be passed by ioscheduler */ 176 __REQ_SOFTBARRIER, /* may not be passed by ioscheduler */
177 __REQ_HARDBARRIER, /* may not be passed by drive either */ 177 __REQ_HARDBARRIER, /* may not be passed by drive either */
178 __REQ_FUA, /* forced unit access */ 178 __REQ_FUA, /* forced unit access */
179 __REQ_NOMERGE, /* don't touch this for merging */ 179 __REQ_NOMERGE, /* don't touch this for merging */
180 __REQ_STARTED, /* drive already may have started this one */ 180 __REQ_STARTED, /* drive already may have started this one */
181 __REQ_DONTPREP, /* don't call prep for this one */ 181 __REQ_DONTPREP, /* don't call prep for this one */
182 __REQ_QUEUED, /* uses queueing */ 182 __REQ_QUEUED, /* uses queueing */
183 __REQ_ELVPRIV, /* elevator private data attached */ 183 __REQ_ELVPRIV, /* elevator private data attached */
184 __REQ_FAILED, /* set if the request failed */ 184 __REQ_FAILED, /* set if the request failed */
185 __REQ_QUIET, /* don't worry about errors */ 185 __REQ_QUIET, /* don't worry about errors */
186 __REQ_PREEMPT, /* set for "ide_preempt" requests */ 186 __REQ_PREEMPT, /* set for "ide_preempt" requests */
187 __REQ_ORDERED_COLOR, /* is before or after barrier */ 187 __REQ_ORDERED_COLOR, /* is before or after barrier */
188 __REQ_RW_SYNC, /* request is sync (O_DIRECT) */ 188 __REQ_RW_SYNC, /* request is sync (O_DIRECT) */
189 __REQ_ALLOCED, /* request came from our alloc pool */ 189 __REQ_ALLOCED, /* request came from our alloc pool */
190 __REQ_RW_META, /* metadata io request */ 190 __REQ_RW_META, /* metadata io request */
191 __REQ_NR_BITS, /* stops here */ 191 __REQ_NR_BITS, /* stops here */
192 }; 192 };
193 193
194 #define REQ_RW (1 << __REQ_RW) 194 #define REQ_RW (1 << __REQ_RW)
195 #define REQ_FAILFAST (1 << __REQ_FAILFAST) 195 #define REQ_FAILFAST (1 << __REQ_FAILFAST)
196 #define REQ_SORTED (1 << __REQ_SORTED) 196 #define REQ_SORTED (1 << __REQ_SORTED)
197 #define REQ_SOFTBARRIER (1 << __REQ_SOFTBARRIER) 197 #define REQ_SOFTBARRIER (1 << __REQ_SOFTBARRIER)
198 #define REQ_HARDBARRIER (1 << __REQ_HARDBARRIER) 198 #define REQ_HARDBARRIER (1 << __REQ_HARDBARRIER)
199 #define REQ_FUA (1 << __REQ_FUA) 199 #define REQ_FUA (1 << __REQ_FUA)
200 #define REQ_NOMERGE (1 << __REQ_NOMERGE) 200 #define REQ_NOMERGE (1 << __REQ_NOMERGE)
201 #define REQ_STARTED (1 << __REQ_STARTED) 201 #define REQ_STARTED (1 << __REQ_STARTED)
202 #define REQ_DONTPREP (1 << __REQ_DONTPREP) 202 #define REQ_DONTPREP (1 << __REQ_DONTPREP)
203 #define REQ_QUEUED (1 << __REQ_QUEUED) 203 #define REQ_QUEUED (1 << __REQ_QUEUED)
204 #define REQ_ELVPRIV (1 << __REQ_ELVPRIV) 204 #define REQ_ELVPRIV (1 << __REQ_ELVPRIV)
205 #define REQ_FAILED (1 << __REQ_FAILED) 205 #define REQ_FAILED (1 << __REQ_FAILED)
206 #define REQ_QUIET (1 << __REQ_QUIET) 206 #define REQ_QUIET (1 << __REQ_QUIET)
207 #define REQ_PREEMPT (1 << __REQ_PREEMPT) 207 #define REQ_PREEMPT (1 << __REQ_PREEMPT)
208 #define REQ_ORDERED_COLOR (1 << __REQ_ORDERED_COLOR) 208 #define REQ_ORDERED_COLOR (1 << __REQ_ORDERED_COLOR)
209 #define REQ_RW_SYNC (1 << __REQ_RW_SYNC) 209 #define REQ_RW_SYNC (1 << __REQ_RW_SYNC)
210 #define REQ_ALLOCED (1 << __REQ_ALLOCED) 210 #define REQ_ALLOCED (1 << __REQ_ALLOCED)
211 #define REQ_RW_META (1 << __REQ_RW_META) 211 #define REQ_RW_META (1 << __REQ_RW_META)
212 212
213 #define BLK_MAX_CDB 16 213 #define BLK_MAX_CDB 16
214 214
215 /* 215 /*
216 * try to put the fields that are referenced together in the same cacheline 216 * try to put the fields that are referenced together in the same cacheline
217 */ 217 */
218 struct request { 218 struct request {
219 struct list_head queuelist; 219 struct list_head queuelist;
220 struct list_head donelist; 220 struct list_head donelist;
221 221
222 struct request_queue *q; 222 struct request_queue *q;
223 223
224 unsigned int cmd_flags; 224 unsigned int cmd_flags;
225 enum rq_cmd_type_bits cmd_type; 225 enum rq_cmd_type_bits cmd_type;
226 226
227 /* Maintain bio traversal state for part by part I/O submission. 227 /* Maintain bio traversal state for part by part I/O submission.
228 * hard_* are block layer internals, no driver should touch them! 228 * hard_* are block layer internals, no driver should touch them!
229 */ 229 */
230 230
231 sector_t sector; /* next sector to submit */ 231 sector_t sector; /* next sector to submit */
232 sector_t hard_sector; /* next sector to complete */ 232 sector_t hard_sector; /* next sector to complete */
233 unsigned long nr_sectors; /* no. of sectors left to submit */ 233 unsigned long nr_sectors; /* no. of sectors left to submit */
234 unsigned long hard_nr_sectors; /* no. of sectors left to complete */ 234 unsigned long hard_nr_sectors; /* no. of sectors left to complete */
235 /* no. of sectors left to submit in the current segment */ 235 /* no. of sectors left to submit in the current segment */
236 unsigned int current_nr_sectors; 236 unsigned int current_nr_sectors;
237 237
238 /* no. of sectors left to complete in the current segment */ 238 /* no. of sectors left to complete in the current segment */
239 unsigned int hard_cur_sectors; 239 unsigned int hard_cur_sectors;
240 240
241 struct bio *bio; 241 struct bio *bio;
242 struct bio *biotail; 242 struct bio *biotail;
243 243
244 struct hlist_node hash; /* merge hash */ 244 struct hlist_node hash; /* merge hash */
245 /* 245 /*
246 * The rb_node is only used inside the io scheduler, requests 246 * The rb_node is only used inside the io scheduler, requests
247 * are pruned when moved to the dispatch queue. So let the 247 * are pruned when moved to the dispatch queue. So let the
248 * completion_data share space with the rb_node. 248 * completion_data share space with the rb_node.
249 */ 249 */
250 union { 250 union {
251 struct rb_node rb_node; /* sort/lookup */ 251 struct rb_node rb_node; /* sort/lookup */
252 void *completion_data; 252 void *completion_data;
253 }; 253 };
254 254
255 /* 255 /*
256 * two pointers are available for the IO schedulers, if they need 256 * two pointers are available for the IO schedulers, if they need
257 * more they have to dynamically allocate it. 257 * more they have to dynamically allocate it.
258 */ 258 */
259 void *elevator_private; 259 void *elevator_private;
260 void *elevator_private2; 260 void *elevator_private2;
261 261
262 struct gendisk *rq_disk; 262 struct gendisk *rq_disk;
263 unsigned long start_time; 263 unsigned long start_time;
264 264
265 /* Number of scatter-gather DMA addr+len pairs after 265 /* Number of scatter-gather DMA addr+len pairs after
266 * physical address coalescing is performed. 266 * physical address coalescing is performed.
267 */ 267 */
268 unsigned short nr_phys_segments; 268 unsigned short nr_phys_segments;
269 269
270 /* Number of scatter-gather addr+len pairs after 270 /* Number of scatter-gather addr+len pairs after
271 * physical and DMA remapping hardware coalescing is performed. 271 * physical and DMA remapping hardware coalescing is performed.
272 * This is the number of scatter-gather entries the driver 272 * This is the number of scatter-gather entries the driver
273 * will actually have to deal with after DMA mapping is done. 273 * will actually have to deal with after DMA mapping is done.
274 */ 274 */
275 unsigned short nr_hw_segments; 275 unsigned short nr_hw_segments;
276 276
277 unsigned short ioprio; 277 unsigned short ioprio;
278 278
279 void *special; 279 void *special;
280 char *buffer; 280 char *buffer;
281 281
282 int tag; 282 int tag;
283 int errors; 283 int errors;
284 284
285 int ref_count; 285 int ref_count;
286 286
287 /* 287 /*
288 * when request is used as a packet command carrier 288 * when request is used as a packet command carrier
289 */ 289 */
290 unsigned int cmd_len; 290 unsigned int cmd_len;
291 unsigned char cmd[BLK_MAX_CDB]; 291 unsigned char cmd[BLK_MAX_CDB];
292 292
293 unsigned int data_len; 293 unsigned int data_len;
294 unsigned int sense_len; 294 unsigned int sense_len;
295 void *data; 295 void *data;
296 void *sense; 296 void *sense;
297 297
298 unsigned int timeout; 298 unsigned int timeout;
299 int retries; 299 int retries;
300 300
301 /* 301 /*
302 * completion callback. 302 * completion callback.
303 */ 303 */
304 rq_end_io_fn *end_io; 304 rq_end_io_fn *end_io;
305 void *end_io_data; 305 void *end_io_data;
306 306
307 /* for bidi */ 307 /* for bidi */
308 struct request *next_rq; 308 struct request *next_rq;
309 }; 309 };
310 310
311 /* 311 /*
312 * State information carried for REQ_TYPE_PM_SUSPEND and REQ_TYPE_PM_RESUME 312 * State information carried for REQ_TYPE_PM_SUSPEND and REQ_TYPE_PM_RESUME
313 * requests. Some step values could eventually be made generic. 313 * requests. Some step values could eventually be made generic.
314 */ 314 */
315 struct request_pm_state 315 struct request_pm_state
316 { 316 {
317 /* PM state machine step value, currently driver specific */ 317 /* PM state machine step value, currently driver specific */
318 int pm_step; 318 int pm_step;
319 /* requested PM state value (S1, S2, S3, S4, ...) */ 319 /* requested PM state value (S1, S2, S3, S4, ...) */
320 u32 pm_state; 320 u32 pm_state;
321 void* data; /* for driver use */ 321 void* data; /* for driver use */
322 }; 322 };
323 323
324 #include <linux/elevator.h> 324 #include <linux/elevator.h>
325 325
326 typedef void (request_fn_proc) (struct request_queue *q); 326 typedef void (request_fn_proc) (struct request_queue *q);
327 typedef int (make_request_fn) (struct request_queue *q, struct bio *bio); 327 typedef int (make_request_fn) (struct request_queue *q, struct bio *bio);
328 typedef int (prep_rq_fn) (struct request_queue *, struct request *); 328 typedef int (prep_rq_fn) (struct request_queue *, struct request *);
329 typedef void (unplug_fn) (struct request_queue *); 329 typedef void (unplug_fn) (struct request_queue *);
330 330
331 struct bio_vec; 331 struct bio_vec;
332 typedef int (merge_bvec_fn) (struct request_queue *, struct bio *, struct bio_vec *); 332 typedef int (merge_bvec_fn) (struct request_queue *, struct bio *, struct bio_vec *);
333 typedef int (issue_flush_fn) (struct request_queue *, struct gendisk *, sector_t *); 333 typedef int (issue_flush_fn) (struct request_queue *, struct gendisk *, sector_t *);
334 typedef void (prepare_flush_fn) (struct request_queue *, struct request *); 334 typedef void (prepare_flush_fn) (struct request_queue *, struct request *);
335 typedef void (softirq_done_fn)(struct request *); 335 typedef void (softirq_done_fn)(struct request *);
336 336
337 enum blk_queue_state { 337 enum blk_queue_state {
338 Queue_down, 338 Queue_down,
339 Queue_up, 339 Queue_up,
340 }; 340 };
341 341
342 struct blk_queue_tag { 342 struct blk_queue_tag {
343 struct request **tag_index; /* map of busy tags */ 343 struct request **tag_index; /* map of busy tags */
344 unsigned long *tag_map; /* bit map of free/busy tags */ 344 unsigned long *tag_map; /* bit map of free/busy tags */
345 struct list_head busy_list; /* fifo list of busy tags */ 345 struct list_head busy_list; /* fifo list of busy tags */
346 int busy; /* current depth */ 346 int busy; /* current depth */
347 int max_depth; /* what we will send to device */ 347 int max_depth; /* what we will send to device */
348 int real_max_depth; /* what the array can hold */ 348 int real_max_depth; /* what the array can hold */
349 atomic_t refcnt; /* map can be shared */ 349 atomic_t refcnt; /* map can be shared */
350 }; 350 };
351 351
352 struct request_queue 352 struct request_queue
353 { 353 {
354 /* 354 /*
355 * Together with queue_head for cacheline sharing 355 * Together with queue_head for cacheline sharing
356 */ 356 */
357 struct list_head queue_head; 357 struct list_head queue_head;
358 struct request *last_merge; 358 struct request *last_merge;
359 elevator_t *elevator; 359 elevator_t *elevator;
360 360
361 /* 361 /*
362 * the queue request freelist, one for reads and one for writes 362 * the queue request freelist, one for reads and one for writes
363 */ 363 */
364 struct request_list rq; 364 struct request_list rq;
365 365
366 request_fn_proc *request_fn; 366 request_fn_proc *request_fn;
367 make_request_fn *make_request_fn; 367 make_request_fn *make_request_fn;
368 prep_rq_fn *prep_rq_fn; 368 prep_rq_fn *prep_rq_fn;
369 unplug_fn *unplug_fn; 369 unplug_fn *unplug_fn;
370 merge_bvec_fn *merge_bvec_fn; 370 merge_bvec_fn *merge_bvec_fn;
371 issue_flush_fn *issue_flush_fn; 371 issue_flush_fn *issue_flush_fn;
372 prepare_flush_fn *prepare_flush_fn; 372 prepare_flush_fn *prepare_flush_fn;
373 softirq_done_fn *softirq_done_fn; 373 softirq_done_fn *softirq_done_fn;
374 374
375 /* 375 /*
376 * Dispatch queue sorting 376 * Dispatch queue sorting
377 */ 377 */
378 sector_t end_sector; 378 sector_t end_sector;
379 struct request *boundary_rq; 379 struct request *boundary_rq;
380 380
381 /* 381 /*
382 * Auto-unplugging state 382 * Auto-unplugging state
383 */ 383 */
384 struct timer_list unplug_timer; 384 struct timer_list unplug_timer;
385 int unplug_thresh; /* After this many requests */ 385 int unplug_thresh; /* After this many requests */
386 unsigned long unplug_delay; /* After this many jiffies */ 386 unsigned long unplug_delay; /* After this many jiffies */
387 struct work_struct unplug_work; 387 struct work_struct unplug_work;
388 388
389 struct backing_dev_info backing_dev_info; 389 struct backing_dev_info backing_dev_info;
390 390
391 /* 391 /*
392 * The queue owner gets to use this for whatever they like. 392 * The queue owner gets to use this for whatever they like.
393 * ll_rw_blk doesn't touch it. 393 * ll_rw_blk doesn't touch it.
394 */ 394 */
395 void *queuedata; 395 void *queuedata;
396 396
397 /* 397 /*
398 * queue needs bounce pages for pages above this limit 398 * queue needs bounce pages for pages above this limit
399 */ 399 */
400 unsigned long bounce_pfn; 400 unsigned long bounce_pfn;
401 gfp_t bounce_gfp; 401 gfp_t bounce_gfp;
402 402
403 /* 403 /*
404 * various queue flags, see QUEUE_* below 404 * various queue flags, see QUEUE_* below
405 */ 405 */
406 unsigned long queue_flags; 406 unsigned long queue_flags;
407 407
408 /* 408 /*
409 * protects queue structures from reentrancy. ->__queue_lock should 409 * protects queue structures from reentrancy. ->__queue_lock should
410 * _never_ be used directly, it is queue private. always use 410 * _never_ be used directly, it is queue private. always use
411 * ->queue_lock. 411 * ->queue_lock.
412 */ 412 */
413 spinlock_t __queue_lock; 413 spinlock_t __queue_lock;
414 spinlock_t *queue_lock; 414 spinlock_t *queue_lock;
415 415
416 /* 416 /*
417 * queue kobject 417 * queue kobject
418 */ 418 */
419 struct kobject kobj; 419 struct kobject kobj;
420 420
421 /* 421 /*
422 * queue settings 422 * queue settings
423 */ 423 */
424 unsigned long nr_requests; /* Max # of requests */ 424 unsigned long nr_requests; /* Max # of requests */
425 unsigned int nr_congestion_on; 425 unsigned int nr_congestion_on;
426 unsigned int nr_congestion_off; 426 unsigned int nr_congestion_off;
427 unsigned int nr_batching; 427 unsigned int nr_batching;
428 428
429 unsigned int max_sectors; 429 unsigned int max_sectors;
430 unsigned int max_hw_sectors; 430 unsigned int max_hw_sectors;
431 unsigned short max_phys_segments; 431 unsigned short max_phys_segments;
432 unsigned short max_hw_segments; 432 unsigned short max_hw_segments;
433 unsigned short hardsect_size; 433 unsigned short hardsect_size;
434 unsigned int max_segment_size; 434 unsigned int max_segment_size;
435 435
436 unsigned long seg_boundary_mask; 436 unsigned long seg_boundary_mask;
437 unsigned int dma_alignment; 437 unsigned int dma_alignment;
438 438
439 struct blk_queue_tag *queue_tags; 439 struct blk_queue_tag *queue_tags;
440 440
441 unsigned int nr_sorted; 441 unsigned int nr_sorted;
442 unsigned int in_flight; 442 unsigned int in_flight;
443 443
444 /* 444 /*
445 * sg stuff 445 * sg stuff
446 */ 446 */
447 unsigned int sg_timeout; 447 unsigned int sg_timeout;
448 unsigned int sg_reserved_size; 448 unsigned int sg_reserved_size;
449 int node; 449 int node;
450 #ifdef CONFIG_BLK_DEV_IO_TRACE 450 #ifdef CONFIG_BLK_DEV_IO_TRACE
451 struct blk_trace *blk_trace; 451 struct blk_trace *blk_trace;
452 #endif 452 #endif
453 /* 453 /*
454 * reserved for flush operations 454 * reserved for flush operations
455 */ 455 */
456 unsigned int ordered, next_ordered, ordseq; 456 unsigned int ordered, next_ordered, ordseq;
457 int orderr, ordcolor; 457 int orderr, ordcolor;
458 struct request pre_flush_rq, bar_rq, post_flush_rq; 458 struct request pre_flush_rq, bar_rq, post_flush_rq;
459 struct request *orig_bar_rq; 459 struct request *orig_bar_rq;
460 460
461 struct mutex sysfs_lock; 461 struct mutex sysfs_lock;
462 462
463 #if defined(CONFIG_BLK_DEV_BSG) 463 #if defined(CONFIG_BLK_DEV_BSG)
464 struct bsg_class_device bsg_dev; 464 struct bsg_class_device bsg_dev;
465 #endif 465 #endif
466 }; 466 };
467 467
468 #define QUEUE_FLAG_CLUSTER 0 /* cluster several segments into 1 */ 468 #define QUEUE_FLAG_CLUSTER 0 /* cluster several segments into 1 */
469 #define QUEUE_FLAG_QUEUED 1 /* uses generic tag queueing */ 469 #define QUEUE_FLAG_QUEUED 1 /* uses generic tag queueing */
470 #define QUEUE_FLAG_STOPPED 2 /* queue is stopped */ 470 #define QUEUE_FLAG_STOPPED 2 /* queue is stopped */
471 #define QUEUE_FLAG_READFULL 3 /* read queue has been filled */ 471 #define QUEUE_FLAG_READFULL 3 /* read queue has been filled */
472 #define QUEUE_FLAG_WRITEFULL 4 /* write queue has been filled */ 472 #define QUEUE_FLAG_WRITEFULL 4 /* write queue has been filled */
473 #define QUEUE_FLAG_DEAD 5 /* queue being torn down */ 473 #define QUEUE_FLAG_DEAD 5 /* queue being torn down */
474 #define QUEUE_FLAG_REENTER 6 /* Re-entrancy avoidance */ 474 #define QUEUE_FLAG_REENTER 6 /* Re-entrancy avoidance */
475 #define QUEUE_FLAG_PLUGGED 7 /* queue is plugged */ 475 #define QUEUE_FLAG_PLUGGED 7 /* queue is plugged */
476 #define QUEUE_FLAG_ELVSWITCH 8 /* don't use elevator, just do FIFO */ 476 #define QUEUE_FLAG_ELVSWITCH 8 /* don't use elevator, just do FIFO */
477 #define QUEUE_FLAG_BIDI 9 /* queue supports bidi requests */ 477 #define QUEUE_FLAG_BIDI 9 /* queue supports bidi requests */
478 478
479 enum { 479 enum {
480 /* 480 /*
481 * Hardbarrier is supported with one of the following methods. 481 * Hardbarrier is supported with one of the following methods.
482 * 482 *
483 * NONE : hardbarrier unsupported 483 * NONE : hardbarrier unsupported
484 * DRAIN : ordering by draining is enough 484 * DRAIN : ordering by draining is enough
485 * DRAIN_FLUSH : ordering by draining w/ pre and post flushes 485 * DRAIN_FLUSH : ordering by draining w/ pre and post flushes
486 * DRAIN_FUA : ordering by draining w/ pre flush and FUA write 486 * DRAIN_FUA : ordering by draining w/ pre flush and FUA write
487 * TAG : ordering by tag is enough 487 * TAG : ordering by tag is enough
488 * TAG_FLUSH : ordering by tag w/ pre and post flushes 488 * TAG_FLUSH : ordering by tag w/ pre and post flushes
489 * TAG_FUA : ordering by tag w/ pre flush and FUA write 489 * TAG_FUA : ordering by tag w/ pre flush and FUA write
490 */ 490 */
491 QUEUE_ORDERED_NONE = 0x00, 491 QUEUE_ORDERED_NONE = 0x00,
492 QUEUE_ORDERED_DRAIN = 0x01, 492 QUEUE_ORDERED_DRAIN = 0x01,
493 QUEUE_ORDERED_TAG = 0x02, 493 QUEUE_ORDERED_TAG = 0x02,
494 494
495 QUEUE_ORDERED_PREFLUSH = 0x10, 495 QUEUE_ORDERED_PREFLUSH = 0x10,
496 QUEUE_ORDERED_POSTFLUSH = 0x20, 496 QUEUE_ORDERED_POSTFLUSH = 0x20,
497 QUEUE_ORDERED_FUA = 0x40, 497 QUEUE_ORDERED_FUA = 0x40,
498 498
499 QUEUE_ORDERED_DRAIN_FLUSH = QUEUE_ORDERED_DRAIN | 499 QUEUE_ORDERED_DRAIN_FLUSH = QUEUE_ORDERED_DRAIN |
500 QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_POSTFLUSH, 500 QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_POSTFLUSH,
501 QUEUE_ORDERED_DRAIN_FUA = QUEUE_ORDERED_DRAIN | 501 QUEUE_ORDERED_DRAIN_FUA = QUEUE_ORDERED_DRAIN |
502 QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_FUA, 502 QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_FUA,
503 QUEUE_ORDERED_TAG_FLUSH = QUEUE_ORDERED_TAG | 503 QUEUE_ORDERED_TAG_FLUSH = QUEUE_ORDERED_TAG |
504 QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_POSTFLUSH, 504 QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_POSTFLUSH,
505 QUEUE_ORDERED_TAG_FUA = QUEUE_ORDERED_TAG | 505 QUEUE_ORDERED_TAG_FUA = QUEUE_ORDERED_TAG |
506 QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_FUA, 506 QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_FUA,
507 507
508 /* 508 /*
509 * Ordered operation sequence 509 * Ordered operation sequence
510 */ 510 */
511 QUEUE_ORDSEQ_STARTED = 0x01, /* flushing in progress */ 511 QUEUE_ORDSEQ_STARTED = 0x01, /* flushing in progress */
512 QUEUE_ORDSEQ_DRAIN = 0x02, /* waiting for the queue to be drained */ 512 QUEUE_ORDSEQ_DRAIN = 0x02, /* waiting for the queue to be drained */
513 QUEUE_ORDSEQ_PREFLUSH = 0x04, /* pre-flushing in progress */ 513 QUEUE_ORDSEQ_PREFLUSH = 0x04, /* pre-flushing in progress */
514 QUEUE_ORDSEQ_BAR = 0x08, /* original barrier req in progress */ 514 QUEUE_ORDSEQ_BAR = 0x08, /* original barrier req in progress */
515 QUEUE_ORDSEQ_POSTFLUSH = 0x10, /* post-flushing in progress */ 515 QUEUE_ORDSEQ_POSTFLUSH = 0x10, /* post-flushing in progress */
516 QUEUE_ORDSEQ_DONE = 0x20, 516 QUEUE_ORDSEQ_DONE = 0x20,
517 }; 517 };
518 518
519 #define blk_queue_plugged(q) test_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags) 519 #define blk_queue_plugged(q) test_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags)
520 #define blk_queue_tagged(q) test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags) 520 #define blk_queue_tagged(q) test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags)
521 #define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags) 521 #define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags)
522 #define blk_queue_flushing(q) ((q)->ordseq) 522 #define blk_queue_flushing(q) ((q)->ordseq)
523 523
524 #define blk_fs_request(rq) ((rq)->cmd_type == REQ_TYPE_FS) 524 #define blk_fs_request(rq) ((rq)->cmd_type == REQ_TYPE_FS)
525 #define blk_pc_request(rq) ((rq)->cmd_type == REQ_TYPE_BLOCK_PC) 525 #define blk_pc_request(rq) ((rq)->cmd_type == REQ_TYPE_BLOCK_PC)
526 #define blk_special_request(rq) ((rq)->cmd_type == REQ_TYPE_SPECIAL) 526 #define blk_special_request(rq) ((rq)->cmd_type == REQ_TYPE_SPECIAL)
527 #define blk_sense_request(rq) ((rq)->cmd_type == REQ_TYPE_SENSE) 527 #define blk_sense_request(rq) ((rq)->cmd_type == REQ_TYPE_SENSE)
528 528
529 #define blk_noretry_request(rq) ((rq)->cmd_flags & REQ_FAILFAST) 529 #define blk_noretry_request(rq) ((rq)->cmd_flags & REQ_FAILFAST)
530 #define blk_rq_started(rq) ((rq)->cmd_flags & REQ_STARTED) 530 #define blk_rq_started(rq) ((rq)->cmd_flags & REQ_STARTED)
531 531
532 #define blk_account_rq(rq) (blk_rq_started(rq) && blk_fs_request(rq)) 532 #define blk_account_rq(rq) (blk_rq_started(rq) && blk_fs_request(rq))
533 533
534 #define blk_pm_suspend_request(rq) ((rq)->cmd_type == REQ_TYPE_PM_SUSPEND) 534 #define blk_pm_suspend_request(rq) ((rq)->cmd_type == REQ_TYPE_PM_SUSPEND)
535 #define blk_pm_resume_request(rq) ((rq)->cmd_type == REQ_TYPE_PM_RESUME) 535 #define blk_pm_resume_request(rq) ((rq)->cmd_type == REQ_TYPE_PM_RESUME)
536 #define blk_pm_request(rq) \ 536 #define blk_pm_request(rq) \
537 (blk_pm_suspend_request(rq) || blk_pm_resume_request(rq)) 537 (blk_pm_suspend_request(rq) || blk_pm_resume_request(rq))
538 538
539 #define blk_sorted_rq(rq) ((rq)->cmd_flags & REQ_SORTED) 539 #define blk_sorted_rq(rq) ((rq)->cmd_flags & REQ_SORTED)
540 #define blk_barrier_rq(rq) ((rq)->cmd_flags & REQ_HARDBARRIER) 540 #define blk_barrier_rq(rq) ((rq)->cmd_flags & REQ_HARDBARRIER)
541 #define blk_fua_rq(rq) ((rq)->cmd_flags & REQ_FUA) 541 #define blk_fua_rq(rq) ((rq)->cmd_flags & REQ_FUA)
542 #define blk_bidi_rq(rq) ((rq)->next_rq != NULL) 542 #define blk_bidi_rq(rq) ((rq)->next_rq != NULL)
543 543
544 #define list_entry_rq(ptr) list_entry((ptr), struct request, queuelist) 544 #define list_entry_rq(ptr) list_entry((ptr), struct request, queuelist)
545 545
546 #define rq_data_dir(rq) ((rq)->cmd_flags & 1) 546 #define rq_data_dir(rq) ((rq)->cmd_flags & 1)
547 547
548 /* 548 /*
549 * We regard a request as sync, if it's a READ or a SYNC write. 549 * We regard a request as sync, if it's a READ or a SYNC write.
550 */ 550 */
551 #define rq_is_sync(rq) (rq_data_dir((rq)) == READ || (rq)->cmd_flags & REQ_RW_SYNC) 551 #define rq_is_sync(rq) (rq_data_dir((rq)) == READ || (rq)->cmd_flags & REQ_RW_SYNC)
552 #define rq_is_meta(rq) ((rq)->cmd_flags & REQ_RW_META) 552 #define rq_is_meta(rq) ((rq)->cmd_flags & REQ_RW_META)
553 553
554 static inline int blk_queue_full(struct request_queue *q, int rw) 554 static inline int blk_queue_full(struct request_queue *q, int rw)
555 { 555 {
556 if (rw == READ) 556 if (rw == READ)
557 return test_bit(QUEUE_FLAG_READFULL, &q->queue_flags); 557 return test_bit(QUEUE_FLAG_READFULL, &q->queue_flags);
558 return test_bit(QUEUE_FLAG_WRITEFULL, &q->queue_flags); 558 return test_bit(QUEUE_FLAG_WRITEFULL, &q->queue_flags);
559 } 559 }
560 560
561 static inline void blk_set_queue_full(struct request_queue *q, int rw) 561 static inline void blk_set_queue_full(struct request_queue *q, int rw)
562 { 562 {
563 if (rw == READ) 563 if (rw == READ)
564 set_bit(QUEUE_FLAG_READFULL, &q->queue_flags); 564 set_bit(QUEUE_FLAG_READFULL, &q->queue_flags);
565 else 565 else
566 set_bit(QUEUE_FLAG_WRITEFULL, &q->queue_flags); 566 set_bit(QUEUE_FLAG_WRITEFULL, &q->queue_flags);
567 } 567 }
568 568
569 static inline void blk_clear_queue_full(struct request_queue *q, int rw) 569 static inline void blk_clear_queue_full(struct request_queue *q, int rw)
570 { 570 {
571 if (rw == READ) 571 if (rw == READ)
572 clear_bit(QUEUE_FLAG_READFULL, &q->queue_flags); 572 clear_bit(QUEUE_FLAG_READFULL, &q->queue_flags);
573 else 573 else
574 clear_bit(QUEUE_FLAG_WRITEFULL, &q->queue_flags); 574 clear_bit(QUEUE_FLAG_WRITEFULL, &q->queue_flags);
575 } 575 }
576 576
577 577
578 /* 578 /*
579 * mergeable request must not have _NOMERGE or _BARRIER bit set, nor may 579 * mergeable request must not have _NOMERGE or _BARRIER bit set, nor may
580 * it already be started by driver. 580 * it already be started by driver.
581 */ 581 */
582 #define RQ_NOMERGE_FLAGS \ 582 #define RQ_NOMERGE_FLAGS \
583 (REQ_NOMERGE | REQ_STARTED | REQ_HARDBARRIER | REQ_SOFTBARRIER) 583 (REQ_NOMERGE | REQ_STARTED | REQ_HARDBARRIER | REQ_SOFTBARRIER)
584 #define rq_mergeable(rq) \ 584 #define rq_mergeable(rq) \
585 (!((rq)->cmd_flags & RQ_NOMERGE_FLAGS) && blk_fs_request((rq))) 585 (!((rq)->cmd_flags & RQ_NOMERGE_FLAGS) && blk_fs_request((rq)))
586 586
587 /* 587 /*
588 * q->prep_rq_fn return values 588 * q->prep_rq_fn return values
589 */ 589 */
590 #define BLKPREP_OK 0 /* serve it */ 590 #define BLKPREP_OK 0 /* serve it */
591 #define BLKPREP_KILL 1 /* fatal error, kill */ 591 #define BLKPREP_KILL 1 /* fatal error, kill */
592 #define BLKPREP_DEFER 2 /* leave on queue */ 592 #define BLKPREP_DEFER 2 /* leave on queue */
593 593
594 extern unsigned long blk_max_low_pfn, blk_max_pfn; 594 extern unsigned long blk_max_low_pfn, blk_max_pfn;
595 595
596 /* 596 /*
597 * standard bounce addresses: 597 * standard bounce addresses:
598 * 598 *
599 * BLK_BOUNCE_HIGH : bounce all highmem pages 599 * BLK_BOUNCE_HIGH : bounce all highmem pages
600 * BLK_BOUNCE_ANY : don't bounce anything 600 * BLK_BOUNCE_ANY : don't bounce anything
601 * BLK_BOUNCE_ISA : bounce pages above ISA DMA boundary 601 * BLK_BOUNCE_ISA : bounce pages above ISA DMA boundary
602 */ 602 */
603 #define BLK_BOUNCE_HIGH ((u64)blk_max_low_pfn << PAGE_SHIFT) 603 #define BLK_BOUNCE_HIGH ((u64)blk_max_low_pfn << PAGE_SHIFT)
604 #define BLK_BOUNCE_ANY ((u64)blk_max_pfn << PAGE_SHIFT) 604 #define BLK_BOUNCE_ANY ((u64)blk_max_pfn << PAGE_SHIFT)
605 #define BLK_BOUNCE_ISA (ISA_DMA_THRESHOLD) 605 #define BLK_BOUNCE_ISA (ISA_DMA_THRESHOLD)
606 606
607 /* 607 /*
608 * default timeout for SG_IO if none specified 608 * default timeout for SG_IO if none specified
609 */ 609 */
610 #define BLK_DEFAULT_SG_TIMEOUT (60 * HZ) 610 #define BLK_DEFAULT_SG_TIMEOUT (60 * HZ)
611 611
612 #ifdef CONFIG_BOUNCE 612 #ifdef CONFIG_BOUNCE
613 extern int init_emergency_isa_pool(void); 613 extern int init_emergency_isa_pool(void);
614 extern void blk_queue_bounce(struct request_queue *q, struct bio **bio); 614 extern void blk_queue_bounce(struct request_queue *q, struct bio **bio);
615 #else 615 #else
616 static inline int init_emergency_isa_pool(void) 616 static inline int init_emergency_isa_pool(void)
617 { 617 {
618 return 0; 618 return 0;
619 } 619 }
620 static inline void blk_queue_bounce(struct request_queue *q, struct bio **bio) 620 static inline void blk_queue_bounce(struct request_queue *q, struct bio **bio)
621 { 621 {
622 } 622 }
623 #endif /* CONFIG_MMU */ 623 #endif /* CONFIG_MMU */
624 624
625 struct req_iterator { 625 struct req_iterator {
626 int i; 626 int i;
627 struct bio *bio; 627 struct bio *bio;
628 }; 628 };
629 629
630 /* This should not be used directly - use rq_for_each_segment */ 630 /* This should not be used directly - use rq_for_each_segment */
631 #define __rq_for_each_bio(_bio, rq) \ 631 #define __rq_for_each_bio(_bio, rq) \
632 if ((rq->bio)) \ 632 if ((rq->bio)) \
633 for (_bio = (rq)->bio; _bio; _bio = _bio->bi_next) 633 for (_bio = (rq)->bio; _bio; _bio = _bio->bi_next)
634 634
635 #define rq_for_each_segment(bvl, _rq, _iter) \ 635 #define rq_for_each_segment(bvl, _rq, _iter) \
636 __rq_for_each_bio(_iter.bio, _rq) \ 636 __rq_for_each_bio(_iter.bio, _rq) \
637 bio_for_each_segment(bvl, _iter.bio, _iter.i) 637 bio_for_each_segment(bvl, _iter.bio, _iter.i)
638 638
639 #define rq_iter_last(rq, _iter) \ 639 #define rq_iter_last(rq, _iter) \
640 (_iter.bio->bi_next == NULL && _iter.i == _iter.bio->bi_vcnt-1) 640 (_iter.bio->bi_next == NULL && _iter.i == _iter.bio->bi_vcnt-1)
641 641
642 extern int blk_register_queue(struct gendisk *disk); 642 extern int blk_register_queue(struct gendisk *disk);
643 extern void blk_unregister_queue(struct gendisk *disk); 643 extern void blk_unregister_queue(struct gendisk *disk);
644 extern void register_disk(struct gendisk *dev); 644 extern void register_disk(struct gendisk *dev);
645 extern void generic_make_request(struct bio *bio); 645 extern void generic_make_request(struct bio *bio);
646 extern void blk_put_request(struct request *); 646 extern void blk_put_request(struct request *);
647 extern void __blk_put_request(struct request_queue *, struct request *); 647 extern void __blk_put_request(struct request_queue *, struct request *);
648 extern void blk_end_sync_rq(struct request *rq, int error); 648 extern void blk_end_sync_rq(struct request *rq, int error);
649 extern struct request *blk_get_request(struct request_queue *, int, gfp_t); 649 extern struct request *blk_get_request(struct request_queue *, int, gfp_t);
650 extern void blk_insert_request(struct request_queue *, struct request *, int, void *); 650 extern void blk_insert_request(struct request_queue *, struct request *, int, void *);
651 extern void blk_requeue_request(struct request_queue *, struct request *); 651 extern void blk_requeue_request(struct request_queue *, struct request *);
652 extern void blk_plug_device(struct request_queue *); 652 extern void blk_plug_device(struct request_queue *);
653 extern int blk_remove_plug(struct request_queue *); 653 extern int blk_remove_plug(struct request_queue *);
654 extern void blk_recount_segments(struct request_queue *, struct bio *); 654 extern void blk_recount_segments(struct request_queue *, struct bio *);
655 extern int scsi_cmd_ioctl(struct file *, struct request_queue *, 655 extern int scsi_cmd_ioctl(struct file *, struct request_queue *,
656 struct gendisk *, unsigned int, void __user *); 656 struct gendisk *, unsigned int, void __user *);
657 extern int sg_scsi_ioctl(struct file *, struct request_queue *, 657 extern int sg_scsi_ioctl(struct file *, struct request_queue *,
658 struct gendisk *, struct scsi_ioctl_command __user *); 658 struct gendisk *, struct scsi_ioctl_command __user *);
659 659
660 /* 660 /*
661 * Temporary export, until SCSI gets fixed up. 661 * Temporary export, until SCSI gets fixed up.
662 */ 662 */
663 extern int blk_rq_append_bio(struct request_queue *q, struct request *rq, 663 extern int blk_rq_append_bio(struct request_queue *q, struct request *rq,
664 struct bio *bio); 664 struct bio *bio);
665 665
666 /* 666 /*
667 * A queue has just exitted congestion. Note this in the global counter of 667 * A queue has just exitted congestion. Note this in the global counter of
668 * congested queues, and wake up anyone who was waiting for requests to be 668 * congested queues, and wake up anyone who was waiting for requests to be
669 * put back. 669 * put back.
670 */ 670 */
671 static inline void blk_clear_queue_congested(struct request_queue *q, int rw) 671 static inline void blk_clear_queue_congested(struct request_queue *q, int rw)
672 { 672 {
673 clear_bdi_congested(&q->backing_dev_info, rw); 673 clear_bdi_congested(&q->backing_dev_info, rw);
674 } 674 }
675 675
676 /* 676 /*
677 * A queue has just entered congestion. Flag that in the queue's VM-visible 677 * A queue has just entered congestion. Flag that in the queue's VM-visible
678 * state flags and increment the global gounter of congested queues. 678 * state flags and increment the global gounter of congested queues.
679 */ 679 */
680 static inline void blk_set_queue_congested(struct request_queue *q, int rw) 680 static inline void blk_set_queue_congested(struct request_queue *q, int rw)
681 { 681 {
682 set_bdi_congested(&q->backing_dev_info, rw); 682 set_bdi_congested(&q->backing_dev_info, rw);
683 } 683 }
684 684
685 extern void blk_start_queue(struct request_queue *q); 685 extern void blk_start_queue(struct request_queue *q);
686 extern void blk_stop_queue(struct request_queue *q); 686 extern void blk_stop_queue(struct request_queue *q);
687 extern void blk_sync_queue(struct request_queue *q); 687 extern void blk_sync_queue(struct request_queue *q);
688 extern void __blk_stop_queue(struct request_queue *q); 688 extern void __blk_stop_queue(struct request_queue *q);
689 extern void blk_run_queue(struct request_queue *); 689 extern void blk_run_queue(struct request_queue *);
690 extern void blk_start_queueing(struct request_queue *); 690 extern void blk_start_queueing(struct request_queue *);
691 extern int blk_rq_map_user(struct request_queue *, struct request *, void __user *, unsigned long); 691 extern int blk_rq_map_user(struct request_queue *, struct request *, void __user *, unsigned long);
692 extern int blk_rq_unmap_user(struct bio *); 692 extern int blk_rq_unmap_user(struct bio *);
693 extern int blk_rq_map_kern(struct request_queue *, struct request *, void *, unsigned int, gfp_t); 693 extern int blk_rq_map_kern(struct request_queue *, struct request *, void *, unsigned int, gfp_t);
694 extern int blk_rq_map_user_iov(struct request_queue *, struct request *, 694 extern int blk_rq_map_user_iov(struct request_queue *, struct request *,
695 struct sg_iovec *, int, unsigned int); 695 struct sg_iovec *, int, unsigned int);
696 extern int blk_execute_rq(struct request_queue *, struct gendisk *, 696 extern int blk_execute_rq(struct request_queue *, struct gendisk *,
697 struct request *, int); 697 struct request *, int);
698 extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *, 698 extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *,
699 struct request *, int, rq_end_io_fn *); 699 struct request *, int, rq_end_io_fn *);
700 extern int blk_verify_command(unsigned char *, int); 700 extern int blk_verify_command(unsigned char *, int);
701 701
702 static inline struct request_queue *bdev_get_queue(struct block_device *bdev) 702 static inline struct request_queue *bdev_get_queue(struct block_device *bdev)
703 { 703 {
704 return bdev->bd_disk->queue; 704 return bdev->bd_disk->queue;
705 } 705 }
706 706
707 static inline void blk_run_backing_dev(struct backing_dev_info *bdi, 707 static inline void blk_run_backing_dev(struct backing_dev_info *bdi,
708 struct page *page) 708 struct page *page)
709 { 709 {
710 if (bdi && bdi->unplug_io_fn) 710 if (bdi && bdi->unplug_io_fn)
711 bdi->unplug_io_fn(bdi, page); 711 bdi->unplug_io_fn(bdi, page);
712 } 712 }
713 713
714 static inline void blk_run_address_space(struct address_space *mapping) 714 static inline void blk_run_address_space(struct address_space *mapping)
715 { 715 {
716 if (mapping) 716 if (mapping)
717 blk_run_backing_dev(mapping->backing_dev_info, NULL); 717 blk_run_backing_dev(mapping->backing_dev_info, NULL);
718 } 718 }
719 719
720 /* 720 /*
721 * end_request() and friends. Must be called with the request queue spinlock 721 * end_request() and friends. Must be called with the request queue spinlock
722 * acquired. All functions called within end_request() _must_be_ atomic. 722 * acquired. All functions called within end_request() _must_be_ atomic.
723 * 723 *
724 * Several drivers define their own end_request and call 724 * Several drivers define their own end_request and call
725 * end_that_request_first() and end_that_request_last() 725 * end_that_request_first() and end_that_request_last()
726 * for parts of the original function. This prevents 726 * for parts of the original function. This prevents
727 * code duplication in drivers. 727 * code duplication in drivers.
728 */ 728 */
729 extern int end_that_request_first(struct request *, int, int); 729 extern int end_that_request_first(struct request *, int, int);
730 extern int end_that_request_chunk(struct request *, int, int); 730 extern int end_that_request_chunk(struct request *, int, int);
731 extern void end_that_request_last(struct request *, int); 731 extern void end_that_request_last(struct request *, int);
732 extern void end_request(struct request *req, int uptodate); 732 extern void end_request(struct request *, int);
733 extern void end_queued_request(struct request *, int);
734 extern void end_dequeued_request(struct request *, int);
733 extern void blk_complete_request(struct request *); 735 extern void blk_complete_request(struct request *);
734 736
735 /* 737 /*
736 * end_that_request_first/chunk() takes an uptodate argument. we account 738 * end_that_request_first/chunk() takes an uptodate argument. we account
737 * any value <= as an io error. 0 means -EIO for compatability reasons, 739 * any value <= as an io error. 0 means -EIO for compatability reasons,
738 * any other < 0 value is the direct error type. An uptodate value of 740 * any other < 0 value is the direct error type. An uptodate value of
739 * 1 indicates successful io completion 741 * 1 indicates successful io completion
740 */ 742 */
741 #define end_io_error(uptodate) (unlikely((uptodate) <= 0)) 743 #define end_io_error(uptodate) (unlikely((uptodate) <= 0))
742 744
743 static inline void blkdev_dequeue_request(struct request *req) 745 static inline void blkdev_dequeue_request(struct request *req)
744 { 746 {
745 elv_dequeue_request(req->q, req); 747 elv_dequeue_request(req->q, req);
746 } 748 }
747 749
748 /* 750 /*
749 * Access functions for manipulating queue properties 751 * Access functions for manipulating queue properties
750 */ 752 */
751 extern struct request_queue *blk_init_queue_node(request_fn_proc *rfn, 753 extern struct request_queue *blk_init_queue_node(request_fn_proc *rfn,
752 spinlock_t *lock, int node_id); 754 spinlock_t *lock, int node_id);
753 extern struct request_queue *blk_init_queue(request_fn_proc *, spinlock_t *); 755 extern struct request_queue *blk_init_queue(request_fn_proc *, spinlock_t *);
754 extern void blk_cleanup_queue(struct request_queue *); 756 extern void blk_cleanup_queue(struct request_queue *);
755 extern void blk_queue_make_request(struct request_queue *, make_request_fn *); 757 extern void blk_queue_make_request(struct request_queue *, make_request_fn *);
756 extern void blk_queue_bounce_limit(struct request_queue *, u64); 758 extern void blk_queue_bounce_limit(struct request_queue *, u64);
757 extern void blk_queue_max_sectors(struct request_queue *, unsigned int); 759 extern void blk_queue_max_sectors(struct request_queue *, unsigned int);
758 extern void blk_queue_max_phys_segments(struct request_queue *, unsigned short); 760 extern void blk_queue_max_phys_segments(struct request_queue *, unsigned short);
759 extern void blk_queue_max_hw_segments(struct request_queue *, unsigned short); 761 extern void blk_queue_max_hw_segments(struct request_queue *, unsigned short);
760 extern void blk_queue_max_segment_size(struct request_queue *, unsigned int); 762 extern void blk_queue_max_segment_size(struct request_queue *, unsigned int);
761 extern void blk_queue_hardsect_size(struct request_queue *, unsigned short); 763 extern void blk_queue_hardsect_size(struct request_queue *, unsigned short);
762 extern void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b); 764 extern void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b);
763 extern void blk_queue_segment_boundary(struct request_queue *, unsigned long); 765 extern void blk_queue_segment_boundary(struct request_queue *, unsigned long);
764 extern void blk_queue_prep_rq(struct request_queue *, prep_rq_fn *pfn); 766 extern void blk_queue_prep_rq(struct request_queue *, prep_rq_fn *pfn);
765 extern void blk_queue_merge_bvec(struct request_queue *, merge_bvec_fn *); 767 extern void blk_queue_merge_bvec(struct request_queue *, merge_bvec_fn *);
766 extern void blk_queue_dma_alignment(struct request_queue *, int); 768 extern void blk_queue_dma_alignment(struct request_queue *, int);
767 extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *); 769 extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *);
768 extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev); 770 extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev);
769 extern int blk_queue_ordered(struct request_queue *, unsigned, prepare_flush_fn *); 771 extern int blk_queue_ordered(struct request_queue *, unsigned, prepare_flush_fn *);
770 extern void blk_queue_issue_flush_fn(struct request_queue *, issue_flush_fn *); 772 extern void blk_queue_issue_flush_fn(struct request_queue *, issue_flush_fn *);
771 extern int blk_do_ordered(struct request_queue *, struct request **); 773 extern int blk_do_ordered(struct request_queue *, struct request **);
772 extern unsigned blk_ordered_cur_seq(struct request_queue *); 774 extern unsigned blk_ordered_cur_seq(struct request_queue *);
773 extern unsigned blk_ordered_req_seq(struct request *); 775 extern unsigned blk_ordered_req_seq(struct request *);
774 extern void blk_ordered_complete_seq(struct request_queue *, unsigned, int); 776 extern void blk_ordered_complete_seq(struct request_queue *, unsigned, int);
775 777
776 extern int blk_rq_map_sg(struct request_queue *, struct request *, struct scatterlist *); 778 extern int blk_rq_map_sg(struct request_queue *, struct request *, struct scatterlist *);
777 extern void blk_dump_rq_flags(struct request *, char *); 779 extern void blk_dump_rq_flags(struct request *, char *);
778 extern void generic_unplug_device(struct request_queue *); 780 extern void generic_unplug_device(struct request_queue *);
779 extern void __generic_unplug_device(struct request_queue *); 781 extern void __generic_unplug_device(struct request_queue *);
780 extern long nr_blockdev_pages(void); 782 extern long nr_blockdev_pages(void);
781 783
782 int blk_get_queue(struct request_queue *); 784 int blk_get_queue(struct request_queue *);
783 struct request_queue *blk_alloc_queue(gfp_t); 785 struct request_queue *blk_alloc_queue(gfp_t);
784 struct request_queue *blk_alloc_queue_node(gfp_t, int); 786 struct request_queue *blk_alloc_queue_node(gfp_t, int);
785 extern void blk_put_queue(struct request_queue *); 787 extern void blk_put_queue(struct request_queue *);
786 788
787 /* 789 /*
788 * tag stuff 790 * tag stuff
789 */ 791 */
790 #define blk_queue_tag_depth(q) ((q)->queue_tags->busy) 792 #define blk_queue_tag_depth(q) ((q)->queue_tags->busy)
791 #define blk_queue_tag_queue(q) ((q)->queue_tags->busy < (q)->queue_tags->max_depth) 793 #define blk_queue_tag_queue(q) ((q)->queue_tags->busy < (q)->queue_tags->max_depth)
792 #define blk_rq_tagged(rq) ((rq)->cmd_flags & REQ_QUEUED) 794 #define blk_rq_tagged(rq) ((rq)->cmd_flags & REQ_QUEUED)
793 extern int blk_queue_start_tag(struct request_queue *, struct request *); 795 extern int blk_queue_start_tag(struct request_queue *, struct request *);
794 extern struct request *blk_queue_find_tag(struct request_queue *, int); 796 extern struct request *blk_queue_find_tag(struct request_queue *, int);
795 extern void blk_queue_end_tag(struct request_queue *, struct request *); 797 extern void blk_queue_end_tag(struct request_queue *, struct request *);
796 extern int blk_queue_init_tags(struct request_queue *, int, struct blk_queue_tag *); 798 extern int blk_queue_init_tags(struct request_queue *, int, struct blk_queue_tag *);
797 extern void blk_queue_free_tags(struct request_queue *); 799 extern void blk_queue_free_tags(struct request_queue *);
798 extern int blk_queue_resize_tags(struct request_queue *, int); 800 extern int blk_queue_resize_tags(struct request_queue *, int);
799 extern void blk_queue_invalidate_tags(struct request_queue *); 801 extern void blk_queue_invalidate_tags(struct request_queue *);
800 extern struct blk_queue_tag *blk_init_tags(int); 802 extern struct blk_queue_tag *blk_init_tags(int);
801 extern void blk_free_tags(struct blk_queue_tag *); 803 extern void blk_free_tags(struct blk_queue_tag *);
802 804
803 static inline struct request *blk_map_queue_find_tag(struct blk_queue_tag *bqt, 805 static inline struct request *blk_map_queue_find_tag(struct blk_queue_tag *bqt,
804 int tag) 806 int tag)
805 { 807 {
806 if (unlikely(bqt == NULL || tag >= bqt->real_max_depth)) 808 if (unlikely(bqt == NULL || tag >= bqt->real_max_depth))
807 return NULL; 809 return NULL;
808 return bqt->tag_index[tag]; 810 return bqt->tag_index[tag];
809 } 811 }
810 812
811 extern int blkdev_issue_flush(struct block_device *, sector_t *); 813 extern int blkdev_issue_flush(struct block_device *, sector_t *);
812 814
813 #define MAX_PHYS_SEGMENTS 128 815 #define MAX_PHYS_SEGMENTS 128
814 #define MAX_HW_SEGMENTS 128 816 #define MAX_HW_SEGMENTS 128
815 #define SAFE_MAX_SECTORS 255 817 #define SAFE_MAX_SECTORS 255
816 #define BLK_DEF_MAX_SECTORS 1024 818 #define BLK_DEF_MAX_SECTORS 1024
817 819
818 #define MAX_SEGMENT_SIZE 65536 820 #define MAX_SEGMENT_SIZE 65536
819 821
820 #define blkdev_entry_to_request(entry) list_entry((entry), struct request, queuelist) 822 #define blkdev_entry_to_request(entry) list_entry((entry), struct request, queuelist)
821 823
822 static inline int queue_hardsect_size(struct request_queue *q) 824 static inline int queue_hardsect_size(struct request_queue *q)
823 { 825 {
824 int retval = 512; 826 int retval = 512;
825 827
826 if (q && q->hardsect_size) 828 if (q && q->hardsect_size)
827 retval = q->hardsect_size; 829 retval = q->hardsect_size;
828 830
829 return retval; 831 return retval;
830 } 832 }
831 833
832 static inline int bdev_hardsect_size(struct block_device *bdev) 834 static inline int bdev_hardsect_size(struct block_device *bdev)
833 { 835 {
834 return queue_hardsect_size(bdev_get_queue(bdev)); 836 return queue_hardsect_size(bdev_get_queue(bdev));
835 } 837 }
836 838
837 static inline int queue_dma_alignment(struct request_queue *q) 839 static inline int queue_dma_alignment(struct request_queue *q)
838 { 840 {
839 int retval = 511; 841 int retval = 511;
840 842
841 if (q && q->dma_alignment) 843 if (q && q->dma_alignment)
842 retval = q->dma_alignment; 844 retval = q->dma_alignment;
843 845
844 return retval; 846 return retval;
845 } 847 }
846 848
847 /* assumes size > 256 */ 849 /* assumes size > 256 */
848 static inline unsigned int blksize_bits(unsigned int size) 850 static inline unsigned int blksize_bits(unsigned int size)
849 { 851 {
850 unsigned int bits = 8; 852 unsigned int bits = 8;
851 do { 853 do {
852 bits++; 854 bits++;
853 size >>= 1; 855 size >>= 1;
854 } while (size > 256); 856 } while (size > 256);
855 return bits; 857 return bits;
856 } 858 }
857 859
858 static inline unsigned int block_size(struct block_device *bdev) 860 static inline unsigned int block_size(struct block_device *bdev)
859 { 861 {
860 return bdev->bd_block_size; 862 return bdev->bd_block_size;
861 } 863 }
862 864
863 typedef struct {struct page *v;} Sector; 865 typedef struct {struct page *v;} Sector;
864 866
865 unsigned char *read_dev_sector(struct block_device *, sector_t, Sector *); 867 unsigned char *read_dev_sector(struct block_device *, sector_t, Sector *);
866 868
867 static inline void put_dev_sector(Sector p) 869 static inline void put_dev_sector(Sector p)
868 { 870 {
869 page_cache_release(p.v); 871 page_cache_release(p.v);
870 } 872 }
871 873
872 struct work_struct; 874 struct work_struct;
873 int kblockd_schedule_work(struct work_struct *work); 875 int kblockd_schedule_work(struct work_struct *work);
874 void kblockd_flush_work(struct work_struct *work); 876 void kblockd_flush_work(struct work_struct *work);
875 877
876 #define MODULE_ALIAS_BLOCKDEV(major,minor) \ 878 #define MODULE_ALIAS_BLOCKDEV(major,minor) \
877 MODULE_ALIAS("block-major-" __stringify(major) "-" __stringify(minor)) 879 MODULE_ALIAS("block-major-" __stringify(major) "-" __stringify(minor))
878 #define MODULE_ALIAS_BLOCKDEV_MAJOR(major) \ 880 #define MODULE_ALIAS_BLOCKDEV_MAJOR(major) \
879 MODULE_ALIAS("block-major-" __stringify(major) "-*") 881 MODULE_ALIAS("block-major-" __stringify(major) "-*")
880 882
881 883
882 #else /* CONFIG_BLOCK */ 884 #else /* CONFIG_BLOCK */
883 /* 885 /*
884 * stubs for when the block layer is configured out 886 * stubs for when the block layer is configured out
885 */ 887 */
886 #define buffer_heads_over_limit 0 888 #define buffer_heads_over_limit 0
887 889
888 static inline long nr_blockdev_pages(void) 890 static inline long nr_blockdev_pages(void)
889 { 891 {
890 return 0; 892 return 0;
891 } 893 }
892 894
893 static inline void exit_io_context(void) 895 static inline void exit_io_context(void)
894 { 896 {
895 } 897 }
896 898
897 #endif /* CONFIG_BLOCK */ 899 #endif /* CONFIG_BLOCK */
898 900
899 #endif 901 #endif
900 902